From a5a479fdc2cbc769215136684926ac6639f4a2a1 Mon Sep 17 00:00:00 2001
From: Ivano Donadi <ivadonadi98@gmail.com>
Date: Wed, 28 Apr 2021 16:50:05 +0200
Subject: [PATCH] Fit trait modification and cross validation proposal (#122)

* change fit signature
cross valdation POC

* fmt

* fix merge issues

* concat to from_shape_vec

* with labels tests

* Move linfa-pls to new Lapack bound (#3)

* Move linfa-pls to new Lapack bound

* More cleanups

* Playing around with `cross_validation`

* Make generic over dimension

* Run rustfmt

* Add simple test for multi target cv

* Run rustfmt

* Rename cross validation multi target to `cross_validate_multi`

* Run rustfmt

* docs

* update table of contents

* fix pls segmentation fault

* update contribution guide

* snippet

Co-authored-by: Lorenz Schmidt <bytesnake@mailbox.org>
---
 CONTRIBUTE.md                                 |  29 +-
 Cargo.toml                                    |   3 +-
 README.md                                     |   1 +
 algorithms/linfa-bayes/src/gaussian_nb.rs     |  10 +-
 algorithms/linfa-clustering/Cargo.toml        |   2 +-
 .../src/appx_dbscan/hyperparameters.rs        |  10 +-
 .../src/gaussian_mixture/algorithm.rs         |  14 +-
 .../src/gaussian_mixture/errors.rs            |  61 ++--
 .../linfa-clustering/src/k_means/algorithm.rs |   8 +-
 .../linfa-clustering/src/k_means/errors.rs    |  22 +-
 .../examples/elasticnet_cv.rs                 |  26 ++
 algorithms/linfa-elasticnet/src/algorithm.rs  |   6 +-
 algorithms/linfa-ica/Cargo.toml               |   3 +-
 algorithms/linfa-ica/src/error.rs             |  36 +-
 algorithms/linfa-ica/src/fast_ica.rs          |  46 ++-
 algorithms/linfa-linear/src/error.rs          |   6 +
 algorithms/linfa-linear/src/glm.rs            |   8 +-
 .../linfa-linear/src/glm/distribution.rs      |  91 +++---
 algorithms/linfa-linear/src/glm/link.rs       |  24 +-
 algorithms/linfa-linear/src/ols.rs            |  29 +-
 algorithms/linfa-logistic/Cargo.toml          |   1 +
 .../linfa-logistic/examples/logistic_cv.rs    |  34 ++
 algorithms/linfa-logistic/src/error.rs        |  22 ++
 algorithms/linfa-logistic/src/lib.rs          | 180 +++++-----
 algorithms/linfa-pls/Cargo.toml               |   4 +-
 algorithms/linfa-pls/src/errors.rs            |  39 +--
 algorithms/linfa-pls/src/lib.rs               |   6 +-
 algorithms/linfa-pls/src/pls_generic.rs       |  47 ++-
 algorithms/linfa-pls/src/pls_svd.rs           |  17 +-
 algorithms/linfa-pls/src/utils.rs             |  34 +-
 .../examples/count_vectorization.rs           |  15 +-
 .../examples/tfidf_vectorization.rs           |  15 +-
 .../src/count_vectorization.rs                |   2 +
 algorithms/linfa-preprocessing/src/error.rs   |   2 +
 .../linfa-preprocessing/src/linear_scaling.rs |   7 +-
 .../linfa-preprocessing/src/whitening.rs      |   7 +-
 algorithms/linfa-reduction/Cargo.toml         |   1 +
 algorithms/linfa-reduction/examples/pca.rs    |   2 +-
 algorithms/linfa-reduction/src/error.rs       |  12 +
 algorithms/linfa-reduction/src/lib.rs         |   1 +
 algorithms/linfa-reduction/src/pca.rs         |  32 +-
 algorithms/linfa-svm/src/classification.rs    |  30 +-
 algorithms/linfa-svm/src/regression.rs        |  16 +-
 .../src/decision_trees/algorithm.rs           |  16 +-
 .../linfa-trees/src/decision_trees/tikz.rs    |   3 +-
 algorithms/linfa-tsne/examples/tsne.rs        |   2 +-
 build.rs                                      |   9 +
 .../content/snippets/cross-validation.md      |  37 +--
 docs/website/content/snippets/k-folding.md    |  23 ++
 src/dataset/impl_dataset.rs                   | 309 +++++++++++++++---
 src/dataset/impl_targets.rs                   |  30 +-
 src/dataset/mod.rs                            | 295 ++++++++++++++++-
 src/error.rs                                  |   2 +
 src/lib.rs                                    |   3 +
 src/metrics_classification.rs                 | 199 ++++++-----
 src/traits.rs                                 |   7 +-
 56 files changed, 1234 insertions(+), 662 deletions(-)
 create mode 100644 algorithms/linfa-elasticnet/examples/elasticnet_cv.rs
 create mode 100644 algorithms/linfa-logistic/examples/logistic_cv.rs
 create mode 100644 algorithms/linfa-logistic/src/error.rs
 create mode 100644 algorithms/linfa-reduction/src/error.rs
 create mode 100644 build.rs
 create mode 100644 docs/website/content/snippets/k-folding.md

diff --git a/CONTRIBUTE.md b/CONTRIBUTE.md
index 4cda44a1b..06d5621fa 100644
--- a/CONTRIBUTE.md
+++ b/CONTRIBUTE.md
@@ -6,38 +6,19 @@ This document should be used as a reference when contributing to Linfa. It descr
 
 An important part of the Linfa ecosystem is how to organize data for the training and estimation process. A [Dataset](src/dataset/mod.rs) serves this purpose. It is a small wrapper of data and targets types and should be used as argument for the [Fit](src/traits.rs) trait. Its parametrization is generic, with [Records](src/dataset/mod.rs) representing input data (atm only implemented for `ndarray::ArrayBase`) and [Targets](src/dataset/mod.rs) for targets.
 
-You can find traits for different classes of algorithms [here](src/traits.rs). For example, to implement a fittable algorithm, which takes an `Array2` as input data and boolean array as targets:
+You can find traits for different classes of algorithms [here](src/traits.rs). For example, to implement a fittable algorithm, which takes an `Array2` as input data and boolean array as targets and could fail with an `Error` struct:
 ```rust
-impl<'a, F: Float> Fit<'a, Array2<F>, Array1<bool>> for SvmParams<F, Pr> {
+impl<F: Float> Fit<Array2<F>, Array1<bool>, Error> for SvmParams<F, Pr> {
     type Object = Svm<F, Pr>;
 
-    fn fit(&self, dataset: &Dataset<Array2<F>, Array1<bool>>) -> Self::Object {
+    fn fit(&self, dataset: &Dataset<Array2<F>, Array1<bool>>) -> Result<Self::Object, Error> {
         ...
     }
 }
 ```
-the type of the dataset is `&Dataset<Kernel<F>, Array1<bool>>`, and lifetime `'a` is the required lifetime for the fitted state. It produces a fitted state, called `Svm<F, Pr>` with probability type `Pr`.
+where the type of the input dataset is `&Dataset<Kernel<F>, Array1<bool>>`. It produces a result with a fitted state, called `Svm<F, Pr>` with probability type `Pr`, or an error of type `Error` in case of failure.
 
-The [Predict](src/traits.rs) should be implemented with dataset arguments, as well as arrays. If a dataset is provided, then predict takes its ownership and returns a new dataset with predicted targets. For an array, predict takes a reference and returns predicted targets. In the same context, SVM implemented predict like this:
-```rust
-impl<F: Float, T: Targets> Predict<Dataset<Array2<F>, T>, Dataset<Array2<F>, Vec<Pr>>>
-    for Svm<F, Pr>
-{
-    fn predict(&self, data: Dataset<Array2<F>, T>) -> Dataset<Array2<F>, Vec<Pr>> {
-        ...
-    }
-}
-```
-and
-```rust
-impl<F: Float, D: Data<Elem = F>> Predict<ArrayBase<D, Ix2>, Vec<Pr>> for Svm<F, Pr> {
-    fn predict(&self, data: ArrayBase<D, Ix2>) -> Vec<Pr> {
-        ...
-    }
-}
-```
-
-For an example of a `Transformer` please look into the [linfa-kernel](linfa-kernel/src/lib.rs) implementation.
+The [Predict](src/traits.rs) trait has its own section later in this document, while for an example of a `Transformer` please look into the [linfa-kernel](linfa-kernel/src/lib.rs) implementation.
 
 ## Parameters and builder
 
diff --git a/Cargo.toml b/Cargo.toml
index 3331dae85..de8686d3a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -54,14 +54,13 @@ features = ["cblas"]
 default-features = false
 
 [dependencies.openblas-src]
-version = "0.9.0"
+version = "0.10.4"
 optional = true
 default-features = false
 features = ["cblas"]
 
 [dev-dependencies]
 ndarray-rand = "0.13"
-
 linfa-datasets = { path = "datasets", features = ["winequality", "iris", "diabetes"] }
 
 [workspace]
diff --git a/README.md b/README.md
index add7af611..4e2a180a3 100644
--- a/README.md
+++ b/README.md
@@ -43,6 +43,7 @@ Where does `linfa` stand right now? [Are we learning yet?](http://www.arewelearn
 | [ica](algorithms/linfa-ica/) | Independent component analysis | Tested | Unsupervised learning | Contains FastICA implementation |
 | [pls](algorithms/linfa-pls/) | Partial Least Squares | Tested | Supervised learning | Contains PLS estimators for dimensionality reduction and regression |
 | [tsne](algorithms/linfa-tsne/) | Dimensionality reduction| Tested | Unsupervised learning | Contains exact solution and Barnes-Hut approximation t-SNE |
+| [preprocessing](algorithms/linfa-preprocessing/) |Normalization & Vectorization| Tested | Pre-processing | Contains data normalization/whitening and count vectorization/tf-idf |
 
 We believe that only a significant community effort can nurture, build, and sustain a machine learning ecosystem in Rust - there is no other way forward.
 
diff --git a/algorithms/linfa-bayes/src/gaussian_nb.rs b/algorithms/linfa-bayes/src/gaussian_nb.rs
index b7c4b39e4..304df69e6 100644
--- a/algorithms/linfa-bayes/src/gaussian_nb.rs
+++ b/algorithms/linfa-bayes/src/gaussian_nb.rs
@@ -2,7 +2,7 @@ use ndarray::{s, Array1, Array2, ArrayBase, ArrayView1, ArrayView2, Axis, Data,
 use ndarray_stats::QuantileExt;
 use std::collections::HashMap;
 
-use crate::error::Result;
+use crate::error::{BayesError, Result};
 use linfa::dataset::{AsTargets, DatasetBase, Labels};
 use linfa::traits::{Fit, IncrementalFit, PredictRef};
 use linfa::Float;
@@ -40,13 +40,13 @@ impl GaussianNbParams {
     }
 }
 
-impl<F, D, L> Fit<'_, ArrayBase<D, Ix2>, L> for GaussianNbParams
+impl<F, D, L> Fit<ArrayBase<D, Ix2>, L, BayesError> for GaussianNbParams
 where
     F: Float,
     D: Data<Elem = F>,
     L: AsTargets<Elem = usize> + Labels<Elem = usize>,
 {
-    type Object = Result<GaussianNb<F>>;
+    type Object = GaussianNb<F>;
 
     /// Fit the model
     ///
@@ -77,7 +77,7 @@ where
     /// # Ok(())
     /// # }
     /// ```
-    fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, L>) -> Self::Object {
+    fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, L>) -> Result<Self::Object> {
         // We extract the unique classes in sorted order
         let mut unique_classes = dataset.targets.labels();
         unique_classes.sort_unstable();
@@ -303,7 +303,7 @@ where
     ///
     /// __Panics__ if the input is empty or if pairwise orderings are undefined
     /// (this occurs in presence of NaN values)
-    fn predict_ref<'a>(&'a self, x: &ArrayBase<D, Ix2>) -> Array1<usize> {
+    fn predict_ref(&self, x: &ArrayBase<D, Ix2>) -> Array1<usize> {
         let joint_log_likelihood = self.joint_log_likelihood(x.view());
 
         // We store the classes and likelihood info in an vec and matrix
diff --git a/algorithms/linfa-clustering/Cargo.toml b/algorithms/linfa-clustering/Cargo.toml
index d12006dc3..bd8d38005 100644
--- a/algorithms/linfa-clustering/Cargo.toml
+++ b/algorithms/linfa-clustering/Cargo.toml
@@ -34,8 +34,8 @@ ndarray-rand = "0.13"
 ndarray-stats = "0.4"
 num-traits = "0.2"
 rand_isaac = "0.3"
+thiserror = "1"
 partitions = "0.2.4"
-
 linfa = { version = "0.3.1", path = "../..", features = ["ndarray-linalg"] }
 
 [dev-dependencies]
diff --git a/algorithms/linfa-clustering/src/appx_dbscan/hyperparameters.rs b/algorithms/linfa-clustering/src/appx_dbscan/hyperparameters.rs
index c9476ae5e..4b624bf6a 100644
--- a/algorithms/linfa-clustering/src/appx_dbscan/hyperparameters.rs
+++ b/algorithms/linfa-clustering/src/appx_dbscan/hyperparameters.rs
@@ -99,7 +99,7 @@ impl<F: Float> AppxDbscanHyperParams<F> {
     }
 
     fn build(tolerance: F, min_points: usize, slack: F) -> Self {
-        if tolerance <= F::cast(0.) {
+        if tolerance <= F::zero() {
             panic!("`tolerance` must be greater than 0!");
         }
         // There is always at least one neighbor to a point (itself)
@@ -107,13 +107,13 @@ impl<F: Float> AppxDbscanHyperParams<F> {
             panic!("`min_points` must be greater than 1!");
         }
 
-        if slack <= F::cast(0.) {
+        if slack <= F::zero() {
             panic!("`slack` must be greater than 0!");
         }
         Self {
-            tolerance: tolerance,
-            min_points: min_points,
-            slack: slack,
+            tolerance,
+            min_points,
+            slack,
             appx_tolerance: tolerance * (F::one() + slack),
         }
     }
diff --git a/algorithms/linfa-clustering/src/gaussian_mixture/algorithm.rs b/algorithms/linfa-clustering/src/gaussian_mixture/algorithm.rs
index 70e6802ed..2388d3006 100644
--- a/algorithms/linfa-clustering/src/gaussian_mixture/algorithm.rs
+++ b/algorithms/linfa-clustering/src/gaussian_mixture/algorithm.rs
@@ -215,10 +215,10 @@ impl<F: Float> GaussianMixtureModel<F> {
         reg_covar: F,
     ) -> Result<(Array1<F>, Array2<F>, Array3<F>)> {
         let nk = resp.sum_axis(Axis(0));
-        if nk.min().unwrap() < &(F::cast(10.) * F::epsilon()) {
+        if nk.min()? < &(F::cast(10.) * F::epsilon()) {
             return Err(GmmError::EmptyCluster(format!(
                 "Cluster #{} has no more point. Consider decreasing number of clusters or change initialization.",
-                nk.argmin().unwrap() + 1
+                nk.argmin()? + 1
             )));
         }
 
@@ -400,12 +400,12 @@ impl<F: Float> GaussianMixtureModel<F> {
     }
 }
 
-impl<'a, F: Float, R: Rng + SeedableRng + Clone, D: Data<Elem = F>, T> Fit<'a, ArrayBase<D, Ix2>, T>
-    for GmmHyperParams<F, R>
+impl<F: Float, R: Rng + SeedableRng + Clone, D: Data<Elem = F>, T>
+    Fit<ArrayBase<D, Ix2>, T, GmmError> for GmmHyperParams<F, R>
 {
-    type Object = Result<GaussianMixtureModel<F>>;
+    type Object = GaussianMixtureModel<F>;
 
-    fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Self::Object {
+    fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Result<Self::Object> {
         self.validate()?;
         let observations = dataset.records().view();
         let mut gmm = GaussianMixtureModel::<F>::new(self, dataset, self.rng())?;
@@ -488,7 +488,7 @@ mod tests {
     }
     impl MultivariateNormal {
         pub fn new(mean: &ArrayView1<f64>, covariance: &ArrayView2<f64>) -> LAResult<Self> {
-            let lower = covariance.cholesky(UPLO::Lower).unwrap();
+            let lower = covariance.cholesky(UPLO::Lower)?;
             Ok(MultivariateNormal {
                 mean: mean.to_owned(),
                 covariance: covariance.to_owned(),
diff --git a/algorithms/linfa-clustering/src/gaussian_mixture/errors.rs b/algorithms/linfa-clustering/src/gaussian_mixture/errors.rs
index 0ae0859f8..9b62101b1 100644
--- a/algorithms/linfa-clustering/src/gaussian_mixture/errors.rs
+++ b/algorithms/linfa-clustering/src/gaussian_mixture/errors.rs
@@ -1,58 +1,37 @@
 use crate::k_means::KMeansError;
 use ndarray_linalg::error::LinalgError;
-use std::error::Error;
-use std::fmt::{self, Display};
-
+use thiserror::Error;
 pub type Result<T> = std::result::Result<T, GmmError>;
 
 /// An error when modeling a GMM algorithm
-#[derive(Debug)]
+#[derive(Error, Debug)]
 pub enum GmmError {
     /// When any of the hyperparameters are set the wrong value
+    #[error("Invalid value encountered: {0}")]
     InvalidValue(String),
     /// Errors encountered during linear algebra operations
-    LinalgError(LinalgError),
+    #[error(
+        "Linalg Error: \
+    Fitting the mixture model failed because some components have \
+    ill-defined empirical covariance (for instance caused by singleton \
+    or collapsed samples). Try to decrease the number of components, \
+    or increase reg_covar. Error: {0}"
+    )]
+    LinalgError(#[from] LinalgError),
     /// When a cluster has no more data point while fitting GMM
+    #[error("Fitting failed: {0}")]
     EmptyCluster(String),
     /// When lower bound computation fails
+    #[error("Fitting failed: {0}")]
     LowerBoundError(String),
     /// When fitting EM algorithm does not converge
+    #[error("Fitting failed: {0}")]
     NotConverged(String),
     /// When initial KMeans fails
-    KMeansError(String),
-}
-
-impl Display for GmmError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            Self::InvalidValue(message) => write!(f, "Invalid value encountered: {}", message),
-            Self::LinalgError(error) => write!(
-                f,
-                "Linalg Error: \
-            Fitting the mixture model failed because some components have \
-            ill-defined empirical covariance (for instance caused by singleton \
-            or collapsed samples). Try to decrease the number of components, \
-            or increase reg_covar. Error: {}",
-                error
-            ),
-            Self::EmptyCluster(message) => write!(f, "Fitting failed: {}", message),
-            Self::LowerBoundError(message) => write!(f, "Fitting failed: {}", message),
-            Self::NotConverged(message) => write!(f, "Fitting failed: {}", message),
-            Self::KMeansError(message) => write!(f, "Initial KMeans failed: {}", message),
-        }
-    }
-}
-
-impl Error for GmmError {}
-
-impl From<LinalgError> for GmmError {
-    fn from(error: LinalgError) -> GmmError {
-        GmmError::LinalgError(error)
-    }
-}
-
-impl From<KMeansError> for GmmError {
-    fn from(error: KMeansError) -> GmmError {
-        GmmError::KMeansError(error.to_string())
-    }
+    #[error("Initial KMeans failed: {0}")]
+    KMeansError(#[from] KMeansError),
+    #[error(transparent)]
+    LinfaError(#[from] linfa::error::Error),
+    #[error(transparent)]
+    MinMaxError(#[from] ndarray_stats::errors::MinMaxError),
 }
diff --git a/algorithms/linfa-clustering/src/k_means/algorithm.rs b/algorithms/linfa-clustering/src/k_means/algorithm.rs
index 8f5aabea0..aefa62c9b 100644
--- a/algorithms/linfa-clustering/src/k_means/algorithm.rs
+++ b/algorithms/linfa-clustering/src/k_means/algorithm.rs
@@ -215,17 +215,17 @@ impl<F: Float> KMeans<F> {
     }
 }
 
-impl<'a, F: Float, R: Rng + Clone + SeedableRng, D: Data<Elem = F>, T> Fit<'a, ArrayBase<D, Ix2>, T>
-    for KMeansHyperParams<F, R>
+impl<F: Float, R: Rng + Clone + SeedableRng, D: Data<Elem = F>, T>
+    Fit<ArrayBase<D, Ix2>, T, KMeansError> for KMeansHyperParams<F, R>
 {
-    type Object = Result<KMeans<F>>;
+    type Object = KMeans<F>;
 
     /// Given an input matrix `observations`, with shape `(n_observations, n_features)`,
     /// `fit` identifies `n_clusters` centroids based on the training data distribution.
     ///
     /// An instance of `KMeans` is returned.
     ///
-    fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Self::Object {
+    fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Result<Self::Object> {
         let mut rng = self.rng();
         let observations = dataset.records().view();
         let n_samples = dataset.nsamples();
diff --git a/algorithms/linfa-clustering/src/k_means/errors.rs b/algorithms/linfa-clustering/src/k_means/errors.rs
index cad2f8548..f63474296 100644
--- a/algorithms/linfa-clustering/src/k_means/errors.rs
+++ b/algorithms/linfa-clustering/src/k_means/errors.rs
@@ -1,27 +1,19 @@
-use std::error::Error;
-use std::fmt::{self, Display};
+use thiserror::Error;
 
 pub type Result<T> = std::result::Result<T, KMeansError>;
 
 /// An error when modeling a KMeans algorithm
-#[derive(Debug)]
+#[derive(Error, Debug)]
 pub enum KMeansError {
     /// When any of the hyperparameters are set the wrong value
+    #[error("Invalid value encountered: {0}")]
     InvalidValue(String),
     /// When inertia computation fails
+    #[error("Fitting failed: {0}")]
     InertiaError(String),
     /// When fitting algorithm does not converge
+    #[error("Fitting failed: {0}")]
     NotConverged(String),
+    #[error(transparent)]
+    LinfaError(#[from] linfa::error::Error),
 }
-
-impl Display for KMeansError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            Self::InvalidValue(message) => write!(f, "Invalid value encountered: {}", message),
-            Self::InertiaError(message) => write!(f, "Fitting failed: {}", message),
-            Self::NotConverged(message) => write!(f, "Fitting failed: {}", message),
-        }
-    }
-}
-
-impl Error for KMeansError {}
diff --git a/algorithms/linfa-elasticnet/examples/elasticnet_cv.rs b/algorithms/linfa-elasticnet/examples/elasticnet_cv.rs
new file mode 100644
index 000000000..751ff9529
--- /dev/null
+++ b/algorithms/linfa-elasticnet/examples/elasticnet_cv.rs
@@ -0,0 +1,26 @@
+use linfa::prelude::*;
+use linfa_elasticnet::{ElasticNet, Result};
+
+fn main() -> Result<()> {
+    // load Diabetes dataset (mutable to allow fast k-folding)
+    let mut dataset = linfa_datasets::diabetes();
+
+    // parameters to compare
+    let ratios = vec![0.1, 0.2, 0.5, 0.7, 1.0];
+
+    // create a model for each parameter
+    let models = ratios
+        .iter()
+        .map(|ratio| ElasticNet::params().penalty(0.3).l1_ratio(*ratio))
+        .collect::<Vec<_>>();
+
+    // get the mean r2 validation score across all folds for each model
+    let r2_values =
+        dataset.cross_validate(5, &models, |prediction, truth| prediction.r2(&truth))?;
+
+    for (ratio, r2) in ratios.iter().zip(r2_values.iter()) {
+        println!("L1 ratio: {}, r2 score: {}", ratio, r2);
+    }
+
+    Ok(())
+}
diff --git a/algorithms/linfa-elasticnet/src/algorithm.rs b/algorithms/linfa-elasticnet/src/algorithm.rs
index 8316fea22..a63ef1ee7 100644
--- a/algorithms/linfa-elasticnet/src/algorithm.rs
+++ b/algorithms/linfa-elasticnet/src/algorithm.rs
@@ -10,13 +10,13 @@ use linfa::{
 
 use super::{ElasticNet, ElasticNetParams, Error, Result};
 
-impl<'a, F, D, T> Fit<'a, ArrayBase<D, Ix2>, T> for ElasticNetParams<F>
+impl<F, D, T> Fit<ArrayBase<D, Ix2>, T, crate::error::Error> for ElasticNetParams<F>
 where
     F: Float + Lapack,
     D: Data<Elem = F>,
     T: AsTargets<Elem = F>,
 {
-    type Object = Result<ElasticNet<F>>;
+    type Object = ElasticNet<F>;
 
     /// Fit an elastic net model given a feature matrix `x` and a target
     /// variable `y`.
@@ -28,7 +28,7 @@ where
     /// Returns a `FittedElasticNet` object which contains the fitted
     /// parameters and can be used to `predict` values of the target variable
     /// for new feature values.
-    fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Result<ElasticNet<F>> {
+    fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Result<Self::Object> {
         self.validate_params()?;
         let target = dataset.try_single_target()?;
 
diff --git a/algorithms/linfa-ica/Cargo.toml b/algorithms/linfa-ica/Cargo.toml
index 91ca5330f..4eb0f7290 100644
--- a/algorithms/linfa-ica/Cargo.toml
+++ b/algorithms/linfa-ica/Cargo.toml
@@ -30,8 +30,9 @@ ndarray-rand = "0.13"
 ndarray-stats = "0.4"
 num-traits = "0.2"
 rand_isaac = "0.3"
+thiserror = "1"
 
-linfa = { version = "0.3.1", path = "../.." }
+linfa = { version = "0.3.1", path = "../..", features = ["ndarray-linalg"] }
 
 [dev-dependencies]
 ndarray-npy = { version = "0.7", default-features = false }
diff --git a/algorithms/linfa-ica/src/error.rs b/algorithms/linfa-ica/src/error.rs
index 3f14bd73c..7d8c614ce 100644
--- a/algorithms/linfa-ica/src/error.rs
+++ b/algorithms/linfa-ica/src/error.rs
@@ -1,38 +1,24 @@
 use ndarray_linalg::error::LinalgError;
-use std::error::Error;
-use std::fmt::{self, Display};
+use thiserror::Error;
 
 pub type Result<T> = std::result::Result<T, FastIcaError>;
 
 /// An error when modeling FastICA algorithm
-#[derive(Debug)]
+#[derive(Error, Debug)]
 pub enum FastIcaError {
+    /// When there are no samples in the provided dataset
+    #[error("Dataset must contain at least one sample")]
+    NotEnoughSamples,
     /// When any of the hyperparameters are set the wrong value
+    #[error("Invalid value encountered: {0}")]
     InvalidValue(String),
     /// If we fail to compute any components of the SVD decomposition
     /// due to an Ill-Conditioned matrix
+    #[error("SVD Decomposition failed, X could be an Ill-Conditioned matrix")]
     SvdDecomposition,
     /// Errors encountered during linear algebra operations
-    Linalg(LinalgError),
-}
-
-impl Display for FastIcaError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            Self::InvalidValue(message) => write!(f, "Invalid value encountered: {}", message),
-            Self::SvdDecomposition => write!(
-                f,
-                "SVD Decomposition failed, X could be an Ill-Conditioned matrix",
-            ),
-            Self::Linalg(error) => write!(f, "Linalg Error: {}", error),
-        }
-    }
-}
-
-impl Error for FastIcaError {}
-
-impl From<LinalgError> for FastIcaError {
-    fn from(error: LinalgError) -> FastIcaError {
-        FastIcaError::Linalg(error)
-    }
+    #[error("Linalg Error: {0}")]
+    Linalg(#[from] LinalgError),
+    #[error(transparent)]
+    LinfaError(#[from] linfa::error::Error),
 }
diff --git a/algorithms/linfa-ica/src/fast_ica.rs b/algorithms/linfa-ica/src/fast_ica.rs
index 657823bf3..839693e41 100644
--- a/algorithms/linfa-ica/src/fast_ica.rs
+++ b/algorithms/linfa-ica/src/fast_ica.rs
@@ -1,8 +1,12 @@
 //! Fast algorithm for Independent Component Analysis (ICA)
 
-use linfa::{dataset::DatasetBase, traits::*, Float};
+use linfa::{
+    dataset::{DatasetBase, Records, WithLapack, WithoutLapack},
+    traits::*,
+    Float,
+};
 use ndarray::{Array, Array1, Array2, ArrayBase, Axis, Data, Ix2};
-use ndarray_linalg::{eigh::Eigh, solveh::UPLO, svd::SVD, Lapack};
+use ndarray_linalg::{eigh::Eigh, solveh::UPLO, svd::SVD};
 use ndarray_rand::{rand::SeedableRng, rand_distr::Uniform, RandomExt};
 use ndarray_stats::QuantileExt;
 use rand_isaac::Isaac64Rng;
@@ -75,8 +79,8 @@ impl<F: Float> FastIca<F> {
     }
 }
 
-impl<'a, F: Float + Lapack, D: Data<Elem = F>, T> Fit<'a, ArrayBase<D, Ix2>, T> for FastIca<F> {
-    type Object = Result<FittedFastIca<F>>;
+impl<F: Float, D: Data<Elem = F>, T> Fit<ArrayBase<D, Ix2>, T, FastIcaError> for FastIca<F> {
+    type Object = FittedFastIca<F>;
 
     /// Fit the model
     ///
@@ -87,9 +91,12 @@ impl<'a, F: Float + Lapack, D: Data<Elem = F>, T> Fit<'a, ArrayBase<D, Ix2>, T>
     ///
     /// If the `alpha` value set for [`GFunc::Logcosh`] is not between 1 and 2
     /// inclusive
-    fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Result<FittedFastIca<F>> {
+    fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Result<Self::Object> {
         let x = &dataset.records;
-        let (nsamples, nfeatures) = (x.nrows(), x.ncols());
+        let (nsamples, nfeatures) = (x.nsamples(), x.nfeatures());
+        if dataset.nsamples() == 0 {
+            return Err(FastIcaError::NotEnoughSamples);
+        }
 
         // If the number of components is not set, we take the minimum of
         // the number of rows and columns
@@ -105,6 +112,7 @@ impl<'a, F: Float + Lapack, D: Data<Elem = F>, T> Fit<'a, ArrayBase<D, Ix2>, T>
         }
 
         // We center the input by subtracting the mean of its features
+        // safe unwrap because we already returned an error on zero samples
         let xmean = x.mean_axis(Axis(0)).unwrap();
         let mut xcentered = x - &xmean.view().insert_axis(Axis(0));
 
@@ -113,20 +121,23 @@ impl<'a, F: Float + Lapack, D: Data<Elem = F>, T> Fit<'a, ArrayBase<D, Ix2>, T>
 
         // We whiten the matrix to remove any potential correlation between
         // the components
+        let xcentered = xcentered.with_lapack();
         let k = match xcentered.svd(true, false)? {
             (Some(u), s, _) => {
-                let s = s.mapv(|x| F::cast(x));
-                (u.slice(s![.., ..nsamples.min(nfeatures)]).to_owned() / s)
+                let s = s.mapv(|x| F::Lapack::cast(x));
+                (u.slice_move(s![.., ..nsamples.min(nfeatures)]) / s)
                     .t()
                     .slice(s![..ncomponents, ..])
                     .to_owned()
             }
             _ => return Err(FastIcaError::SvdDecomposition),
         };
-        let mut xwhitened = k.dot(&xcentered);
+
+        let mut xwhitened = k.dot(&xcentered).without_lapack();
+        let k = k.without_lapack();
 
         // We multiply the matrix with root of the number of records
-        let nsamples_sqrt = F::cast((nsamples as f64).sqrt());
+        let nsamples_sqrt = F::cast(nsamples).sqrt();
         xwhitened.mapv_inplace(|x| x * nsamples_sqrt);
 
         // We initialize the de-mixing matrix with a uniform distribution
@@ -152,7 +163,7 @@ impl<'a, F: Float + Lapack, D: Data<Elem = F>, T> Fit<'a, ArrayBase<D, Ix2>, T>
     }
 }
 
-impl<F: Float + Lapack> FastIca<F> {
+impl<F: Float> FastIca<F> {
     // Parallel FastICA, Optimization step
     fn ica_parallel(&self, x: &Array2<F>, w: &Array2<F>) -> Result<Array2<F>> {
         let mut w = Self::sym_decorrelation(&w)?;
@@ -173,9 +184,9 @@ impl<F: Float + Lapack> FastIca<F> {
                 .zip(w.outer_iter())
                 .map(|(a, b)| a.dot(&b))
                 .collect::<Array1<F>>()
-                .mapv(num_traits::Float::abs)
+                .mapv(|x| x.abs())
                 .mapv(|x| x - F::cast(1.))
-                .mapv(num_traits::Float::abs)
+                .mapv(|x| x.abs())
                 .max()
                 .unwrap();
 
@@ -193,17 +204,18 @@ impl<F: Float + Lapack> FastIca<F> {
     //
     // W <- (W * W.T)^{-1/2} * W
     fn sym_decorrelation(w: &Array2<F>) -> Result<Array2<F>> {
-        let (eig_val, eig_vec) = w.dot(&w.t()).eigh(UPLO::Upper)?;
+        let (eig_val, eig_vec) = w.dot(&w.t()).with_lapack().eigh(UPLO::Upper)?;
         let eig_val = eig_val.mapv(|x| F::cast(x));
+        let eig_vec = eig_vec.without_lapack();
 
         let tmp = &eig_vec
-            * &(eig_val.mapv(num_traits::Float::sqrt).mapv(|x| {
+            * &(eig_val.mapv(|x| x.sqrt()).mapv(|x| {
                 // We lower bound the float value at 1e-7 when taking the reciprocal
                 let lower_bound = F::cast(1e-7);
                 if x < lower_bound {
-                    return num_traits::Float::recip(lower_bound);
+                    return lower_bound.recip();
                 }
-                num_traits::Float::recip(x)
+                x.recip()
             }))
             .insert_axis(Axis(0));
 
diff --git a/algorithms/linfa-linear/src/error.rs b/algorithms/linfa-linear/src/error.rs
index 45dd2a51c..b996943e8 100644
--- a/algorithms/linfa-linear/src/error.rs
+++ b/algorithms/linfa-linear/src/error.rs
@@ -11,4 +11,10 @@ pub enum LinearError {
     Argmin(#[from] argmin::core::Error),
     #[error(transparent)]
     BaseCrate(#[from] linfa::Error),
+    #[error("At least one sample needed")]
+    NotEnoughSamples,
+    #[error("At least one target needed")]
+    NotEnoughTargets,
+    #[error(transparent)]
+    LinalgError(#[from] ndarray_linalg::error::LinalgError),
 }
diff --git a/algorithms/linfa-linear/src/glm.rs b/algorithms/linfa-linear/src/glm.rs
index c51c29c7c..a7009f283 100644
--- a/algorithms/linfa-linear/src/glm.rs
+++ b/algorithms/linfa-linear/src/glm.rs
@@ -3,7 +3,7 @@
 mod distribution;
 mod link;
 
-use crate::error::Result;
+use crate::error::{LinearError, Result};
 use crate::float::{ArgminParam, Float};
 use distribution::TweedieDistribution;
 pub use link::Link;
@@ -119,12 +119,12 @@ impl TweedieRegressor {
     }
 }
 
-impl<A: Float, D: Data<Elem = A>, T: AsTargets<Elem = A>> Fit<'_, ArrayBase<D, Ix2>, T>
+impl<A: Float, D: Data<Elem = A>, T: AsTargets<Elem = A>> Fit<ArrayBase<D, Ix2>, T, LinearError>
     for TweedieRegressor
 {
-    type Object = Result<FittedTweedieRegressor<A>>;
+    type Object = FittedTweedieRegressor<A>;
 
-    fn fit(&self, ds: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Result<FittedTweedieRegressor<A>> {
+    fn fit(&self, ds: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Result<Self::Object> {
         let (x, y) = (ds.records(), ds.try_single_target()?);
 
         let dist = TweedieDistribution::new(self.power)?;
diff --git a/algorithms/linfa-linear/src/glm/distribution.rs b/algorithms/linfa-linear/src/glm/distribution.rs
index d91a8cc8c..c16554d3a 100644
--- a/algorithms/linfa-linear/src/glm/distribution.rs
+++ b/algorithms/linfa-linear/src/glm/distribution.rs
@@ -1,4 +1,4 @@
-use crate::float::Float;
+use linfa::Float;
 use ndarray::Zip;
 use ndarray::{Array1, ArrayView1};
 
@@ -42,48 +42,43 @@ impl TweedieDistribution {
     }
 
     // Returns `true` if y is in the valid range
-    pub fn in_range<A: Float>(&self, y: &ArrayView1<A>) -> bool {
+    pub fn in_range<F: Float>(&self, y: &ArrayView1<F>) -> bool {
         if self.inclusive {
-            return y.iter().all(|&x| x >= A::from(self.lower_bound).unwrap());
+            return y.iter().all(|&x| x >= F::cast(self.lower_bound));
         }
-        y.iter().all(|&x| x > A::from(self.lower_bound).unwrap())
+        y.iter().all(|&x| x > F::cast(self.lower_bound))
     }
 
-    fn unit_variance<A: Float>(&self, ypred: ArrayView1<A>) -> Array1<A> {
+    fn unit_variance<F: Float>(&self, ypred: ArrayView1<F>) -> Array1<F> {
         // ypred ^ power
-        ypred.mapv(|x| num_traits::Float::powf(x, A::from(self.power).unwrap()))
+        ypred.mapv(|x| x.powf(F::cast(self.power)))
     }
 
-    fn unit_deviance<A: Float>(&self, y: ArrayView1<A>, ypred: ArrayView1<A>) -> Result<Array1<A>> {
+    fn unit_deviance<F: Float>(&self, y: ArrayView1<F>, ypred: ArrayView1<F>) -> Result<Array1<F>> {
         match self.power {
             power if power < 0. => {
                 let mut left = y.mapv(|x| {
-                    if x < A::from(0.).unwrap() {
-                        return A::from(0.).unwrap();
+                    if x.is_negative() {
+                        return F::zero();
                     }
                     x
                 });
                 left.mapv_inplace(|x| {
-                    num_traits::Float::powf(x, A::from(2. - self.power).unwrap())
-                        / A::from((1. - self.power) * (2. - self.power)).unwrap()
+                    x.powf(F::cast(2. - self.power))
+                        / F::cast((1. - self.power) * (2. - self.power))
                 });
 
-                let middle = &y
-                    * &ypred.mapv(|x| {
-                        num_traits::Float::powf(x, A::from(1. - self.power).unwrap())
-                            / A::from(1. - power).unwrap()
-                    });
+                let middle =
+                    &y * &ypred.mapv(|x| x.powf(F::cast(1. - self.power)) / F::cast(1. - power));
 
-                let right = ypred.mapv(|x| {
-                    num_traits::Float::powf(x, A::from(2. - self.power).unwrap())
-                        / A::from(2. - self.power).unwrap()
-                });
+                let right =
+                    ypred.mapv(|x| x.powf(F::cast(2. - self.power)) / F::cast(2. - self.power));
 
-                Ok((left - middle + right).mapv(|x| A::from(2.).unwrap() * x))
+                Ok((left - middle + right).mapv(|x| F::cast(2.) * x))
             }
             // Normal distribution
             // (y - ypred)^2
-            power if power == 0. => Ok((&y - &ypred).mapv(|x| num_traits::Float::powi(x, 2))),
+            power if power == 0. => Ok((&y - &ypred).mapv(|x| x * x)),
             power if power < 1. => Err(linfa::Error::Parameters(format!(
                 "Power value cannot be between 0 and 1, got: {}",
                 power
@@ -93,10 +88,10 @@ impl TweedieDistribution {
             power if (power - 1.).abs() < 1e-6 => {
                 let mut div = &y / &ypred;
                 Zip::from(&mut div).and(y).apply(|y, &x| {
-                    if x == A::from(0.).unwrap() {
-                        *y = A::from(0.).unwrap();
+                    if x == F::zero() {
+                        *y = F::zero();
                     } else {
-                        *y = A::from(2.).unwrap() * (x * num_traits::Float::ln(*y));
+                        *y = F::cast(2.) * (x * y.ln());
                     }
                 });
                 Ok(div - y + ypred)
@@ -104,49 +99,41 @@ impl TweedieDistribution {
             // Gamma distribution
             // 2 * (log(ypred / y) + (y / ypred) - 1)
             power if (power - 2.).abs() < 1e-6 => {
-                let mut temp = (&ypred / &y).mapv(num_traits::Float::ln) + (&y / &ypred);
-                temp.mapv_inplace(|x| x - A::from(1.).unwrap());
-                Ok(temp.mapv(|x| A::from(2.).unwrap() * x))
+                let mut temp = (&ypred / &y).mapv(|x| x.ln()) + (&y / &ypred);
+                temp.mapv_inplace(|x| x - F::one());
+                Ok(temp.mapv(|x| F::cast(2.) * x))
             }
             power => {
-                let left = y.mapv(|x| {
-                    num_traits::Float::powf(x, A::from(2. - power).unwrap())
-                        / A::from((1. - power) * (2. - power)).unwrap()
-                });
+                let left =
+                    y.mapv(|x| x.powf(F::cast(2. - power)) / F::cast((1. - power) * (2. - power)));
 
-                let middle = &y
-                    * &ypred.mapv(|x| {
-                        num_traits::Float::powf(x, A::from(1. - power).unwrap())
-                            / A::from(1. - power).unwrap()
-                    });
+                let middle =
+                    &y * &ypred.mapv(|x| x.powf(F::cast(1. - power)) / F::cast(1. - power));
 
-                let right = ypred.mapv(|x| {
-                    num_traits::Float::powf(x, A::from(2. - power).unwrap())
-                        / A::from(2. - power).unwrap()
-                });
+                let right = ypred.mapv(|x| x.powf(F::cast(2. - power)) / F::cast(2. - power));
 
-                Ok((left - middle + right).mapv(|x| A::from(2.).unwrap() * x))
+                Ok((left - middle + right).mapv(|x| F::cast(2.) * x))
             }
         }
     }
 
-    fn unit_deviance_derivative<A: Float>(
+    fn unit_deviance_derivative<F: Float>(
         &self,
-        y: ArrayView1<A>,
-        ypred: ArrayView1<A>,
-    ) -> Array1<A> {
-        ((&y - &ypred) / &self.unit_variance(ypred)).mapv(|x| A::from(-2.).unwrap() * x)
+        y: ArrayView1<F>,
+        ypred: ArrayView1<F>,
+    ) -> Array1<F> {
+        ((&y - &ypred) / &self.unit_variance(ypred)).mapv(|x| F::cast(-2.) * x)
     }
 
-    pub fn deviance<A: Float>(&self, y: ArrayView1<A>, ypred: ArrayView1<A>) -> Result<A> {
+    pub fn deviance<F: Float>(&self, y: ArrayView1<F>, ypred: ArrayView1<F>) -> Result<F> {
         Ok(self.unit_deviance(y, ypred)?.sum())
     }
 
-    pub fn deviance_derivative<A: Float>(
+    pub fn deviance_derivative<F: Float>(
         &self,
-        y: ArrayView1<A>,
-        ypred: ArrayView1<A>,
-    ) -> Array1<A> {
+        y: ArrayView1<F>,
+        ypred: ArrayView1<F>,
+    ) -> Array1<F> {
         self.unit_deviance_derivative(y, ypred)
     }
 }
diff --git a/algorithms/linfa-linear/src/glm/link.rs b/algorithms/linfa-linear/src/glm/link.rs
index ebde1a8b5..4184fd5b0 100644
--- a/algorithms/linfa-linear/src/glm/link.rs
+++ b/algorithms/linfa-linear/src/glm/link.rs
@@ -89,9 +89,9 @@ impl<A: Float> LinkFn<A> for IdentityLink {
 
 struct LogLink;
 
-impl<A: Float> LinkFn<A> for LogLink {
+impl<A: linfa::Float> LinkFn<A> for LogLink {
     fn link(ypred: &Array1<A>) -> Array1<A> {
-        ypred.mapv(|x| num_traits::Float::ln(x))
+        ypred.mapv(|x| x.ln())
     }
 
     fn link_derivative(ypred: &Array1<A>) -> Array1<A> {
@@ -106,40 +106,36 @@ impl<A: Float> LinkFn<A> for LogLink {
     }
 
     fn inverse(lin_pred: &Array1<A>) -> Array1<A> {
-        lin_pred.mapv(|x| num_traits::Float::exp(x))
+        lin_pred.mapv(|x| x.exp())
     }
 
     fn inverse_derivative(lin_pred: &Array1<A>) -> Array1<A> {
-        lin_pred.mapv(|x| num_traits::Float::exp(x))
+        lin_pred.mapv(|x| x.exp())
     }
 }
 
 struct LogitLink;
 
-impl<A: Float> LinkFn<A> for LogitLink {
+impl<A: linfa::Float> LinkFn<A> for LogitLink {
     fn link(ypred: &Array1<A>) -> Array1<A> {
         // logit(ypred)
-        ypred.mapv(|x| num_traits::Float::ln(x / (A::from(1.).unwrap() - x)))
+        ypred.mapv(|x| (x / (A::one() - x)).ln())
     }
 
     fn link_derivative(ypred: &Array1<A>) -> Array1<A> {
         // 1 / (ypred * (1-ypred)
-        ypred.mapv(|x| A::from(1.).unwrap() / (x * (A::from(1.).unwrap() - x)))
+        ypred.mapv(|x| A::one() / (x * (A::one() - x)))
     }
 
     fn inverse(lin_pred: &Array1<A>) -> Array1<A> {
         // expit(lin_pred)
-        lin_pred.mapv(|x| {
-            A::from(1.).unwrap() / (A::from(1.).unwrap() + num_traits::Float::exp(x.neg()))
-        })
+        lin_pred.mapv(|x| A::one() / (A::one() + x.neg().exp()))
     }
 
     fn inverse_derivative(lin_pred: &Array1<A>) -> Array1<A> {
         // expit(lin_pred) * (1 - expit(lin_pred))
-        let expit = lin_pred.mapv(|x| {
-            A::from(1.).unwrap() / (A::from(1.).unwrap() + num_traits::Float::exp(x.neg()))
-        });
-        let one_minus_expit = expit.mapv(|x| A::from(1.).unwrap() - x);
+        let expit = lin_pred.mapv(|x| A::one() / (A::one() + x.neg().exp()));
+        let one_minus_expit = expit.mapv(|x| A::one() - x);
         expit * one_minus_expit
     }
 }
diff --git a/algorithms/linfa-linear/src/ols.rs b/algorithms/linfa-linear/src/ols.rs
index 00190c182..de3cb6cc4 100644
--- a/algorithms/linfa-linear/src/ols.rs
+++ b/algorithms/linfa-linear/src/ols.rs
@@ -1,5 +1,6 @@
 //! Ordinary Least Squares
 #![allow(non_snake_case)]
+use crate::error::{LinearError, Result};
 use ndarray::{Array1, Array2, ArrayBase, Axis, Data, Ix1, Ix2};
 use ndarray_linalg::{Lapack, Scalar, Solve};
 use ndarray_stats::SummaryStatisticsExt;
@@ -117,10 +118,10 @@ impl LinearRegression {
     }
 }
 
-impl<'a, F: Float, D: Data<Elem = F>, T: AsTargets<Elem = F>> Fit<'a, ArrayBase<D, Ix2>, T>
+impl<F: Float, D: Data<Elem = F>, T: AsTargets<Elem = F>> Fit<ArrayBase<D, Ix2>, T, LinearError>
     for LinearRegression
 {
-    type Object = Result<FittedLinearRegression<F>, String>;
+    type Object = FittedLinearRegression<F>;
 
     /// Fit a linear regression model given a feature matrix `X` and a target
     /// variable `y`.
@@ -132,12 +133,9 @@ impl<'a, F: Float, D: Data<Elem = F>, T: AsTargets<Elem = F>> Fit<'a, ArrayBase<
     /// Returns a `FittedLinearRegression` object which contains the fitted
     /// parameters and can be used to `predict` values of the target variable
     /// for new feature values.
-    fn fit(
-        &self,
-        dataset: &DatasetBase<ArrayBase<D, Ix2>, T>,
-    ) -> Result<FittedLinearRegression<F>, String> {
+    fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Result<Self::Object> {
         let X = dataset.records();
-        let y = dataset.try_single_target().unwrap();
+        let y = dataset.try_single_target()?;
 
         let (n_samples, _) = X.dim();
 
@@ -151,11 +149,9 @@ impl<'a, F: Float, D: Data<Elem = F>, T: AsTargets<Elem = F>> Fit<'a, ArrayBase<
             // to the X_offset and y_offset
             let X_offset: Array1<F> = X
                 .mean_axis(Axis(0))
-                .ok_or_else(|| String::from("cannot compute mean of X"))?;
+                .ok_or_else(|| LinearError::NotEnoughSamples)?;
             let X_centered: Array2<F> = X - &X_offset;
-            let y_offset: F = y
-                .mean()
-                .ok_or_else(|| String::from("cannot compute mean of y"))?;
+            let y_offset: F = y.mean().ok_or_else(|| LinearError::NotEnoughTargets)?;
             let y_centered: Array1<F> = &y - y_offset;
             let params: Array1<F> =
                 compute_params(&X_centered, &y_centered, self.options.should_normalize())?;
@@ -176,7 +172,7 @@ fn compute_params<F, B, C>(
     X: &ArrayBase<B, Ix2>,
     y: &ArrayBase<C, Ix1>,
     normalize: bool,
-) -> Result<Array1<F>, String>
+) -> Result<Array1<F>>
 where
     F: Float,
     B: Data<Elem = F>,
@@ -196,10 +192,7 @@ where
 /// Solve the overconstrained model Xb = y by solving X^T X b = X^t y,
 /// this is (mathematically, not numerically) equivalent to computing
 /// the solution with the Moore-Penrose pseudo-inverse.
-fn solve_normal_equation<F, B, C>(
-    X: &ArrayBase<B, Ix2>,
-    y: &ArrayBase<C, Ix1>,
-) -> Result<Array1<F>, String>
+fn solve_normal_equation<F, B, C>(X: &ArrayBase<B, Ix2>, y: &ArrayBase<C, Ix1>) -> Result<Array1<F>>
 where
     F: Float,
     B: Data<Elem = F>,
@@ -207,9 +200,7 @@ where
 {
     let rhs = X.t().dot(y);
     let linear_operator = X.t().dot(X);
-    linear_operator
-        .solve_into(rhs)
-        .map_err(|err| format! {"{}", err})
+    linear_operator.solve_into(rhs).map_err(|err| err.into())
 }
 
 /// View the fitted parameters and make predictions with a fitted
diff --git a/algorithms/linfa-logistic/Cargo.toml b/algorithms/linfa-logistic/Cargo.toml
index 144c0261a..495533637 100644
--- a/algorithms/linfa-logistic/Cargo.toml
+++ b/algorithms/linfa-logistic/Cargo.toml
@@ -19,6 +19,7 @@ ndarray-linalg = "0.13"
 num-traits = "0.2"
 argmin = { version = "0.4", features = ["ndarrayl"] }
 serde = "1.0"
+thiserror = "1"
 
 linfa = { version = "0.3.1", path = "../.." }
 
diff --git a/algorithms/linfa-logistic/examples/logistic_cv.rs b/algorithms/linfa-logistic/examples/logistic_cv.rs
new file mode 100644
index 000000000..0219ead31
--- /dev/null
+++ b/algorithms/linfa-logistic/examples/logistic_cv.rs
@@ -0,0 +1,34 @@
+use linfa::prelude::*;
+use linfa_logistic::error::Result;
+use linfa_logistic::LogisticRegression;
+
+fn main() -> Result<()> {
+    // Load dataset. Mutability is needed for fast cross validation
+    let mut dataset =
+        linfa_datasets::winequality().map_targets(|x| if *x > 6 { "good" } else { "bad" });
+
+    // define a sequence of models to compare. In this case the
+    // models will differ by the amount of l2 regularization
+    let alphas = vec![0.1, 1., 10.];
+    let models: Vec<_> = alphas
+        .iter()
+        .map(|alpha| {
+            LogisticRegression::default()
+                .alpha(*alpha)
+                .max_iterations(150)
+        })
+        .collect();
+
+    // use cross validation to compute the validation accuracy of each model. The
+    // accuracy of each model will be averaged across the folds, 5 in this case
+    let accuracies = dataset.cross_validate(5, &models, |prediction, truth| {
+        Ok(prediction.confusion_matrix(truth)?.accuracy())
+    })?;
+
+    // display the accuracy of the models along with their regularization coefficient
+    for (alpha, accuracy) in alphas.iter().zip(accuracies.iter()) {
+        println!("Alpha: {}, accuracy: {} ", alpha, accuracy);
+    }
+
+    Ok(())
+}
diff --git a/algorithms/linfa-logistic/src/error.rs b/algorithms/linfa-logistic/src/error.rs
new file mode 100644
index 000000000..54ae7c5a6
--- /dev/null
+++ b/algorithms/linfa-logistic/src/error.rs
@@ -0,0 +1,22 @@
+use thiserror::Error;
+pub type Result<T> = std::result::Result<T, Error>;
+
+#[derive(Error, Debug)]
+pub enum Error {
+    #[error(transparent)]
+    LinfaError(#[from] linfa::Error),
+    #[error("Expected exactly two classes for logistic regression")]
+    WrongNumberOfClasses,
+    #[error(transparent)]
+    ArgMinError(#[from] argmin::core::Error),
+    #[error("Expected `x` and `y` to have same number of rows, got {0} != {1}")]
+    MismatchedShapes(usize, usize),
+    #[error("Values must be finite and not `Inf`, `-Inf` or `NaN`")]
+    InvalidValues,
+    #[error("gradient_tolerance must be a positive, finite number")]
+    InvalidGradientTolerance,
+    #[error("Size of initial parameter guess must be the same as the number of columns in the feature matrix `x`")]
+    InvalidInitialParametersGuessSize,
+    #[error("Initial parameter guess must be finite")]
+    InvalidInitialParametersGuess,
+}
diff --git a/algorithms/linfa-logistic/src/lib.rs b/algorithms/linfa-logistic/src/lib.rs
index cb299bbf5..5882fa5de 100644
--- a/algorithms/linfa-logistic/src/lib.rs
+++ b/algorithms/linfa-logistic/src/lib.rs
@@ -14,7 +14,11 @@
 //! ```bash
 //! $ cargo run --example winequality
 //! ```
+//!
+
+pub mod error;
 
+use crate::error::{Error, Result};
 use argmin::prelude::*;
 use argmin::solver::linesearch::MoreThuenteLineSearch;
 use argmin::solver::quasinewton::lbfgs::LBFGS;
@@ -138,11 +142,7 @@ impl<F: Float> LogisticRegression<F> {
     /// i.e. any values are `Inf` or `NaN`, `y` doesn't have as many items as
     /// `x` has rows, or if other parameters (gradient_tolerance, alpha) have
     /// been set to inalid values.
-    fn fit<A, T, C>(
-        &self,
-        x: &ArrayBase<A, Ix2>,
-        y: T,
-    ) -> Result<FittedLogisticRegression<F, C>, String>
+    fn fit<A, T, C>(&self, x: &ArrayBase<A, Ix2>, y: T) -> Result<FittedLogisticRegression<F, C>>
     where
         A: Data<Elem = F>,
         T: AsTargets<Elem = C>,
@@ -159,45 +159,38 @@ impl<F: Float> LogisticRegression<F> {
 
     /// Ensure that `x` and `y` have the right shape and that all data and
     /// configuration parameters are finite.
-    fn validate_data<A, B>(
-        &self,
-        x: &ArrayBase<A, Ix2>,
-        y: &ArrayBase<B, Ix1>,
-    ) -> Result<(), String>
+    fn validate_data<A, B>(&self, x: &ArrayBase<A, Ix2>, y: &ArrayBase<B, Ix1>) -> Result<()>
     where
         A: Data<Elem = F>,
         B: Data<Elem = F>,
     {
         if x.shape()[0] != y.len() {
-            return Err(
-                "Incompatible shapes of data, expected `x` and `y` to have same number of rows"
-                    .to_string(),
-            );
+            return Err(Error::MismatchedShapes(x.shape()[0], y.len()));
         }
         if x.iter().any(|x| !x.is_finite())
             || y.iter().any(|y| !y.is_finite())
             || !self.alpha.is_finite()
         {
-            return Err("Values must be finite and not `Inf`, `-Inf` or `NaN`".to_string());
+            return Err(Error::InvalidValues);
         }
         if !self.gradient_tolerance.is_finite() || self.gradient_tolerance <= F::zero() {
-            return Err("gradient_tolerance must be a positive, finite number".to_string());
+            return Err(Error::InvalidGradientTolerance);
         }
         self.validate_init_params(x)?;
         Ok(())
     }
 
-    fn validate_init_params<A>(&self, x: &ArrayBase<A, Ix2>) -> Result<(), String>
+    fn validate_init_params<A>(&self, x: &ArrayBase<A, Ix2>) -> Result<()>
     where
         A: Data<Elem = F>,
     {
         if let Some((params, intercept)) = self.initial_params.as_ref() {
             let (_, n_features) = x.dim();
             if n_features != params.dim() {
-                return Err("Size of initial parameter guess must be the same as the number of columns in the feature matrix `x`".to_string());
+                return Err(Error::InvalidInitialParametersGuessSize);
             }
             if params.iter().any(|p| !p.is_finite()) || !intercept.is_finite() {
-                return Err("Initial parameter guess must be finite".to_string());
+                return Err(Error::InvalidInitialParametersGuess);
             }
         }
         Ok(())
@@ -254,14 +247,14 @@ impl<F: Float> LogisticRegression<F> {
         problem: LogisticRegressionProblem<'a, F, A>,
         solver: LBFGSType<F>,
         init_params: Array1<F>,
-    ) -> Result<ArgminResult<LogisticRegressionProblem<'a, F, A>>, String>
+    ) -> Result<ArgminResult<LogisticRegressionProblem<'a, F, A>>>
     where
         A: Data<Elem = F>,
     {
         Executor::new(problem, solver, ArgminParam(init_params))
             .max_iters(self.max_iterations)
             .run()
-            .map_err(|err| format!("Error running solver: {}", err))
+            .map_err(|err| err.into())
     }
 
     /// Take an ArgminResult and return a FittedLogisticRegression.
@@ -269,7 +262,7 @@ impl<F: Float> LogisticRegression<F> {
         &self,
         labels: ClassLabels<F, C>,
         result: &ArgminResult<LogisticRegressionProblem<F, A>>,
-    ) -> Result<FittedLogisticRegression<F, C>, String>
+    ) -> Result<FittedLogisticRegression<F, C>>
     where
         A: Data<Elem = F>,
         C: PartialOrd + Clone,
@@ -285,9 +278,9 @@ impl<F: Float> LogisticRegression<F> {
 }
 
 impl<'a, C: 'a + PartialOrd + Clone, F: Float, D: Data<Elem = F>, T: AsTargets<Elem = C>>
-    Fit<'a, ArrayBase<D, Ix2>, T> for LogisticRegression<F>
+    Fit<ArrayBase<D, Ix2>, T, Error> for LogisticRegression<F>
 {
-    type Object = Result<FittedLogisticRegression<F, C>, String>;
+    type Object = FittedLogisticRegression<F, C>;
 
     /// Given a 2-dimensional feature matrix array `x` with shape
     /// (n_samples, n_features) and an array of target classes to predict,
@@ -305,10 +298,7 @@ impl<'a, C: 'a + PartialOrd + Clone, F: Float, D: Data<Elem = F>, T: AsTargets<E
     /// i.e. any values are `Inf` or `NaN`, `y` doesn't have as many items as
     /// `x` has rows, or if other parameters (gradient_tolerance, alpha) have
     /// been set to inalid values.
-    fn fit(
-        &self,
-        dataset: &DatasetBase<ArrayBase<D, Ix2>, T>,
-    ) -> Result<FittedLogisticRegression<F, C>, String> {
+    fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Result<Self::Object> {
         self.fit(dataset.records(), dataset.targets())
     }
 }
@@ -319,61 +309,57 @@ impl<'a, C: 'a + PartialOrd + Clone, F: Float, D: Data<Elem = F>, T: AsTargets<E
 /// class.
 ///
 /// It is an error to have more than two classes.
-fn label_classes<F, T, C>(y: T) -> Result<(ClassLabels<F, C>, Array1<F>), String>
+fn label_classes<F, T, C>(y: T) -> Result<(ClassLabels<F, C>, Array1<F>)>
 where
     F: Float,
     T: AsTargets<Elem = C>,
     C: PartialOrd + Clone,
 {
-    match y.try_single_target() {
-        Err(_) => Err("Expected single target dataset".to_string()),
-        Ok(y_single_target) => {
-            let mut classes: Vec<&C> = vec![];
-            let mut target_vec = vec![];
-            let mut use_negative_label: bool = true;
-            for item in y_single_target {
-                if let Some(last_item) = classes.last() {
-                    if *last_item != item {
-                        use_negative_label = !use_negative_label;
-                    }
-                }
-                if !classes.contains(&item) {
-                    classes.push(item);
-                }
-                target_vec.push(if use_negative_label {
-                    F::NEGATIVE_LABEL
-                } else {
-                    F::POSITIVE_LABEL
-                });
-            }
-            if classes.len() != 2 {
-                return Err("Expected exactly two classes for logistic regression".to_string());
+    let y_single_target = y.try_single_target()?;
+    let mut classes: Vec<&C> = vec![];
+    let mut target_vec = vec![];
+    let mut use_negative_label: bool = true;
+    for item in y_single_target {
+        if let Some(last_item) = classes.last() {
+            if *last_item != item {
+                use_negative_label = !use_negative_label;
             }
-            let mut target_array = Array1::from(target_vec);
-            let labels = if classes[0] < classes[1] {
-                (F::NEGATIVE_LABEL, F::POSITIVE_LABEL)
-            } else {
-                // If we found the larger class first, flip the sign in the target
-                // vector, so that -1.0 is always the label for the smaller class
-                // and 1.0 the label for the larger class
-                target_array *= -F::one();
-                (F::POSITIVE_LABEL, F::NEGATIVE_LABEL)
-            };
-            Ok((
-                vec![
-                    ClassLabel {
-                        class: classes[0].clone(),
-                        label: labels.0,
-                    },
-                    ClassLabel {
-                        class: classes[1].clone(),
-                        label: labels.1,
-                    },
-                ],
-                target_array,
-            ))
         }
+        if !classes.contains(&item) {
+            classes.push(item);
+        }
+        target_vec.push(if use_negative_label {
+            F::NEGATIVE_LABEL
+        } else {
+            F::POSITIVE_LABEL
+        });
+    }
+    if classes.len() != 2 {
+        return Err(Error::WrongNumberOfClasses);
     }
+    let mut target_array = Array1::from(target_vec);
+    let labels = if classes[0] < classes[1] {
+        (F::NEGATIVE_LABEL, F::POSITIVE_LABEL)
+    } else {
+        // If we found the larger class first, flip the sign in the target
+        // vector, so that -1.0 is always the label for the smaller class
+        // and 1.0 the label for the larger class
+        target_array *= -F::one();
+        (F::POSITIVE_LABEL, F::NEGATIVE_LABEL)
+    };
+    Ok((
+        vec![
+            ClassLabel {
+                class: classes[0].clone(),
+                label: labels.0,
+            },
+            ClassLabel {
+                class: classes[1].clone(),
+                label: labels.1,
+            },
+        ],
+        target_array,
+    ))
 }
 
 /// Conditionally split the feature vector `w` into parameter vector and
@@ -394,8 +380,8 @@ fn convert_params<F: Float>(n_features: usize, w: &Array1<F>) -> (Array1<F>, F)
 }
 
 /// The logistic function
-fn logistic<F: Float>(x: F) -> F {
-    F::one() / (F::one() + num_traits::Float::exp(-x))
+fn logistic<F: linfa::Float>(x: F) -> F {
+    F::one() / (F::one() + (-x).exp())
 }
 
 /// A numerically stable version of the log of the logistic function.
@@ -405,11 +391,11 @@ fn logistic<F: Float>(x: F) -> F {
 ///
 /// See the blog post describing this implementation:
 /// http://fa.bianp.net/blog/2013/numerical-optimizers-for-logistic-regression/
-fn log_logistic<F: Float>(x: F) -> F {
+fn log_logistic<F: linfa::Float>(x: F) -> F {
     if x > F::zero() {
-        -num_traits::Float::ln(F::one() + num_traits::Float::exp(-x))
+        -(F::one() + (-x).exp()).ln()
     } else {
-        x - num_traits::Float::ln(F::one() + num_traits::Float::exp(x))
+        x - (F::one() + x.exp()).ln()
     }
 }
 
@@ -572,13 +558,13 @@ impl<'a, F: Float, A: Data<Elem = F>> ArgminOp for LogisticRegressionProblem<'a,
     type Float = F;
 
     /// Apply the cost function to a parameter `p`
-    fn apply(&self, p: &Self::Param) -> Result<Self::Output, Error> {
+    fn apply(&self, p: &Self::Param) -> std::result::Result<Self::Output, argmin::core::Error> {
         let w = p.as_array();
         Ok(logistic_loss(self.x, &self.target, self.alpha, w))
     }
 
     /// Compute the gradient at parameter `p`.
-    fn gradient(&self, p: &Self::Param) -> Result<Self::Param, Error> {
+    fn gradient(&self, p: &Self::Param) -> std::result::Result<Self::Param, argmin::core::Error> {
         let w = p.as_array();
         Ok(ArgminParam(logistic_grad(
             self.x,
@@ -773,7 +759,10 @@ mod test {
         let x = array![[0.01], [1.0], [-1.0], [-0.01]];
         let y = array![[0, 0], [0, 0], [0, 0], [0, 0]];
         let res = log_reg.fit(&x, &y);
-        assert_eq!(res, Err("Expected single target dataset".to_string()));
+        assert_eq!(
+            res.unwrap_err().to_string(),
+            "multiple targets not supported".to_string()
+        );
     }
 
     #[test]
@@ -783,11 +772,8 @@ mod test {
         let y = array![0.0, 0.0, 1.0, 1.0];
         let res = log_reg.fit(&x, &y);
         assert_eq!(
-            res,
-            Err(
-                "Incompatible shapes of data, expected `x` and `y` to have same number of rows"
-                    .to_string()
-            )
+            res.unwrap_err().to_string(),
+            "Expected `x` and `y` to have same number of rows, got 3 != 4".to_string()
         );
     }
 
@@ -798,15 +784,15 @@ mod test {
         let log_reg = LogisticRegression::default();
         let normal_x = array![[-1.0], [1.0]];
         let y = array![0.0, 1.0];
-        let expected = Err("Values must be finite and not `Inf`, `-Inf` or `NaN`".to_string());
+        let expected = "Values must be finite and not `Inf`, `-Inf` or `NaN`".to_string();
         for inf_x in &inf_xs {
             let res = log_reg.fit(inf_x, &y);
-            assert_eq!(res, expected);
+            assert_eq!(res.unwrap_err().to_string(), expected);
         }
         for inf in &infs {
             let log_reg = LogisticRegression::default().alpha(*inf);
             let res = log_reg.fit(&normal_x, &y);
-            assert_eq!(res, expected);
+            assert_eq!(res.unwrap_err().to_string(), expected);
         }
         let mut non_positives = infs.clone();
         non_positives.push(-1.0);
@@ -815,8 +801,8 @@ mod test {
             let log_reg = LogisticRegression::default().gradient_tolerance(*inf);
             let res = log_reg.fit(&normal_x, &y);
             assert_eq!(
-                res,
-                Err("gradient_tolerance must be a positive, finite number".to_string())
+                res.unwrap_err().to_string(),
+                "gradient_tolerance must be a positive, finite number"
             );
         }
     }
@@ -826,21 +812,21 @@ mod test {
         let infs = vec![std::f64::INFINITY, std::f64::NEG_INFINITY, std::f64::NAN];
         let normal_x = array![[-1.0], [1.0]];
         let normal_y = array![0.0, 1.0];
-        let expected = Err("Initial parameter guess must be finite".to_string());
+        let expected = "Initial parameter guess must be finite".to_string();
         for inf in &infs {
             let log_reg = LogisticRegression::default().initial_params(array![*inf], 0.0);
             let res = log_reg.fit(&normal_x, &normal_y);
-            assert_eq!(res, expected);
+            assert_eq!(res.unwrap_err().to_string(), expected);
         }
         for inf in &infs {
             let log_reg = LogisticRegression::default().initial_params(array![0.0], *inf);
             let res = log_reg.fit(&normal_x, &normal_y);
-            assert_eq!(res, expected);
+            assert_eq!(res.unwrap_err().to_string(), expected);
         }
         {
             let log_reg = LogisticRegression::default().initial_params(array![0.0, 0.0], 0.0);
             let res = log_reg.fit(&normal_x, &normal_y);
-            assert_eq!(res, Err("Size of initial parameter guess must be the same as the number of columns in the feature matrix `x`".to_string()));
+            assert_eq!(res.unwrap_err().to_string(), "Size of initial parameter guess must be the same as the number of columns in the feature matrix `x`".to_string());
         }
     }
 
diff --git a/algorithms/linfa-pls/Cargo.toml b/algorithms/linfa-pls/Cargo.toml
index af8e9cec8..1221010c1 100644
--- a/algorithms/linfa-pls/Cargo.toml
+++ b/algorithms/linfa-pls/Cargo.toml
@@ -31,8 +31,8 @@ ndarray-rand = "0.13"
 rand_isaac = "0.3"
 num-traits = "0.2"
 paste = "1.0"
-
-linfa = { version = "0.3.1", path = "../.." }
+thiserror = "1"
+linfa = { version = "0.3.1", path = "../..", features = ["ndarray-linalg"] }
 
 [dev-dependencies]
 linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["linnerud"] }
diff --git a/algorithms/linfa-pls/src/errors.rs b/algorithms/linfa-pls/src/errors.rs
index a39c568bb..6986050bf 100644
--- a/algorithms/linfa-pls/src/errors.rs
+++ b/algorithms/linfa-pls/src/errors.rs
@@ -1,36 +1,19 @@
 use ndarray_linalg::error::LinalgError;
-use std::error::Error;
-use std::fmt::{self, Display};
-
+use thiserror::Error;
 pub type Result<T> = std::result::Result<T, PlsError>;
 
-#[derive(Debug)]
+#[derive(Error, Debug)]
 pub enum PlsError {
+    #[error("Not enough samples: {0}")]
     NotEnoughSamplesError(String),
+    #[error("Bad component number: {0}")]
     BadComponentNumberError(String),
+    #[error("Power method not converged: {0}")]
     PowerMethodNotConvergedError(String),
-    LinalgError(String),
-}
-
-impl Display for PlsError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            Self::NotEnoughSamplesError(message) => write!(f, "Not enough samples: {}", message),
-            Self::BadComponentNumberError(message) => {
-                write!(f, "Bad component number: {}", message)
-            }
-            Self::PowerMethodNotConvergedError(message) => {
-                write!(f, "Power method not converged: {}", message)
-            }
-            Self::LinalgError(message) => write!(f, "Linear algebra error: {}", message),
-        }
-    }
-}
-
-impl Error for PlsError {}
-
-impl From<LinalgError> for PlsError {
-    fn from(error: LinalgError) -> PlsError {
-        PlsError::LinalgError(error.to_string())
-    }
+    #[error(transparent)]
+    LinalgError(#[from] LinalgError),
+    #[error(transparent)]
+    LinfaError(#[from] linfa::error::Error),
+    #[error(transparent)]
+    MinMaxError(#[from] ndarray_stats::errors::MinMaxError),
 }
diff --git a/algorithms/linfa-pls/src/lib.rs b/algorithms/linfa-pls/src/lib.rs
index 9e2ed11d5..93dfa8df4 100644
--- a/algorithms/linfa-pls/src/lib.rs
+++ b/algorithms/linfa-pls/src/lib.rs
@@ -130,14 +130,14 @@ macro_rules! pls_algo { ($name:ident) => {
             }
         }
 
-        impl<F: Float, D: Data<Elem = F>> Fit<'_, ArrayBase<D, Ix2>, ArrayBase<D, Ix2>>
+        impl<F: Float, D: Data<Elem = F>> Fit<ArrayBase<D, Ix2>, ArrayBase<D, Ix2>, PlsError>
             for [<Pls $name Params>]<F>
         {
-            type Object = Result<[<Pls $name>]<F>>;
+            type Object = [<Pls $name>]<F>;
             fn fit(
                 &self,
                 dataset: &DatasetBase<ArrayBase<D, Ix2>, ArrayBase<D, Ix2>>,
-            ) -> Result<[<Pls $name>]<F>> {
+            ) -> Result<Self::Object> {
                 let pls = self.0.fit(dataset)?;
                 Ok([<Pls $name>](pls))
             }
diff --git a/algorithms/linfa-pls/src/pls_generic.rs b/algorithms/linfa-pls/src/pls_generic.rs
index 177aa1746..a3f33d526 100644
--- a/algorithms/linfa-pls/src/pls_generic.rs
+++ b/algorithms/linfa-pls/src/pls_generic.rs
@@ -1,8 +1,12 @@
 use crate::errors::{PlsError, Result};
-use crate::{utils, Float};
+use crate::utils;
 
 use linfa::{
-    dataset::Records, traits::Fit, traits::PredictRef, traits::Transformer, Dataset, DatasetBase,
+    dataset::{Records, WithLapack, WithoutLapack},
+    traits::Fit,
+    traits::PredictRef,
+    traits::Transformer,
+    Dataset, DatasetBase, Float,
 };
 use ndarray::{Array1, Array2, ArrayBase, Data, Ix2};
 use ndarray_linalg::svd::*;
@@ -197,10 +201,15 @@ impl<F: Float> PlsParams<F> {
     }
 }
 
-impl<F: Float, D: Data<Elem = F>> Fit<'_, ArrayBase<D, Ix2>, ArrayBase<D, Ix2>> for PlsParams<F> {
-    type Object = Result<Pls<F>>;
+impl<F: Float, D: Data<Elem = F>> Fit<ArrayBase<D, Ix2>, ArrayBase<D, Ix2>, PlsError>
+    for PlsParams<F>
+{
+    type Object = Pls<F>;
 
-    fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, ArrayBase<D, Ix2>>) -> Result<Pls<F>> {
+    fn fit(
+        &self,
+        dataset: &DatasetBase<ArrayBase<D, Ix2>, ArrayBase<D, Ix2>>,
+    ) -> Result<Self::Object> {
         let records = dataset.records();
         let targets = dataset.targets();
 
@@ -259,9 +268,7 @@ impl<F: Float, D: Data<Elem = F>> Fit<'_, ArrayBase<D, Ix2>, ArrayBase<D, Ix2>>
                 Algorithm::Nipals => {
                     // Replace columns that are all close to zero with zeros
                     for mut yj in yk.gencolumns_mut() {
-                        if *(yj.mapv(|y| num_traits::float::Float::abs(y)).max().unwrap())
-                            < F::cast(10.) * eps
-                        {
+                        if *(yj.mapv(|y| y.abs()).max()?) < F::cast(10.) * eps {
                             yj.assign(&Array1::zeros(yj.len()));
                         }
                     }
@@ -278,7 +285,7 @@ impl<F: Float, D: Data<Elem = F>> Fit<'_, ArrayBase<D, Ix2>, ArrayBase<D, Ix2>>
             // compute scores, i.e. the projections of x and Y
             let x_scores_k = xk.dot(&x_weights_k);
             let y_ss = if norm_y_weights {
-                F::cast(1.)
+                F::one()
             } else {
                 y_weights_k.dot(&y_weights_k)
             };
@@ -316,8 +323,8 @@ impl<F: Float, D: Data<Elem = F>> Fit<'_, ArrayBase<D, Ix2>, ArrayBase<D, Ix2>>
         // Similiarly, Y was approximated as Omega . Delta.T + Y_(R+1)
 
         // Compute transformation matrices (rotations_). See User Guide.
-        let x_rotations = x_weights.dot(&utils::pinv2(&x_loadings.t().dot(&x_weights), None));
-        let y_rotations = y_weights.dot(&utils::pinv2(&y_loadings.t().dot(&y_weights), None));
+        let x_rotations = x_weights.dot(&utils::pinv2(x_loadings.t().dot(&x_weights).view(), None));
+        let y_rotations = y_weights.dot(&utils::pinv2(y_loadings.t().dot(&y_weights).view(), None));
 
         let mut coefficients = x_rotations.dot(&y_loadings.t());
         coefficients *= &y_std;
@@ -354,7 +361,7 @@ impl<F: Float> PlsParams<F> {
 
         let mut y_score = Array1::ones(y.ncols());
         for col in y.t().genrows() {
-            if *col.mapv(|v| num_traits::Float::abs(v)).max().unwrap() > eps {
+            if *col.mapv(|v| v.abs()).max().unwrap() > eps {
                 y_score = col.to_owned();
                 break;
             }
@@ -363,8 +370,8 @@ impl<F: Float> PlsParams<F> {
         let mut x_pinv = None;
         let mut y_pinv = None;
         if self.mode == Mode::B {
-            x_pinv = Some(utils::pinv2(&x, Some(F::cast(10.) * eps)));
-            y_pinv = Some(utils::pinv2(&y, Some(F::cast(10.) * eps)));
+            x_pinv = Some(utils::pinv2(x.view(), Some(F::cast(10.) * eps)));
+            y_pinv = Some(utils::pinv2(y.view(), Some(F::cast(10.) * eps)));
         }
 
         // init to big value for first convergence check
@@ -379,7 +386,7 @@ impl<F: Float> PlsParams<F> {
                 Mode::A => x.t().dot(&y_score) / y_score.dot(&y_score),
                 Mode::B => x_pinv.to_owned().unwrap().dot(&y_score),
             };
-            x_weights /= num_traits::Float::sqrt(x_weights.dot(&x_weights)) + eps;
+            x_weights /= x_weights.dot(&x_weights).sqrt() + eps;
             let x_score = x.dot(&x_weights);
 
             y_weights = match self.mode {
@@ -388,7 +395,7 @@ impl<F: Float> PlsParams<F> {
             };
 
             if norm_y_weights {
-                y_weights /= num_traits::Float::sqrt(y_weights.dot(&y_weights)) + eps
+                y_weights /= y_weights.dot(&y_weights).sqrt() + eps
             }
 
             let ya = y.dot(&y_weights);
@@ -420,9 +427,13 @@ impl<F: Float> PlsParams<F> {
         y: &ArrayBase<impl Data<Elem = F>, Ix2>,
     ) -> Result<(Array1<F>, Array1<F>)> {
         let c = x.t().dot(y);
+
+        let c = c.with_lapack();
         let (u, _, vt) = c.svd(true, true)?;
-        let u = u.unwrap().column(0).to_owned();
-        let vt = vt.unwrap().row(0).to_owned();
+        // safe unwrap because both parameters are set to true in above call
+        let u = u.unwrap().column(0).to_owned().without_lapack();
+        let vt = vt.unwrap().row(0).to_owned().without_lapack();
+
         Ok((u, vt))
     }
 }
diff --git a/algorithms/linfa-pls/src/pls_svd.rs b/algorithms/linfa-pls/src/pls_svd.rs
index a314456ea..05913f179 100644
--- a/algorithms/linfa-pls/src/pls_svd.rs
+++ b/algorithms/linfa-pls/src/pls_svd.rs
@@ -36,13 +36,15 @@ impl Default for PlsSvdParams {
 }
 
 #[allow(clippy::many_single_char_names)]
-impl<F: Float, D: Data<Elem = F>> Fit<'_, ArrayBase<D, Ix2>, ArrayBase<D, Ix2>> for PlsSvdParams {
-    type Object = Result<PlsSvd<F>>;
+impl<F: Float, D: Data<Elem = F>> Fit<ArrayBase<D, Ix2>, ArrayBase<D, Ix2>, PlsError>
+    for PlsSvdParams
+{
+    type Object = PlsSvd<F>;
 
     fn fit(
         &self,
         dataset: &DatasetBase<ArrayBase<D, Ix2>, ArrayBase<D, Ix2>>,
-    ) -> Result<PlsSvd<F>> {
+    ) -> Result<Self::Object> {
         if dataset.nsamples() < 2 {
             return Err(PlsError::NotEnoughSamplesError(format!(
                 "should be greater than 1, got {}",
@@ -68,10 +70,11 @@ impl<F: Float, D: Data<Elem = F>> Fit<'_, ArrayBase<D, Ix2>, ArrayBase<D, Ix2>>
 
         // Compute SVD of cross-covariance matrix
         let c = x.t().dot(&y);
-        let (u, _, vt) = c.svd(true, true).unwrap();
-        let u = u.unwrap().slice(s![.., ..self.n_components]).to_owned();
-        let vt = vt.unwrap().slice(s![..self.n_components, ..]).to_owned();
-        let (u, vt) = utils::svd_flip(&u, &vt);
+        let (u, _, vt) = c.svd(true, true)?;
+        // safe unwraps because both parameters are set to true in above call
+        let u = u.unwrap().slice_move(s![.., ..self.n_components]);
+        let vt = vt.unwrap().slice_move(s![..self.n_components, ..]);
+        let (u, vt) = utils::svd_flip(u, vt);
         let v = vt.reversed_axes();
 
         let x_weights = u;
diff --git a/algorithms/linfa-pls/src/utils.rs b/algorithms/linfa-pls/src/utils.rs
index 71331b5fa..1b780e050 100644
--- a/algorithms/linfa-pls/src/utils.rs
+++ b/algorithms/linfa-pls/src/utils.rs
@@ -1,6 +1,10 @@
-use linfa::{DatasetBase, Float};
-use ndarray::{s, Array1, Array2, ArrayBase, Axis, Data, DataMut, Ix1, Ix2, Zip};
+use linfa::{
+    dataset::{WithLapack, WithoutLapack},
+    DatasetBase, Float,
+};
+use ndarray::{s, Array1, Array2, ArrayBase, ArrayView2, Axis, Data, DataMut, Ix1, Ix2, Zip};
 use ndarray_linalg::svd::*;
+use ndarray_linalg::Scalar;
 use ndarray_stats::QuantileExt;
 
 pub fn outer<F: Float>(
@@ -15,10 +19,8 @@ pub fn outer<F: Float>(
 }
 
 /// Calculates the pseudo inverse of a matrix
-pub fn pinv2<F: crate::Float, D: Data<Elem = F>>(
-    x: &ArrayBase<D, Ix2>,
-    cond: Option<F>,
-) -> Array2<F> {
+pub fn pinv2<F: Float>(x: ArrayView2<F>, cond: Option<F>) -> Array2<F> {
+    let x = x.with_lapack();
     let (opt_u, s, opt_vh) = x.svd(true, true).unwrap();
     let u = opt_u.unwrap();
     let vh = opt_vh.unwrap();
@@ -33,12 +35,14 @@ pub fn pinv2<F: crate::Float, D: Data<Elem = F>>(
         acc
     });
 
-    let mut ucut = u.slice(s![.., ..rank]).to_owned();
-    ucut /= &s.slice(s![..rank]).mapv(|v| F::cast(v));
-    ucut.dot(&vh.slice(s![..rank, ..]))
-        .mapv(|v| v.conj())
+    let mut ucut = u.slice_move(s![.., ..rank]);
+    ucut /= &s.slice(s![..rank]).mapv(|v| F::Lapack::cast(v));
+
+    vh.slice(s![..rank, ..])
         .t()
-        .to_owned()
+        .dot(&ucut.t())
+        .mapv(|v| v.conj())
+        .without_lapack()
 }
 
 #[allow(clippy::type_complexity)]
@@ -85,8 +89,8 @@ pub fn svd_flip_1d<F: Float>(
 }
 
 pub fn svd_flip<F: Float>(
-    u: &ArrayBase<impl Data<Elem = F>, Ix2>,
-    v: &ArrayBase<impl Data<Elem = F>, Ix2>,
+    u: ArrayBase<impl Data<Elem = F>, Ix2>,
+    v: ArrayBase<impl Data<Elem = F>, Ix2>,
 ) -> (Array2<F>, Array2<F>) {
     // columns of u, rows of v
     let abs_u = u.mapv(|v| v.abs());
@@ -97,7 +101,7 @@ pub fn svd_flip<F: Float>(
         .and(&max_abs_val_indices)
         .and(&range)
         .apply(|s, &i, &j| *s = u[[i, j]].signum());
-    (u * &signs, v * &signs.insert_axis(Axis(1)))
+    (&u * &signs, &v * &signs.insert_axis(Axis(1)))
 }
 
 #[cfg(test)]
@@ -117,7 +121,7 @@ mod tests {
     #[test]
     fn test_pinv2() {
         let a = array![[1., 2., 3.], [4., 5., 6.], [7., 8., 10.]];
-        let a_pinv2 = pinv2(&a, None);
+        let a_pinv2 = pinv2(a.view(), None);
         assert_abs_diff_eq!(a.dot(&a_pinv2), Array2::eye(3), epsilon = 1e-6)
     }
 }
diff --git a/algorithms/linfa-preprocessing/examples/count_vectorization.rs b/algorithms/linfa-preprocessing/examples/count_vectorization.rs
index 749262e9b..1164b466b 100644
--- a/algorithms/linfa-preprocessing/examples/count_vectorization.rs
+++ b/algorithms/linfa-preprocessing/examples/count_vectorization.rs
@@ -4,9 +4,10 @@ use encoding::DecoderTrap::Strict;
 use flate2::read::GzDecoder;
 use linfa::metrics::ToConfusionMatrix;
 use linfa::traits::{Fit, Predict};
+use linfa::Dataset;
 use linfa_bayes::GaussianNbParams;
 use linfa_preprocessing::count_vectorization::CountVectorizer;
-use ndarray::Array1;
+use ndarray::Array2;
 use std::collections::HashSet;
 use std::path::Path;
 use tar::Archive;
@@ -34,7 +35,7 @@ fn download_20news_bydate() {
 fn load_set(
     path: &'static str,
     desired_targets: &[&str],
-) -> Result<(Vec<std::path::PathBuf>, Array1<usize>, usize), std::io::Error> {
+) -> Result<(Vec<std::path::PathBuf>, Array2<usize>, usize), std::io::Error> {
     let mut file_paths = Vec::new();
     let mut targets = Vec::new();
     let desired_targets: HashSet<String> = desired_targets.iter().map(|s| s.to_string()).collect();
@@ -59,19 +60,19 @@ fn load_set(
             ntargets = ntargets + 1;
         }
     }
-    let targets = Array1::from_shape_vec(targets.len(), targets).unwrap();
+    let targets = Array2::from_shape_vec((targets.len(), 1), targets).unwrap();
     Ok((file_paths, targets, ntargets))
 }
 
 fn load_train_set(
     desired_targets: &[&str],
-) -> Result<(Vec<std::path::PathBuf>, Array1<usize>, usize), std::io::Error> {
+) -> Result<(Vec<std::path::PathBuf>, Array2<usize>, usize), std::io::Error> {
     load_set("./20news/20news-bydate-train", desired_targets)
 }
 
 fn load_test_set(
     desired_targets: &[&str],
-) -> Result<(Vec<std::path::PathBuf>, Array1<usize>, usize), std::io::Error> {
+) -> Result<(Vec<std::path::PathBuf>, Array2<usize>, usize), std::io::Error> {
     load_set("./20news/20news-bydate-test", desired_targets)
 }
 
@@ -165,9 +166,9 @@ fn main() {
         .transform_files(&test_filenames, ISO_8859_1, Strict)
         .to_dense();
     let test_records = test_records.mapv(|c| c as f32);
-    let test_dataset = (test_records, test_targets).into();
+    let test_dataset: Dataset<f32, usize> = (test_records, test_targets).into();
     // Let's predict the test data targets
-    let test_prediction: Array1<usize> = model.predict(&test_dataset);
+    let test_prediction = model.predict(&test_dataset);
     let cm = test_prediction.confusion_matrix(&test_dataset).unwrap();
     // 0.9523
     let accuracy = cm.f1_score();
diff --git a/algorithms/linfa-preprocessing/examples/tfidf_vectorization.rs b/algorithms/linfa-preprocessing/examples/tfidf_vectorization.rs
index 5ca00a92d..18ae10c8b 100644
--- a/algorithms/linfa-preprocessing/examples/tfidf_vectorization.rs
+++ b/algorithms/linfa-preprocessing/examples/tfidf_vectorization.rs
@@ -4,9 +4,10 @@ use encoding::DecoderTrap::Strict;
 use flate2::read::GzDecoder;
 use linfa::metrics::ToConfusionMatrix;
 use linfa::traits::{Fit, Predict};
+use linfa::Dataset;
 use linfa_bayes::GaussianNbParams;
 use linfa_preprocessing::tf_idf_vectorization::TfIdfVectorizer;
-use ndarray::Array1;
+use ndarray::Array2;
 use std::collections::HashSet;
 use std::path::Path;
 use tar::Archive;
@@ -34,7 +35,7 @@ fn download_20news_bydate() {
 fn load_set(
     path: &'static str,
     desired_targets: &[&str],
-) -> Result<(Vec<std::path::PathBuf>, Array1<usize>, usize), std::io::Error> {
+) -> Result<(Vec<std::path::PathBuf>, Array2<usize>, usize), std::io::Error> {
     let mut file_paths = Vec::new();
     let mut targets = Vec::new();
     let desired_targets: HashSet<String> = desired_targets.iter().map(|s| s.to_string()).collect();
@@ -59,19 +60,19 @@ fn load_set(
             ntargets = ntargets + 1;
         }
     }
-    let targets = Array1::from_shape_vec(targets.len(), targets).unwrap();
+    let targets = Array2::from_shape_vec((targets.len(), 1), targets).unwrap();
     Ok((file_paths, targets, ntargets))
 }
 
 fn load_train_set(
     desired_targets: &[&str],
-) -> Result<(Vec<std::path::PathBuf>, Array1<usize>, usize), std::io::Error> {
+) -> Result<(Vec<std::path::PathBuf>, Array2<usize>, usize), std::io::Error> {
     load_set("./20news/20news-bydate-train", desired_targets)
 }
 
 fn load_test_set(
     desired_targets: &[&str],
-) -> Result<(Vec<std::path::PathBuf>, Array1<usize>, usize), std::io::Error> {
+) -> Result<(Vec<std::path::PathBuf>, Array2<usize>, usize), std::io::Error> {
     load_set("./20news/20news-bydate-test", desired_targets)
 }
 
@@ -162,9 +163,9 @@ fn main() {
     let test_records = vectorizer
         .transform_files(&test_filenames, ISO_8859_1, Strict)
         .to_dense();
-    let test_dataset = (test_records, test_targets).into();
+    let test_dataset: Dataset<f64, usize> = (test_records, test_targets).into();
     // Let's predict the test data targets
-    let test_prediction: Array1<usize> = model.predict(&test_dataset);
+    let test_prediction = model.predict(&test_dataset);
     let cm = test_prediction.confusion_matrix(&test_dataset).unwrap();
     // 0.8402
     let accuracy = cm.f1_score();
diff --git a/algorithms/linfa-preprocessing/src/count_vectorization.rs b/algorithms/linfa-preprocessing/src/count_vectorization.rs
index 7228ef7c9..5dee08093 100644
--- a/algorithms/linfa-preprocessing/src/count_vectorization.rs
+++ b/algorithms/linfa-preprocessing/src/count_vectorization.rs
@@ -165,9 +165,11 @@ impl CountVectorizer {
             let mut document_bytes = Vec::new();
             file.read_to_end(&mut document_bytes)?;
             let document = encoding::decode(&document_bytes, trap, encoding).0;
+            // encoding error contains a cow string, can't just use ?, must go through the unwrap
             if document.is_err() {
                 return Err(crate::error::Error::EncodingError(document.err().unwrap()));
             }
+            // safe unwrap now that error has been handled
             let document = transform_string(document.unwrap(), &self.properties);
             self.read_document_into_vocabulary(document, &regex, &mut vocabulary);
         }
diff --git a/algorithms/linfa-preprocessing/src/error.rs b/algorithms/linfa-preprocessing/src/error.rs
index 87a4cbb76..e6cc85bec 100644
--- a/algorithms/linfa-preprocessing/src/error.rs
+++ b/algorithms/linfa-preprocessing/src/error.rs
@@ -32,4 +32,6 @@ pub enum Error {
     LinalgError(#[from] ndarray_linalg::error::LinalgError),
     #[error(transparent)]
     NdarrayStatsEmptyError(#[from] ndarray_stats::errors::EmptyInput),
+    #[error(transparent)]
+    LinfaError(#[from] linfa::error::Error),
 }
diff --git a/algorithms/linfa-preprocessing/src/linear_scaling.rs b/algorithms/linfa-preprocessing/src/linear_scaling.rs
index 43497c498..17d435c85 100644
--- a/algorithms/linfa-preprocessing/src/linear_scaling.rs
+++ b/algorithms/linfa-preprocessing/src/linear_scaling.rs
@@ -113,14 +113,14 @@ impl<F: Float> LinearScaler<F> {
     }
 }
 
-impl<'a, F: Float, D: Data<Elem = F>, T: AsTargets> Fit<'a, ArrayBase<D, Ix2>, T>
+impl<F: Float, D: Data<Elem = F>, T: AsTargets> Fit<ArrayBase<D, Ix2>, T, Error>
     for LinearScaler<F>
 {
-    type Object = Result<FittedLinearScaler<F>>;
+    type Object = FittedLinearScaler<F>;
 
     /// Fits the input dataset accordng to the scaler [method](enum.ScalingMethod.html). Will return an error
     /// if the dataset does not contain any samples or (in the case of MinMax scaling) if the specified range is not valid.
-    fn fit(&self, x: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Self::Object {
+    fn fit(&self, x: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Result<Self::Object> {
         match &self.method {
             ScalingMethod::Standard(with_mean, with_std) => {
                 FittedLinearScaler::standard(x.records(), *with_mean, *with_std)
@@ -149,6 +149,7 @@ impl<F: Float> FittedLinearScaler<F> {
         if records.dim().0 == 0 {
             return Err(Error::NotEnoughSamples);
         }
+        // safe unwrap because of above zero records check
         let means = records.mean_axis(Axis(0)).unwrap();
         let std_devs = if with_std {
             records.std_axis(Axis(0), F::zero()).mapv(|s| {
diff --git a/algorithms/linfa-preprocessing/src/whitening.rs b/algorithms/linfa-preprocessing/src/whitening.rs
index d7b1c8b89..6c607d21b 100644
--- a/algorithms/linfa-preprocessing/src/whitening.rs
+++ b/algorithms/linfa-preprocessing/src/whitening.rs
@@ -55,13 +55,14 @@ impl Whitener {
     }
 }
 
-impl<'a, F: Float, D: Data<Elem = F>, T: AsTargets> Fit<'a, ArrayBase<D, Ix2>, T> for Whitener {
-    type Object = Result<FittedWhitener<F>>;
+impl<F: Float, D: Data<Elem = F>, T: AsTargets> Fit<ArrayBase<D, Ix2>, T, Error> for Whitener {
+    type Object = FittedWhitener<F>;
 
-    fn fit(&self, x: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Self::Object {
+    fn fit(&self, x: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Result<Self::Object> {
         if x.nsamples() == 0 {
             return Err(Error::NotEnoughSamples);
         }
+        // safe because of above zero samples check
         let mean = x.records().mean_axis(Axis(0)).unwrap();
         let sigma = x.records() - &mean;
 
diff --git a/algorithms/linfa-reduction/Cargo.toml b/algorithms/linfa-reduction/Cargo.toml
index 7a6e8491b..16e8f0a0d 100644
--- a/algorithms/linfa-reduction/Cargo.toml
+++ b/algorithms/linfa-reduction/Cargo.toml
@@ -29,6 +29,7 @@ ndarray = { version = "0.14", default-features = false, features = ["approx"] }
 ndarray-linalg = "0.13"
 ndarray-rand = "0.13"
 num-traits = "0.2"
+thiserror = "1"
 
 linfa = { version = "0.3.1", path = "../..", features = ["ndarray-linalg"] }
 linfa-kernel = { version = "0.3.1", path = "../linfa-kernel" }
diff --git a/algorithms/linfa-reduction/examples/pca.rs b/algorithms/linfa-reduction/examples/pca.rs
index 873b038cf..54f6cb8b1 100644
--- a/algorithms/linfa-reduction/examples/pca.rs
+++ b/algorithms/linfa-reduction/examples/pca.rs
@@ -16,7 +16,7 @@ fn main() {
     let n = 10;
     let dataset = Dataset::from(generate_blobs(n, &expected_centroids, &mut rng));
 
-    let embedding: Pca<f64> = Pca::params(1).fit(&dataset);
+    let embedding: Pca<f64> = Pca::params(1).fit(&dataset).unwrap();
     let embedding = embedding.predict(&dataset);
 
     dbg!(&embedding);
diff --git a/algorithms/linfa-reduction/src/error.rs b/algorithms/linfa-reduction/src/error.rs
new file mode 100644
index 000000000..bf3279ca5
--- /dev/null
+++ b/algorithms/linfa-reduction/src/error.rs
@@ -0,0 +1,12 @@
+use thiserror::Error;
+pub type Result<T> = std::result::Result<T, Error>;
+
+#[derive(Error, Debug)]
+pub enum Error {
+    #[error("At least 1 sample needed")]
+    NotEnoughSamples,
+    #[error(transparent)]
+    LinalgError(#[from] ndarray_linalg::error::LinalgError),
+    #[error(transparent)]
+    LinfaError(#[from] linfa::error::Error),
+}
diff --git a/algorithms/linfa-reduction/src/lib.rs b/algorithms/linfa-reduction/src/lib.rs
index d8fb94c82..d799abe28 100644
--- a/algorithms/linfa-reduction/src/lib.rs
+++ b/algorithms/linfa-reduction/src/lib.rs
@@ -12,6 +12,7 @@
 extern crate ndarray;
 
 pub mod diffusion_map;
+pub mod error;
 pub mod pca;
 pub mod utils;
 
diff --git a/algorithms/linfa-reduction/src/pca.rs b/algorithms/linfa-reduction/src/pca.rs
index bcb1fe3c4..4f9033af3 100644
--- a/algorithms/linfa-reduction/src/pca.rs
+++ b/algorithms/linfa-reduction/src/pca.rs
@@ -15,12 +15,13 @@
 //!
 //! // apply PCA projection along a line which maximizes the spread of the data
 //! let embedding = Pca::params(1)
-//!     .fit(&dataset);
+//!     .fit(&dataset).unwrap();
 //!
 //! // reduce dimensionality of the dataset
 //! let dataset = embedding.predict(dataset);
 //! ```
 //!
+use crate::error::{Error, Result};
 use ndarray::{Array1, Array2, ArrayBase, Axis, Data, Ix2};
 use ndarray_linalg::{TruncatedOrder, TruncatedSvd};
 #[cfg(feature = "serde")]
@@ -67,19 +68,22 @@ impl PcaParams {
 /// # Returns
 ///
 /// A fitted PCA model with origin and hyperplane
-impl<'a, T, D: Data<Elem = f64>> Fit<'a, ArrayBase<D, Ix2>, T> for PcaParams {
+impl<T, D: Data<Elem = f64>> Fit<ArrayBase<D, Ix2>, T, Error> for PcaParams {
     type Object = Pca<f64>;
 
-    fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Pca<f64> {
+    fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Result<Pca<f64>> {
+        if dataset.nsamples() == 0 {
+            return Err(Error::NotEnoughSamples);
+        }
         let x = dataset.records();
         // calculate mean of data and subtract it
+        // safe because of above 0 samples check
         let mean = x.mean_axis(Axis(0)).unwrap();
         let x = x - &mean;
 
         // estimate Singular Value Decomposition
-        let result = TruncatedSvd::new(x, TruncatedOrder::Largest)
-            .decompose(self.embedding_size)
-            .unwrap();
+        let result =
+            TruncatedSvd::new(x, TruncatedOrder::Largest).decompose(self.embedding_size)?;
 
         // explained variance is the spectral distribution of the eigenvalues
         let (_, sigma, mut v_t) = result.values_vectors();
@@ -96,11 +100,11 @@ impl<'a, T, D: Data<Elem = f64>> Fit<'a, ArrayBase<D, Ix2>, T> for PcaParams {
             }
         }
 
-        Pca {
+        Ok(Pca {
             embedding: v_t,
             sigma,
             mean,
-        }
+        })
     }
 }
 
@@ -118,7 +122,7 @@ impl<'a, T, D: Data<Elem = f64>> Fit<'a, ArrayBase<D, Ix2>, T> for PcaParams {
 ///
 /// // apply PCA projection along a line which maximizes the spread of the data
 /// let embedding = Pca::params(1)
-///     .fit(&dataset);
+///     .fit(&dataset).unwrap();
 ///
 /// // reduce dimensionality of the dataset
 /// let dataset = embedding.predict(dataset);
@@ -216,7 +220,7 @@ mod tests {
 
         let dataset = Dataset::from(tmp.dot(&q));
 
-        let model = Pca::params(2).whiten(true).fit(&dataset);
+        let model = Pca::params(2).whiten(true).fit(&dataset).unwrap();
         let proj = model.predict(&dataset);
 
         // check that the covariance is unit diagonal
@@ -237,7 +241,7 @@ mod tests {
         let data = Array2::random_using((300, 50), Uniform::new(-1.0f64, 1.), &mut rng);
         let dataset = Dataset::from(data);
 
-        let model = Pca::params(10).whiten(true).fit(&dataset);
+        let model = Pca::params(10).whiten(true).fit(&dataset).unwrap();
         let proj = model.predict(&dataset);
 
         // check that the covariance is unit diagonal
@@ -262,7 +266,7 @@ mod tests {
         let data = Array2::random_using((1000, 500), StandardNormal, &mut rng);
         let dataset = Dataset::from(data / 1000f64.sqrt());
 
-        let model = Pca::params(500).fit(&dataset);
+        let model = Pca::params(500).fit(&dataset).unwrap();
         let sv = model.singular_values().mapv(|x| x * x);
 
         // we have created a random spectrum and can apply the Marchenko-Pastur law
@@ -319,7 +323,7 @@ mod tests {
         let dataset = Dataset::from(data);
 
         // fit PCA with 10 possible embeddings
-        let model = Pca::params(10).fit(&dataset);
+        let model = Pca::params(10).fit(&dataset).unwrap();
 
         // only two eigenvalues are relevant
         assert_eq!(model.explained_variance_ratio().len(), 2);
@@ -334,7 +338,7 @@ mod tests {
     #[test]
     fn test_explained_variance_diag() {
         let dataset = Dataset::from(Array2::from_diag(&array![1., 1., 1., 1.]));
-        let model = Pca::params(3).fit(&dataset);
+        let model = Pca::params(3).fit(&dataset).unwrap();
 
         assert_abs_diff_eq!(
             model.explained_variance_ratio(),
diff --git a/algorithms/linfa-svm/src/classification.rs b/algorithms/linfa-svm/src/classification.rs
index 449dd3eed..8d3b0ba26 100644
--- a/algorithms/linfa-svm/src/classification.rs
+++ b/algorithms/linfa-svm/src/classification.rs
@@ -8,7 +8,7 @@ use linfa::{
 use ndarray::{Array1, Array2, ArrayBase, ArrayView2, Data, Ix1, Ix2};
 use std::cmp::Ordering;
 
-use super::error::Result;
+use super::error::{Result, SvmResult};
 use super::permutable_kernel::{PermutableKernel, PermutableKernelOneClass};
 use super::solver_smo::SolverState;
 use super::SolverParams;
@@ -207,10 +207,10 @@ pub fn fit_one_class<F: Float + num_traits::ToPrimitive>(
 /// probabilities for whether a sample belongs to the first or second class.
 macro_rules! impl_classification {
     ($records:ty, $targets:ty) => {
-        impl<'a, F: Float> Fit<'a, $records, $targets> for SvmParams<F, Pr> {
-            type Object = Result<Svm<F, Pr>>;
+        impl<F: Float> Fit<$records, $targets, SvmResult> for SvmParams<F, Pr> {
+            type Object = Svm<F, Pr>;
 
-            fn fit(&self, dataset: &DatasetBase<$records, $targets>) -> Self::Object {
+            fn fit(&self, dataset: &DatasetBase<$records, $targets>) -> Result<Self::Object> {
                 let kernel = self.kernel.transform(dataset.records());
                 let target = dataset.try_single_target()?;
                 let target = target.as_slice().unwrap();
@@ -238,10 +238,10 @@ macro_rules! impl_classification {
             }
         }
 
-        impl<'a, F: Float> Fit<'a, $records, $targets> for SvmParams<F, bool> {
-            type Object = Result<Svm<F, bool>>;
+        impl<F: Float> Fit<$records, $targets, SvmResult> for SvmParams<F, bool> {
+            type Object = Svm<F, bool>;
 
-            fn fit(&self, dataset: &DatasetBase<$records, $targets>) -> Self::Object {
+            fn fit(&self, dataset: &DatasetBase<$records, $targets>) -> Result<Self::Object> {
                 let kernel = self.kernel.transform(dataset.records());
                 let target = dataset.try_single_target()?;
                 let target = target.as_slice().unwrap();
@@ -272,10 +272,10 @@ macro_rules! impl_classification {
 }
 
 impl_classification!(Array2<F>, Array2<bool>);
-impl_classification!(ArrayView2<'a, F>, ArrayView2<'a, bool>);
+impl_classification!(ArrayView2<'_, F>, ArrayView2<'_, bool>);
 impl_classification!(Array2<F>, CountedTargets<bool, Array2<bool>>);
-impl_classification!(ArrayView2<'a, F>, CountedTargets<bool, Array2<bool>>);
-impl_classification!(ArrayView2<'a, F>, CountedTargets<bool, ArrayView2<'a, bool>>);
+impl_classification!(ArrayView2<'_, F>, CountedTargets<bool, Array2<bool>>);
+impl_classification!(ArrayView2<'_, F>, CountedTargets<bool, ArrayView2<'_, bool>>);
 
 /// Fit one-class problem
 ///
@@ -283,10 +283,10 @@ impl_classification!(ArrayView2<'a, F>, CountedTargets<bool, ArrayView2<'a, bool
 /// implementation of SVM.
 macro_rules! impl_oneclass {
     ($records:ty, $targets:ty) => {
-        impl<'a, F: Float> Fit<'a, $records, $targets> for SvmParams<F, bool> {
-            type Object = Result<Svm<F, bool>>;
+        impl<F: Float> Fit<$records, $targets, SvmResult> for SvmParams<F, Pr> {
+            type Object = Svm<F, bool>;
 
-            fn fit(&self, dataset: &DatasetBase<$records, $targets>) -> Self::Object {
+            fn fit(&self, dataset: &DatasetBase<$records, $targets>) -> Result<Self::Object> {
                 let kernel = self.kernel.transform(dataset.records());
                 let records = dataset.records().view();
 
@@ -302,9 +302,9 @@ macro_rules! impl_oneclass {
 }
 
 impl_oneclass!(Array2<F>, Array2<()>);
-impl_oneclass!(ArrayView2<'a, F>, ArrayView2<'a, ()>);
+impl_oneclass!(ArrayView2<'_, F>, ArrayView2<'_, ()>);
 impl_oneclass!(Array2<F>, CountedTargets<(), Array2<()>>);
-impl_oneclass!(Array2<F>, CountedTargets<(), ArrayView2<'a, ()>>);
+impl_oneclass!(Array2<F>, CountedTargets<(), ArrayView2<'_, ()>>);
 
 /// Predict a probability with a feature vector
 impl<F: Float, D: Data<Elem = F>> Predict<ArrayBase<D, Ix1>, Pr> for Svm<F, Pr> {
diff --git a/algorithms/linfa-svm/src/regression.rs b/algorithms/linfa-svm/src/regression.rs
index 1c2f222cd..bd4497d21 100644
--- a/algorithms/linfa-svm/src/regression.rs
+++ b/algorithms/linfa-svm/src/regression.rs
@@ -8,7 +8,7 @@ use linfa::{
 use linfa_kernel::Kernel;
 use ndarray::{Array1, Array2, ArrayBase, ArrayView1, ArrayView2, Data, Ix2};
 
-use super::error::Result;
+use super::error::{Result, SvmResult};
 use super::permutable_kernel::PermutableKernelRegression;
 use super::solver_smo::SolverState;
 use super::SolverParams;
@@ -119,10 +119,10 @@ pub fn fit_nu<F: Float>(
 /// Take a number of observations and project them to optimal continuous targets.
 macro_rules! impl_regression {
     ($records:ty, $targets:ty, $f:ty) => {
-        impl<'a> Fit<'a, $records, $targets> for SvmParams<$f, $f> {
-            type Object = Result<Svm<$f, $f>>;
+        impl Fit<$records, $targets, SvmResult> for SvmParams<$f, $f> {
+            type Object = Svm<$f, $f>;
 
-            fn fit(&self, dataset: &DatasetBase<$records, $targets>) -> Self::Object {
+            fn fit(&self, dataset: &DatasetBase<$records, $targets>) -> Result<Self::Object> {
                 let kernel = self.kernel.transform(dataset.records());
                 let target = dataset.try_single_target()?;
                 let target = target.as_slice().unwrap();
@@ -155,12 +155,12 @@ macro_rules! impl_regression {
 
 impl_regression!(Array2<f32>, Array2<f32>, f32);
 impl_regression!(Array2<f64>, Array2<f64>, f64);
-impl_regression!(ArrayView2<'a, f32>, ArrayView2<'a, f32>, f32);
-impl_regression!(ArrayView2<'a, f64>, ArrayView2<'a, f64>, f64);
+impl_regression!(ArrayView2<'_, f32>, ArrayView2<'_, f32>, f32);
+impl_regression!(ArrayView2<'_, f64>, ArrayView2<'_, f64>, f64);
 impl_regression!(Array2<f32>, Array1<f32>, f32);
 impl_regression!(Array2<f64>, Array1<f64>, f64);
-impl_regression!(ArrayView2<'a, f32>, ArrayView1<'a, f32>, f32);
-impl_regression!(ArrayView2<'a, f64>, ArrayView1<'a, f64>, f64);
+impl_regression!(ArrayView2<'_, f32>, ArrayView1<'_, f32>, f32);
+impl_regression!(ArrayView2<'_, f64>, ArrayView1<'_, f64>, f64);
 
 macro_rules! impl_predict {
     ( $($t:ty),* ) => {
diff --git a/algorithms/linfa-trees/src/decision_trees/algorithm.rs b/algorithms/linfa-trees/src/decision_trees/algorithm.rs
index f60152ed7..52b5d6f41 100644
--- a/algorithms/linfa-trees/src/decision_trees/algorithm.rs
+++ b/algorithms/linfa-trees/src/decision_trees/algorithm.rs
@@ -11,6 +11,7 @@ use super::NodeIter;
 use super::Tikz;
 use linfa::{
     dataset::{AsTargets, Labels, Records},
+    error::Error,
     error::Result,
     traits::*,
     DatasetBase, Float, Label,
@@ -128,10 +129,7 @@ pub struct TreeNode<F, L> {
 
 impl<F: Float, L: Label> Hash for TreeNode<F, L> {
     fn hash<H: Hasher>(&self, state: &mut H) {
-        let mut data: Vec<u64> = vec![];
-        data.push(self.feature_idx as u64);
-        //data.push(self.prediction);
-        data.push(self.leaf_node as u64);
+        let data: Vec<u64> = vec![self.feature_idx as u64, self.leaf_node as u64];
         data.hash(state);
     }
 }
@@ -495,7 +493,7 @@ impl<F: Float, L: Label, D: Data<Elem = F>> PredictRef<ArrayBase<D, Ix2>, Array1
     for DecisionTree<F, L>
 {
     /// Make predictions for each row of a matrix of features `x`.
-    fn predict_ref<'a>(&'a self, x: &ArrayBase<D, Ix2>) -> Array1<L> {
+    fn predict_ref(&self, x: &ArrayBase<D, Ix2>) -> Array1<L> {
         x.genrows()
             .into_iter()
             .map(|row| make_prediction(&row, &self.root_node))
@@ -503,18 +501,18 @@ impl<F: Float, L: Label, D: Data<Elem = F>> PredictRef<ArrayBase<D, Ix2>, Array1
     }
 }
 
-impl<'a, F: Float, L: Label + 'a + std::fmt::Debug, D, T> Fit<'a, ArrayBase<D, Ix2>, T>
+impl<'a, F: Float, L: Label + 'a + std::fmt::Debug, D, T> Fit<ArrayBase<D, Ix2>, T, Error>
     for DecisionTreeParams<F, L>
 where
     D: Data<Elem = F>,
     T: AsTargets<Elem = L> + Labels<Elem = L>,
 {
-    type Object = Result<DecisionTree<F, L>>;
+    type Object = DecisionTree<F, L>;
 
     /// Fit a decision tree using `hyperparamters` on the dataset consisting of
     /// a matrix of features `x` and an array of labels `y`.
-    fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Self::Object {
-        self.validate().unwrap();
+    fn fit(&self, dataset: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Result<Self::Object> {
+        self.validate()?;
 
         let x = dataset.records();
         let feature_names = dataset.feature_names();
diff --git a/algorithms/linfa-trees/src/decision_trees/tikz.rs b/algorithms/linfa-trees/src/decision_trees/tikz.rs
index 466579a31..bbe518034 100644
--- a/algorithms/linfa-trees/src/decision_trees/tikz.rs
+++ b/algorithms/linfa-trees/src/decision_trees/tikz.rs
@@ -95,7 +95,8 @@ impl<'a, F: Float, L: Debug + Label> Tikz<'a, F, L> {
                     let var = format!(
                         "Var({})&:&{}\\\\",
                         node.split().0,
-                        node.feature_name().unwrap()
+                        // TODO:: why use lengend if there are no feature names? Should it be allowed?
+                        node.feature_name().unwrap_or(&"".to_string())
                     );
                     out.push_str(&var);
                     map.insert(node.split().0);
diff --git a/algorithms/linfa-tsne/examples/tsne.rs b/algorithms/linfa-tsne/examples/tsne.rs
index f8b05556e..76c692ba4 100644
--- a/algorithms/linfa-tsne/examples/tsne.rs
+++ b/algorithms/linfa-tsne/examples/tsne.rs
@@ -5,7 +5,7 @@ use std::{io::Write, process::Command};
 
 fn main() -> Result<()> {
     let ds = linfa_datasets::iris();
-    let ds = Pca::params(3).whiten(true).fit(&ds).transform(ds);
+    let ds = Pca::params(3).whiten(true).fit(&ds).unwrap().transform(ds);
 
     let ds = TSne::embedding_size(2)
         .perplexity(10.0)
diff --git a/build.rs b/build.rs
new file mode 100644
index 000000000..428b39280
--- /dev/null
+++ b/build.rs
@@ -0,0 +1,9 @@
+#[cfg(any(feature = "openblas-system", feature = "netlib-system"))]
+fn main() {
+    println!("cargo:rustc-link-lib=lapacke");
+    println!("cargo:rustc-link-lib=lapack");
+    println!("cargo:rustc-link-lib=cblas");
+}
+
+#[cfg(not(any(feature = "openblas-system", feature = "netlib-system")))]
+fn main() {}
diff --git a/docs/website/content/snippets/cross-validation.md b/docs/website/content/snippets/cross-validation.md
index 2d48e3c56..bfd7e42b9 100644
--- a/docs/website/content/snippets/cross-validation.md
+++ b/docs/website/content/snippets/cross-validation.md
@@ -2,22 +2,21 @@
 title = "Cross Validation"
 +++
 ```rust
-// perform cross-validation with the F1 score
-let f1_runs = dataset
-    .iter_fold(8, |v| params.fit(&v).unwrap())
-    .map(|(model, valid)| {
-        let cm = model
-            .predict(&valid)
-            .mapv(|x| x > Pr::even())
-            .confusion_matrix(&valid).unwrap();
-  
-          cm.f1_score()
-    })  
-    .collect::<Array1<_>>();
-  
-// calculate mean and standard deviation
-println!("F1 score: {}±{}",
-    f1_runs.mean().unwrap(),
-    f1_runs.std_axis(Axis(0), 0.0),
-);  
-```
+// parameters to compare
+let ratios = vec![0.1, 0.2, 0.5, 0.7, 1.0];
+
+// create a model for each parameter
+let models = ratios
+    .iter()
+    .map(|ratio| ElasticNet::params().penalty(0.3).l1_ratio(*ratio))
+    .collect::<Vec<_>>();
+
+// get the mean r2 validation score across 5 folds for each model
+let r2_values =
+    dataset.cross_validate(5, &models, |prediction, truth| prediction.r2(&truth))?;
+
+// show the mean r2 score for each parameter choice
+for (ratio, r2) in ratios.iter().zip(r2_values.iter()) {
+    println!("L1 ratio: {}, r2 score: {}", ratio, r2);
+}
+```
\ No newline at end of file
diff --git a/docs/website/content/snippets/k-folding.md b/docs/website/content/snippets/k-folding.md
new file mode 100644
index 000000000..52f10707d
--- /dev/null
+++ b/docs/website/content/snippets/k-folding.md
@@ -0,0 +1,23 @@
++++
+title = "K folding"
++++
+```rust
+// perform cross-validation with the F1 score
+let f1_runs = dataset
+    .iter_fold(8, |v| params.fit(&v).unwrap())
+    .map(|(model, valid)| {
+        let cm = model
+            .predict(&valid)
+            .mapv(|x| x > Pr::even())
+            .confusion_matrix(&valid).unwrap();
+  
+          cm.f1_score()
+    })  
+    .collect::<Array1<_>>();
+  
+// calculate mean and standard deviation
+println!("F1 score: {}±{}",
+    f1_runs.mean().unwrap(),
+    f1_runs.std_axis(Axis(0), 0.0),
+);  
+```
diff --git a/src/dataset/impl_dataset.rs b/src/dataset/impl_dataset.rs
index f17f18ae9..f4aab8a53 100644
--- a/src/dataset/impl_dataset.rs
+++ b/src/dataset/impl_dataset.rs
@@ -1,16 +1,17 @@
-use ndarray::{
-    concatenate, s, Array1, Array2, ArrayBase, ArrayView2, ArrayViewMut2, Axis, Data, DataMut,
-    Dimension, Ix1, Ix2,
-};
-use rand::{seq::SliceRandom, Rng};
-use std::collections::HashMap;
-
 use super::{
     super::traits::{Predict, PredictRef},
     iter::{ChunksIter, DatasetIter, Iter},
     AsTargets, AsTargetsMut, CountedTargets, Dataset, DatasetBase, DatasetView, Float,
     FromTargetArray, Label, Labels, Records, Result,
 };
+use crate::traits::Fit;
+use ndarray::{
+    concatenate, s, Array, Array1, Array2, ArrayBase, ArrayView1, ArrayView2, ArrayViewMut2, Axis,
+    Data, DataMut, Dimension, Ix1, Ix2, OwnedRepr,
+};
+use rand::{seq::SliceRandom, Rng};
+use std::collections::HashMap;
+use std::ops::AddAssign;
 
 /// Implementation without constraints on records and targets
 ///
@@ -654,6 +655,18 @@ where
     }
 }
 
+macro_rules! assist_swap_array2 {
+    ($slice: expr, $index: expr, $fold_size: expr, $features: expr) => {
+        if $index != 0 {
+            let adj_fold_size = $fold_size * $features;
+            let start = adj_fold_size * $index;
+            let (first_s, second_s) = $slice.split_at_mut(start);
+            let (mut fold, _) = second_s.split_at_mut(adj_fold_size);
+            first_s[..$fold_size * $features].swap_with_slice(&mut fold);
+        }
+    };
+}
+
 impl<'a, F: Float, E: Copy + 'a, D, S> DatasetBase<ArrayBase<D, Ix2>, ArrayBase<S, Ix2>>
 where
     D: DataMut<Elem = F>,
@@ -691,20 +704,24 @@ where
     /// ## Example
     /// ```rust
     /// use linfa::traits::Fit;
-    /// use linfa::dataset::{Dataset, DatasetView};
+    /// use linfa::dataset::{Dataset, DatasetView, Records};
     /// use ndarray::{array, ArrayView1, ArrayView2};
+    /// use linfa::Error;
     ///
     /// struct MockFittable {}
     ///
     /// struct MockFittableResult {
-    ///     mock_var: usize,
+    ///    mock_var: usize,
     /// }
     ///
-    /// impl<'a> Fit<'a, ArrayView2<'a, f64>, ArrayView2<'a, f64>> for MockFittable {
+    ///
+    /// impl<'a> Fit<ArrayView2<'a,f64>, ArrayView2<'a, f64>, linfa::error::Error> for MockFittable {
     ///     type Object = MockFittableResult;
     ///
-    ///     fn fit(&self, training_data: &DatasetView<f64, f64>) -> Self::Object {
-    ///         MockFittableResult { mock_var: training_data.ntargets()}
+    ///     fn fit(&self, training_data: &DatasetView<f64, f64>) -> Result<Self::Object, linfa::error::Error> {
+    ///         Ok(MockFittableResult {
+    ///             mock_var: training_data.nsamples(),
+    ///         })
     ///     }
     /// }
     ///
@@ -713,17 +730,16 @@ where
     /// let mut dataset: Dataset<f64, f64> = (records, targets).into();
     /// let params = MockFittable {};
     ///
-    ///for (model,validation_set) in dataset.iter_fold(5, |v| params.fit(&v)){
+    ///for (model,validation_set) in dataset.iter_fold(5, |v| params.fit(&v).unwrap()){
     ///     // Here you can use `model` and `validation_set` to
     ///     // assert the performance of the chosen algorithm
     /// }
     /// ```
-    pub fn iter_fold<O, C: Fn(DatasetView<F, E>) -> O>(
+    pub fn iter_fold<O, C: Fn(&DatasetView<F, E>) -> O>(
         &'a mut self,
         k: usize,
         fit_closure: C,
     ) -> impl Iterator<Item = (O, DatasetBase<ArrayView2<F>, ArrayView2<E>>)> {
-        //)-> impl Iterator<Item = (O, ())> + 'a {
         assert!(k > 0);
         assert!(k <= self.nsamples());
         let samples_count = self.nsamples();
@@ -732,50 +748,243 @@ where
         let features = self.nfeatures();
         let targets = self.ntargets();
 
-        let mut records_sl = self.records.as_slice_mut().unwrap();
-        let mut targets_sl2 = self.targets.as_multi_targets_mut();
-        let mut targets_sl = targets_sl2.as_slice_mut().unwrap();
-
         let mut objs: Vec<O> = Vec::new();
 
-        for i in 0..k {
-            assist_swap_array2(&mut records_sl, i, fold_size, features);
-            assist_swap_array2(&mut targets_sl, i, fold_size, targets);
-
-            let train = DatasetBase::new(
-                ArrayView2::from_shape(
-                    (samples_count - fold_size, features),
-                    records_sl.split_at(fold_size * features).1,
-                )
-                .unwrap(),
-                ArrayView2::from_shape(
-                    (samples_count - fold_size, targets),
-                    targets_sl.split_at(fold_size * targets).1,
-                )
-                .unwrap(),
-            );
-
-            let obj = fit_closure(train);
-            objs.push(obj);
+        {
+            let records_sl = self.records.as_slice_mut().unwrap();
+            let mut targets_sl2 = self.targets.as_multi_targets_mut();
+            let targets_sl = targets_sl2.as_slice_mut().unwrap();
+
+            for i in 0..k {
+                assist_swap_array2!(records_sl, i, fold_size, features);
+                assist_swap_array2!(targets_sl, i, fold_size, targets);
+
+                {
+                    let train = DatasetBase::new(
+                        ArrayView2::from_shape(
+                            (samples_count - fold_size, features),
+                            records_sl.split_at(fold_size * features).1,
+                        )
+                        .unwrap(),
+                        ArrayView2::from_shape(
+                            (samples_count - fold_size, targets),
+                            targets_sl.split_at(fold_size * targets).1,
+                        )
+                        .unwrap(),
+                    );
+
+                    let obj = fit_closure(&train);
+                    objs.push(obj);
+                }
 
-            assist_swap_array2(&mut records_sl, i, fold_size, features);
-            assist_swap_array2(&mut targets_sl, i, fold_size, targets);
+                assist_swap_array2!(records_sl, i, fold_size, features);
+                assist_swap_array2!(targets_sl, i, fold_size, targets);
+            }
         }
 
         objs.into_iter().zip(self.sample_chunks(fold_size))
-        //
     }
-}
 
-fn assist_swap_array2<F>(slice: &mut [F], index: usize, fold_size: usize, features: usize) {
-    if index == 0 {
-        return;
+    /// Cross validation for multi-target algorithms
+    ///
+    /// Given a list of fittable models, cross validation
+    /// is used to compare their performance according to some
+    /// performance metric. To do so, k-folding is applied to the
+    /// dataset and, for each fold, each model is trained on the training set
+    /// and its performance is evaluated on the validation set. The performances
+    /// collected for each model are then averaged over the number of folds.
+    ///
+    /// ### Parameters:
+    ///
+    /// - `k`: the number of folds to apply
+    /// - `parameters`: a list of models to compare
+    /// - `eval`: closure used to evaluate the performance of each trained model
+    ///
+    /// ### Returns
+    ///
+    /// An array of model performances, in the same order as the models in input, if no errors occur.
+    /// The performance of each model is given as an array of performances, one for each target.
+    /// Otherwise, it might return an Error in one of the following cases:
+    ///
+    /// - An error occurred during the fitting of one model
+    /// - An error occurred inside the evaluation closure
+    ///
+    /// ### Example
+    ///
+    /// ```rust, ignore
+    ///
+    /// use linfa::prelude::*;
+    ///
+    /// // mutability needed for fast cross validation
+    /// let mut dataset = linfa_datasets::diabetes();
+    ///
+    /// let models = vec![model1, model2, ... ];
+    ///
+    /// let r2_scores = dataset.cross_validate_multi(5,&models, |prediction, truth| prediction.r2(truth))?;
+    ///
+    /// ```
+    pub fn cross_validate_multi<O, ER, M, FACC, C>(
+        &'a mut self,
+        k: usize,
+        parameters: &[M],
+        eval: C,
+    ) -> std::result::Result<Array2<FACC>, ER>
+    where
+        ER: std::error::Error + std::convert::From<crate::error::Error>,
+        M: for<'c> Fit<ArrayView2<'c, F>, ArrayView2<'c, E>, ER, Object = O>,
+        O: for<'d> PredictRef<ArrayView2<'a, F>, Array2<E>>,
+        FACC: Float,
+        C: Fn(&Array2<E>, &ArrayView2<E>) -> std::result::Result<Array1<FACC>, crate::error::Error>,
+    {
+        let mut evaluations = Array2::from_elem((parameters.len(), self.ntargets()), FACC::zero());
+        let folds_evaluations: std::result::Result<Vec<_>, ER> = self
+            .iter_fold(k, |train| {
+                let fit_result: std::result::Result<Vec<_>, ER> =
+                    parameters.iter().map(|p| p.fit(&train)).collect();
+                fit_result
+            })
+            .map(|(models, valid)| {
+                let targets = valid.targets();
+                let models = models?;
+                let mut eval_predictions =
+                    Array2::from_elem((models.len(), targets.len()), FACC::zero());
+                for (i, model) in models.iter().enumerate() {
+                    let predicted = model.predict(valid.records());
+                    let eval_pred = match eval(&predicted, &targets) {
+                        Err(e) => Err(ER::from(e)),
+                        Ok(res) => Ok(res),
+                    }?;
+                    eval_predictions.row_mut(i).add_assign(&eval_pred);
+                }
+                Ok(eval_predictions)
+            })
+            .collect();
+
+        for fold_evaluation in folds_evaluations? {
+            evaluations.add_assign(&fold_evaluation)
+        }
+        Ok(evaluations / FACC::from(k).unwrap())
+    }
+
+    /// Cross validation for single target algorithms
+    ///
+    /// Given a list of fittable models, cross validation
+    /// is used to compare their performance according to some
+    /// performance metric. To do so, k-folding is applied to the
+    /// dataset and, for each fold, each model is trained on the training set
+    /// and its performance is evaluated on the validation set. The performances
+    /// collected for each model are then averaged over the number of folds.
+    ///
+    /// ### Parameters:
+    ///
+    /// - `k`: the number of folds to apply
+    /// - `parameters`: a list of models to compare
+    /// - `eval`: closure used to evaluate the performance of each trained model. For single target
+    ///    datasets, this closure is called once for each fold.
+    ///    For multi-target datasets the closure is called, in each fold, once for every different target.
+    ///    If there is the need to use different evaluations for each target, take a look at the
+    ///    [`cross_validate_multi`](struct.DatasetBase.html#method.cross_validate_multi) method.
+    ///
+    /// ### Returns
+    ///
+    /// On succesful evalutation it returns an array of model performances, in the same order as the models in input.
+    ///
+    /// It returns an Error in one of the following cases:
+    ///
+    /// - An error occurred during the fitting of one model
+    /// - An error occurred inside the evaluation closure
+    ///
+    /// ### Example
+    ///
+    /// ```rust, ignore
+    ///
+    /// use linfa::prelude::*;
+    ///
+    /// // mutability needed for fast cross validation
+    /// let mut dataset = linfa_datasets::diabetes();
+    ///
+    /// let models = vec![model1, model2, ... ];
+    ///
+    /// let r2_scores = dataset.cross_validate(5,&models, |prediction, truth| prediction.r2(truth))?;
+    ///
+    /// ```
+    pub fn cross_validate<O, ER, M, FACC, C, I>(
+        &'a mut self,
+        k: usize,
+        parameters: &[M],
+        eval: C,
+    ) -> std::result::Result<ArrayBase<OwnedRepr<FACC>, I>, ER>
+    where
+        ER: std::error::Error + std::convert::From<crate::error::Error>,
+        M: for<'c> Fit<ArrayView2<'c, F>, ArrayView2<'c, E>, ER, Object = O>,
+        O: for<'d> PredictRef<ArrayView2<'a, F>, ArrayBase<OwnedRepr<E>, I>>,
+        FACC: Float,
+        C: Fn(&ArrayView1<E>, &ArrayView1<E>) -> std::result::Result<FACC, crate::error::Error>,
+        I: Dimension,
+    {
+        // construct shape as either vector or matrix
+        let mut shape = match I::NDIM {
+            Some(1) | Some(2) => Ok(I::zeros(I::NDIM.unwrap())),
+            _ => Err(crate::Error::NdShape(ndarray::ShapeError::from_kind(
+                ndarray::ErrorKind::IncompatibleShape,
+            ))),
+        }?;
+
+        // assign shape form of output
+        let mut tmp = shape.as_array_view_mut();
+        tmp[0] = parameters.len();
+        if tmp.len() == 2 {
+            tmp[1] = self.ntargets();
+        }
+
+        let folds_evaluations = self
+            .iter_fold(k, |train| {
+                let fit_result: std::result::Result<Vec<_>, ER> =
+                    parameters.iter().map(|p| p.fit(&train)).collect();
+                fit_result
+            })
+            .map(|(models, valid)| {
+                let targets = valid.as_multi_targets();
+                let models = models?;
+
+                let eval_predictions = models
+                    .iter()
+                    .map(|m| {
+                        let nsamples = valid.nsamples();
+                        let predicted = m.predict(valid.records());
+
+                        // reshape to ensure that matrix has two dimensions
+                        let ntargets = if predicted.ndim() == 1 {
+                            1
+                        } else {
+                            predicted.len_of(Axis(1))
+                        };
+
+                        let predicted: Array2<_> =
+                            predicted.into_shape((nsamples, ntargets)).unwrap();
+
+                        predicted
+                            .gencolumns()
+                            .into_iter()
+                            .zip(targets.gencolumns().into_iter())
+                            .map(|(p, t)| eval(&p.view(), &t).map_err(ER::from))
+                            .collect()
+                    })
+                    .collect::<std::result::Result<Vec<Vec<FACC>>, ER>>()?
+                    .into_iter()
+                    .flatten()
+                    .collect();
+
+                Ok(Array::from_shape_vec(shape.clone(), eval_predictions).unwrap())
+            })
+            .collect::<std::result::Result<Vec<_>, ER>>();
+
+        let res = folds_evaluations?
+            .into_iter()
+            .fold(Array::<FACC, _>::zeros(shape.clone()), std::ops::Add::add);
+
+        Ok(res / FACC::cast(k))
     }
-    let adj_fold_size = fold_size * features;
-    let start = adj_fold_size * index;
-    let (first_s, second_s) = slice.split_at_mut(start);
-    let (mut fold, _) = second_s.split_at_mut(adj_fold_size);
-    first_s[..fold_size * features].swap_with_slice(&mut fold);
 }
 
 impl<F: Float, E> Dataset<F, E> {
diff --git a/src/dataset/impl_targets.rs b/src/dataset/impl_targets.rs
index 86d392955..45872f727 100644
--- a/src/dataset/impl_targets.rs
+++ b/src/dataset/impl_targets.rs
@@ -5,8 +5,8 @@ use super::{
     Label, Labels, Pr, Records,
 };
 use ndarray::{
-    concatenate, Array1, Array2, ArrayBase, ArrayView2, ArrayViewMut2, Axis, CowArray, Data,
-    DataMut, Dimension, Ix1, Ix2, Ix3, OwnedRepr, ViewRepr,
+    Array1, Array2, ArrayBase, ArrayView2, ArrayViewMut2, Axis, CowArray, Data, DataMut, Dimension,
+    Ix1, Ix2, Ix3, OwnedRepr, ViewRepr,
 };
 
 impl<'a, L, S: Data<Elem = L>> AsTargets for ArrayBase<S, Ix1> {
@@ -151,12 +151,12 @@ impl<L: Label, S: Data<Elem = L>, I: Dimension> Labels for ArrayBase<S, I> {
     }
 }
 
-/// A NdArray with discrete labels can act as labels
-impl<L: Label, R: Records, T: AsTargets<Elem = L>> Labels for DatasetBase<R, CountedTargets<L, T>> {
+/// Counted labels can act as labels
+impl<L: Label, T: AsTargets<Elem = L>> Labels for CountedTargets<L, T> {
     type Elem = L;
 
     fn label_count(&self) -> Vec<HashMap<L, usize>> {
-        self.targets.labels.clone()
+        self.labels.clone()
     }
 }
 
@@ -165,9 +165,14 @@ where
     D: Data<Elem = F>,
     T: AsTargets<Elem = L>,
 {
+    /// Transforms the input dataset by keeping only those samples whose label appears in `labels`.
+    ///
+    /// In the multi-target case a sample is kept if *any* of its targets appears in `labels`.
+    ///
+    /// Sample weights and feature names are preserved by this transformation.
     pub fn with_labels(
         &self,
-        labels: &[&[L]],
+        labels: &[L],
     ) -> DatasetBase<Array2<F>, CountedTargets<L, Array2<L>>> {
         let targets = self.targets.as_multi_targets();
         let old_weights = self.weights();
@@ -185,7 +190,7 @@ where
             .zip(targets.genrows().into_iter())
             .enumerate()
         {
-            let any_exists = t.iter().zip(labels.iter()).any(|(a, b)| b.contains(&a));
+            let any_exists = t.iter().any(|a| labels.contains(&a));
 
             if any_exists {
                 for (map, val) in map.iter_mut().zip(t.iter()) {
@@ -201,8 +206,15 @@ where
             }
         }
 
-        let records: Array2<F> = concatenate(Axis(0), &records_arr).unwrap();
-        let targets = concatenate(Axis(0), &targets_arr).unwrap();
+        let nsamples = records_arr.len();
+        let nfeatures = self.nfeatures();
+        let ntargets = self.ntargets();
+
+        let records_arr = records_arr.into_iter().flatten().copied().collect();
+        let targets_arr = targets_arr.into_iter().flatten().copied().collect();
+
+        let records = Array2::from_shape_vec((nsamples, nfeatures), records_arr).unwrap();
+        let targets = Array2::from_shape_vec((nsamples, ntargets), targets_arr).unwrap();
 
         let targets = CountedTargets {
             targets,
diff --git a/src/dataset/mod.rs b/src/dataset/mod.rs
index 5100c12b1..102bb60a2 100644
--- a/src/dataset/mod.rs
+++ b/src/dataset/mod.rs
@@ -284,6 +284,7 @@ pub trait Labels {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use approx::assert_abs_diff_eq;
     use ndarray::{array, Array1, Array2};
     use rand::{rngs::SmallRng, SeedableRng};
 
@@ -523,34 +524,67 @@ mod tests {
         );
     }
 
-    struct MockFittable {}
+    use crate::traits::{Fit, PredictRef};
+    use ndarray::ArrayView2;
+    use thiserror::Error;
+
+    struct MockFittable {
+        mock_var: usize,
+    }
 
     struct MockFittableResult {
         mock_var: usize,
     }
 
-    use crate::traits::Fit;
-    use ndarray::ArrayView2;
+    #[derive(Error, Debug)]
+    enum MockError {
+        #[error(transparent)]
+        LinfaError(#[from] crate::error::Error),
+    }
+
+    type MockResult<T> = std::result::Result<T, MockError>;
 
-    impl<'a> Fit<'a, ArrayView2<'a, f64>, ArrayView2<'a, f64>> for MockFittable {
+    impl<'a> Fit<ArrayView2<'a, f64>, ArrayView2<'a, f64>, MockError> for MockFittable {
         type Object = MockFittableResult;
 
-        fn fit(&self, training_data: &DatasetView<f64, f64>) -> Self::Object {
-            MockFittableResult {
-                mock_var: training_data.nsamples(),
+        fn fit(
+            &self,
+            training_data: &DatasetView<f64, f64>,
+        ) -> std::result::Result<Self::Object, MockError> {
+            if self.mock_var == 0 {
+                Err(MockError::LinfaError(Error::Parameters("0".to_string())))
+            } else {
+                Ok(MockFittableResult {
+                    mock_var: training_data.nsamples(),
+                })
             }
         }
     }
 
+    impl<'b> PredictRef<ArrayView2<'b, f64>, Array1<f64>> for MockFittableResult {
+        fn predict_ref<'a>(&'a self, _x: &'a ArrayView2<'b, f64>) -> Array1<f64> {
+            array![0.]
+        }
+    }
+
+    impl<'b> PredictRef<ArrayView2<'b, f64>, Array2<f64>> for MockFittableResult {
+        fn predict_ref<'a>(&'a self, _x: &'a ArrayView2<'b, f64>) -> Array2<f64> {
+            array![[0., 0.]]
+        }
+    }
+
     #[test]
     fn test_iter_fold() {
         let records =
             Array2::from_shape_vec((5, 2), vec![1., 1., 2., 2., 3., 3., 4., 4., 5., 5.]).unwrap();
         let targets = Array1::from_shape_vec(5, vec![1., 2., 3., 4., 5.]).unwrap();
         let mut dataset: Dataset<f64, f64> = (records, targets).into();
-        let params = MockFittable {};
+        let params = MockFittable { mock_var: 1 };
 
-        for (i, (model, validation_set)) in dataset.iter_fold(5, |v| params.fit(&v)).enumerate() {
+        for (i, (model, validation_set)) in dataset
+            .iter_fold(5, |v| params.fit(&v).unwrap())
+            .enumerate()
+        {
             assert_eq!(model.mock_var, 4);
             assert_eq!(validation_set.records().row(0)[0] as usize, i + 1);
             assert_eq!(validation_set.records().row(0)[1] as usize, i + 1);
@@ -566,12 +600,15 @@ mod tests {
             Array2::from_shape_vec((5, 2), vec![1., 1., 2., 2., 3., 3., 4., 4., 5., 5.]).unwrap();
         let targets = Array1::from_shape_vec(5, vec![1., 2., 3., 4., 5.]).unwrap();
         let mut dataset: Dataset<f64, f64> = (records, targets).into();
-        let params = MockFittable {};
+        let params = MockFittable { mock_var: 1 };
 
         // If we request three folds from a dataset with 5 samples it will cut the
         // last two samples from the folds and always add them as a tail of the training
         // data
-        for (i, (model, validation_set)) in dataset.iter_fold(3, |v| params.fit(&v)).enumerate() {
+        for (i, (model, validation_set)) in dataset
+            .iter_fold(3, |v| params.fit(&v).unwrap())
+            .enumerate()
+        {
             assert_eq!(model.mock_var, 4);
             assert_eq!(validation_set.records().row(0)[0] as usize, i + 1);
             assert_eq!(validation_set.records().row(0)[1] as usize, i + 1);
@@ -582,7 +619,10 @@ mod tests {
         }
 
         // the same goes for the last sample if we choose 4 folds
-        for (i, (model, validation_set)) in dataset.iter_fold(4, |v| params.fit(&v)).enumerate() {
+        for (i, (model, validation_set)) in dataset
+            .iter_fold(4, |v| params.fit(&v).unwrap())
+            .enumerate()
+        {
             assert_eq!(model.mock_var, 4);
             assert_eq!(validation_set.records().row(0)[0] as usize, i + 1);
             assert_eq!(validation_set.records().row(0)[1] as usize, i + 1);
@@ -594,7 +634,10 @@ mod tests {
 
         // if we choose 2 folds then again the last sample will be only
         // used for trainig
-        for (i, (model, validation_set)) in dataset.iter_fold(2, |v| params.fit(&v)).enumerate() {
+        for (i, (model, validation_set)) in dataset
+            .iter_fold(2, |v| params.fit(&v).unwrap())
+            .enumerate()
+        {
             assert_eq!(model.mock_var, 3);
             assert_eq!(validation_set.targets().dim(), (2, 1));
             assert!(i < 2);
@@ -608,7 +651,7 @@ mod tests {
             Array2::from_shape_vec((5, 2), vec![1., 1., 2., 2., 3., 3., 4., 4., 5., 5.]).unwrap();
         let targets = Array1::from_shape_vec(5, vec![1., 2., 3., 4., 5.]).unwrap();
         let mut dataset: Dataset<f64, f64> = (records, targets).into();
-        let params = MockFittable {};
+        let params = MockFittable { mock_var: 1 };
         let _ = dataset.iter_fold(0, |v| params.fit(&v)).enumerate();
     }
 
@@ -619,7 +662,229 @@ mod tests {
             Array2::from_shape_vec((5, 2), vec![1., 1., 2., 2., 3., 3., 4., 4., 5., 5.]).unwrap();
         let targets = Array1::from_shape_vec(5, vec![1., 2., 3., 4., 5.]).unwrap();
         let mut dataset: Dataset<f64, f64> = (records, targets).into();
-        let params = MockFittable {};
+        let params = MockFittable { mock_var: 1 };
         let _ = dataset.iter_fold(6, |v| params.fit(&v)).enumerate();
     }
+
+    #[test]
+    fn test_st_cv_all_correct() {
+        let records =
+            Array2::from_shape_vec((5, 2), vec![1., 1., 2., 2., 3., 3., 4., 4., 5., 5.]).unwrap();
+        let targets = Array1::from_shape_vec(5, vec![1., 2., 3., 4., 5.]).unwrap();
+        let mut dataset: Dataset<f64, f64> = (records, targets).into();
+        let params = vec![MockFittable { mock_var: 1 }, MockFittable { mock_var: 2 }];
+        let acc = dataset
+            .cross_validate(5, &params, |_pred, _truth| Ok(3.))
+            .unwrap();
+        assert_eq!(acc, array![3., 3.]);
+
+        let mut dataset: Dataset<f64, f64> =
+            (array![[1., 1.], [2., 2.]], array![[1., 2.], [3., 4.]]).into();
+
+        let params = vec![MockFittable { mock_var: 1 }, MockFittable { mock_var: 2 }];
+        let acc = dataset
+            .cross_validate(2, &params, |_pred, _truth| Ok(3.))
+            .unwrap();
+        assert_eq!(acc, array![[3., 3.], [3., 3.]]);
+    }
+    #[test]
+    #[should_panic(
+        expected = "called `Result::unwrap()` on an `Err` value: LinfaError(Parameters(\"0\"))"
+    )]
+    fn test_st_cv_one_incorrect() {
+        let records =
+            Array2::from_shape_vec((5, 2), vec![1., 1., 2., 2., 3., 3., 4., 4., 5., 5.]).unwrap();
+        let targets = Array1::from_shape_vec(5, vec![1., 2., 3., 4., 5.]).unwrap();
+        let mut dataset: Dataset<f64, f64> = (records, targets).into();
+        // second one should throw an error
+        let params = vec![MockFittable { mock_var: 1 }, MockFittable { mock_var: 0 }];
+        let acc: MockResult<Array1<_>> = dataset.cross_validate(5, &params, |_pred, _truth| Ok(0.));
+
+        acc.unwrap();
+    }
+
+    #[test]
+    #[should_panic(
+        expected = "called `Result::unwrap()` on an `Err` value: LinfaError(Parameters(\"eval\"))"
+    )]
+    fn test_st_cv_incorrect_eval() {
+        let records =
+            Array2::from_shape_vec((5, 2), vec![1., 1., 2., 2., 3., 3., 4., 4., 5., 5.]).unwrap();
+        let targets = Array1::from_shape_vec(5, vec![1., 2., 3., 4., 5.]).unwrap();
+        let mut dataset: Dataset<f64, f64> = (records, targets).into();
+        // second one should throw an error
+        let params = vec![MockFittable { mock_var: 1 }, MockFittable { mock_var: 1 }];
+        let err: MockResult<Array1<_>> = dataset.cross_validate(5, &params, |_pred, _truth| {
+            if false {
+                Ok(0f32)
+            } else {
+                Err(Error::Parameters("eval".to_string()))
+            }
+        });
+
+        err.unwrap();
+    }
+
+    #[test]
+    fn test_st_cv_mt_all_correct() {
+        let records =
+            Array2::from_shape_vec((5, 2), vec![1., 1., 2., 2., 3., 3., 4., 4., 5., 5.]).unwrap();
+        let targets = array![[1., 1.], [2., 2.], [3., 3.], [4., 4.], [5., 5.]];
+        let mut dataset: Dataset<f64, f64> = (records, targets).into();
+        let params = vec![MockFittable { mock_var: 1 }, MockFittable { mock_var: 2 }];
+        let acc = dataset
+            .cross_validate_multi(5, &params, |_pred, _truth| Ok(array![5., 6.]))
+            .unwrap();
+        assert_eq!(acc.dim(), (params.len(), dataset.ntargets()));
+        assert_eq!(acc, array![[5., 6.], [5., 6.]])
+    }
+    #[test]
+    fn test_st_cv_mt_one_incorrect() {
+        let records =
+            Array2::from_shape_vec((5, 2), vec![1., 1., 2., 2., 3., 3., 4., 4., 5., 5.]).unwrap();
+        let targets = Array1::from_shape_vec(5, vec![1., 2., 3., 4., 5.]).unwrap();
+        let mut dataset: Dataset<f64, f64> = (records, targets).into();
+        // second one should throw an error
+        let params = vec![MockFittable { mock_var: 1 }, MockFittable { mock_var: 0 }];
+        let err = dataset
+            .cross_validate_multi(5, &params, |_pred, _truth| Ok(array![5.]))
+            .unwrap_err();
+        assert_eq!(err.to_string(), "invalid parameter 0".to_string());
+    }
+
+    #[test]
+    fn test_st_cv_mt_incorrect_eval() {
+        let records =
+            Array2::from_shape_vec((5, 2), vec![1., 1., 2., 2., 3., 3., 4., 4., 5., 5.]).unwrap();
+        let targets = Array1::from_shape_vec(5, vec![1., 2., 3., 4., 5.]).unwrap();
+        let mut dataset: Dataset<f64, f64> = (records, targets).into();
+        // second one should throw an error
+        let params = vec![MockFittable { mock_var: 1 }, MockFittable { mock_var: 1 }];
+        let err = dataset
+            .cross_validate_multi(5, &params, |_pred, _truth| {
+                if false {
+                    Ok(array![0f32])
+                } else {
+                    Err(Error::Parameters("eval".to_string()))
+                }
+            })
+            .unwrap_err();
+        assert_eq!(err.to_string(), "invalid parameter eval".to_string());
+    }
+
+    #[test]
+    fn test_with_labels_st() {
+        let records = array![
+            [0., 1.],
+            [1., 2.],
+            [2., 3.],
+            [0., 4.],
+            [1., 5.],
+            [2., 6.],
+            [0., 7.],
+            [1., 8.],
+            [2., 9.],
+            [0., 10.]
+        ];
+        let targets = array![0, 1, 2, 0, 1, 2, 0, 1, 2, 0].insert_axis(Axis(1));
+        let dataset = DatasetBase::from((records, targets));
+        assert_eq!(dataset.nsamples(), 10);
+        assert_eq!(dataset.ntargets(), 1);
+        let dataset_no_0 = dataset.with_labels(&[1, 2]);
+        assert_eq!(dataset_no_0.nsamples(), 6);
+        assert_eq!(dataset_no_0.ntargets(), 1);
+        assert_abs_diff_eq!(
+            dataset_no_0.records,
+            array![[1., 2.], [2., 3.], [1., 5.], [2., 6.], [1., 8.], [2., 9.]]
+        );
+        assert_abs_diff_eq!(
+            dataset_no_0.try_single_target().unwrap(),
+            array![1, 2, 1, 2, 1, 2]
+        );
+        let dataset_no_1 = dataset.with_labels(&[0, 2]);
+        assert_eq!(dataset_no_1.nsamples(), 7);
+        assert_eq!(dataset_no_1.ntargets(), 1);
+        assert_abs_diff_eq!(
+            dataset_no_1.records,
+            array![
+                [0., 1.],
+                [2., 3.],
+                [0., 4.],
+                [2., 6.],
+                [0., 7.],
+                [2., 9.],
+                [0., 10.]
+            ]
+        );
+        assert_abs_diff_eq!(
+            dataset_no_1.try_single_target().unwrap(),
+            array![0, 2, 0, 2, 0, 2, 0]
+        );
+        let dataset_no_2 = dataset.with_labels(&[0, 1]);
+        assert_eq!(dataset_no_2.nsamples(), 7);
+        assert_eq!(dataset_no_2.ntargets(), 1);
+        assert_abs_diff_eq!(
+            dataset_no_2.records,
+            array![
+                [0., 1.],
+                [1., 2.],
+                [0., 4.],
+                [1., 5.],
+                [0., 7.],
+                [1., 8.],
+                [0., 10.]
+            ]
+        );
+        assert_abs_diff_eq!(
+            dataset_no_2.try_single_target().unwrap(),
+            array![0, 1, 0, 1, 0, 1, 0]
+        );
+    }
+
+    #[test]
+    fn test_with_labels_mt() {
+        let records = array![
+            [0., 1.],
+            [1., 2.],
+            [2., 3.],
+            [0., 4.],
+            [1., 5.],
+            [2., 6.],
+            [0., 7.],
+            [1., 8.],
+            [2., 9.],
+            [0., 10.]
+        ];
+        let targets = array![
+            [0, 7],
+            [1, 8],
+            [2, 9],
+            [0, 7],
+            [1, 8],
+            [2, 9],
+            [0, 7],
+            [1, 8],
+            [2, 9],
+            [0, 7]
+        ];
+        let dataset = DatasetBase::from((records, targets));
+        assert_eq!(dataset.nsamples(), 10);
+        assert_eq!(dataset.ntargets(), 2);
+        // remove 0 from target 1 and 7 from target 2
+        let dataset_no_07 = dataset.with_labels(&[1, 2, 8, 9]);
+        assert_eq!(dataset_no_07.nsamples(), 6);
+        assert_eq!(dataset_no_07.ntargets(), 2);
+        assert_abs_diff_eq!(
+            dataset_no_07.records,
+            array![[1., 2.], [2., 3.], [1., 5.], [2., 6.], [1., 8.], [2., 9.]]
+        );
+        assert_abs_diff_eq!(
+            dataset_no_07.as_multi_targets(),
+            array![[1, 8], [2, 9], [1, 8], [2, 9], [1, 8], [2, 9]]
+        );
+        // remove label 1 from target 1 and label 7 from target 2: with labels is an "any" so all targets should be kept
+        let dataset_no_17 = dataset.with_labels(&[0, 2, 8, 9]);
+        assert_eq!(dataset_no_17.nsamples(), 10);
+        assert_eq!(dataset_no_17.ntargets(), 2);
+    }
 }
diff --git a/src/error.rs b/src/error.rs
index c86a81a20..f5ef0f14d 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -24,4 +24,6 @@ pub enum Error {
     MultipleTargets,
     #[error("platt scaling failed")]
     Platt(PlattNewtonResult),
+    #[error("The number of samples do not match: {0} - {1}")]
+    MismatchedShapes(usize, usize),
 }
diff --git a/src/lib.rs b/src/lib.rs
index 02cc08c57..b678bdb55 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -32,6 +32,9 @@
 //! | [hierarchical](https://docs.rs/linfa-hierarchical/) | Agglomerative hierarchical clustering | Tested | Unsupervised learning | Cluster and build hierarchy of clusters |
 //! | [bayes](https://docs.rs/linfa-bayes/) | Naive Bayes | Tested | Supervised learning | Contains Gaussian Naive Bayes |
 //! | [ica](https://docs.rs/linfa-ica/) | Independent component analysis | Tested | Unsupervised learning | Contains FastICA implementation |
+//! | [pls](algorithms/linfa-pls/) | Partial Least Squares | Tested | Supervised learning | Contains PLS estimators for dimensionality reduction and regression |
+//! | [tsne](algorithms/linfa-tsne/) | Dimensionality reduction| Tested | Unsupervised learning | Contains exact solution and Barnes-Hut approximation t-SNE |
+//! | [preprocessing](algorithms/linfa-preprocessing/) |Normalization & Vectorization| Tested | Pre-processing | Contains data normalization/whitening and count vectorization/tf-idf|
 //!
 //! We believe that only a significant community effort can nurture, build, and sustain a machine learning ecosystem in Rust - there is no other way forward.
 //!
diff --git a/src/metrics_classification.rs b/src/metrics_classification.rs
index 835bcc54d..37d6a345d 100644
--- a/src/metrics_classification.rs
+++ b/src/metrics_classification.rs
@@ -10,7 +10,7 @@ use ndarray::prelude::*;
 use ndarray::Data;
 
 use crate::dataset::{AsTargets, DatasetBase, Label, Labels, Pr, Records};
-use crate::error::Result;
+use crate::error::{Error, Result};
 
 /// Return tuple of class index for each element of prediction and ground_truth
 fn map_prediction_to_idx<L: Label>(
@@ -267,10 +267,25 @@ where
     T: AsTargets<Elem = L> + Labels<Elem = L>,
 {
     fn confusion_matrix(&self, ground_truth: ArrayBase<S, Ix1>) -> Result<ConfusionMatrix<L>> {
+        self.confusion_matrix(&ground_truth)
+    }
+}
+
+impl<L: Label, S, T> ToConfusionMatrix<L, &ArrayBase<S, Ix1>> for T
+where
+    S: Data<Elem = L>,
+    T: AsTargets<Elem = L> + Labels<Elem = L>,
+{
+    fn confusion_matrix(&self, ground_truth: &ArrayBase<S, Ix1>) -> Result<ConfusionMatrix<L>> {
+        let targets = self.try_single_target()?;
+        if targets.len() != ground_truth.len() {
+            return Err(Error::MismatchedShapes(targets.len(), ground_truth.len()));
+        }
+
         let classes = self.labels();
 
         let indices = map_prediction_to_idx(
-            &self.try_single_target()?.as_slice().unwrap(),
+            targets.as_slice().unwrap(),
             &ground_truth.as_slice().unwrap(),
             &classes,
         );
@@ -475,107 +490,109 @@ impl<R: Records, R2: Records, T: AsTargets<Elem = bool>, T2: AsTargets<Elem = Pr
     }
 }
 
-/*
 #[cfg(test)]
 mod tests {
-    use super::{BinaryClassification, ToConfusionMatrix};
-    use super::{DatasetBase, Pr};
-    use approx::{abs_diff_eq, AbsDiffEq};
-    use ndarray::{array, Array1, ArrayBase, ArrayView1, Data, Dimension};
+    use super::{BinaryClassification, ConfusionMatrix, ToConfusionMatrix};
+    use super::{Label, Pr};
+    use approx::assert_abs_diff_eq;
+    use ndarray::{array, Array1, Array2, ArrayView1};
     use rand::{distributions::Uniform, rngs::SmallRng, Rng, SeedableRng};
-    use std::borrow::Borrow;
-
-    fn assert_eq_slice<
-        A: std::fmt::Debug + PartialEq + AbsDiffEq,
-        S: Data<Elem = A>,
-        D: Dimension,
-    >(
-        a: ArrayBase<S, D>,
-        b: &[A],
-    ) {
-        assert_eq_iter(a.iter(), b);
+    use std::collections::HashMap;
+
+    fn get_labels_map<L: Label>(cm: &ConfusionMatrix<L>) -> HashMap<L, usize> {
+        cm.members
+            .iter()
+            .enumerate()
+            .map(|(index, label)| (label.clone(), index))
+            .collect()
     }
 
-    fn assert_eq_iter<'a, A, B>(a: impl IntoIterator<Item = B>, b: impl IntoIterator<Item = &'a A>)
-    where
-        A: 'a + std::fmt::Debug + PartialEq + AbsDiffEq,
-        B: Borrow<A>,
-    {
-        let mut a_iter = a.into_iter();
-        let mut b_iter = b.into_iter();
-        loop {
-            match (a_iter.next(), b_iter.next()) {
-                (None, None) => break,
-                (Some(a_item), Some(b_item)) => {
-                    abs_diff_eq!(a_item.borrow(), b_item);
-                }
-                _ => {
-                    panic!("assert_eq_iters: iterators had different lengths");
-                }
-            }
+    // confusion matrices use hash sets for the labels to pair so
+    // the order of the rows of the matrices is not constant.
+    // we can transform the index->member mapping in `cm.members`
+    // into a member->index mapping to check each element independently
+    fn assert_cm_eq<L: Label>(cm: &ConfusionMatrix<L>, expected: &Array2<f32>, labels: &Array1<L>) {
+        let map = get_labels_map(cm);
+        for ((row, column), value) in expected.indexed_iter().map(|((r, c), v)| {
+            (
+                (*map.get(&labels[r]).unwrap(), *map.get(&labels[c]).unwrap()),
+                v,
+            )
+        }) {
+            let cm_value = *cm.matrix.get((row, column)).unwrap();
+            assert_abs_diff_eq!(cm_value, value);
+        }
+    }
+
+    fn assert_split_eq<L: Label, C: Fn(&ConfusionMatrix<bool>) -> f32>(
+        cm: &ConfusionMatrix<L>,
+        eval: C,
+        expected: &Array1<f32>,
+        labels: &Array1<L>,
+    ) {
+        let map = get_labels_map(cm);
+        let evals = cm
+            .split_one_vs_all()
+            .into_iter()
+            .map(|x| eval(&x))
+            .collect::<Vec<_>>();
+        for (index, value) in expected
+            .indexed_iter()
+            .map(|(i, v)| (*map.get(&labels[i]).unwrap(), v))
+        {
+            let evals_value = *evals.get(index).unwrap();
+            assert_abs_diff_eq!(evals_value, value);
         }
     }
 
     #[test]
     fn test_confusion_matrix() {
-        let predicted = ArrayView1::from(&[0, 1, 0, 1, 0, 1]);
         let ground_truth = ArrayView1::from(&[1, 1, 0, 1, 0, 1]);
+        let predicted = ArrayView1::from(&[0, 1, 0, 1, 0, 1]);
 
-        let cm = predicted.confusion_matrix(ground_truth);
+        let cm = predicted.confusion_matrix(ground_truth).unwrap();
 
-        assert_eq_slice(cm.matrix, &[2., 1., 0., 3.]);
+        let labels = array![0, 1];
+        let expected = array![[2., 1.], [0., 3.]];
+
+        assert_cm_eq(&cm, &expected, &labels);
     }
 
     #[test]
     fn test_cm_metrices() {
-        let predicted = Array1::from(vec![0, 1, 0, 1, 0, 1]);
         let ground_truth = Array1::from(vec![1, 1, 0, 1, 0, 1]);
+        let predicted = Array1::from(vec![0, 1, 0, 1, 0, 1]);
 
-        let x = predicted.confusion_matrix(ground_truth);
+        let x = predicted.confusion_matrix(ground_truth).unwrap();
 
-        abs_diff_eq!(x.accuracy(), 5.0 / 6.0_f32);
-        abs_diff_eq!(
+        let labels = array![0, 1];
+
+        assert_abs_diff_eq!(x.accuracy(), 5.0 / 6.0_f32);
+        assert_abs_diff_eq!(
             x.mcc(),
             (2. * 3. - 1. * 0.) / (2.0f32 * 3. * 3. * 4.).sqrt() as f32
         );
 
-        assert_eq_iter(
-            x.split_one_vs_all().into_iter().map(|x| x.precision()),
-            &[1.0, 3. / 4.],
+        assert_split_eq(
+            &x,
+            |cm| ConfusionMatrix::precision(cm),
+            &array![1.0, 3. / 4.],
+            &labels,
         );
-        assert_eq_iter(
-            x.split_one_vs_all().into_iter().map(|x| x.recall()),
-            &[2.0 / 3.0, 1.0],
+        assert_split_eq(
+            &x,
+            |cm| ConfusionMatrix::recall(cm),
+            &array![2.0 / 3.0, 1.0],
+            &labels,
         );
-        assert_eq_iter(
-            x.split_one_vs_all().into_iter().map(|x| x.f1_score()),
-            &[4.0 / 5.0, 6.0 / 7.0],
+        assert_split_eq(
+            &x,
+            |cm| ConfusionMatrix::f1_score(cm),
+            &array![4.0 / 5.0, 6.0 / 7.0],
+            &labels,
         );
     }
 
-    #[test]
-    fn test_modification() {
-        let predicted = array![0, 3, 2, 0, 1, 1, 1, 3, 2, 3];
-
-        let ground_truth =
-            DatasetBase::new((), array![0, 2, 3, 0, 1, 2, 1, 2, 3, 2]).with_labels(&[0, 1, 2]);
-
-        // exclude class 3 from evaluation
-        let cm = predicted.confusion_matrix(&ground_truth);
-
-        assert_eq_slice(cm.matrix, &[2., 0., 0., 0., 2., 1., 0., 0., 0.]);
-
-        // weight errors in class 2 more severe and exclude class 1
-        let ground_truth = ground_truth
-            .with_weights(vec![1., 2., 1., 1., 1., 2., 1., 2., 1., 2.])
-            .with_labels(&[0, 2, 3]);
-
-        let cm = predicted.confusion_matrix(&ground_truth);
-
-        // the false-positive error for label=2 is twice severe here
-        assert_eq_slice(cm.matrix, &[2., 0., 0., 0., 0., 4., 0., 3., 0.]);
-    }
-
     #[test]
     fn test_roc_curve() {
         let predicted = ArrayView1::from(&[0.1, 0.3, 0.5, 0.7, 0.8, 0.9]).mapv(Pr);
@@ -592,7 +609,7 @@ mod tests {
             (1., 1.),
         ];
 
-        let roc = predicted.roc(&groundtruth);
+        let roc = predicted.roc(&groundtruth).unwrap();
         assert_eq!(roc.get_curve(), result);
     }
 
@@ -609,32 +626,38 @@ mod tests {
             .collect::<Vec<_>>();
 
         // ROC Area-Under-Curve should be approximately 0.5
-        let roc = predicted.roc(&ground_truth);
+        let roc = predicted.roc(&ground_truth).unwrap();
         assert!((roc.area_under_curve() - 0.5) < 0.04);
     }
 
     #[test]
     fn split_one_vs_all() {
-        let predicted = array![0, 3, 2, 0, 1, 1, 1, 3, 2, 3];
         let ground_truth = array![0, 2, 3, 0, 1, 2, 1, 2, 3, 2];
+        let predicted = array![0, 3, 2, 0, 1, 1, 1, 3, 2, 3];
 
         // create a confusion matrix
-        let cm = predicted.confusion_matrix(ground_truth);
+        let cm = predicted.confusion_matrix(ground_truth).unwrap();
+
+        let labels = array![0, 1, 2, 3];
+        let bin_labels = array![true, false];
+        let map = get_labels_map(&cm);
 
         // split four class confusion matrix into 4 binary confusion matrix
         let n_cm = cm.split_one_vs_all();
 
-        let result: &[&[f32]] = &[
-            &[2., 0., 0., 8.], // no misclassification for label=0
-            &[2., 1., 0., 7.], // one false-positive for label=1
-            &[0., 2., 4., 4.], // two false-positive and four false-negative for label=2
-            &[0., 3., 2., 5.], // three false-positive and two false-negative for label=3
+        let result = &[
+            array![[2., 0.], [0., 8.]], // no misclassification for label=0
+            array![[2., 1.], [0., 7.]], // one false-positive for label=1
+            array![[0., 2.], [4., 4.]], // two false-positive and four false-negative for label=2
+            array![[0., 3.], [2., 5.]], // three false-positive and two false-negative for label=3
         ];
 
-        // compare to result
-        n_cm.into_iter()
-            .zip(result.iter())
-            .for_each(|(x, r)| assert_eq_slice(x.matrix, r))
+        for (r, x) in result
+            .iter()
+            .zip(labels.iter())
+            .map(|(r, l)| (r, n_cm.get(*map.get(l).unwrap()).unwrap()))
+        {
+            assert_cm_eq(x, r, &bin_labels);
+        }
     }
 }
-*/
diff --git a/src/traits.rs b/src/traits.rs
index b0c4e30d7..c7bbca2cb 100644
--- a/src/traits.rs
+++ b/src/traits.rs
@@ -2,6 +2,7 @@
 //!
 
 use crate::dataset::{DatasetBase, Records};
+use std::convert::From;
 
 /// Transformation algorithms
 ///
@@ -20,10 +21,10 @@ pub trait Transformer<R: Records, T> {
 /// A fittable algorithm takes a dataset and creates a concept of some kind about it. For example
 /// in *KMeans* this would be the mean values for each class, or in *SVM* the separating
 /// hyperplane. It returns a model, which can be used to predict targets for new data.
-pub trait Fit<'a, R: Records, T> {
-    type Object: 'a;
+pub trait Fit<R: Records, T, E: std::error::Error + From<crate::error::Error>> {
+    type Object;
 
-    fn fit(&self, dataset: &DatasetBase<R, T>) -> Self::Object;
+    fn fit(&self, dataset: &DatasetBase<R, T>) -> Result<Self::Object, E>;
 }
 
 /// Incremental algorithms