From c4e439f7aff0b8ceb6e342159d2b1175fac4a828 Mon Sep 17 00:00:00 2001 From: L <457124+liborty@users.noreply.github.com> Date: Tue, 23 Apr 2024 16:11:16 +1000 Subject: [PATCH] v 2.0.10 --- Cargo.toml | 4 ++-- README.md | 2 ++ src/triangmat.rs | 25 ++++++++++++++++++++++--- tests/tests.rs | 10 +++++----- 4 files changed, 31 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0dfeca1..6854e36 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,9 @@ [package] name = "rstats" -version = "2.0.9" +version = "2.0.10" authors = ["Libor Spacek"] edition = "2021" -description = "Statistics, Information Measures, Data Analysis, Linear Algebra, Clifford Algebra, Machine Learning, Geometric Median, Matrix Decompositions, Mahalanobis Distance, Hulls, Multithreading.." +description = "Statistics, Information Measures, Data Analysis, Linear Algebra, Clifford Algebra, Machine Learning, Geometric Median, Matrix Decompositions, PCA, Mahalanobis Distance, Hulls, Multithreading.." readme = "README.md" homepage = "https://github.com/liborty/rstats" repository = "https://github.com/liborty/rstats" diff --git a/README.md b/README.md index dc4e0af..61b2ae5 100644 --- a/README.md +++ b/README.md @@ -332,6 +332,8 @@ Methods which take an additional generic vector argument, such as a vector of we ## Appendix: Recent Releases +* **Version 2.0.10** - Added to struct TriangMat `eigenvectors` (enabling PCA) and `variances` which compute variances of the data cloud along the original axes, by projecting on them and summing the eigenvalue weighted principal components. + * **Version 2.0.9** - Pruned some rarely used methods, simplified `gmparts` and `gmerror`, updated dependencies. * **Version 2.0.8**' - Changed initial guess in iterative weighted gm methods to weighted mean. This, being more accurate than plain mean, leads to fewer iterations. Updated some dependencies. diff --git a/src/triangmat.rs b/src/triangmat.rs index d32046d..13da3ee 100644 --- a/src/triangmat.rs +++ b/src/triangmat.rs @@ -1,4 +1,4 @@ -use crate::{re_error, sumn, RError, Stats, TriangMat, Vecg, RE}; // MStats, MinMax, MutVecg, Stats, VecVec }; +use crate::{re_error, sumn, RError, Stats, TriangMat, Vecg, MutVecg, RE}; // MStats, MinMax, MutVecg, Stats, VecVec }; pub use indxvec::{printing::*, Printing, Vecops}; /// Meanings of 'kind' field. Note that 'Upper Symmetric' would represent the same full matrix as @@ -83,14 +83,33 @@ impl TriangMat { } TriangMat { kind: 2, data } } - /// Eigenvalues (obtainable from Cholesky L matrix) + /// Eigenvalues from Cholesky L matrix pub fn eigenvalues(&self) -> Vec { self.diagonal().iter().map(|&x| x * x).collect::>() } - /// Determinant (obtainable from Cholesky L matrix) + /// Determinant from Cholesky L matrix pub fn determinant(&self) -> f64 { self.diagonal().iter().map(|&x| x * x).product() } + /// Normalized full eigenvectors from triangular covariance matrix. + /// Can be used together with eigenvalues for Principal Components Analysis. + /// Covariance matrix is symmetric, so + /// we use its rows as eigenvectors (no need to transpose it) + pub fn eigenvectors(&self) -> Vec> { + let mut fullcov = self.to_full(); + fullcov.iter_mut().for_each(|eigenvector| eigenvector.munit()); + fullcov + } + /// Independent variances along the original axes (dimensions), + /// from triangular covariance matrix. + pub fn variances(&self) -> Result< Vec, RE > { + let eigenvalues = self.cholesky()?.eigenvalues(); + let eigenvectors = self.eigenvectors(); + let mut result = vec![0_f64;eigenvalues.len()]; + eigenvectors.iter().zip(eigenvalues).for_each( + |(eigenvector, eigenvalue)| result.mutvadd(&eigenvector.smult(eigenvalue))); + Ok(result) + } /// Translates subscripts to a 1d vector, i.e. natural numbers, to a pair of /// (row,column) coordinates within a lower/upper triangular matrix. diff --git a/tests/tests.rs b/tests/tests.rs index 89ce60c..646c4dc 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -242,8 +242,7 @@ fn triangmat() -> Result<(), RE> { println!("Diagonal: {}",TriangMat::unit(7).diagonal().gr()); let d = 10_usize; let n = 90_usize; - println!("Testing on a random set of {n} points in {d} dimensional space"); - // set_seeds(1133); + println!("Testing on a random set of {n} points in {d} dimensional space"); let pts = ranvv_f64_range(n,d, 0.0..=4.0)?; // println!("\nTest data:\n{}",pts.gr()); // let transppt = pts.transpose(); @@ -251,9 +250,10 @@ fn triangmat() -> Result<(), RE> { println!("Comediance matrix:\n{cov}"); let chol = cov.cholesky()?; println!("Cholesky L matrix:\n{chol}"); - println!("Sorted eigenvalues of the comediance matrix from Cholesky decomposition:\n{}", - chol.eigenvalues().sortm(false).gr()); - println!("Determinant of the comediance matrix (their product): {}",chol.determinant().gr()); + println!("Eigenvalues by Cholesky decomposition:\n{}", + chol.eigenvalues().gr()); + println!("Determinant (their product): {}",chol.determinant().gr()); + println!("Variances of the original data by summing principal components:\n{}",cov.variances()?.gr()); let d = ranv_f64(d)?; let dmag = d.vmag(); let mahamag = chol.mahalanobis(&d)?;