diff --git a/Cargo.toml b/Cargo.toml index c5575ae..0e08bac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rstats" -version = "2.1.3" +version = "2.1.4" authors = ["Libor Spacek"] edition = "2021" description = "Statistics, Information Measures, Data Analysis, Linear Algebra, Clifford Algebra, Machine Learning, Geometric Median, Matrix Decompositions, PCA, Mahalanobis Distance, Hulls, Multithreading.." diff --git a/README.md b/README.md index 25ca6a3..3f6473d 100644 --- a/README.md +++ b/README.md @@ -196,11 +196,11 @@ if dif <= 0_f64 { pub type RE = RError; ``` -Convenience function `re_error` can be used to simplify constructing and returning these errors. Its message argument can be either literal `&str`, or `String` (e.g. constructed by `format!`). It returns a Result, thus it needs `?` operator to unwrap its `Err` variant. +Convenience functions `nodata_error, data_error, arith_error, other_error` are used to construct and return these errors. Their message argument can be either literal `&str`, or `String` (e.g. constructed by `format!`). They return `ReError` already wrapped up as an `Err` variant of `Result`. cf.: ```rust if dif <= 0_f64 { - return re_error("arith","cholesky needs a positive definite matrix")?; + return arith_error("cholesky needs a positive definite matrix"); }; ``` @@ -258,11 +258,11 @@ The remaining general cases previously required new manual implementations to be * `sumn`: the sum of the sequence `1..n = n*(n+1)/2`. It is also the size of a lower/upper triangular matrix. -* `t_stat`: of a value x: (x-centre)/spread. In one dimension. +* `tm_stat`: (x-centre)/dispersion. Generalised t-statistic in one dimension. * `unit_matrix`: - generates full square unit matrix. -* `re_error` - helps to construct custom RE errors (see Errors above). +* `nodata_error, data_error, arith_error, other_error` - construct custom RE errors (see section Errors above). ## Trait Stats @@ -336,7 +336,9 @@ Methods which take an additional generic vector argument, such as a vector of we ## Appendix: Recent Releases -* **Version 2.1.3** - Added `pca_reduction` to `struct TriangMat`. Changed `eigenvectors` to compute normalized eigenvectors of the original data rather than of its covariance matrix. That is now done by better named `normalize`, should you still need it. `Eigenvectors` is slower, as it needs to do forward substitution to find each one. +* **Version 2.1.4** - Tidied up some error processing. + +* **Version 2.1.3** - Added `pca_reduction` to `struct TriangMat`. Changed `eigenvectors` to compute normalized eigenvectors of the original data rather than of its covariance matrix. That is now done by better named `normalize` (should you still need it). `Eigenvectors` is somewhat slower, as it needs to solve forward substitution to find each vector. * **Version 2.1.2** - Added function `project` to project a `TriangMat` to a lower dimensional space of selected dimensions. Removed `rows` which was a duplicate of `dim`. diff --git a/src/error.rs b/src/error.rs index ae646ef..0c7a77b 100644 --- a/src/error.rs +++ b/src/error.rs @@ -10,6 +10,8 @@ pub type RE = RError; #[derive(Debug)] /// Custom RStats Error +/// Parameter is future proofing, so that any type of argument may be returned. +/// Currently only messages of type and <&str> are used pub enum RError where T: Sized + Debug, @@ -39,6 +41,44 @@ where } } } +/// Convenience function for building RError::NoDataError(String) +/// from payload message, which can be either literal `&str` or `String`. +/// `String` allows splicing in values of variables for debugging, using `format!` +pub fn nodata_error(msg: impl Into) -> Result> { + Err(RError::NoDataError(msg.into())) +} +/// Convenience function for building RError::DataError(String) +/// from payload message, which can be either literal `&str` or `String`. +/// `String` allows splicing in values of variables for debugging, using `format!` +pub fn data_error(msg: impl Into) -> Result> { + Err(RError::DataError(msg.into())) +} +/// Convenience function for building RError::ArithError(String) +/// from payload message, which can be either literal `&str` or `String`. +/// `String` allows splicing in values of variables for debugging, using `format!` +pub fn arith_error(msg: impl Into) -> Result> { + Err(RError::ArithError(msg.into())) +} +/// Convenience function for building RError::ArithError(String) +/// from payload message, which can be either literal `&str` or `String`. +/// `String` allows splicing in values of variables for debugging, using `format!` +pub fn other_error(msg: impl Into) -> Result> { + Err(RError::OtherError(msg.into())) +} +/* +/// Convenience function for building RError +/// from short name and payload message, which can be either literal `&str` or `String`. +/// `String` allows splicing in values of variables for debugging, using `format!` +pub fn re_error(kind: &str, msg: impl Into) -> Result> { + match kind { + "empty" => Err(RError::NoDataError(msg.into())), + "size" => Err(RError::DataError(msg.into())), + "arith" => Err(RError::ArithError(msg.into())), + "other" => Err(RError::OtherError(msg.into())), + _ => Err(RError::OtherError("Wrong error kind given to re_error".into())) + } +} +*/ /// Automatically converting any RanError to RError::OtherError impl From> for RError { fn from(e: RanError) -> Self { @@ -66,15 +106,3 @@ impl From for RError { RError::OtherError(format!("IOError: {e}")) } } - -/// Convenience function for building RError -/// from short name and payload message, which can be either &str or String -pub fn re_error(kind: &str, msg: impl Into) -> Result> { - match kind { - "empty" => Err(RError::NoDataError(msg.into())), - "size" => Err(RError::DataError(msg.into())), - "arith" => Err(RError::ArithError(msg.into())), - "other" => Err(RError::OtherError(msg.into())), - _ => Err(RError::OtherError("Wrong error kind given to re_error".into())) - } -} diff --git a/src/lib.rs b/src/lib.rs index 589bebc..cec96d0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,7 +19,7 @@ pub mod vecvec; /// Multidimensional operations on sets of vectors, with additional inputs pub mod vecvecg; -pub use crate::error::{re_error, RError, RE}; +pub use crate::error::*; // reexporting useful related methods pub use indxvec::{here, printing::*, MinMax, Printing}; pub use medians::{MedError, Median, Medianf64}; diff --git a/src/stats.rs b/src/stats.rs index 1149cd0..78880f8 100644 --- a/src/stats.rs +++ b/src/stats.rs @@ -1,4 +1,4 @@ -use crate::{error::RError, re_error, sumn, fromop, Params, MutVecg, Stats, Vecg, RE}; +use crate::*; //{RError, nodata_error, sumn, fromop, Params, MutVecg, Stats, Vecg, RE}; use indxvec::Vecops; use medians::{Medianf64,Median}; use core::cmp::Ordering::*; @@ -28,7 +28,7 @@ where /// Vector with reciprocal components fn vreciprocal(self) -> Result, RE> { if self.is_empty() { - return Err(RError::NoDataError("empty self vec".to_owned())); + return nodata_error("vreciprocal: empty self vec"); }; self.iter() .map(|component| -> Result { @@ -36,9 +36,7 @@ where if c.is_normal() { Ok(1.0 / c) } else { - Err(RError::ArithError( - "no reciprocal for zero component".to_owned(), - )) + arith_error(format!("vreciprocal: bad component {c}")) } }) .collect::, RE>>() @@ -47,22 +45,20 @@ where /// Vector with inverse magnitude fn vinverse(self) -> Result, RE> { if self.is_empty() { - return Err(RError::NoDataError("empty self vec".to_owned())); + return nodata_error("vinverse: empty self vec"); }; let vmagsq = self.vmagsq(); if vmagsq > 0.0 { Ok(self.iter().map(|x| x.clone().into() / vmagsq).collect()) } else { - Err(RError::DataError( - "no inverse of zero vector magnitude".to_owned(), - )) + data_error("vinverse: can not invert zero vector") } } // Negated vector (all components swap sign) fn negv(self) -> Result, RE> { if self.is_empty() { - return Err(RError::NoDataError("empty self vec".to_owned())); + return nodata_error("negv: empty self vec"); }; Ok(self.iter().map(|x| (-x.clone().into())).collect()) } @@ -70,15 +66,13 @@ where /// Unit vector fn vunit(self) -> Result, RE> { if self.is_empty() { - return Err(RError::NoDataError("empty self vec".to_owned())); + return nodata_error("vunit: empty self vec"); }; let mag = self.vmag(); if mag > 0.0 { Ok(self.iter().map(|x| x.clone().into() / mag).collect()) } else { - Err(RError::DataError( - "vector of zero magnitude cannot be made into a unit vector".to_owned(), - )) + data_error("vunit: can not make zero vector into a unit vector") } } @@ -86,7 +80,7 @@ where fn hmad(self) -> Result { let n = self.len(); if n == 0 { - return Err(RError::NoDataError("empty self vec".to_owned())); + return nodata_error("hmad: empty self"); }; let fself = self.iter().map(|x| x.clone().into()).collect::>(); let recmedian = 1.0 / fself.medf_checked()?; @@ -95,7 +89,7 @@ where .map(|x| -> Result { let fx: f64 = x.clone().into(); if !fx.is_normal() { - return re_error("ArithError","attempt to divide by zero"); + return arith_error("hmad: attempt to divide by zero"); }; Ok((recmedian - 1.0 / fx).abs()) }) @@ -116,7 +110,7 @@ where if n > 0 { Ok(self.iter().map(|x| x.clone().into()).sum::() / (n as f64)) } else { - re_error("NoDataError","empty self vec") + nodata_error("amean: empty self vec") } } diff --git a/src/triangmat.rs b/src/triangmat.rs index ad47075..8419e4a 100644 --- a/src/triangmat.rs +++ b/src/triangmat.rs @@ -1,4 +1,4 @@ -use crate::{re_error, sumn, RError, Stats, TriangMat, Vecg, MutVecg, RE}; // MStats, MinMax, MutVecg, Stats, VecVec }; +use crate::*; // MStats, MinMax, MutVecg, Stats, VecVec }; pub use indxvec::{printing::*, Printing, Indices, Vecops}; /// Meanings of 'kind' field. Note that 'Upper Symmetric' would represent the same full matrix as @@ -43,7 +43,7 @@ impl TriangMat { /// Squared euclidian vector magnitude (norm) of the data vector pub fn magsq(&self) -> f64 { self.data.vmagsq() - } + } /// Sum of the elements: /// when applied to the wedge product **a∧b**, returns det(**a,b**) pub fn sum(&self) -> f64 { @@ -67,7 +67,7 @@ impl TriangMat { }) .collect::>() } - /// New unit (symmetric) TriangMat matrix with total data size `n*(n+1)/2` + /// New unit (symmetric) TriangMat matrix (data size `n*(n+1)/2`) pub fn unit(n: usize) -> Self { let mut data = Vec::new(); for i in 0..n { @@ -94,9 +94,13 @@ impl TriangMat { fullcov.iter_mut().for_each(|eigenvector| eigenvector.munit()); fullcov } - /// Eigenvectors (normalized and indexed) of A=LL', given L (the lower triangular Cholesky decomposition). - /// The index gives the ordering by eigenvalues. + + /// Normalized eigenvectors of A, given L. + /// Where L is the lower triangular Cholesky decomposition of covariance/comediance matrix for A. + /// Index gives the descending order of eigenvalues. + /// Can be used to order eigenvectors by their relative significance (covariance in their direction). pub fn eigenvectors(&self) -> Result<(Vec>,Vec),RE> { + if self.is_empty() { return nodata_error("eigenvectors applied to empty L") }; let n = self.dim(); let mut evectors = Vec::new(); let eigenvals = self.eigenvalues(); @@ -108,18 +112,23 @@ impl TriangMat { eigenvec.munit(); // normalize the eigenvector evectors.push(eigenvec); }; + // descending sort index of eigenvalues let index = eigenvals .isort_indexed(0..n, |a, b| b.total_cmp(a)); Ok((evectors,index)) } - /// PCA dimensional reduction using cholesky lower triangular matrix L (self). - /// Projecting data using only `new_dims` number of eigenvectors, + + /// PCA dimensional reduction using cholesky lower triangular matrix L (as self). + /// Projecting data using only `dimensions` number of eigenvectors, /// corresponding to the largest eigenvalues. - pub fn pca_reduction(self, data: &[Vec], new_dims: usize) -> Result>,RE> { - if new_dims > data.len() { re_error("size","pca_reduction: new_dims exceeds L dimension")? }; + pub fn pca_reduction(self, data: &[Vec], dimensions: usize) -> Result>,RE> { + if data.is_empty() { return nodata_error("pca_reduction: empty data") }; + let n = data[0].len(); + if dimensions > n { return data_error("pca_reduction: new dimensions exceed those of data") }; + if self.dim() != n { return data_error("pca_reduction: L and data dimensions mismatch") }; let mut res = Vec::with_capacity(data.len()); - let (evecs,mut index) = self.eigenvectors()?; - index.truncate(new_dims); + let (evecs,mut index) = self.eigenvectors()?; + index.truncate(dimensions); let pruned_evecs = index.unindex(&evecs, true); for dvec in data { res.push( @@ -233,7 +242,7 @@ impl TriangMat { let sl = self.data.len(); // input not long enough to compute anything if sl < 3 { - return re_error("empty", "cholesky needs at least 3x3 TriangMat: {self}")?; + return nodata_error("cholesky needs at least 3x3 TriangMat"); }; // n is the dimension of the implied square matrix. // Not needed as an extra argument. We compute it @@ -241,7 +250,7 @@ impl TriangMat { let (n, c) = TriangMat::rowcol(sl); // input is not a triangular number, is of wrong size if c != 0 { - return re_error("size", "cholesky needs a triangular matrix")?; + return data_error("cholesky needs a triangular matrix"); }; let mut res = vec![0.0; sl]; // result L is of the same size as the input for i in 0..n { @@ -259,7 +268,7 @@ impl TriangMat { // dif <= 0 means that the input matrix is not positive definite, // or is ill-conditioned, so we return ArithError if dif <= 0_f64 { - return re_error("arith", "cholesky matrix is not positive definite")?; + return arith_error("cholesky matrix is not positive definite"); }; dif.sqrt() } diff --git a/src/vecg.rs b/src/vecg.rs index 7df7714..5693e4a 100644 --- a/src/vecg.rs +++ b/src/vecg.rs @@ -1,7 +1,4 @@ -use crate::{ - error::{re_error, RError, RE}, - here, Stats, TriangMat, Vecg, -}; +use crate::*; use core::{cmp::Ordering::*, convert::identity}; use indxvec::{Indices, Vecops}; use medians::Median; @@ -51,10 +48,7 @@ where fn dotsig(self, sig: &[f64]) -> Result { let dims = self.len(); if 2 * dims != sig.len() { - return re_error( - "size", - "dotsig: sig vec must have double the dimensions of self", - )?; + return data_error("dotsig: sig vec must have double the dimensions of self"); } let mut ressum = 0_f64; for (i, c) in self.iter().enumerate() { @@ -174,8 +168,6 @@ where /// cityblock distance fn cityblockd>(self, v: &[U]) -> f64 - where - U: Into, { self.iter() .zip(v) @@ -301,9 +293,7 @@ where fn jointpdf>(self, v: &[U]) -> Result, RE> { let n = self.len(); if v.len() != n { - return Err(RError::DataError( - "{jointpdf argument vectors must be of equal length!".to_owned(), - )); + return data_error("jointpdf argument vectors must be of equal length!"); }; let nf = n as f64; let mut res: Vec = Vec::new(); @@ -431,23 +421,23 @@ where xvec.ucorrelation(&yvec) // using Indices trait from idxvec } - /// Delta gm that adding point self will cause + /// Delta gm that adding a point will cause fn contribvec_newpt(self, gm: &[f64], recips: f64) -> Result, RE> { let dv = self.vsub(gm); let mag = dv.vmag(); if !mag.is_normal() { - return re_error("arith", "point being added is coincident with gm")?; + return arith_error("contribvec_newpt: point being added is coincident with gm"); }; // adding new unit vector (to approximate zero vector) and rescaling let recip = 1f64 / mag; Ok(dv.vunit()?.smult(recip / (recips + recip))) } - /// Normalized magnitude of change to gm that adding point self will cause + /// Normalized magnitude of change to gm that adding a point will cause fn contrib_newpt(self, gm: &[f64], recips: f64, nf: f64) -> Result { let mag = self.vdist(gm); if !mag.is_normal() { - return re_error("arith", here!("point being added is coincident with gm"))?; + return arith_error("contrib_newpt: point being added is coincident with gm"); }; let recip = 1f64 / mag; // first had to test for division by zero Ok((nf + 1.0) / (recips + recip)) @@ -458,7 +448,7 @@ where let dv = self.vsub(gm); let mag = dv.vmag(); if !mag.is_normal() { - return re_error("arith", here!("point being removed is coincident with gm"))?; + return arith_error("contribvec_oldpt: point being removed is coincident with gm"); }; let recip = 1f64 / mag; // first had to test for division by zero Ok(dv.vunit()?.smult(recip / (recip - recips))) // scaling @@ -469,7 +459,7 @@ where fn contrib_oldpt(self, gm: &[f64], recips: f64, nf: f64) -> Result { let mag = self.vdist(gm); if !mag.is_normal() { - return re_error("arith", here!("point being removed is coincident with gm"))?; + return arith_error("contrib_oldpt: point being removed is coincident with gm"); }; let recip = 1f64 / mag; // first had to test for division by zero Ok((nf - 1.0) / (recip - recips)) diff --git a/src/vecvec.rs b/src/vecvec.rs index e03b2a8..0e0084c 100644 --- a/src/vecvec.rs +++ b/src/vecvec.rs @@ -1,4 +1,4 @@ -use crate::{here,sumn, re_error, RError, RE, Params, MinMax, MutVecg, Stats, TriangMat, VecVec, Vecg}; +use crate::*; use indxvec::Vecops; use medians::{MedError, Medianf64}; use rayon::prelude::*; @@ -14,9 +14,7 @@ where fn dvdt(self, centre: &[f64]) -> Result, RE> { let len = self.len(); if len < 2 { - return Err(RError::NoDataError(format!( - "dfdt time series too short: {len}" - ))); + return nodata_error(format!("dvdt time series is too short: {len}")); }; let mut weight = 1_f64; let mut sumwv:Vec = self[0].iter().map(|x| x.clone().into()).collect(); @@ -81,7 +79,7 @@ where let mut rres = Vec::with_capacity(sumn(min)); for j in 0..min { let Some(slc) = r[j].get(j..d) - else { return Err(RError::DataError("house_ur: failed to extract uvec slice".to_owned()));}; + else { return data_error("house_ur: failed to extract uvec slice"); }; let uvec = slc.house_reflector(); for rlast in r.iter_mut().take(d).skip(j) { let rvec = uvec.house_reflect::(&rlast.drain(j..d).collect::>()); @@ -115,18 +113,16 @@ where let d = self[0].len(); // their common dimensionality (length) for v in self.iter().skip(1) { if v.len() != d { - return Err(RError::DataError( - "jointpdfn: all vectors must be of equal length!".to_owned(), - )); + return data_error("jointpdfn: all vectors must be of equal length!"); }; } let mut res: Vec = Vec::with_capacity(d); let mut tuples = self.transpose(); let df = tuples.len() as f64; // for turning counts to probabilities - // lexical sort to group together occurrences of identical tuples - tuples.sort_unstable_by(|a, b| { - let Some(x) = a.partial_cmp(b) - else { panic!("jointpdfn: comparison fail in f64 sort!"); }; x}); + // lexical sort to group together occurrences of identical tuples + tuples.sort_unstable_by( + |a, b| a.partial_cmp(b) + .expect("jointpdfn: tuples comparison failed")); let mut count = 1_usize; // running count let mut lastindex = 0; // initial index of the last unique tuple tuples.iter().enumerate().skip(1).for_each(|(i, ti)| { @@ -261,7 +257,7 @@ where /// Radius of a point specified by its subscript. fn radius(self, i: usize, gm: &[f64]) -> Result { if i > self.len() { - return re_error("DataError",here!("radius: invalid subscript"))?; + return data_error("radius: invalid subscript"); } Ok(self[i].vdist(gm)) } @@ -293,7 +289,7 @@ where fn sigvec(self, idx: &[usize]) -> Result, RE> { let dims = self[0].len(); if self.is_empty() { - return re_error("empty",here!("sigvec given no data"))?; + return nodata_error("sigvec given empty data"); }; let mut hemis = vec![0_f64; 2 * dims]; for &i in idx { @@ -312,14 +308,14 @@ where /// madgm median of distances from gm: stable nd data spread measure fn madgm(self, gm: &[f64]) -> Result { if self.is_empty() { - return re_error("NoDataError","madgm given zero length vec!")?; }; + return nodata_error("madgm given empty vec!"); }; Ok(self.radii(gm)?.medf_unchecked()) } /// stdgm mean of distances from gm: nd data spread measure, aka nd standard deviation fn stdgm(self, gm: &[f64]) -> Result { if self.is_empty() { - return re_error("NoDataError","stdgm given zero length vec!")?; }; + return nodata_error("stdgm given empty vec!")?; }; Ok( self.iter() .map(|s| s.vdist(gm)).sum::()/self.len() as f64 ) } @@ -554,7 +550,7 @@ where fn covar(self, mid:&[f64]) -> Result { let d = self[0].len(); // dimension of the vector(s) if d != mid.len() { - return re_error("data","covar self and mid dimensions mismatch")? }; + return data_error("covar self and mid dimensions mismatch"); }; let mut covsum = self .par_iter() .fold( @@ -590,7 +586,7 @@ where fn serial_covar(self, mid:&[f64]) -> Result { let d = self[0].len(); // dimension of the vector(s) if d != mid.len() { - return re_error("data","serial_covar self and mid dimensions mismatch")? }; + return data_error("serial_covar self and mid dimensions mismatch")?; }; let mut covsums = vec![0_f64; (d+1)*d/2]; for p in self { let mut covsub = 0_usize; // subscript into the flattened array cov diff --git a/src/vecvecg.rs b/src/vecvecg.rs index 4179c0b..6153ddd 100644 --- a/src/vecvecg.rs +++ b/src/vecvecg.rs @@ -1,4 +1,4 @@ -use crate::{re_error,RError,RE,Stats,TriangMat,Vecg,MutVecg,VecVecg,VecVec}; +use crate::*; use indxvec::Mutops; use medians::Medianf64; use rayon::prelude::*; @@ -40,7 +40,7 @@ impl VecVecg for &[Vec] fn wdvdt(self, ws: &[U], centre: &[f64]) -> Result, RE> { let len = self.len(); if len < 2 { - return re_error("empty","time series too short: {len}"); + return nodata_error("wdvdt: time series too short: {len}"); }; let mut weightsum:f64 = ws[0].clone().into(); let mut sumv:Vec = self[0].smult(weightsum); @@ -55,9 +55,9 @@ impl VecVecg for &[Vec] /// 1.0-dotproduct with **v**, in range [0,2] fn divs(self, v: &[U]) -> Result,RE> { if self.is_empty() { - return re_error("empty","divs given no points")?; }; + return nodata_error("divs given no points"); }; if self[0].len() != v.len() { - return re_error("size","divs dimensions mismatch")?; }; + return data_error("divs dimensions mismatch"); }; let uv = v.vunit()?; self.scalar_fn(|p| Ok(1.0-p.vunit()?.dotp(&uv))) } @@ -65,9 +65,9 @@ impl VecVecg for &[Vec] /// median of weighted 1.0-dotproducts of **v**, with all in self fn wdivs(self, ws:&[U], v: &[f64]) -> Result<(Vec,f64),RE> { if self.is_empty() { - return re_error("empty","wdivs given no points")?; }; + return nodata_error("wdivs given no points"); }; if self[0].len() != v.len() { - return re_error("size","wdivs dimensions mismatch")?; }; + return data_error("wdivs dimensions mismatch"); }; let uv = v.vunit()?; self.scalar_wfn(ws,|p| Ok(1.0-p.vunit()?.dotp(&uv))) } @@ -75,9 +75,9 @@ impl VecVecg for &[Vec] /// median of weighted cos deviations from **v** fn wdivsmed(self, ws: &[U], v: &[f64]) -> Result { if self.is_empty() { - return re_error("empty","wmeddivs given no points")?; }; + return nodata_error("wdivsmed given no points"); }; if self[0].len() != v.len() { - return re_error("size","wmeddivs dimensions mismatch")?; }; + return data_error("wdivsmed dimensions mismatch"); }; let (values,wsum) = self.wdivs(ws,v)?; Ok((self.len() as f64) * values.medf_unchecked()/wsum) } @@ -85,18 +85,18 @@ impl VecVecg for &[Vec] /// weighted radii to all points in self fn wradii(self, ws:&[U], gm: &[f64]) -> Result<(Vec,f64),RE> { if self.is_empty() { - return re_error("empty","wradii given no points")?; }; + return nodata_error("wradii given no points"); }; if self[0].len() != gm.len() { - return re_error("size","wradii dimensions mismatch")?; }; + return data_error("wradii dimensions mismatch"); }; self.scalar_wfn(ws, |p| Ok(p.vdist(gm))) } /// wmadgm median of weighted deviations from (weighted) gm: stable nd data spread estimator. fn wmadgm(self, ws: &[U], gm: &[f64]) -> Result { if self.is_empty() { - return re_error("empty","wmadgm given no points")?; }; + return nodata_error("wmadgm given no points"); }; if self[0].len() != gm.len() { - return re_error("size","wmadgm dimensions mismatch")?; }; + return data_error("wmadgm dimensions mismatch"); }; let (values,wsum) = self.scalar_wfn(ws,|p| Ok(p.vdist(gm)))?; Ok((self.len() as f64) * values.medf_unchecked()/wsum) } @@ -104,14 +104,14 @@ impl VecVecg for &[Vec] /// Rows of matrix self multiplying (column) vector v fn leftmultv(self,v: &[U]) -> Result,RE> { if self[0].len() != v.len() { - return re_error("size","leftmultv dimensions mismatch")?; }; + return data_error("leftmultv dimensions mismatch"); }; Ok(self.iter().map(|s| s.dotp(v)).collect()) } /// Row vector v multipying columns of matrix self fn rightmultv(self,v: &[U]) -> Result,RE> { if v.len() != self.len() { - return re_error("size","rightmultv dimensions mismatch")?; }; + return data_error("rightmultv dimensions mismatch"); }; Ok((0..self[0].len()).map(|colnum| v.columnp(colnum,self)).collect()) } @@ -121,7 +121,7 @@ impl VecVecg for &[Vec] /// Result dimensions are self.len() x m[0].len() fn matmult(self,m: &[Vec]) -> Result>,RE> { if self[0].len() != m.len() { - return re_error("size","matmult dimensions mismatch")?; }; + return data_error("matmult dimensions mismatch"); }; Ok(self.par_iter().map(|srow| (0..m[0].len()).map(|colnum| srow.columnp(colnum,m)).collect() ).collect::>>()) @@ -168,7 +168,7 @@ impl VecVecg for &[Vec] /// This is a robust relationship between two unordered multidimensional sets. /// The two sets have to be in the same (dimensional) space but can have different numbers of points. fn trend(self, eps:f64, v:Vec>) -> Result,RE> { - if self[0].len() != v[0].len() { return Err(RError::DataError("trend dimensions mismatch".to_owned())); }; + if self[0].len() != v[0].len() { return data_error("trend dimensions mismatch"); }; let pair = rayon::join(||v.gmedian(eps),||self.gmedian(eps)); Ok(pair.0.vsub(&pair.1)) } @@ -180,7 +180,7 @@ impl VecVecg for &[Vec] /// both of which depend on the choice of axis. fn translate(self, m:&[U]) -> Result>,RE> { if self[0].len() != m.len() { - return re_error("size","translate dimensions mismatch")?; }; + return data_error("translate dimensions mismatch"); }; self.vector_fn(|s| Ok(s.vsub(m))) } @@ -190,7 +190,7 @@ impl VecVecg for &[Vec] /// The result is normalized to unit vector. fn wsigvec(self, idx: &[usize], ws:&[U]) -> Result,RE> { let dims = self[0].len(); - if self.len() != ws.len() { return re_error("DataError","wsigvec: weights number mismatch")?; }; + if self.len() != ws.len() { return data_error("wsigvec: weights number mismatch"); }; let mut hemis = vec![0_f64; 2*dims]; for &i in idx { let wf:f64 = ws[i].clone().into(); @@ -225,7 +225,7 @@ impl VecVecg for &[Vec] /// Factors out the entropy of m to save repetition of work fn dependencies(self, m:&[U]) -> Result,RE> { if self[0].len() != m.len() { - return re_error("size","dependencies: dimensions mismatch")?; }; + return data_error("dependencies: dimensions mismatch"); }; let entropym = m.entropy(); return self.par_iter().map(|s| -> Result { Ok((entropym + s.entropy())/ @@ -235,7 +235,7 @@ impl VecVecg for &[Vec] /// Individual distances from any point v, typically not a member, to all the members of self. fn dists(self, v:&[U]) -> Result,RE> { if self[0].len() != v.len() { - return re_error("size","dists dimensions mismatch")?; } + return data_error("dists dimensions mismatch"); } self.scalar_fn(|p| Ok(p.vdist(v))) } @@ -246,16 +246,16 @@ impl VecVecg for &[Vec] /// The radius (distance) from gm is far more efficient, once gm has been found. fn distsum(self, v: &[U]) -> Result { if self[0].len() != v.len() { - return re_error("size","distsum dimensions mismatch")?; } + return data_error("distsum dimensions mismatch"); } Ok(self.iter().map(|p| p.vdist(v)).sum::()) } /// Sorted weighted radii to all member points from the Geometric Median. fn wsortedrads(self, ws: &[U], gm:&[f64]) -> Result,RE> { if self.len() != ws.len() { - return re_error("size","wsortedrads self and ws lengths mismatch")?; }; + return data_error("wsortedrads self and ws lengths mismatch"); }; if self[0].len() != gm.len() { - return re_error("size","wsortedrads self and gm dimensions mismatch")?; }; + return data_error("wsortedrads self and gm dimensions mismatch"); }; let wf = ws.iter().map(|x| x.clone().into()).collect::>(); let wnorm = 1.0 / wf.iter().sum::(); let mut res = self.iter().map(|s| wnorm*s.vdist::(gm)) @@ -267,7 +267,7 @@ impl VecVecg for &[Vec] /// Weighted Geometric Median (gm) is the point that minimises the sum of distances to a given set of points. fn wgmedian(self, ws:&[U], eps: f64) -> Result,RE> { if self.len() != ws.len() { - return re_error("size","wgmedian and ws lengths mismatch")?; }; + return data_error("wgmedian and ws lengths mismatch"); }; let mut g = self.wacentroid(ws); // start iterating from the weighted centre let mut recsum = 0f64; loop { // vector iteration till accuracy eps is exceeded @@ -300,7 +300,7 @@ impl VecVecg for &[Vec] /// The sum of reciprocals is strictly increasing and so is used to easily evaluate the termination condition. fn par_wgmedian(self, ws: &[U], eps: f64) -> Result,RE> { if self.len() != ws.len() { - return Err(RError::DataError("wgmedian and ws lengths mismatch".to_owned())); }; + return data_error("wgmedian and ws lengths mismatch"); }; let mut g = self.wacentroid(ws); // start iterating from the weighted centre or from vec![0_f64; self[0].len()] let mut recsum = 0_f64; loop { @@ -384,9 +384,9 @@ impl VecVecg for &[Vec] fn wcovar(self, ws:&[U], mid:&[f64]) -> Result { let n = self[0].len(); // dimension of the vector(s) if n != mid.len() { - return re_error("data","wcovar self and m dimensions mismatch")? }; + return data_error("wcovar self and m dimensions mismatch"); }; if self.len() != ws.len() { - return re_error("data","wcovar self and ws lengths mismatch")? }; + return data_error("wcovar self and ws lengths mismatch"); }; let (mut covsum,wsum) = self .par_iter().zip(ws) .fold( @@ -425,9 +425,9 @@ impl VecVecg for &[Vec] fn serial_wcovar(self, ws:&[U], mid:&[f64]) -> Result { let d = self[0].len(); // dimension of the vector(s) if d != mid.len() { - return re_error("data","serial_wcovar self and mid dimensions mismatch")? }; + return data_error("serial_wcovar self and mid dimensions mismatch"); }; if self.len() != ws.len() { - return re_error("data","serial_wcovar self and ws lengths mismatch")? }; + return data_error("serial_wcovar self and ws lengths mismatch"); }; let mut covsums = vec![0_f64; (d+1)*d/2]; let mut wsum = 0_f64; for (p,w) in self.iter().zip(ws) { diff --git a/tests/tests.rs b/tests/tests.rs index 357b6e0..a34c907 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1,9 +1,7 @@ use indxvec::{printing::*, Indices, Printing, Vecops}; use medians::{Median, Medianf64}; use ran::*; -use rstats::{ - fromop, tm_stat, unit_matrix, re_error, RE, Stats, TriangMat, VecVec, VecVecg, Vecg, Vecu8 -}; +use rstats::*; use times::benchvvf64; pub const EPS: f64 = 1e-3; @@ -43,7 +41,7 @@ fn u8() -> Result<(), RE> { set_seeds(77777); let pt = ranvv_u8(n,d)?; println!("Acentroid:\n{}", pt.acentroid().gr()); - println!("G-median :\n{}", pt.gmedian(EPS).gr()); + println!("Geometric median :\n{}", pt.gmedian(EPS).gr()); let cov = pt.covar(&pt.acentroid())?; println!("Covariances:\n{cov}"); let com = pt.covar(&pt.gmedian(EPS))?; @@ -205,9 +203,9 @@ fn vecg() -> Result<(), RE> { println!("Joint Entropy:\t\t{}", v1.jointentropy(&v2)?.gr()); println!("Dependence:\t\t{}", v1.dependence(&v2)?.gr()); println!("Independence:\t\t{}", v1.independence(&v2)?.gr()); - println!("Wedge product:\n{}",v1.wedge(&v2).gr()); + println!("\nWedge product:\n{}",v1.wedge(&v2).gr()); println!("Geometric product:\n{}",v1.geometric(&v2).gr()); - println!("Sine: {} {} check: {}",v1.sine(&v2).gr(),v2.sine(&v1).gr(),(v1.varea(&v2)/v1.vmag()/v2.vmag()).gr()); + println!("Sine v1v2: {} v2v1: {} check: {}",v1.sine(&v2).gr(),v2.sine(&v1).gr(),(v1.varea(&v2)/v1.vmag()/v2.vmag()).gr()); println!("Cosine:\t\t\t{}", v1.cosine(&v2).gr()); println!("cos^2+sin^2 check:\t{}", (v1.cosine(&v2).powi(2)+v1.sine(&v2).powi(2)).gr()); println!( @@ -273,7 +271,7 @@ fn triangmat() -> Result<(), RE> { let (evecs,index) = chol.eigenvectors()?; println!("Eigenvectors:\n{}Their sort index by eigenvalues:\n{}", evecs.gr(),index.gr()); - println!("Original data reduced to 3 dimensions:\n{}", + println!("Original data PCA reduced to 3 dimensions:\n{}", chol.pca_reduction(&pts,3)?.gr()); Ok(()) } @@ -437,7 +435,7 @@ fn hulls() -> Result<(), RE> { let zeropts = pts.translate(&median)?; let (innerhull, outerhull) = zeropts.hulls(); if innerhull.is_empty() || outerhull.is_empty() { - return re_error("arith","no hull points found")? }; + return arith_error("no hull points found"); }; let mad = zeropts.madgm(&median)?; println!("Madgm of zeropts: {}", mad.gr()); println!(