2.0.9

liborty · Mar 6, 2024 · b00ca18 · b00ca18
1 parent cec7d5e
commit b00ca18
Show file tree

Hide file tree

Showing 6 changed files with 35 additions and 91 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "rstats"
-version = "2.0.8"
+version = "2.0.9"
 authors = ["Libor Spacek"]
 edition = "2021"
 description = "Statistics, Information Measures, Data Analysis, Linear Algebra, Clifford Algebra, Machine Learning, Geometric Median, Matrix Decompositions, Mahalanobis Distance, Hulls, Multithreading.."
@@ -20,7 +20,7 @@ include = [
 maintenance = { status = "actively-developed" }
 [lib]
 [dependencies]
-rayon = "1.8"
+rayon = "1.9"
 indxvec = "1.8"
 medians = "3.0"   
 ran = "2.0"

diff --git a/README.md b/README.md
@@ -332,6 +332,8 @@ Methods which take an additional generic vector argument, such as a vector of we
 
 ## Appendix: Recent Releases
 
+* **Version 2.0.9** - Pruned some rarely used methods, simplified `gmparts` and `gmerror`.
+
 * **Version 2.0.8**' - Changed initial guess in iterative weighted gm methods to weighted mean. This, being more accurate than plain mean, leads to fewer iterations. Updated some dependecies.
 
 * **Version 2.0.7** - Updated to `ran 2.0`.

diff --git a/src/lib.rs b/src/lib.rs
@@ -314,24 +314,18 @@ pub trait VecVec<T> {
     fn gcentroid(self) -> Result<Vec<f64>, RE>;
     /// Harmonic Centroid = harmonic mean of a set of points
     fn hcentroid(self) -> Result<Vec<f64>, RE>;
-    /// Possible first iteration point for geometric medians
-    fn firstpoint(self) -> Vec<f64>;
     /// Sums of distances from each point to all other points
     fn distsums(self) -> Vec<f64>;
     /// Medoid distance, its index, outlier distance, its index
     fn medout(self, gm: &[f64]) -> Result<MinMax<f64>, RE>;
-    /// Like gmparts, except only does one iteration from any non-member point g
-    fn nxnonmember(self, g: &[f64]) -> (Vec<f64>, Vec<f64>, f64);
-    /// Radius of a point specified by its subscript.    
+     /// Radius of a point specified by its subscript.    
     fn radius(self, i: usize, gm: &[f64]) -> Result<f64, RE>;
     /// Arith mean and std (in Params struct), Median and mad, Medoid and Outlier (in MinMax struct)
     fn eccinfo(self, gm: &[f64]) -> Result<(Params, Params, MinMax<f64>), RE>
     where
         Vec<f64>: FromIterator<f64>;
     /// Quasi median, recommended only for comparison purposes
     fn quasimedian(self) -> Result<Vec<f64>, RE>;
-    /// Geometric median estimate's error
-    fn gmerror(self, gm: &[f64]) -> f64;
     /// Proportional projections on each +/- axis (by hemispheres)
     fn sigvec(self, idx: &[usize]) -> Result<Vec<f64>, RE>;
     /// madgm, median of radii from geometric median: stable nd data spread estimator
@@ -346,12 +340,14 @@ pub trait VecVec<T> {
     fn depth(self, descending_index: &[usize], p: &[f64]) -> Result<f64,RE>; 
     /// Collects indices of outer and inner hull points, from zero median data    
     fn hulls(self) -> (Vec<usize>, Vec<usize>);
+    /// Geometric median's residual error
+    fn gmerror(self, g: &[f64]) -> Result<f64, RE>;
     /// New algorithm for geometric median, to accuracy eps    
     fn gmedian(self, eps: f64) -> Vec<f64>;
     /// Parallel (multithreaded) implementation of Geometric Median. Possibly the fastest you will find.
     fn par_gmedian(self, eps: f64) -> Vec<f64>;
-    /// Like `gmedian` but returns the sum of unit vecs and the sum of reciprocals of distances.
-    fn gmparts(self, eps: f64) -> (Vec<f64>, Vec<f64>, f64);
+    /// Like `gmedian` but returns also the sum of reciprocals of distances
+    fn gmparts(self, eps: f64) -> (Vec<f64>, f64);
 }
 
 /// Methods applicable to slice of vectors of generic end type, plus one other argument
@@ -411,8 +407,8 @@ pub trait VecVecg<T, U> {
     fn wgmedian(self, ws: &[U], eps: f64) -> Result<Vec<f64>, RE>;
     /// Parallel (multithreaded) implementation of the weighted Geometric Median.  
     fn par_wgmedian(self, ws: &[U], eps: f64) -> Result<Vec<f64>, RE>;
-    /// Like `wgmedian` but returns also the sum of unit vecs and the sum of reciprocals.
-    fn wgmparts(self, ws: &[U], eps: f64) -> Result<(Vec<f64>, Vec<f64>, f64), RE>;
+    /// Like `wgmedian` but returns also the sum of reciprocals.
+    fn wgmparts(self, ws: &[U], eps: f64) -> Result<(Vec<f64>, f64), RE>;
     /// Flattened lower triangular part of a covariance matrix of a Vec of f64 vectors.
     fn covar(self, med: &[U]) -> Result<TriangMat, RE>;
     /// Flattened lower triangular part of a covariance matrix for weighted f64 vectors.

diff --git a/src/vecvec.rs b/src/vecvec.rs
@@ -286,12 +286,6 @@ where
             .collect::<Result<Vec<f64>, MedError<String>>>()?)
     }
 
-    /// Geometric median's estimated error
-    fn gmerror(self, g: &[f64]) -> f64 {
-        let (gm, _, _) = self.nxnonmember(g);
-        gm.vdist::<f64>(g)
-    }
-
     /// Proportional projections on each +/- axis (by hemispheres).
     /// Adds only points that are specified in idx.
     /// Self should be zero median vectors, previously obtained by `self.translate(&gm)`.
@@ -399,52 +393,11 @@ where
         (innerindex, outerindex)
     }
 
-    /// Initial (first) point for geometric medians.
-    fn firstpoint(self) -> Vec<f64> {
-        let mut rsum = 0_f64;
-        let mut vsum = vec![0_f64; self[0].len()];
-        for p in self {
-            let mag = p.iter().map(|pi| pi.clone().into().powi(2)).sum::<f64>(); // vmag();
-            if mag.is_normal() {
-                // skip if p is at the origin
-                let rec = 1.0_f64 / (mag.sqrt());
-                // the sum of reciprocals of magnitudes for the final scaling
-                rsum += rec;
-                // add this unit vector to their sum
-                vsum.mutvadd::<f64>(&p.smult::<f64>(rec))
-            }
-        }
-        vsum.mutsmult::<f64>(1.0 / rsum); // scale by the sum of reciprocals
-        vsum // good initial gm
-    }
-
-    /// Like gmparts, except only does one iteration from any non-member point g
-    fn nxnonmember(self, g: &[f64]) -> (Vec<f64>, Vec<f64>, f64) {
-        // vsum is the sum vector of unit vectors towards the points
-        let mut vsum = vec![0_f64; self[0].len()];
-        let mut recip = 0_f64;
-        for x in self {
-            // |x-p| done in-place for speed. Could have simply called x.vdist(p)
-            let mag: f64 = x
-                .iter()
-                .zip(g)
-                .map(|(xi, &gi)| (xi.clone().into() - gi).powi(2))
-                .sum::<f64>();
-            if mag.is_normal() {
-                // ignore this point should distance be zero
-                let rec = 1.0_f64 / (mag.sqrt()); // reciprocal of distance (scalar)
-                                                  // vsum increments by components
-                vsum.iter_mut()
-                    .zip(x)
-                    .for_each(|(vi, xi)| *vi += xi.clone().into() * rec);
-                recip += rec // add separately the reciprocals for final scaling
-            }
-        }
-        (
-            vsum.iter().map(|vi| vi / recip).collect::<Vec<f64>>(),
-            vsum,
-            recip
-        )
+    /// Geometric median's residual error
+    fn gmerror(self, g: &[f64]) -> Result<f64, RE> {
+        let mut unitvecssum = vec![0_f64; self[0].len()];
+        for v in self { unitvecssum.mutvadd(&v.vsub(g).vunit()?); };
+        Ok(unitvecssum.vmag())
     }
 
     /// Geometric Median (gm) is the point that minimises the sum of distances to a given set of points.
@@ -512,7 +465,6 @@ where
                             .zip(&g)
                             .map(|(vi, gi)| (vi.clone().into() - gi).powi(2))
                             .sum();
-                        // let (mut vecsum, mut recsum) = pair;
                         if mag > eps {
                             let rec = 1.0_f64 / (mag.sqrt()); // reciprocal of distance (scalar)
                             for (vi, gi) in p.iter().zip(&mut pair.0) {
@@ -541,8 +493,8 @@ where
         }
     }
 
-    /// Like `gmedian` but returns also the sum of unit vecs and the sum of reciprocals.
-    fn gmparts(self, eps: f64) -> (Vec<f64>, Vec<f64>, f64) {
+    /// Like `gmedian` but returns also the sum of reciprocals.
+    fn gmparts(self, eps: f64) -> (Vec<f64>, f64) {
         let mut g = self.acentroid(); // start iterating from the Centre
         let mut recsum = 0f64;
         loop {
@@ -573,8 +525,7 @@ where
                     nextg
                         .iter()
                         .map(|&gi| gi / nextrecsum)
-                        .collect::<Vec<f64>>(),
-                    nextg,
+                        .collect::<Vec<f64>>(), 
                     nextrecsum,
                 );
             }; // termination

diff --git a/src/vecvecg.rs b/src/vecvecg.rs
@@ -346,7 +346,7 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
     }
 
     /// Like `gmedian` but returns also the sum of unit vecs and the sum of reciprocals. 
-    fn wgmparts(self, ws:&[U], eps: f64) -> Result<(Vec<f64>,Vec<f64>,f64),RE> { 
+    fn wgmparts(self, ws:&[U], eps: f64) -> Result<(Vec<f64>,f64),RE> { 
         if self.len() != ws.len() { 
             return Err(RError::DataError("wgmparts and ws lengths mismatch".to_owned())); };
         let mut g = self.wacentroid(ws); // start iterating from the weighted centre
@@ -367,8 +367,7 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
             }
             if nextrecsum-recsum < eps { 
                 return Ok((
-                    nextg.iter().map(|&gi| gi/nextrecsum).collect::<Vec<f64>>(),
-                    nextg,
+                    nextg.iter().map(|&gi| gi/nextrecsum).collect::<Vec<f64>>(),    
                     nextrecsum
                 )); }; // termination        
             nextg.iter_mut().for_each(|gi| *gi /= nextrecsum);

diff --git a/tests/tests.rs b/tests/tests.rs
@@ -203,7 +203,7 @@ fn vecg() -> Result<(), RE> {
     println!("Independence:\t\t{}", v1.independence(&v2)?.gr());
     println!("Wedge product:\n{}",v1.wedge(&v2).gr());
     println!("Geometric product:\n{}",v1.geometric(&v2).gr());
-    println!("Sine: {} {} {YL}{}{UN}",v1.sine(&v2).gr(),v2.sine(&v1).gr(),v1.varea(&v2)/v1.vmag()/v2.vmag());
+    println!("Sine: {} {} check: {}",v1.sine(&v2).gr(),v2.sine(&v1).gr(),(v1.varea(&v2)/v1.vmag()/v2.vmag()).gr());
     println!("Cosine:\t\t\t{}", v1.cosine(&v2).gr());
     println!("cos^2+sin^2 check:\t{}", (v1.cosine(&v2).powi(2)+v1.sine(&v2).powi(2)).gr());
     println!(
@@ -298,7 +298,7 @@ fn vecvec() -> Result<(), RE> {
     println!("First data vector:\n{}",pts[0].gr());
     println!("Joint entropy: {}", pts.jointentropyn()?.gr());
     println!("Dependence:    {}", pts.dependencen()?.gr());
-    let (median, _vsum, recips) = pts.gmparts(EPS);
+    let (median,recipsum) = pts.gmparts(EPS);
     println!("Approximate dv/dt:\n{}", pts.dvdt(&median)?.gr());
     let outcomes = ranv_u8(n)?;
     println!("\nRandom testing outcomes:\n{}",outcomes.gr());
@@ -321,9 +321,6 @@ fn vecvec() -> Result<(), RE> {
     let gcentroid = pts.gcentroid()?;
     let acentroid = pts.acentroid();
     let quasimed = pts.quasimedian()?;
-    let firstp = pts.firstpoint();
-
-    println!("Mean reciprocal of radius: {}", (recips / d as f64).gr());
     let dists = pts.distsums();
     let md = dists.minmax();
     println!("Medoid and Outlier Total Distances:\n{md}");
@@ -354,11 +351,10 @@ fn vecvec() -> Result<(), RE> {
         pts.radius(radsindex[0], &median)? / pts.radius(radsindex[radsindex.len() - 1], &median)?
     );
     println!("Madgm:               {}", pts.madgm(&median)?.gr());
-    println!("Median's error:      {GR}{:e}{UN}", pts.gmerror(&median));
+    println!("Median's error:      {}", pts.gmerror(&median)?.gr());
     println!("Stdgm:               {}", pts.stdgm(&median)?.gr());
     println!("ACentroid's radius:  {}", acentroid.vdist(&median).gr());
     println!("Quasimed's radius:   {}", quasimed.vdist(&median).gr());
-    println!("Firstpoint's radius: {}", firstp.vdist(&median).gr());
     println!("GCentroid's radius:  {}", gcentroid.vdist(&median).gr());
     println!("HCentroid's radius:  {}", hcentroid.vdist(&median).gr());
     println!("Medoid's radius:     {}", medoid.vdist(&median).gr());
@@ -391,21 +387,21 @@ fn vecvec() -> Result<(), RE> {
 
     println!(
         "\nContribution of adding acentroid:    {}",
-        acentroid.contrib_newpt(&median, recips, nf)?.gr()
+        acentroid.contrib_newpt(&median, recipsum, nf)?.gr()
     );
     println!(
         "Contribution of adding gcentroid:    {}",
-        gcentroid.contrib_newpt(&median, recips, nf)?.gr()
+        gcentroid.contrib_newpt(&median, recipsum, nf)?.gr()
     );
     println!(
         "Contribution of removing gcentroid: {}",
         gcentroid
-            .contrib_oldpt(&median, recips + 1.0 / median.vdist(&gcentroid), nf)? 
+            .contrib_oldpt(&median, recipsum + 1.0 / median.vdist(&gcentroid), nf)? 
             .gr()
     );
     let contribs = pts
         .iter()
-        .map(|p|-> Result<f64,RE> { p.contrib_oldpt(&median, recips, nf)})
+        .map(|p|-> Result<f64,RE> { p.contrib_oldpt(&median, recipsum, nf)})
         .collect::<Result<Vec<f64>,RE>>()?;
     println!(
         "\nContributions of removing data points, summary:\n{}\nCentroid: {}\nMedian: {}",
@@ -581,7 +577,7 @@ fn geometric_medians() -> Result<(), RE> {
     ];
     set_seeds(7777777777_u64); // intialise random numbers generator
                                // Rnum specifies the type of the random numbers required
-    println!("\n{YL}Timing Comparisons (in nanoseconds){UN}");
+    println!("\n{YL}Timing Comparisons (in nanoseconds):   {UN}");
     benchvvf64(
         100,
         1000..1500,
@@ -594,7 +590,7 @@ fn geometric_medians() -> Result<(), RE> {
     let n = 100_usize;
     let d = 1000_usize;
     set_seeds(7777777);
-    println!("\n{YL}Total errors for {ITERATIONS} repeats of {n} points in {d} dimensions{UN}\n");
+    println!("\n{RD}Total errors for {ITERATIONS} repeats of {n} points in {d} dimensions:{UN}\n");
     let mut sumg = 0_f64;
     let mut sumr = 0_f64;
     let mut sumq = 0_f64;
@@ -604,15 +600,15 @@ fn geometric_medians() -> Result<(), RE> {
     for _i in 1..ITERATIONS {
         let pts = ranvv_f64(n,d)?;
         gm = pts.gmedian(EPS);
-        sumg += pts.gmerror(&gm);
+        sumg += pts.gmerror(&gm)?;
         gm = pts.par_gmedian(EPS);
-        sumr += pts.gmerror(&gm);
+        sumr += pts.gmerror(&gm)?;
         gm = pts.quasimedian()?;
-        sumq += pts.gmerror(&gm);
+        sumq += pts.gmerror(&gm)?;
         gm = pts.acentroid();
-        summ += pts.gmerror(&gm);
+        summ += pts.gmerror(&gm)?;
         gm = pts.par_acentroid();
-        sump += pts.gmerror(&gm);
+        sump += pts.gmerror(&gm)?;
     }
     println!("{MG}par_gmedian   {GR}{sumr:.10}{UN}");
     println!("{MG}gmedian       {GR}{sumg:.10}{UN}");