Skip to content

Commit

Permalink
2.0.9
Browse files Browse the repository at this point in the history
  • Loading branch information
liborty committed Mar 6, 2024
1 parent cec7d5e commit b00ca18
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 91 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "rstats"
version = "2.0.8"
version = "2.0.9"
authors = ["Libor Spacek"]
edition = "2021"
description = "Statistics, Information Measures, Data Analysis, Linear Algebra, Clifford Algebra, Machine Learning, Geometric Median, Matrix Decompositions, Mahalanobis Distance, Hulls, Multithreading.."
Expand All @@ -20,7 +20,7 @@ include = [
maintenance = { status = "actively-developed" }
[lib]
[dependencies]
rayon = "1.8"
rayon = "1.9"
indxvec = "1.8"
medians = "3.0"
ran = "2.0"
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,8 @@ Methods which take an additional generic vector argument, such as a vector of we

## Appendix: Recent Releases

* **Version 2.0.9** - Pruned some rarely used methods, simplified `gmparts` and `gmerror`.

* **Version 2.0.8**' - Changed initial guess in iterative weighted gm methods to weighted mean. This, being more accurate than plain mean, leads to fewer iterations. Updated some dependecies.

* **Version 2.0.7** - Updated to `ran 2.0`.
Expand Down
18 changes: 7 additions & 11 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -314,24 +314,18 @@ pub trait VecVec<T> {
fn gcentroid(self) -> Result<Vec<f64>, RE>;
/// Harmonic Centroid = harmonic mean of a set of points
fn hcentroid(self) -> Result<Vec<f64>, RE>;
/// Possible first iteration point for geometric medians
fn firstpoint(self) -> Vec<f64>;
/// Sums of distances from each point to all other points
fn distsums(self) -> Vec<f64>;
/// Medoid distance, its index, outlier distance, its index
fn medout(self, gm: &[f64]) -> Result<MinMax<f64>, RE>;
/// Like gmparts, except only does one iteration from any non-member point g
fn nxnonmember(self, g: &[f64]) -> (Vec<f64>, Vec<f64>, f64);
/// Radius of a point specified by its subscript.
/// Radius of a point specified by its subscript.
fn radius(self, i: usize, gm: &[f64]) -> Result<f64, RE>;
/// Arith mean and std (in Params struct), Median and mad, Medoid and Outlier (in MinMax struct)
fn eccinfo(self, gm: &[f64]) -> Result<(Params, Params, MinMax<f64>), RE>
where
Vec<f64>: FromIterator<f64>;
/// Quasi median, recommended only for comparison purposes
fn quasimedian(self) -> Result<Vec<f64>, RE>;
/// Geometric median estimate's error
fn gmerror(self, gm: &[f64]) -> f64;
/// Proportional projections on each +/- axis (by hemispheres)
fn sigvec(self, idx: &[usize]) -> Result<Vec<f64>, RE>;
/// madgm, median of radii from geometric median: stable nd data spread estimator
Expand All @@ -346,12 +340,14 @@ pub trait VecVec<T> {
fn depth(self, descending_index: &[usize], p: &[f64]) -> Result<f64,RE>;
/// Collects indices of outer and inner hull points, from zero median data
fn hulls(self) -> (Vec<usize>, Vec<usize>);
/// Geometric median's residual error
fn gmerror(self, g: &[f64]) -> Result<f64, RE>;
/// New algorithm for geometric median, to accuracy eps
fn gmedian(self, eps: f64) -> Vec<f64>;
/// Parallel (multithreaded) implementation of Geometric Median. Possibly the fastest you will find.
fn par_gmedian(self, eps: f64) -> Vec<f64>;
/// Like `gmedian` but returns the sum of unit vecs and the sum of reciprocals of distances.
fn gmparts(self, eps: f64) -> (Vec<f64>, Vec<f64>, f64);
/// Like `gmedian` but returns also the sum of reciprocals of distances
fn gmparts(self, eps: f64) -> (Vec<f64>, f64);
}

/// Methods applicable to slice of vectors of generic end type, plus one other argument
Expand Down Expand Up @@ -411,8 +407,8 @@ pub trait VecVecg<T, U> {
fn wgmedian(self, ws: &[U], eps: f64) -> Result<Vec<f64>, RE>;
/// Parallel (multithreaded) implementation of the weighted Geometric Median.
fn par_wgmedian(self, ws: &[U], eps: f64) -> Result<Vec<f64>, RE>;
/// Like `wgmedian` but returns also the sum of unit vecs and the sum of reciprocals.
fn wgmparts(self, ws: &[U], eps: f64) -> Result<(Vec<f64>, Vec<f64>, f64), RE>;
/// Like `wgmedian` but returns also the sum of reciprocals.
fn wgmparts(self, ws: &[U], eps: f64) -> Result<(Vec<f64>, f64), RE>;
/// Flattened lower triangular part of a covariance matrix of a Vec of f64 vectors.
fn covar(self, med: &[U]) -> Result<TriangMat, RE>;
/// Flattened lower triangular part of a covariance matrix for weighted f64 vectors.
Expand Down
65 changes: 8 additions & 57 deletions src/vecvec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,12 +286,6 @@ where
.collect::<Result<Vec<f64>, MedError<String>>>()?)
}

/// Geometric median's estimated error
fn gmerror(self, g: &[f64]) -> f64 {
let (gm, _, _) = self.nxnonmember(g);
gm.vdist::<f64>(g)
}

/// Proportional projections on each +/- axis (by hemispheres).
/// Adds only points that are specified in idx.
/// Self should be zero median vectors, previously obtained by `self.translate(&gm)`.
Expand Down Expand Up @@ -399,52 +393,11 @@ where
(innerindex, outerindex)
}

/// Initial (first) point for geometric medians.
fn firstpoint(self) -> Vec<f64> {
let mut rsum = 0_f64;
let mut vsum = vec![0_f64; self[0].len()];
for p in self {
let mag = p.iter().map(|pi| pi.clone().into().powi(2)).sum::<f64>(); // vmag();
if mag.is_normal() {
// skip if p is at the origin
let rec = 1.0_f64 / (mag.sqrt());
// the sum of reciprocals of magnitudes for the final scaling
rsum += rec;
// add this unit vector to their sum
vsum.mutvadd::<f64>(&p.smult::<f64>(rec))
}
}
vsum.mutsmult::<f64>(1.0 / rsum); // scale by the sum of reciprocals
vsum // good initial gm
}

/// Like gmparts, except only does one iteration from any non-member point g
fn nxnonmember(self, g: &[f64]) -> (Vec<f64>, Vec<f64>, f64) {
// vsum is the sum vector of unit vectors towards the points
let mut vsum = vec![0_f64; self[0].len()];
let mut recip = 0_f64;
for x in self {
// |x-p| done in-place for speed. Could have simply called x.vdist(p)
let mag: f64 = x
.iter()
.zip(g)
.map(|(xi, &gi)| (xi.clone().into() - gi).powi(2))
.sum::<f64>();
if mag.is_normal() {
// ignore this point should distance be zero
let rec = 1.0_f64 / (mag.sqrt()); // reciprocal of distance (scalar)
// vsum increments by components
vsum.iter_mut()
.zip(x)
.for_each(|(vi, xi)| *vi += xi.clone().into() * rec);
recip += rec // add separately the reciprocals for final scaling
}
}
(
vsum.iter().map(|vi| vi / recip).collect::<Vec<f64>>(),
vsum,
recip
)
/// Geometric median's residual error
fn gmerror(self, g: &[f64]) -> Result<f64, RE> {
let mut unitvecssum = vec![0_f64; self[0].len()];
for v in self { unitvecssum.mutvadd(&v.vsub(g).vunit()?); };
Ok(unitvecssum.vmag())
}

/// Geometric Median (gm) is the point that minimises the sum of distances to a given set of points.
Expand Down Expand Up @@ -512,7 +465,6 @@ where
.zip(&g)
.map(|(vi, gi)| (vi.clone().into() - gi).powi(2))
.sum();
// let (mut vecsum, mut recsum) = pair;
if mag > eps {
let rec = 1.0_f64 / (mag.sqrt()); // reciprocal of distance (scalar)
for (vi, gi) in p.iter().zip(&mut pair.0) {
Expand Down Expand Up @@ -541,8 +493,8 @@ where
}
}

/// Like `gmedian` but returns also the sum of unit vecs and the sum of reciprocals.
fn gmparts(self, eps: f64) -> (Vec<f64>, Vec<f64>, f64) {
/// Like `gmedian` but returns also the sum of reciprocals.
fn gmparts(self, eps: f64) -> (Vec<f64>, f64) {
let mut g = self.acentroid(); // start iterating from the Centre
let mut recsum = 0f64;
loop {
Expand Down Expand Up @@ -573,8 +525,7 @@ where
nextg
.iter()
.map(|&gi| gi / nextrecsum)
.collect::<Vec<f64>>(),
nextg,
.collect::<Vec<f64>>(),
nextrecsum,
);
}; // termination
Expand Down
5 changes: 2 additions & 3 deletions src/vecvecg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
}

/// Like `gmedian` but returns also the sum of unit vecs and the sum of reciprocals.
fn wgmparts(self, ws:&[U], eps: f64) -> Result<(Vec<f64>,Vec<f64>,f64),RE> {
fn wgmparts(self, ws:&[U], eps: f64) -> Result<(Vec<f64>,f64),RE> {
if self.len() != ws.len() {
return Err(RError::DataError("wgmparts and ws lengths mismatch".to_owned())); };
let mut g = self.wacentroid(ws); // start iterating from the weighted centre
Expand All @@ -367,8 +367,7 @@ impl<T,U> VecVecg<T,U> for &[Vec<T>]
}
if nextrecsum-recsum < eps {
return Ok((
nextg.iter().map(|&gi| gi/nextrecsum).collect::<Vec<f64>>(),
nextg,
nextg.iter().map(|&gi| gi/nextrecsum).collect::<Vec<f64>>(),
nextrecsum
)); }; // termination
nextg.iter_mut().for_each(|gi| *gi /= nextrecsum);
Expand Down
32 changes: 14 additions & 18 deletions tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ fn vecg() -> Result<(), RE> {
println!("Independence:\t\t{}", v1.independence(&v2)?.gr());
println!("Wedge product:\n{}",v1.wedge(&v2).gr());
println!("Geometric product:\n{}",v1.geometric(&v2).gr());
println!("Sine: {} {} {YL}{}{UN}",v1.sine(&v2).gr(),v2.sine(&v1).gr(),v1.varea(&v2)/v1.vmag()/v2.vmag());
println!("Sine: {} {} check: {}",v1.sine(&v2).gr(),v2.sine(&v1).gr(),(v1.varea(&v2)/v1.vmag()/v2.vmag()).gr());
println!("Cosine:\t\t\t{}", v1.cosine(&v2).gr());
println!("cos^2+sin^2 check:\t{}", (v1.cosine(&v2).powi(2)+v1.sine(&v2).powi(2)).gr());
println!(
Expand Down Expand Up @@ -298,7 +298,7 @@ fn vecvec() -> Result<(), RE> {
println!("First data vector:\n{}",pts[0].gr());
println!("Joint entropy: {}", pts.jointentropyn()?.gr());
println!("Dependence: {}", pts.dependencen()?.gr());
let (median, _vsum, recips) = pts.gmparts(EPS);
let (median,recipsum) = pts.gmparts(EPS);
println!("Approximate dv/dt:\n{}", pts.dvdt(&median)?.gr());
let outcomes = ranv_u8(n)?;
println!("\nRandom testing outcomes:\n{}",outcomes.gr());
Expand All @@ -321,9 +321,6 @@ fn vecvec() -> Result<(), RE> {
let gcentroid = pts.gcentroid()?;
let acentroid = pts.acentroid();
let quasimed = pts.quasimedian()?;
let firstp = pts.firstpoint();

println!("Mean reciprocal of radius: {}", (recips / d as f64).gr());
let dists = pts.distsums();
let md = dists.minmax();
println!("Medoid and Outlier Total Distances:\n{md}");
Expand Down Expand Up @@ -354,11 +351,10 @@ fn vecvec() -> Result<(), RE> {
pts.radius(radsindex[0], &median)? / pts.radius(radsindex[radsindex.len() - 1], &median)?
);
println!("Madgm: {}", pts.madgm(&median)?.gr());
println!("Median's error: {GR}{:e}{UN}", pts.gmerror(&median));
println!("Median's error: {}", pts.gmerror(&median)?.gr());
println!("Stdgm: {}", pts.stdgm(&median)?.gr());
println!("ACentroid's radius: {}", acentroid.vdist(&median).gr());
println!("Quasimed's radius: {}", quasimed.vdist(&median).gr());
println!("Firstpoint's radius: {}", firstp.vdist(&median).gr());
println!("GCentroid's radius: {}", gcentroid.vdist(&median).gr());
println!("HCentroid's radius: {}", hcentroid.vdist(&median).gr());
println!("Medoid's radius: {}", medoid.vdist(&median).gr());
Expand Down Expand Up @@ -391,21 +387,21 @@ fn vecvec() -> Result<(), RE> {

println!(
"\nContribution of adding acentroid: {}",
acentroid.contrib_newpt(&median, recips, nf)?.gr()
acentroid.contrib_newpt(&median, recipsum, nf)?.gr()
);
println!(
"Contribution of adding gcentroid: {}",
gcentroid.contrib_newpt(&median, recips, nf)?.gr()
gcentroid.contrib_newpt(&median, recipsum, nf)?.gr()
);
println!(
"Contribution of removing gcentroid: {}",
gcentroid
.contrib_oldpt(&median, recips + 1.0 / median.vdist(&gcentroid), nf)?
.contrib_oldpt(&median, recipsum + 1.0 / median.vdist(&gcentroid), nf)?
.gr()
);
let contribs = pts
.iter()
.map(|p|-> Result<f64,RE> { p.contrib_oldpt(&median, recips, nf)})
.map(|p|-> Result<f64,RE> { p.contrib_oldpt(&median, recipsum, nf)})
.collect::<Result<Vec<f64>,RE>>()?;
println!(
"\nContributions of removing data points, summary:\n{}\nCentroid: {}\nMedian: {}",
Expand Down Expand Up @@ -581,7 +577,7 @@ fn geometric_medians() -> Result<(), RE> {
];
set_seeds(7777777777_u64); // intialise random numbers generator
// Rnum specifies the type of the random numbers required
println!("\n{YL}Timing Comparisons (in nanoseconds){UN}");
println!("\n{YL}Timing Comparisons (in nanoseconds): {UN}");
benchvvf64(
100,
1000..1500,
Expand All @@ -594,7 +590,7 @@ fn geometric_medians() -> Result<(), RE> {
let n = 100_usize;
let d = 1000_usize;
set_seeds(7777777);
println!("\n{YL}Total errors for {ITERATIONS} repeats of {n} points in {d} dimensions{UN}\n");
println!("\n{RD}Total errors for {ITERATIONS} repeats of {n} points in {d} dimensions:{UN}\n");
let mut sumg = 0_f64;
let mut sumr = 0_f64;
let mut sumq = 0_f64;
Expand All @@ -604,15 +600,15 @@ fn geometric_medians() -> Result<(), RE> {
for _i in 1..ITERATIONS {
let pts = ranvv_f64(n,d)?;
gm = pts.gmedian(EPS);
sumg += pts.gmerror(&gm);
sumg += pts.gmerror(&gm)?;
gm = pts.par_gmedian(EPS);
sumr += pts.gmerror(&gm);
sumr += pts.gmerror(&gm)?;
gm = pts.quasimedian()?;
sumq += pts.gmerror(&gm);
sumq += pts.gmerror(&gm)?;
gm = pts.acentroid();
summ += pts.gmerror(&gm);
summ += pts.gmerror(&gm)?;
gm = pts.par_acentroid();
sump += pts.gmerror(&gm);
sump += pts.gmerror(&gm)?;
}
println!("{MG}par_gmedian {GR}{sumr:.10}{UN}");
println!("{MG}gmedian {GR}{sumg:.10}{UN}");
Expand Down

0 comments on commit b00ca18

Please sign in to comment.