Skip to content

Commit 9e41b7e

Browse files
committedMar 11, 2019
Update readme and changelog for 0.0.10-beta release
1 parent 9ed72d0 commit 9e41b7e

12 files changed

+106
-63
lines changed
 

‎.gitattributes

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
11
* text=auto
22

3-
*.php text eol=lf
4-
5-
*.png binary
6-
*.jpg binary
3+
*.php text eol=lf

‎CHANGELOG.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
- Unreleased
1+
- 0.0.10-beta
22
- Added LODA online anomaly detector
33
- Added Radius Neighbors classifier and regressor
44
- Added fast k-d LOF anomaly detector

‎README.md

+71-32
Large diffs are not rendered by default.

‎docs/images/rubix-ml-system-architecture.svg

+1-1
Loading

‎src/Clusterers/FuzzyCMeans.php

+7-8
Original file line numberDiff line numberDiff line change
@@ -364,15 +364,15 @@ protected function calculateMembership(array $sample) : array
364364
foreach ($this->centroids as $cluster => $centroid1) {
365365
$a = $this->kernel->compute($sample, $centroid1);
366366

367-
$total = 0.;
367+
$sigma = 0.;
368368

369369
foreach ($this->centroids as $centroid2) {
370370
$b = $this->kernel->compute($sample, $centroid2);
371371

372-
$total += ($a / ($b ?: self::EPSILON)) ** $this->lambda;
372+
$sigma += ($a / ($b ?: self::EPSILON)) ** $this->lambda;
373373
}
374374

375-
$membership[$cluster] = 1. / ($total ?: self::EPSILON);
375+
$membership[$cluster] = 1. / ($sigma ?: self::EPSILON);
376376
}
377377

378378
return $membership;
@@ -387,18 +387,17 @@ protected function calculateMembership(array $sample) : array
387387
*/
388388
protected function interClusterDistance(Dataset $dataset, array $memberships) : float
389389
{
390-
$total = 0.;
390+
$distance = 0.;
391391

392392
foreach ($dataset as $i => $sample) {
393393
$membership = $memberships[$i];
394394

395395
foreach ($this->centroids as $cluster => $centroid) {
396-
$distance = $this->kernel->compute($sample, $centroid);
397-
398-
$total += $membership[$cluster] * $distance;
396+
$distance += $membership[$cluster]
397+
* $this->kernel->compute($sample, $centroid);
399398
}
400399
}
401400

402-
return $total;
401+
return $distance;
403402
}
404403
}

‎src/Clusterers/GaussianMixture.php

+2-5
Original file line numberDiff line numberDiff line change
@@ -235,13 +235,10 @@ public function train(Dataset $dataset) : void
235235

236236
$n = $dataset->numRows();
237237

238-
[$means, $variances] = $this->initialize($dataset);
239-
240-
$this->means = $means;
241-
$this->variances = $variances;
242-
243238
$this->priors = array_fill(0, $this->k, log(1. / $this->k));
244239

240+
[$this->means, $this->variances] = $this->initialize($dataset);
241+
245242
$this->steps = [];
246243

247244
$prevLoss = 0.;

‎src/Clusterers/MeanShift.php

+9-9
Original file line numberDiff line numberDiff line change
@@ -194,12 +194,12 @@ public function train(Dataset $dataset) : void
194194
]));
195195
}
196196

197-
$this->centroids = $previous = $dataset->samples();
197+
$centroids = $previous = $dataset->samples();
198198

199199
$this->steps = [];
200200

201201
for ($epoch = 1; $epoch <= $this->epochs; $epoch++) {
202-
foreach ($this->centroids as $i => &$centroid) {
202+
foreach ($centroids as $i => &$centroid) {
203203
foreach ($dataset as $sample) {
204204
$distance = $this->kernel->compute($sample, $centroid);
205205

@@ -214,15 +214,15 @@ public function train(Dataset $dataset) : void
214214
}
215215
}
216216

217-
foreach ($this->centroids as $j => $neighbor) {
217+
foreach ($centroids as $j => $neighbor) {
218218
if ($i === $j) {
219219
continue 1;
220220
}
221221

222222
$distance = $this->kernel->compute($centroid, $neighbor);
223223

224224
if ($distance < $this->radius) {
225-
unset($this->centroids[$j]);
225+
unset($centroids[$j]);
226226
}
227227
}
228228
}
@@ -243,10 +243,10 @@ public function train(Dataset $dataset) : void
243243
break 1;
244244
}
245245

246-
$previous = $this->centroids;
246+
$previous = $centroids;
247247
}
248248

249-
$this->centroids = array_values($this->centroids);
249+
$this->centroids = array_values($centroids);
250250

251251
if ($this->logger) {
252252
$this->logger->info('Training complete');
@@ -296,7 +296,7 @@ protected function assign(array $sample) : int
296296
}
297297

298298
/**
299-
* Calculate the magnitude (l1) of a centroid shift from the previous epoch.
299+
* Calculate the magnitude (l1) of centroid shift from the previous epoch.
300300
*
301301
* @param array $previous
302302
* @return float
@@ -306,10 +306,10 @@ protected function centroidShift(array $previous) : float
306306
$shift = 0.;
307307

308308
foreach ($this->centroids as $cluster => $centroid) {
309-
$prevCluster = $previous[$cluster];
309+
$prevCentroid = $previous[$cluster];
310310

311311
foreach ($centroid as $column => $mean) {
312-
$shift += abs($prevCluster[$column] - $mean);
312+
$shift += abs($prevCentroid[$column] - $mean);
313313
}
314314
}
315315

‎src/Embedders/TSNE.php

+1-2
Original file line numberDiff line numberDiff line change
@@ -272,8 +272,7 @@ public function predict(Dataset $dataset) : array
272272
$x = Matrix::build($dataset->samples());
273273

274274
if ($this->logger) {
275-
$this->logger->info('Computing pairwise high'
276-
. ' dimensional affinities');
275+
$this->logger->info('Computing affinity matrix');
277276
}
278277

279278
$distances = $this->pairwiseDistances($x);

‎src/Graph/BallTree.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
* References:
2121
* [1] S. M. Omohundro. (1989). Five Balltree Construction Algorithms.
2222
* [2] M. Dolatshah et al. (2015). Ball*-tree: Efficient spatial indexing for
23-
* constrained nearest-neighbor search in metric spaces
23+
* constrained nearest-neighbor search in metric spaces.
2424
*
2525
* @category Machine Learning
2626
* @package Rubix/ML

‎src/Transformers/DenseRandomProjector.php

+4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
* The Dense Random Projector uses a random matrix sampled from a dense uniform
99
* distribution [-1, 1] to project a sample matrix onto a target dimensionality.
1010
*
11+
* References:
12+
* [1] D. Achlioptas. (2003). Database-friendly random projections:
13+
* Johnson-Lindenstrauss with binary coins.
14+
*
1115
* @category Machine Learning
1216
* @package Rubix/ML
1317
* @author Andrew DalPino

‎src/Transformers/SparseRandomProjector.php

+4
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@
1313
* The Sparse Random Projector uses a random matrix sampled from a sparse uniform
1414
* distribution (mostly 0s) to project a sample matrix onto a target dimensionality.
1515
*
16+
* References:
17+
* [1] D. Achlioptas. (2003). Database-friendly random projections:
18+
* Johnson-Lindenstrauss with binary coins.
19+
*
1620
* @category Machine Learning
1721
* @package Rubix/ML
1822
* @author Andrew DalPino

‎src/Transformers/TfIdfTransformer.php

+4
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@
1818
* > **Note**: This transformer assumes that its input is made up of word
1919
* frequency vectors such as those created by the Word Count Vectorizer.
2020
*
21+
* References:
22+
* [1] S. Robertson. (2003). Understanding Inverse Document Frequency: On
23+
* theoretical arguments for IDF.
24+
*
2125
* @category Machine Learning
2226
* @package Rubix/ML
2327
* @author Andrew DalPino

0 commit comments

Comments
 (0)
Please sign in to comment.