Skip to content

Commit d4ba39c

Browse files
committedSep 21, 2017
Formatted documentation
1 parent 586842c commit d4ba39c

7 files changed

+40
-24
lines changed
 

‎clusters.go

+32-17
Original file line numberDiff line numberDiff line change
@@ -1,65 +1,79 @@
1+
// Package clusters provides abstract definitions of clusterers as well as
2+
// their implementations.
13
package clusters
24

35
import (
46
"math"
57
)
68

9+
// DistanceFunc represents a function for measuring distance
10+
// between n-dimensional vectors.
711
type DistanceFunc func(a, b []float64) float64
812

13+
// Online represents parameters important for online learning in
14+
// clustering algorithms.
915
type Online struct {
1016
Alpha float64
1117
Dimension int
1218
}
1319

14-
/* Events represent intermediate results of computation of both kinds of algorithms
15-
* transmitted periodically to the caller */
20+
// HCEvent represents the intermediate result of computation of hard clustering algorithm
21+
// and are transmitted periodically to the caller during online learning
1622
type HCEvent struct {
1723
Cluster int
1824
Observation []float64
1925
}
2026

21-
/* TestResult represents output of a test performed to measure quality of an algorithm. */
22-
type TestResult struct {
23-
clusters, expected int
24-
}
25-
26-
/* Clusterer denotes the operation of learning
27-
* common for both Hard and Soft clusterers */
27+
// Clusterer defines the operation of learning
28+
// common for all algorithms
2829
type Clusterer interface {
2930
Learn([][]float64) error
3031
}
3132

32-
/* HardClusterer defines a set of operations for hard clustering algorithms */
33+
// HardClusterer defines a set of operations for hard clustering algorithms
3334
type HardClusterer interface {
3435

35-
/* Returns sizes of respective clusters */
36+
// Sizes returns sizes of respective clusters
3637
Sizes() []int
3738

38-
/* Returns mapping from data point indices to cluster index. Cluster indices begin at 1, not 0. */
39+
// Guesses returns mapping from data point indices to cluster numbers. Clusters' numbering begins at 1.
3940
Guesses() []int
4041

41-
/* Returns index of cluster to which the observation was assigned */
42+
// Predict returns number of cluster to which the observation would be assigned
4243
Predict(observation []float64) int
4344

44-
/* Whether algorithm supports online learning */
45+
// IsOnline tells the algorithm supports online learning
4546
IsOnline() bool
4647

47-
/* Allows to configure the algorithms for online learning */
48+
// WithOnline configures the algorithms for online learning with given parameters
4849
WithOnline(Online) HardClusterer
4950

50-
/* Provides a method to train the algorithm online and receive intermediate results of computation */
51+
// Online begins the process of online training of an algorithm. Observations are sent on the observations channel,
52+
// once no more are expected an empty struct needs to be sent on done channel. Caller receives intermediate results of computation via
53+
// the returned channel.
5154
Online(observations chan []float64, done chan struct{}) chan *HCEvent
5255

56+
// Implement common operation
5357
Clusterer
5458
}
5559

60+
// Estimator defines a computation used to determine an optimal number of clusters in the dataset
5661
type Estimator interface {
5762

58-
/* Estimates the numer of clusters */
63+
// Estimate provides an expected number of clusters in the dataset
5964
Estimate([][]float64) (int, error)
6065
}
6166

67+
// Importer defines an operation of importing the dataset from an external file
68+
type Importer interface {
69+
70+
// Import fetches the data from a file, start and end arguments allow user
71+
// to specify the span of data columns to be imported (inclusively)
72+
Import(file string, start, end int) ([][]float64, error)
73+
}
74+
6275
var (
76+
// EuclideanDistance is one of the common distance measurement
6377
EuclideanDistance = func(a, b []float64) float64 {
6478
var (
6579
s, t float64
@@ -73,6 +87,7 @@ var (
7387
return math.Sqrt(s)
7488
}
7589

90+
// EuclideanDistanceSquared is one of the common distance measurement
7691
EuclideanDistanceSquared = func(a, b []float64) float64 {
7792
var (
7893
s, t float64

‎importer.go ‎csv_importer.go

-4
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,6 @@ import (
88
"strconv"
99
)
1010

11-
type Importer interface {
12-
Import(file string, start, end int) ([][]float64, error)
13-
}
14-
1511
type csvImporter struct {
1612
}
1713

File renamed without changes.

‎dbscan.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ type dbscanClusterer struct {
2929
d [][]float64
3030
}
3131

32-
/* Implementation of DBSCAN algorithm with concurrent nearest neighbour computation */
32+
// Implementation of DBSCAN algorithm with concurrent nearest neighbour computation. The number of goroutines acting concurrently
33+
// is controlled via workers argument. Passing 0 will result in this number being chosen arbitrarily.
3334
func DBSCAN(minpts int, eps float64, workers int, distance DistanceFunc) (HardClusterer, error) {
3435
if minpts < 1 {
3536
return nil, ErrZeroMinpts

‎kmeans.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ const (
1313
CHANGES_THRESHOLD = 2
1414
)
1515

16-
/* Implementation of k-means++ algorithm with online learning */
1716
type kmeansClusterer struct {
1817
iterations, number int
1918

@@ -37,6 +36,7 @@ type kmeansClusterer struct {
3736
d [][]float64
3837
}
3938

39+
// Implementation of k-means++ algorithm with online learning
4040
func KMeans(iterations, clusters int, distance DistanceFunc) (HardClusterer, error) {
4141
if iterations < 1 {
4242
return nil, ErrZeroIterations

‎kmeans_estimator.go

+3
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ type kmeansEstimator struct {
2525
d [][]float64
2626
}
2727

28+
// Implementation of cluster number estimator using gap statistic
29+
// ("Estimating the number of clusters in a data set via the gap statistic", Tibshirani et al.) with k-means++ as
30+
// clustering algorithm
2831
func KMeansEstimator(iterations, clusters int, distance DistanceFunc) (Estimator, error) {
2932
if iterations < 1 {
3033
return nil, ErrZeroIterations

‎optics.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ type opticsClusterer struct {
4646
d [][]float64
4747
}
4848

49-
/* Implementation of OPTICS algorithm with concurrent nearest neighbour computation */
49+
// Implementation of OPTICS algorithm with concurrent nearest neighbour computation. The number of goroutines acting concurrently
50+
// is controlled via workers argument. Passing 0 will result in this number being chosen arbitrarily.
5051
func OPTICS(minpts int, eps, xi float64, workers int, distance DistanceFunc) (HardClusterer, error) {
5152
if minpts < 1 {
5253
return nil, ErrZeroMinpts

0 commit comments

Comments
 (0)
Please sign in to comment.