|
| 1 | +// Package clusters provides abstract definitions of clusterers as well as |
| 2 | +// their implementations. |
1 | 3 | package clusters
|
2 | 4 |
|
3 | 5 | import (
|
4 | 6 | "math"
|
5 | 7 | )
|
6 | 8 |
|
| 9 | +// DistanceFunc represents a function for measuring distance |
| 10 | +// between n-dimensional vectors. |
7 | 11 | type DistanceFunc func(a, b []float64) float64
|
8 | 12 |
|
| 13 | +// Online represents parameters important for online learning in |
| 14 | +// clustering algorithms. |
9 | 15 | type Online struct {
|
10 | 16 | Alpha float64
|
11 | 17 | Dimension int
|
12 | 18 | }
|
13 | 19 |
|
14 |
| -/* Events represent intermediate results of computation of both kinds of algorithms |
15 |
| - * transmitted periodically to the caller */ |
| 20 | +// HCEvent represents the intermediate result of computation of hard clustering algorithm |
| 21 | +// and are transmitted periodically to the caller during online learning |
16 | 22 | type HCEvent struct {
|
17 | 23 | Cluster int
|
18 | 24 | Observation []float64
|
19 | 25 | }
|
20 | 26 |
|
21 |
| -/* TestResult represents output of a test performed to measure quality of an algorithm. */ |
22 |
| -type TestResult struct { |
23 |
| - clusters, expected int |
24 |
| -} |
25 |
| - |
26 |
| -/* Clusterer denotes the operation of learning |
27 |
| - * common for both Hard and Soft clusterers */ |
| 27 | +// Clusterer defines the operation of learning |
| 28 | +// common for all algorithms |
28 | 29 | type Clusterer interface {
|
29 | 30 | Learn([][]float64) error
|
30 | 31 | }
|
31 | 32 |
|
32 |
| -/* HardClusterer defines a set of operations for hard clustering algorithms */ |
| 33 | +// HardClusterer defines a set of operations for hard clustering algorithms |
33 | 34 | type HardClusterer interface {
|
34 | 35 |
|
35 |
| - /* Returns sizes of respective clusters */ |
| 36 | + // Sizes returns sizes of respective clusters |
36 | 37 | Sizes() []int
|
37 | 38 |
|
38 |
| - /* Returns mapping from data point indices to cluster index. Cluster indices begin at 1, not 0. */ |
| 39 | + // Guesses returns mapping from data point indices to cluster numbers. Clusters' numbering begins at 1. |
39 | 40 | Guesses() []int
|
40 | 41 |
|
41 |
| - /* Returns index of cluster to which the observation was assigned */ |
| 42 | + // Predict returns number of cluster to which the observation would be assigned |
42 | 43 | Predict(observation []float64) int
|
43 | 44 |
|
44 |
| - /* Whether algorithm supports online learning */ |
| 45 | + // IsOnline tells the algorithm supports online learning |
45 | 46 | IsOnline() bool
|
46 | 47 |
|
47 |
| - /* Allows to configure the algorithms for online learning */ |
| 48 | + // WithOnline configures the algorithms for online learning with given parameters |
48 | 49 | WithOnline(Online) HardClusterer
|
49 | 50 |
|
50 |
| - /* Provides a method to train the algorithm online and receive intermediate results of computation */ |
| 51 | + // Online begins the process of online training of an algorithm. Observations are sent on the observations channel, |
| 52 | + // once no more are expected an empty struct needs to be sent on done channel. Caller receives intermediate results of computation via |
| 53 | + // the returned channel. |
51 | 54 | Online(observations chan []float64, done chan struct{}) chan *HCEvent
|
52 | 55 |
|
| 56 | + // Implement common operation |
53 | 57 | Clusterer
|
54 | 58 | }
|
55 | 59 |
|
| 60 | +// Estimator defines a computation used to determine an optimal number of clusters in the dataset |
56 | 61 | type Estimator interface {
|
57 | 62 |
|
58 |
| - /* Estimates the numer of clusters */ |
| 63 | + // Estimate provides an expected number of clusters in the dataset |
59 | 64 | Estimate([][]float64) (int, error)
|
60 | 65 | }
|
61 | 66 |
|
| 67 | +// Importer defines an operation of importing the dataset from an external file |
| 68 | +type Importer interface { |
| 69 | + |
| 70 | + // Import fetches the data from a file, start and end arguments allow user |
| 71 | + // to specify the span of data columns to be imported (inclusively) |
| 72 | + Import(file string, start, end int) ([][]float64, error) |
| 73 | +} |
| 74 | + |
62 | 75 | var (
|
| 76 | + // EuclideanDistance is one of the common distance measurement |
63 | 77 | EuclideanDistance = func(a, b []float64) float64 {
|
64 | 78 | var (
|
65 | 79 | s, t float64
|
|
73 | 87 | return math.Sqrt(s)
|
74 | 88 | }
|
75 | 89 |
|
| 90 | + // EuclideanDistanceSquared is one of the common distance measurement |
76 | 91 | EuclideanDistanceSquared = func(a, b []float64) float64 {
|
77 | 92 | var (
|
78 | 93 | s, t float64
|
|
0 commit comments