forked from ryanbressler/CloudForest
-
Notifications
You must be signed in to change notification settings - Fork 3
/
wrftarget.go
123 lines (96 loc) · 2.84 KB
/
wrftarget.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
package CloudForest
/*
WRFTarget wraps a numerical feature as a target for us weigted random forest.
*/
type WRFTarget struct {
CatFeature
Weights []float64
}
/*
NewWRFTarget creates a weighted random forest target and initializes its weights.
*/
func NewWRFTarget(f CatFeature, weights map[string]float64) (abt *WRFTarget) {
abt = &WRFTarget{f, make([]float64, f.NCats())}
for i := range abt.Weights {
abt.Weights[i] = weights[f.NumToCat(i)]
}
return
}
/*
SplitImpurity is an weigtedRF version of SplitImpurity.
*/
func (target *WRFTarget) SplitImpurity(l *[]int, r *[]int, m *[]int, allocs *BestSplitAllocs) (impurityDecrease float64) {
nl := float64(len(*l))
nr := float64(len(*r))
nm := 0.0
impurityDecrease = nl * target.Impurity(l, allocs.LCounter)
impurityDecrease += nr * target.Impurity(r, allocs.RCounter)
if m != nil && len(*m) > 0 {
nm = float64(len(*m))
impurityDecrease += nm * target.Impurity(m, allocs.Counter)
}
impurityDecrease /= nl + nr + nm
return
}
//UpdateSImpFromAllocs willl be called when splits are being built by moving cases from r to l
//to avoid recalulatign the entire split impurity.
func (target *WRFTarget) UpdateSImpFromAllocs(l *[]int, r *[]int, m *[]int, allocs *BestSplitAllocs, movedRtoL *[]int) (impurityDecrease float64) {
var cat, i int
lcounter := *allocs.LCounter
rcounter := *allocs.RCounter
for _, i = range *movedRtoL {
//most expensive statement:
cat = target.Geti(i)
lcounter[cat]++
rcounter[cat]--
//counter[target.Geti(i)]++
}
nl := float64(len(*l))
nr := float64(len(*r))
nm := 0.0
impurityDecrease = nl * target.ImpFromCounts(allocs.LCounter)
impurityDecrease += nr * target.ImpFromCounts(allocs.RCounter)
if m != nil && len(*m) > 0 {
nm = float64(len(*m))
impurityDecrease += nm * target.ImpFromCounts(allocs.Counter)
}
impurityDecrease /= nl + nr + nm
return
}
//Impurity is Gini impurity that uses the weights specified in WRFTarget.weights.
func (target *WRFTarget) Impurity(cases *[]int, counter *[]int) (e float64) {
target.CountPerCat(cases, counter)
return target.ImpFromCounts(counter)
}
//ImpFromCounts recalculates gini impurity from class counts for us in intertive updates.
func (target *WRFTarget) ImpFromCounts(counter *[]int) (e float64) {
total := 0.0
for i, v := range *counter {
w := target.Weights[i]
total += float64(v) * w
e -= float64(v*v) * w * w
}
e /= float64(total * total)
e++
return
}
//FindPredicted finds the predicted target as the weighted catagorical Mode.
func (target *WRFTarget) FindPredicted(cases []int) (pred string) {
counts := make([]int, target.NCats())
for _, i := range cases {
if !target.IsMissing(i) {
counts[target.Geti(i)] += 1
}
}
m := 0
max := 0.0
for k, v := range counts {
val := float64(v) * target.Weights[k]
if val > max {
m = k
max = val
}
}
pred = target.NumToCat(m)
return
}