forked from ryanbressler/CloudForest
-
Notifications
You must be signed in to change notification settings - Fork 3
/
gradboostclasstarget.go
120 lines (92 loc) · 2.69 KB
/
gradboostclasstarget.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
package CloudForest
import (
"fmt"
"math"
)
func Logit(x float64) float64 {
return math.Log(x / (1.0 - x))
}
func Expit(x float64) (out float64) {
//return 1.0 / (1.0 + math.Exp(-1.0*x))
out = 0.5 * x
out = math.Tanh(out)
out += 1.0
out *= 0.5
return out
}
/*
GradBoostClassTarget wraps a numerical feature as a target for us in Two Class Gradiant Boosting Trees.
It should be used with SumBallotBox and expit transformed to get class probabilities.
*/
type GradBoostClassTarget struct {
*GradBoostTarget
Actual NumFeature
Pred NumFeature
LearnRate float64
Prior float64
Pos_class string
}
func NewGradBoostClassTarget(f CatFeature, learnrate float64, pos_class string) (gbc *GradBoostClassTarget) {
//fmt.Println("Back: ", f.CatToNum(pos_class), f.(*DenseCatFeature).Back)
actual := f.EncodeToNum()[0].(*DenseNumFeature)
pred := actual.Copy().(*DenseNumFeature)
// Make sure the encoding has the positive class as 1
for i := 0; i < f.Length(); i++ {
if f.GetStr(i) == pos_class {
actual.Put(i, 1.0)
} else {
actual.Put(i, 0.0)
}
}
res := &GradBoostTarget{actual.Copy().(*DenseNumFeature), learnrate, 0.0}
pos := 0.0
for i := 0; i < actual.Length(); i++ {
pos += actual.Get(i)
}
// Set intial residual to
prior := math.Log(pos / (float64(res.Length()) - pos))
for i := 0; i < res.Length(); i++ {
pred.Put(i, prior)
v := actual.Get(i) - Expit(prior)
res.Put(i, v)
}
//fmt.Println(res.Copy().(*DenseNumFeature).NumData)
gbc = &GradBoostClassTarget{res, actual, pred, learnrate, prior, pos_class}
return
}
func (f *GradBoostClassTarget) Intercept() float64 {
return f.Prior
}
//BUG(ryan) does GradBoostingTarget need seperate residuals and values?
func (f *GradBoostClassTarget) Boost(leaves *[][]int, preds *[]string) (weight float64) {
for i, cases := range *leaves {
f.Update(&cases, ParseFloat((*preds)[i]))
}
return f.LearnRate
}
func (f *GradBoostClassTarget) Predicted(cases *[]int) float64 {
//TODO(ryan): update predicted on whole data not just in bag
num := 0.0
denom := 0.0
for _, c := range *cases {
r := f.Get(c)
num += r
y := f.Actual.Get(c)
denom += (y - r) * (1.0 - y + r)
}
return num / denom // 1.0 / (1.0 + math.Exp(-1*meanlogodds))
}
func (f *GradBoostClassTarget) FindPredicted(cases []int) (pred string) {
pred = fmt.Sprintf("%v", f.Predicted(&cases))
return
}
//Update updates the underlying numeric data by subtracting the mean*weight of the
//specified cases from the value for those cases.
func (f *GradBoostClassTarget) Update(cases *[]int, predicted float64) {
for _, i := range *cases {
pred := f.Pred.Get(i) + f.LearnRate*predicted
f.Pred.Put(i, pred)
g := f.Actual.Get(i) - Expit(pred)
f.Put(i, g)
}
}