Skip to content

Commit

Permalink
Add CML support for threshold
Browse files Browse the repository at this point in the history
  • Loading branch information
seiflotfy committed May 5, 2016
1 parent 2f84ee4 commit 4776a83
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 8 deletions.
35 changes: 29 additions & 6 deletions src/sketches/cml.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,14 @@ import (
// CMLSketch is the toplevel Sketch to control the count-min-log implementation
type CMLSketch struct {
*datamodel.Info
impl *cml.Sketch
impl *cml.Sketch
threshold *Dict
}

// NewCMLSketch ...
func NewCMLSketch(info *datamodel.Info) (*CMLSketch, error) {
sketch, err := cml.NewForCapacity16(uint64(info.Properties.GetMaxUniqueItems()), 0.01)
d := CMLSketch{info, sketch}
if err != nil {
logger.Errorf("an error has occurred while saving CMLSketch: %s", err.Error())
}
threshold := NewDict(info)
d := CMLSketch{info, nil, threshold}
return &d, nil
}

Expand All @@ -29,6 +27,27 @@ func (d *CMLSketch) Add(values [][]byte) (bool, error) {
success := true

dict := make(map[string]uint)

if d.threshold != nil {
s, err := d.threshold.Add(values)
success = s
if err != nil {
return false, err
}
if !d.threshold.IsFull() {
return true, nil
}
values = d.threshold.Keys()
d.threshold = nil
if d.impl == nil {
sketch, err := cml.NewForCapacity16(uint64(d.Info.Properties.GetMaxUniqueItems()), 0.01)
if err != nil {
logger.Errorf("an error has occurred while saving CMLSketch: %s", err.Error())
}
d.impl = sketch
}
}

for _, v := range values {
dict[string(v)]++
}
Expand All @@ -42,6 +61,10 @@ func (d *CMLSketch) Add(values [][]byte) (bool, error) {

// Get ...
func (d *CMLSketch) Get(data interface{}) (interface{}, error) {
if d.threshold != nil {
return d.threshold.Get(data)
}

values := data.([][]byte)
res := &pb.FrequencyResult{
Frequencies: make([]*pb.Frequency, len(values), len(values)),
Expand Down
6 changes: 4 additions & 2 deletions src/sketches/cml_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,19 @@ import (

"datamodel"
pb "datamodel/protobuf"
"utils"
"testutils"
"utils"
)

func TestAdd(t *testing.T) {
func TestAddCML(t *testing.T) {
testutils.SetupTests()
defer testutils.TearDownTests()

info := datamodel.NewEmptyInfo()
info.Properties.MaxUniqueItems = utils.Int64p(1000000)
info.Name = utils.Stringp("marvel")
typ := pb.SketchType_FREQ
info.Type = &typ
sketch, err := NewCMLSketch(info)

if err != nil {
Expand Down
23 changes: 23 additions & 0 deletions src/sketches/dict.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ func (d *Dict) Get(data interface{}) (interface{}, error) {
switch datamodel.GetTypeString(typ) {
case datamodel.Bloom:
return d.getMemb(data)
case datamodel.CML:
return d.getFreq(data)
}
return nil, fmt.Errorf("Unknown error: %v", d.Info.GetType().String()) // FIXME: return some error
}
Expand All @@ -76,3 +78,24 @@ func (d *Dict) getMemb(data interface{}) (interface{}, error) {
}
return res, nil
}

func (d *Dict) getFreq(data interface{}) (interface{}, error) {
fmt.Println("----->")
values := data.([][]byte)
res := &pb.FrequencyResult{
Frequencies: make([]*pb.Frequency, len(values), len(values)),
}
tmpRes := make(map[string]*pb.Frequency)
for i, v := range values {
if r, ok := tmpRes[string(v)]; ok {
res.Frequencies[i] = r
continue
}
res.Frequencies[i] = &pb.Frequency{
Value: utils.Stringp(string(v)),
Count: utils.Int64p(int64(d.impl[string(v)])),
}
tmpRes[string(v)] = res.Frequencies[i]
}
return res, nil
}

0 comments on commit 4776a83

Please sign in to comment.