google · a-nogikh · Apr 1, 2024 · dvyukov · Apr 3, 2024
diff --git a/pkg/fuzzer/fuzzer.go b/pkg/fuzzer/fuzzer.go
@@ -14,6 +14,7 @@ import (
 
 	"github.com/google/syzkaller/pkg/corpus"
 	"github.com/google/syzkaller/pkg/ipc"
+	"github.com/google/syzkaller/pkg/learning"
 	"github.com/google/syzkaller/pkg/rpctype"
 	"github.com/google/syzkaller/pkg/signal"
 	"github.com/google/syzkaller/prog"
@@ -34,6 +35,12 @@ type Fuzzer struct {
 	ctMu         sync.Mutex // TODO: use RWLock.
 	ctRegenerate chan struct{}
 
+	// Use a MAB to determine the right distribution of
+	// exec fuzz and exec gen.
+	genFuzzMAB      *learning.PlainMAB[string]
+	genSignalSpeed  *learning.RunningRatioAverage[float64]
+	fuzzSignalSpeed *learning.RunningRatioAverage[float64]
+
 	nextExec  *priorityQueue[*Request]
 	nextJobID atomic.Int64
 
@@ -43,6 +50,12 @@ type Fuzzer struct {
 
 func NewFuzzer(ctx context.Context, cfg *Config, rnd *rand.Rand,
 	target *prog.Target) *Fuzzer {
+	genFuzzMAB := &learning.PlainMAB[string]{
+		ExplorationRate: 0.02,
+		MinLearningRate: 0.0005,
+	}
+	genFuzzMAB.AddArms(statFuzz, statGenerate)
+
 	f := &Fuzzer{
 		Config: cfg,
 		Cover:  &Cover{},
@@ -54,7 +67,10 @@ func NewFuzzer(ctx context.Context, cfg *Config, rnd *rand.Rand,
 
 		// We're okay to lose some of the messages -- if we are already
 		// regenerating the table, we don't want to repeat it right away.
-		ctRegenerate: make(chan struct{}),
+		ctRegenerate:    make(chan struct{}),
+		genFuzzMAB:      genFuzzMAB,
+		genSignalSpeed:  learning.NewRunningRatioAverage[float64](10000),
+		fuzzSignalSpeed: learning.NewRunningRatioAverage[float64](20000),
 
 		nextExec: makePriorityQueue[*Request](),
 	}
@@ -91,6 +107,8 @@ type Request struct {
 	flags   ProgTypes
 	stat    string
 	resultC chan *Result
+
+	genFuzzAction learning.Action[string]
 }
 
 type Result struct {
@@ -102,11 +120,12 @@ func (fuzzer *Fuzzer) Done(req *Request, res *Result) {
 	// Triage individual calls.
 	// We do it before unblocking the waiting threads because
 	// it may result it concurrent modification of req.Prog.
+	var newSignal int
 	if req.NeedSignal != rpctype.NoSignal && res.Info != nil {
 		for call, info := range res.Info.Calls {
-			fuzzer.triageProgCall(req.Prog, &info, call, req.flags)
+			newSignal += fuzzer.triageProgCall(req.Prog, &info, call, req.flags)
 		}
-		fuzzer.triageProgCall(req.Prog, &res.Info.Extra, -1, req.flags)
+		newSignal += fuzzer.triageProgCall(req.Prog, &res.Info.Extra, -1, req.flags)
 	}
 	// Unblock threads that wait for the result.
 	if req.resultC != nil {
@@ -116,20 +135,36 @@ func (fuzzer *Fuzzer) Done(req *Request, res *Result) {
 	fuzzer.mu.Lock()
 	fuzzer.stats[req.stat]++
 	fuzzer.mu.Unlock()
+	// Update the MAB(s).
+	reward := 0.0
+	if res.Info != nil && res.Info.ElapsedSec > 0 {
+		// Similarly to the "SyzVegas: Beating Kernel Fuzzing Odds with Reinforcement Learning"
+		// paper, let's use the ratio of "new max signal" to "execution time".
+		// Unlike the paper, let's take the raw value of it instead of its ratio to the average one.
+		reward = float64(newSignal) / res.Info.ElapsedSec
+		if req.stat == statGenerate {
+			fuzzer.genSignalSpeed.Save(float64(newSignal), res.Info.ElapsedSec)
+		} else if req.stat == statFuzz {
+			fuzzer.fuzzSignalSpeed.Save(float64(newSignal), res.Info.ElapsedSec)
+		}
+	}
+	if !req.genFuzzAction.Empty() {
+		fuzzer.genFuzzMAB.SaveReward(req.genFuzzAction, reward)
+	}
 }
 
 func (fuzzer *Fuzzer) triageProgCall(p *prog.Prog, info *ipc.CallInfo, call int,
-	flags ProgTypes) {
+	flags ProgTypes) int {
 	prio := signalPrio(p, info, call)
 	newMaxSignal := fuzzer.Cover.addRawMaxSignal(info.Signal, prio)
 	if newMaxSignal.Empty() {
-		return
+		return 0
 	}
 	if flags&progInTriage > 0 {
 		// We are already triaging this exact prog.
 		// All newly found coverage is flaky.
 		fuzzer.Logf(2, "found new flaky signal in call %d in %s", call, p)
-		return
+		return newMaxSignal.Len()
 	}
 	fuzzer.Logf(2, "found new signal in call %d in %s", call, p)
 	fuzzer.startJob(&triageJob{
@@ -140,6 +175,7 @@ func (fuzzer *Fuzzer) triageProgCall(p *prog.Prog, info *ipc.CallInfo, call int,
 		flags:       flags,
 		jobPriority: triageJobPrio(flags),
 	})
+	return newMaxSignal.Len()
 }
 
 func signalPrio(p *prog.Prog, info *ipc.CallInfo, call int) (prio uint8) {
@@ -184,21 +220,18 @@ func (fuzzer *Fuzzer) nextInput() *Request {
 		}
 	}
 
-	// Either generate a new input or mutate an existing one.
-	mutateRate := 0.95
-	if !fuzzer.Config.Coverage {
-		// If we don't have real coverage signal, generate programs
-		// more frequently because fallback signal is weak.
-		mutateRate = 0.5
-	}
 	rnd := fuzzer.rand()
-	if rnd.Float64() < mutateRate {
-		req := mutateProgRequest(fuzzer, rnd)
-		if req != nil {
-			return req
-		}
+	action := fuzzer.genFuzzMAB.Action(rnd)
+
+	var req *Request
+	if action.Arm == statFuzz {
+		req = mutateProgRequest(fuzzer, rnd)
+	}
+	if req == nil {
+		req = genProgRequest(fuzzer, rnd)
 	}
-	return genProgRequest(fuzzer, rnd)
+	req.genFuzzAction = action
+	return req
 }
 
 func (fuzzer *Fuzzer) startJob(newJob job) {

diff --git a/pkg/fuzzer/fuzzer_test.go b/pkg/fuzzer/fuzzer_test.go
@@ -85,6 +85,8 @@ func TestFuzz(t *testing.T) {
 		t.Logf("%s", p.Serialize())
 	}
 
+	t.Logf("stats: %+v", fuzzer.Stats().Named)
+
 	assert.Equal(t, len(tf.expectedCrashes), len(tf.crashes),
 		"not all expected crashes were found")
 }

diff --git a/pkg/fuzzer/stats.go b/pkg/fuzzer/stats.go
@@ -42,5 +42,7 @@ func (fuzzer *Fuzzer) Stats() Stats {
 	for k, v := range fuzzer.stats {
 		ret.Named[k] = v
 	}
+	ret.Named["exec gen, sig/sec*1000"] = uint64(fuzzer.genSignalSpeed.Load() * 1000)
+	ret.Named["exec fuzz, sig/sec*1000"] = uint64(fuzzer.fuzzSignalSpeed.Load() * 1000)
 	return ret
 }
diff --git a/pkg/ipc/ipc.go b/pkg/ipc/ipc.go
@@ -95,8 +95,9 @@ type CallInfo struct {
 }
 
 type ProgInfo struct {
-	Calls []CallInfo
-	Extra CallInfo // stores Signal and Cover collected from background threads
+	Calls      []CallInfo
+	Extra      CallInfo // stores Signal and Cover collected from background threads
+	ElapsedSec float64  // total execution time in seconds
 }
 
 type Env struct {
@@ -275,14 +276,19 @@ func (env *Env) Exec(opts *ExecOpts, p *prog.Prog) (output []byte, info *ProgInf
 		return
 	}
 
+	start := osutil.MonotonicNano()
 	output, hanged, err0 = env.cmd.exec(opts, progData)
+	elapsedNs := osutil.MonotonicNano() - start
 	if err0 != nil {
 		env.cmd.close()
 		env.cmd = nil
 		return
 	}
 
 	info, err0 = env.parseOutput(p, opts)
+	if info != nil {
+		info.ElapsedSec = float64(elapsedNs) / float64(1e9)
+	}
 	if info != nil && env.config.Flags&FlagSignal == 0 {
 		addFallbackSignal(p, info)
 	}

diff --git a/pkg/learning/mab.go b/pkg/learning/mab.go
@@ -0,0 +1,77 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package learning
+
+import (
+	"math/rand"
+	"sync"
+)
+
+type Action[T comparable] struct {
+	Arm   T
+	index int
+}
+
+func (a Action[T]) Empty() bool {
+	return a == Action[T]{}
+}
+
+type countedValue struct {
+	value float64
+	count int64
+}
+
+func (cv *countedValue) update(value, minStep float64) {
+	// Using larger steps at the beginning allows us to
+	// converge faster to the actual value.
+	// The minStep limit ensures that we can still track
+	// non-stationary problems.
+	cv.count++
+	step := 1.0 / float64(cv.count)
+	if step < minStep {
+		step = minStep
+	}
+	cv.value += (value - cv.value) * step
+}
+
+// PlainMAB is a very simple epsylon-greedy MAB implementation.
+type PlainMAB[T comparable] struct {
+	MinLearningRate float64
+	ExplorationRate float64
+
+	mu      sync.RWMutex
+	arms    []T
+	weights []countedValue
+}
+
+func (p *PlainMAB[T]) AddArms(arms ...T) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	for _, arm := range arms {
+		p.arms = append(p.arms, arm)
+		p.weights = append(p.weights, countedValue{0, 0})
+	}
+}
+
+func (p *PlainMAB[T]) Action(r *rand.Rand) Action[T] {
+	p.mu.RLock()
+	defer p.mu.RUnlock()
+	var pos int
+	if r.Float64() < p.ExplorationRate {
+		pos = r.Intn(len(p.arms))
+	} else {
+		for i := 1; i < len(p.arms); i++ {
+			if p.weights[i].value > p.weights[pos].value {
+				pos = i
+			}
+		}
+	}
+	return Action[T]{Arm: p.arms[pos], index: pos}
+}
+
+func (p *PlainMAB[T]) SaveReward(action Action[T], reward float64) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	p.weights[action.index].update(reward, p.MinLearningRate)
+}
diff --git a/pkg/learning/mab_test.go b/pkg/learning/mab_test.go
@@ -0,0 +1,66 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package learning
+
+import (
+	"math/rand"
+	"testing"
+
+	"github.com/google/syzkaller/pkg/testutil"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestMABSmallDiff(t *testing.T) {
+	r := rand.New(testutil.RandSource(t))
+	bandit := &PlainMAB[int]{
+		MinLearningRate: 0.0001,
+		ExplorationRate: 0.1,
+	}
+	arms := []float64{0.65, 0.7}
+	for i := range arms {
+		bandit.AddArms(i)
+	}
+	const steps = 40000
+	counts := runMAB(r, bandit, arms, steps)
+	t.Logf("counts: %v", counts)
+	assert.Greater(t, counts[1], steps/4*3)
+}
+
+func TestNonStationaryMAB(t *testing.T) {
+	r := rand.New(testutil.RandSource(t))
+	bandit := &PlainMAB[int]{
+		MinLearningRate: 0.02,
+		ExplorationRate: 0.04,
+	}
+
+	arms := []float64{0.2, 0.7, 0.5, 0.1}
+	for i := range arms {
+		bandit.AddArms(i)
+	}
+
+	const steps = 25000
+	counts := runMAB(r, bandit, arms, steps)
+	t.Logf("initially: %v", counts)
+
+	// Ensure that we've found the best arm.
+	assert.Greater(t, counts[1], steps/2)
+
+	// Now change the best arm's avg reward.
+	arms[3] = 0.9
+	counts = runMAB(r, bandit, arms, steps)
+	t.Logf("after reward change: %v", counts)
+	assert.Greater(t, counts[3], steps/2)
+}
+
+func runMAB(r *rand.Rand, bandit *PlainMAB[int], arms []float64, steps int) []int {
+	counts := make([]int, len(arms))
+	for i := 0; i < steps; i++ {
+		action := bandit.Action(r)
+		// TODO: use normal distribution?
+		reward := r.Float64() * arms[action.Arm]
+		counts[action.Arm]++
+		bandit.SaveReward(action, reward)
+	}
+	return counts
+}