Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pkg/fuzzer: use a MAB to decide on exec fuzz vs exec gen #4632

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 52 additions & 19 deletions pkg/fuzzer/fuzzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (

"github.com/google/syzkaller/pkg/corpus"
"github.com/google/syzkaller/pkg/ipc"
"github.com/google/syzkaller/pkg/learning"
"github.com/google/syzkaller/pkg/rpctype"
"github.com/google/syzkaller/pkg/signal"
"github.com/google/syzkaller/prog"
Expand All @@ -34,6 +35,12 @@ type Fuzzer struct {
ctMu sync.Mutex // TODO: use RWLock.
ctRegenerate chan struct{}

// Use a MAB to determine the right distribution of
// exec fuzz and exec gen.
genFuzzMAB *learning.PlainMAB[string]
genSignalSpeed *learning.RunningRatioAverage[float64]
fuzzSignalSpeed *learning.RunningRatioAverage[float64]

nextExec *priorityQueue[*Request]
nextJobID atomic.Int64

Expand All @@ -43,6 +50,12 @@ type Fuzzer struct {

func NewFuzzer(ctx context.Context, cfg *Config, rnd *rand.Rand,
target *prog.Target) *Fuzzer {
genFuzzMAB := &learning.PlainMAB[string]{
ExplorationRate: 0.02,
MinLearningRate: 0.0005,
}
genFuzzMAB.AddArms(statFuzz, statGenerate)

f := &Fuzzer{
Config: cfg,
Cover: &Cover{},
Expand All @@ -54,7 +67,10 @@ func NewFuzzer(ctx context.Context, cfg *Config, rnd *rand.Rand,

// We're okay to lose some of the messages -- if we are already
// regenerating the table, we don't want to repeat it right away.
ctRegenerate: make(chan struct{}),
ctRegenerate: make(chan struct{}),
genFuzzMAB: genFuzzMAB,
genSignalSpeed: learning.NewRunningRatioAverage[float64](10000),
fuzzSignalSpeed: learning.NewRunningRatioAverage[float64](20000),

nextExec: makePriorityQueue[*Request](),
}
Expand Down Expand Up @@ -91,6 +107,8 @@ type Request struct {
flags ProgTypes
stat string
resultC chan *Result

genFuzzAction learning.Action[string]
}

type Result struct {
Expand All @@ -102,11 +120,12 @@ func (fuzzer *Fuzzer) Done(req *Request, res *Result) {
// Triage individual calls.
// We do it before unblocking the waiting threads because
// it may result it concurrent modification of req.Prog.
var newSignal int
if req.NeedSignal != rpctype.NoSignal && res.Info != nil {
for call, info := range res.Info.Calls {
fuzzer.triageProgCall(req.Prog, &info, call, req.flags)
newSignal += fuzzer.triageProgCall(req.Prog, &info, call, req.flags)
}
fuzzer.triageProgCall(req.Prog, &res.Info.Extra, -1, req.flags)
newSignal += fuzzer.triageProgCall(req.Prog, &res.Info.Extra, -1, req.flags)
}
// Unblock threads that wait for the result.
if req.resultC != nil {
Expand All @@ -116,20 +135,36 @@ func (fuzzer *Fuzzer) Done(req *Request, res *Result) {
fuzzer.mu.Lock()
fuzzer.stats[req.stat]++
fuzzer.mu.Unlock()
// Update the MAB(s).
reward := 0.0
if res.Info != nil && res.Info.ElapsedSec > 0 {
// Similarly to the "SyzVegas: Beating Kernel Fuzzing Odds with Reinforcement Learning"
// paper, let's use the ratio of "new max signal" to "execution time".
// Unlike the paper, let's take the raw value of it instead of its ratio to the average one.
reward = float64(newSignal) / res.Info.ElapsedSec
if req.stat == statGenerate {
fuzzer.genSignalSpeed.Save(float64(newSignal), res.Info.ElapsedSec)
} else if req.stat == statFuzz {
fuzzer.fuzzSignalSpeed.Save(float64(newSignal), res.Info.ElapsedSec)
}
}
if !req.genFuzzAction.Empty() {
fuzzer.genFuzzMAB.SaveReward(req.genFuzzAction, reward)
}
}

func (fuzzer *Fuzzer) triageProgCall(p *prog.Prog, info *ipc.CallInfo, call int,
flags ProgTypes) {
flags ProgTypes) int {
prio := signalPrio(p, info, call)
newMaxSignal := fuzzer.Cover.addRawMaxSignal(info.Signal, prio)
if newMaxSignal.Empty() {
return
return 0
}
if flags&progInTriage > 0 {
// We are already triaging this exact prog.
// All newly found coverage is flaky.
fuzzer.Logf(2, "found new flaky signal in call %d in %s", call, p)
return
return newMaxSignal.Len()
}
fuzzer.Logf(2, "found new signal in call %d in %s", call, p)
fuzzer.startJob(&triageJob{
Expand All @@ -140,6 +175,7 @@ func (fuzzer *Fuzzer) triageProgCall(p *prog.Prog, info *ipc.CallInfo, call int,
flags: flags,
jobPriority: triageJobPrio(flags),
})
return newMaxSignal.Len()
}

func signalPrio(p *prog.Prog, info *ipc.CallInfo, call int) (prio uint8) {
Expand Down Expand Up @@ -184,21 +220,18 @@ func (fuzzer *Fuzzer) nextInput() *Request {
}
}

// Either generate a new input or mutate an existing one.
mutateRate := 0.95
if !fuzzer.Config.Coverage {
// If we don't have real coverage signal, generate programs
// more frequently because fallback signal is weak.
mutateRate = 0.5
}
rnd := fuzzer.rand()
if rnd.Float64() < mutateRate {
req := mutateProgRequest(fuzzer, rnd)
if req != nil {
return req
}
action := fuzzer.genFuzzMAB.Action(rnd)

var req *Request
if action.Arm == statFuzz {
req = mutateProgRequest(fuzzer, rnd)
}
if req == nil {
req = genProgRequest(fuzzer, rnd)
}
return genProgRequest(fuzzer, rnd)
req.genFuzzAction = action
return req
}

func (fuzzer *Fuzzer) startJob(newJob job) {
Expand Down
2 changes: 2 additions & 0 deletions pkg/fuzzer/fuzzer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ func TestFuzz(t *testing.T) {
t.Logf("%s", p.Serialize())
}

t.Logf("stats: %+v", fuzzer.Stats().Named)

assert.Equal(t, len(tf.expectedCrashes), len(tf.crashes),
"not all expected crashes were found")
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/fuzzer/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,7 @@ func (fuzzer *Fuzzer) Stats() Stats {
for k, v := range fuzzer.stats {
ret.Named[k] = v
}
ret.Named["exec gen, sig/sec*1000"] = uint64(fuzzer.genSignalSpeed.Load() * 1000)
ret.Named["exec fuzz, sig/sec*1000"] = uint64(fuzzer.fuzzSignalSpeed.Load() * 1000)
return ret
}
10 changes: 8 additions & 2 deletions pkg/ipc/ipc.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,9 @@ type CallInfo struct {
}

type ProgInfo struct {
Calls []CallInfo
Extra CallInfo // stores Signal and Cover collected from background threads
Calls []CallInfo
Extra CallInfo // stores Signal and Cover collected from background threads
ElapsedSec float64 // total execution time in seconds
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would store this as time.Duration. Floats are pretty specific thing, if something wants floats (learning), it should convert the standard time to floats.
Also if we add per-syscall execution time, storing it as seconds will be somewhat strange since most syscalls execute within microseconds.

}

type Env struct {
Expand Down Expand Up @@ -275,14 +276,19 @@ func (env *Env) Exec(opts *ExecOpts, p *prog.Prog) (output []byte, info *ProgInf
return
}

start := osutil.MonotonicNano()
output, hanged, err0 = env.cmd.exec(opts, progData)
elapsedNs := osutil.MonotonicNano() - start
if err0 != nil {
env.cmd.close()
env.cmd = nil
return
}

info, err0 = env.parseOutput(p, opts)
if info != nil {
info.ElapsedSec = float64(elapsedNs) / float64(1e9)
}
if info != nil && env.config.Flags&FlagSignal == 0 {
addFallbackSignal(p, info)
}
Expand Down
77 changes: 77 additions & 0 deletions pkg/learning/mab.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Copyright 2024 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.

package learning

import (
"math/rand"
"sync"
)

type Action[T comparable] struct {
Arm T
index int
}

func (a Action[T]) Empty() bool {
return a == Action[T]{}
}

type countedValue struct {
value float64
count int64
}

func (cv *countedValue) update(value, minStep float64) {
// Using larger steps at the beginning allows us to
// converge faster to the actual value.
// The minStep limit ensures that we can still track
// non-stationary problems.
cv.count++
step := 1.0 / float64(cv.count)
if step < minStep {
step = minStep
}
cv.value += (value - cv.value) * step
}

// PlainMAB is a very simple epsylon-greedy MAB implementation.
type PlainMAB[T comparable] struct {
MinLearningRate float64
ExplorationRate float64

mu sync.RWMutex
arms []T
weights []countedValue
}

func (p *PlainMAB[T]) AddArms(arms ...T) {
p.mu.Lock()
defer p.mu.Unlock()
for _, arm := range arms {
p.arms = append(p.arms, arm)
p.weights = append(p.weights, countedValue{0, 0})
}
}

func (p *PlainMAB[T]) Action(r *rand.Rand) Action[T] {
p.mu.RLock()
defer p.mu.RUnlock()
var pos int
if r.Float64() < p.ExplorationRate {
pos = r.Intn(len(p.arms))
} else {
for i := 1; i < len(p.arms); i++ {
if p.weights[i].value > p.weights[pos].value {
pos = i
}
}
}
return Action[T]{Arm: p.arms[pos], index: pos}
}

func (p *PlainMAB[T]) SaveReward(action Action[T], reward float64) {
p.mu.Lock()
defer p.mu.Unlock()
p.weights[action.index].update(reward, p.MinLearningRate)
}
66 changes: 66 additions & 0 deletions pkg/learning/mab_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Copyright 2024 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.

package learning

import (
"math/rand"
"testing"

"github.com/google/syzkaller/pkg/testutil"
"github.com/stretchr/testify/assert"
)

func TestMABSmallDiff(t *testing.T) {
r := rand.New(testutil.RandSource(t))
bandit := &PlainMAB[int]{
MinLearningRate: 0.0001,
ExplorationRate: 0.1,
}
arms := []float64{0.65, 0.7}
for i := range arms {
bandit.AddArms(i)
}
const steps = 40000
counts := runMAB(r, bandit, arms, steps)
t.Logf("counts: %v", counts)
assert.Greater(t, counts[1], steps/4*3)
}

func TestNonStationaryMAB(t *testing.T) {
r := rand.New(testutil.RandSource(t))
bandit := &PlainMAB[int]{
MinLearningRate: 0.02,
ExplorationRate: 0.04,
}

arms := []float64{0.2, 0.7, 0.5, 0.1}
for i := range arms {
bandit.AddArms(i)
}

const steps = 25000
counts := runMAB(r, bandit, arms, steps)
t.Logf("initially: %v", counts)

// Ensure that we've found the best arm.
assert.Greater(t, counts[1], steps/2)

// Now change the best arm's avg reward.
arms[3] = 0.9
counts = runMAB(r, bandit, arms, steps)
t.Logf("after reward change: %v", counts)
assert.Greater(t, counts[3], steps/2)
}

func runMAB(r *rand.Rand, bandit *PlainMAB[int], arms []float64, steps int) []int {
counts := make([]int, len(arms))
for i := 0; i < steps; i++ {
action := bandit.Action(r)
// TODO: use normal distribution?
reward := r.Float64() * arms[action.Arm]
counts[action.Arm]++
bandit.SaveReward(action, reward)
}
return counts
}
Loading
Loading