utls/fuzz/fuzz.go
Katie Hockman dbca77fab6 internal/fuzz: improve minimizing message
In order to know the actual number of bytes
of the entire corpus entry, the coordinator
would likely need to unmarshal the bytes and
tally up the length. That's more work than it
is worth, so this change just clarifies that
the printed # of bytes is the length of the
entire file, not just the entry itself.

Fixes #48989

Change-Id: I6fa0c0206a249cefdf6335040c560ec0c5a55b4a
Reviewed-on: https://go-review.googlesource.com/c/go/+/361414
Trust: Katie Hockman <katie@golang.org>
Run-TryBot: Katie Hockman <katie@golang.org>
Reviewed-by: Roland Shoemaker <roland@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
2021-11-04 19:34:33 +00:00

1074 lines
34 KiB
Go

// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package fuzz provides common fuzzing functionality for tests built with
// "go test" and for programs that use fuzzing functionality in the testing
// package.
package fuzz
import (
"context"
"crypto/sha256"
"errors"
"fmt"
"internal/godebug"
"io"
"io/ioutil"
"math/bits"
"os"
"path/filepath"
"reflect"
"runtime"
"strings"
"sync"
"time"
)
// CoordinateFuzzingOpts is a set of arguments for CoordinateFuzzing.
// The zero value is valid for each field unless specified otherwise.
type CoordinateFuzzingOpts struct {
// Log is a writer for logging progress messages and warnings.
// If nil, io.Discard will be used instead.
Log io.Writer
// Timeout is the amount of wall clock time to spend fuzzing after the corpus
// has loaded. If zero, there will be no time limit.
Timeout time.Duration
// Limit is the number of random values to generate and test. If zero,
// there will be no limit on the number of generated values.
Limit int64
// MinimizeTimeout is the amount of wall clock time to spend minimizing
// after discovering a crasher. If zero, there will be no time limit. If
// MinimizeTimeout and MinimizeLimit are both zero, then minimization will
// be disabled.
MinimizeTimeout time.Duration
// MinimizeLimit is the maximum number of calls to the fuzz function to be
// made while minimizing after finding a crash. If zero, there will be no
// limit. Calls to the fuzz function made when minimizing also count toward
// Limit. If MinimizeTimeout and MinimizeLimit are both zero, then
// minimization will be disabled.
MinimizeLimit int64
// parallel is the number of worker processes to run in parallel. If zero,
// CoordinateFuzzing will run GOMAXPROCS workers.
Parallel int
// Seed is a list of seed values added by the fuzz target with testing.F.Add
// and in testdata.
Seed []CorpusEntry
// Types is the list of types which make up a corpus entry.
// Types must be set and must match values in Seed.
Types []reflect.Type
// CorpusDir is a directory where files containing values that crash the
// code being tested may be written. CorpusDir must be set.
CorpusDir string
// CacheDir is a directory containing additional "interesting" values.
// The fuzzer may derive new values from these, and may write new values here.
CacheDir string
}
// CoordinateFuzzing creates several worker processes and communicates with
// them to test random inputs that could trigger crashes and expose bugs.
// The worker processes run the same binary in the same directory with the
// same environment variables as the coordinator process. Workers also run
// with the same arguments as the coordinator, except with the -test.fuzzworker
// flag prepended to the argument list.
//
// If a crash occurs, the function will return an error containing information
// about the crash, which can be reported to the user.
func CoordinateFuzzing(ctx context.Context, opts CoordinateFuzzingOpts) (err error) {
if err := ctx.Err(); err != nil {
return err
}
if opts.Log == nil {
opts.Log = io.Discard
}
if opts.Parallel == 0 {
opts.Parallel = runtime.GOMAXPROCS(0)
}
if opts.Limit > 0 && int64(opts.Parallel) > opts.Limit {
// Don't start more workers than we need.
opts.Parallel = int(opts.Limit)
}
c, err := newCoordinator(opts)
if err != nil {
return err
}
if opts.Timeout > 0 {
var cancel func()
ctx, cancel = context.WithTimeout(ctx, opts.Timeout)
defer cancel()
}
// fuzzCtx is used to stop workers, for example, after finding a crasher.
fuzzCtx, cancelWorkers := context.WithCancel(ctx)
defer cancelWorkers()
doneC := ctx.Done()
// stop is called when a worker encounters a fatal error.
var fuzzErr error
stopping := false
stop := func(err error) {
if err == fuzzCtx.Err() || isInterruptError(err) {
// Suppress cancellation errors and terminations due to SIGINT.
// The messages are not helpful since either the user triggered the error
// (with ^C) or another more helpful message will be printed (a crasher).
err = nil
}
if err != nil && (fuzzErr == nil || fuzzErr == ctx.Err()) {
fuzzErr = err
}
if stopping {
return
}
stopping = true
cancelWorkers()
doneC = nil
}
// Ensure that any crash we find is written to the corpus, even if an error
// or interruption occurs while minimizing it.
crashWritten := false
defer func() {
if c.crashMinimizing == nil || crashWritten {
return
}
werr := writeToCorpus(&c.crashMinimizing.entry, opts.CorpusDir)
if werr != nil {
err = fmt.Errorf("%w\n%v", err, werr)
return
}
if err == nil {
err = &crashError{
path: c.crashMinimizing.entry.Path,
err: errors.New(c.crashMinimizing.crasherMsg),
}
}
}()
// Start workers.
// TODO(jayconrod): do we want to support fuzzing different binaries?
dir := "" // same as self
binPath := os.Args[0]
args := append([]string{"-test.fuzzworker"}, os.Args[1:]...)
env := os.Environ() // same as self
errC := make(chan error)
workers := make([]*worker, opts.Parallel)
for i := range workers {
var err error
workers[i], err = newWorker(c, dir, binPath, args, env)
if err != nil {
return err
}
}
for i := range workers {
w := workers[i]
go func() {
err := w.coordinate(fuzzCtx)
if fuzzCtx.Err() != nil || isInterruptError(err) {
err = nil
}
cleanErr := w.cleanup()
if err == nil {
err = cleanErr
}
errC <- err
}()
}
// Main event loop.
// Do not return until all workers have terminated. We avoid a deadlock by
// receiving messages from workers even after ctx is cancelled.
activeWorkers := len(workers)
statTicker := time.NewTicker(3 * time.Second)
defer statTicker.Stop()
defer c.logStats()
c.logStats()
for {
var inputC chan fuzzInput
input, ok := c.peekInput()
if ok && c.crashMinimizing == nil && !stopping {
inputC = c.inputC
}
var minimizeC chan fuzzMinimizeInput
minimizeInput, ok := c.peekMinimizeInput()
if ok && !stopping {
minimizeC = c.minimizeC
}
select {
case <-doneC:
// Interrupted, cancelled, or timed out.
// stop sets doneC to nil so we don't busy wait here.
stop(ctx.Err())
case err := <-errC:
// A worker terminated, possibly after encountering a fatal error.
stop(err)
activeWorkers--
if activeWorkers == 0 {
return fuzzErr
}
case result := <-c.resultC:
// Received response from worker.
if stopping {
break
}
c.updateStats(result)
if result.crasherMsg != "" {
if c.warmupRun() && result.entry.IsSeed {
target := filepath.Base(c.opts.CorpusDir)
fmt.Fprintf(c.opts.Log, "found a crash while testing seed corpus entry: %s/%s\n", target, testName(result.entry.Parent))
stop(errors.New(result.crasherMsg))
break
}
if c.canMinimize() && result.canMinimize {
if c.crashMinimizing != nil {
// This crash is not minimized, and another crash is being minimized.
// Ignore this one and wait for the other one to finish.
break
}
// Found a crasher but haven't yet attempted to minimize it.
// Send it back to a worker for minimization. Disable inputC so
// other workers don't continue fuzzing.
c.crashMinimizing = &result
fmt.Fprintf(c.opts.Log, "fuzz: minimizing %d-byte crash file\n", len(result.entry.Data))
c.queueForMinimization(result, nil)
} else if !crashWritten {
// Found a crasher that's either minimized or not minimizable.
// Write to corpus and stop.
err := writeToCorpus(&result.entry, opts.CorpusDir)
if err == nil {
crashWritten = true
err = &crashError{
path: result.entry.Path,
err: errors.New(result.crasherMsg),
}
}
if shouldPrintDebugInfo() {
fmt.Fprintf(
c.opts.Log,
"DEBUG new crasher, elapsed: %s, id: %s, parent: %s, gen: %d, size: %d, exec time: %s\n",
c.elapsed(),
result.entry.Path,
result.entry.Parent,
result.entry.Generation,
len(result.entry.Data),
result.entryDuration,
)
}
stop(err)
}
} else if result.coverageData != nil {
if c.warmupRun() {
if shouldPrintDebugInfo() {
fmt.Fprintf(
c.opts.Log,
"DEBUG processed an initial input, elapsed: %s, id: %s, new bits: %d, size: %d, exec time: %s\n",
c.elapsed(),
result.entry.Parent,
countBits(diffCoverage(c.coverageMask, result.coverageData)),
len(result.entry.Data),
result.entryDuration,
)
}
c.updateCoverage(result.coverageData)
c.warmupInputLeft--
if c.warmupInputLeft == 0 {
fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, gathering baseline coverage: %d/%d completed, now fuzzing with %d workers\n", c.elapsed(), c.warmupInputCount, c.warmupInputCount, c.opts.Parallel)
if shouldPrintDebugInfo() {
fmt.Fprintf(
c.opts.Log,
"DEBUG finished processing input corpus, elapsed: %s, entries: %d, initial coverage bits: %d\n",
c.elapsed(),
len(c.corpus.entries),
countBits(c.coverageMask),
)
}
}
} else if keepCoverage := diffCoverage(c.coverageMask, result.coverageData); keepCoverage != nil {
// Found a value that expanded coverage.
// It's not a crasher, but we may want to add it to the on-disk
// corpus and prioritize it for future fuzzing.
// TODO(jayconrod, katiehockman): Prioritize fuzzing these
// values which expanded coverage, perhaps based on the
// number of new edges that this result expanded.
// TODO(jayconrod, katiehockman): Don't write a value that's already
// in the corpus.
if c.canMinimize() && result.canMinimize && c.crashMinimizing == nil {
// Send back to workers to find a smaller value that preserves
// at least one new coverage bit.
c.queueForMinimization(result, keepCoverage)
} else {
// Update the coordinator's coverage mask and save the value.
inputSize := len(result.entry.Data)
if opts.CacheDir != "" {
// It is possible that the input that was discovered is already
// present in the corpus, but the worker produced a coverage map
// that still expanded our total coverage (this may happen due to
// flakiness in the coverage counters). In order to prevent adding
// duplicate entries to the corpus (and re-writing the file on
// disk), skip it if the on disk file already exists.
// TOOD(roland): this check is limited in that it will only be
// applied if we are using the CacheDir. Another option would be
// to iterate through the corpus and check if it is already present,
// which would catch cases where we are not caching entries.
// A slightly faster approach would be to keep some kind of map of
// entry hashes, which would allow us to avoid iterating through
// all entries.
_, err = os.Stat(result.entry.Path)
if err == nil {
continue
}
err := writeToCorpus(&result.entry, opts.CacheDir)
if err != nil {
stop(err)
}
result.entry.Data = nil
}
c.updateCoverage(keepCoverage)
c.corpus.entries = append(c.corpus.entries, result.entry)
c.inputQueue.enqueue(result.entry)
c.interestingCount++
if shouldPrintDebugInfo() {
fmt.Fprintf(
c.opts.Log,
"DEBUG new interesting input, elapsed: %s, id: %s, parent: %s, gen: %d, new bits: %d, total bits: %d, size: %d, exec time: %s\n",
c.elapsed(),
result.entry.Path,
result.entry.Parent,
result.entry.Generation,
countBits(keepCoverage),
countBits(c.coverageMask),
inputSize,
result.entryDuration,
)
}
}
} else {
if shouldPrintDebugInfo() {
fmt.Fprintf(
c.opts.Log,
"DEBUG worker reported interesting input that doesn't expand coverage, elapsed: %s, id: %s, parent: %s, canMinimize: %t\n",
c.elapsed(),
result.entry.Path,
result.entry.Parent,
result.canMinimize,
)
}
}
} else if c.warmupRun() {
// No error or coverage data was reported for this input during
// warmup, so continue processing results.
c.warmupInputLeft--
if c.warmupInputLeft == 0 {
fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, testing seed corpus: %d/%d completed, now fuzzing with %d workers\n", c.elapsed(), c.warmupInputCount, c.warmupInputCount, c.opts.Parallel)
if shouldPrintDebugInfo() {
fmt.Fprintf(
c.opts.Log,
"DEBUG finished testing-only phase, elapsed: %s, entries: %d\n",
time.Since(c.startTime),
len(c.corpus.entries),
)
}
}
}
// Once the result has been processed, stop the worker if we
// have reached the fuzzing limit.
if c.opts.Limit > 0 && c.count >= c.opts.Limit {
stop(nil)
}
case inputC <- input:
// Sent the next input to a worker.
c.sentInput(input)
case minimizeC <- minimizeInput:
// Sent the next input for minimization to a worker.
c.sentMinimizeInput(minimizeInput)
case <-statTicker.C:
c.logStats()
}
}
// TODO(jayconrod,katiehockman): if a crasher can't be written to the corpus,
// write to the cache instead.
}
// crashError wraps a crasher written to the seed corpus. It saves the name
// of the file where the input causing the crasher was saved. The testing
// framework uses this to report a command to re-run that specific input.
type crashError struct {
path string
err error
}
func (e *crashError) Error() string {
return e.err.Error()
}
func (e *crashError) Unwrap() error {
return e.err
}
func (e *crashError) CrashPath() string {
return e.path
}
type corpus struct {
entries []CorpusEntry
}
// CorpusEntry represents an individual input for fuzzing.
//
// We must use an equivalent type in the testing and testing/internal/testdeps
// packages, but testing can't import this package directly, and we don't want
// to export this type from testing. Instead, we use the same struct type and
// use a type alias (not a defined type) for convenience.
type CorpusEntry = struct {
Parent string
// Path is the path of the corpus file, if the entry was loaded from disk.
// For other entries, including seed values provided by f.Add, Path is the
// name of the test, e.g. seed#0 or its hash.
Path string
// Data is the raw input data. Data should only be populated for seed
// values. For on-disk corpus files, Data will be nil, as it will be loaded
// from disk using Path.
Data []byte
// Values is the unmarshaled values from a corpus file.
Values []interface{}
Generation int
// IsSeed indicates whether this entry is part of the seed corpus.
IsSeed bool
}
// Data returns the raw input bytes, either from the data struct field,
// or from disk.
func CorpusEntryData(ce CorpusEntry) ([]byte, error) {
if ce.Data != nil {
return ce.Data, nil
}
return os.ReadFile(ce.Path)
}
type fuzzInput struct {
// entry is the value to test initially. The worker will randomly mutate
// values from this starting point.
entry CorpusEntry
// timeout is the time to spend fuzzing variations of this input,
// not including starting or cleaning up.
timeout time.Duration
// limit is the maximum number of calls to the fuzz function the worker may
// make. The worker may make fewer calls, for example, if it finds an
// error early. If limit is zero, there is no limit on calls to the
// fuzz function.
limit int64
// warmup indicates whether this is a warmup input before fuzzing begins. If
// true, the input should not be fuzzed.
warmup bool
// coverageData reflects the coordinator's current coverageMask.
coverageData []byte
}
type fuzzResult struct {
// entry is an interesting value or a crasher.
entry CorpusEntry
// crasherMsg is an error message from a crash. It's "" if no crash was found.
crasherMsg string
// canMinimize is true if the worker should attempt to minimize this result.
// It may be false because an attempt has already been made.
canMinimize bool
// coverageData is set if the worker found new coverage.
coverageData []byte
// limit is the number of values the coordinator asked the worker
// to test. 0 if there was no limit.
limit int64
// count is the number of values the worker actually tested.
count int64
// totalDuration is the time the worker spent testing inputs.
totalDuration time.Duration
// entryDuration is the time the worker spent execution an interesting result
entryDuration time.Duration
}
type fuzzMinimizeInput struct {
// entry is an interesting value or crasher to minimize.
entry CorpusEntry
// crasherMsg is an error message from a crash. It's "" if no crash was found.
// If set, the worker will attempt to find a smaller input that also produces
// an error, though not necessarily the same error.
crasherMsg string
// limit is the maximum number of calls to the fuzz function the worker may
// make. The worker may make fewer calls, for example, if it can't reproduce
// an error. If limit is zero, there is no limit on calls to the fuzz function.
limit int64
// timeout is the time to spend minimizing this input.
// A zero timeout means no limit.
timeout time.Duration
// keepCoverage is a set of coverage bits that entry found that were not in
// the coordinator's combined set. When minimizing, the worker should find an
// input that preserves at least one of these bits. keepCoverage is nil for
// crashing inputs.
keepCoverage []byte
}
// coordinator holds channels that workers can use to communicate with
// the coordinator.
type coordinator struct {
opts CoordinateFuzzingOpts
// startTime is the time we started the workers after loading the corpus.
// Used for logging.
startTime time.Time
// inputC is sent values to fuzz by the coordinator. Any worker may receive
// values from this channel. Workers send results to resultC.
inputC chan fuzzInput
// minimizeC is sent values to minimize by the coordinator. Any worker may
// receive values from this channel. Workers send results to resultC.
minimizeC chan fuzzMinimizeInput
// resultC is sent results of fuzzing by workers. The coordinator
// receives these. Multiple types of messages are allowed.
resultC chan fuzzResult
// count is the number of values fuzzed so far.
count int64
// countLastLog is the number of values fuzzed when the output was last
// logged.
countLastLog int64
// timeLastLog is the time at which the output was last logged.
timeLastLog time.Time
// interestingCount is the number of unique interesting values which have
// been found this execution.
interestingCount int64
// warmupInputCount is the count of all entries in the corpus which will
// need to be received from workers to run once during warmup, but not fuzz.
// This could be for coverage data, or only for the purposes of verifying
// that the seed corpus doesn't have any crashers. See warmupRun.
warmupInputCount int
// warmupInputLeft is the number of entries in the corpus which still need
// to be received from workers to run once during warmup, but not fuzz.
// See warmupInputLeft.
warmupInputLeft int
// duration is the time spent fuzzing inside workers, not counting time
// starting up or tearing down.
duration time.Duration
// countWaiting is the number of fuzzing executions the coordinator is
// waiting on workers to complete.
countWaiting int64
// corpus is a set of interesting values, including the seed corpus and
// generated values that workers reported as interesting.
corpus corpus
// minimizationAllowed is true if one or more of the types of fuzz
// function's parameters can be minimized.
minimizationAllowed bool
// inputQueue is a queue of inputs that workers should try fuzzing. This is
// initially populated from the seed corpus and cached inputs. More inputs
// may be added as new coverage is discovered.
inputQueue queue
// minimizeQueue is a queue of inputs that caused errors or exposed new
// coverage. Workers should attempt to find smaller inputs that do the
// same thing.
minimizeQueue queue
// crashMinimizing is the crash that is currently being minimized.
crashMinimizing *fuzzResult
// coverageMask aggregates coverage that was found for all inputs in the
// corpus. Each byte represents a single basic execution block. Each set bit
// within the byte indicates that an input has triggered that block at least
// 1 << n times, where n is the position of the bit in the byte. For example, a
// value of 12 indicates that separate inputs have triggered this block
// between 4-7 times and 8-15 times.
coverageMask []byte
}
func newCoordinator(opts CoordinateFuzzingOpts) (*coordinator, error) {
// Make sure all of the seed corpus has marshalled data.
for i := range opts.Seed {
if opts.Seed[i].Data == nil && opts.Seed[i].Values != nil {
opts.Seed[i].Data = marshalCorpusFile(opts.Seed[i].Values...)
}
}
corpus, err := readCache(opts.Seed, opts.Types, opts.CacheDir)
if err != nil {
return nil, err
}
c := &coordinator{
opts: opts,
startTime: time.Now(),
inputC: make(chan fuzzInput),
minimizeC: make(chan fuzzMinimizeInput),
resultC: make(chan fuzzResult),
corpus: corpus,
timeLastLog: time.Now(),
}
if opts.MinimizeLimit > 0 || opts.MinimizeTimeout > 0 {
for _, t := range opts.Types {
if isMinimizable(t) {
c.minimizationAllowed = true
break
}
}
}
covSize := len(coverage())
if covSize == 0 {
fmt.Fprintf(c.opts.Log, "warning: the test binary was not built with coverage instrumentation, so fuzzing will run without coverage guidance and may be inefficient\n")
// Even though a coverage-only run won't occur, we should still run all
// of the seed corpus to make sure there are no existing failures before
// we start fuzzing.
c.warmupInputCount = len(c.opts.Seed)
for _, e := range c.opts.Seed {
c.inputQueue.enqueue(e)
}
} else {
c.warmupInputCount = len(c.corpus.entries)
for _, e := range c.corpus.entries {
c.inputQueue.enqueue(e)
}
// Set c.coverageMask to a clean []byte full of zeros.
c.coverageMask = make([]byte, covSize)
}
c.warmupInputLeft = c.warmupInputCount
if len(c.corpus.entries) == 0 {
fmt.Fprintf(c.opts.Log, "warning: starting with empty corpus\n")
var vals []interface{}
for _, t := range opts.Types {
vals = append(vals, zeroValue(t))
}
data := marshalCorpusFile(vals...)
h := sha256.Sum256(data)
name := fmt.Sprintf("%x", h[:4])
c.corpus.entries = append(c.corpus.entries, CorpusEntry{Path: name, Data: data})
}
return c, nil
}
func (c *coordinator) updateStats(result fuzzResult) {
c.count += result.count
c.countWaiting -= result.limit
c.duration += result.totalDuration
}
func (c *coordinator) logStats() {
now := time.Now()
if c.warmupRun() {
runSoFar := c.warmupInputCount - c.warmupInputLeft
if coverageEnabled {
fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, gathering baseline coverage: %d/%d completed\n", c.elapsed(), runSoFar, c.warmupInputCount)
} else {
fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, testing seed corpus: %d/%d completed\n", c.elapsed(), runSoFar, c.warmupInputCount)
}
} else if c.crashMinimizing != nil {
fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, minimizing\n", c.elapsed())
} else {
rate := float64(c.count-c.countLastLog) / now.Sub(c.timeLastLog).Seconds()
if coverageEnabled {
interestingTotalCount := int64(c.warmupInputCount-len(c.opts.Seed)) + c.interestingCount
fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, execs: %d (%.0f/sec), new interesting: %d (total: %d)\n", c.elapsed(), c.count, rate, c.interestingCount, interestingTotalCount)
} else {
fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, execs: %d (%.0f/sec)\n", c.elapsed(), c.count, rate)
}
}
c.countLastLog = c.count
c.timeLastLog = now
}
// peekInput returns the next value that should be sent to workers.
// If the number of executions is limited, the returned value includes
// a limit for one worker. If there are no executions left, peekInput returns
// a zero value and false.
//
// peekInput doesn't actually remove the input from the queue. The caller
// must call sentInput after sending the input.
//
// If the input queue is empty and the coverage/testing-only run has completed,
// queue refills it from the corpus.
func (c *coordinator) peekInput() (fuzzInput, bool) {
if c.opts.Limit > 0 && c.count+c.countWaiting >= c.opts.Limit {
// Already making the maximum number of calls to the fuzz function.
// Don't send more inputs right now.
return fuzzInput{}, false
}
if c.inputQueue.len == 0 {
if c.warmupRun() {
// Wait for coverage/testing-only run to finish before sending more
// inputs.
return fuzzInput{}, false
}
c.refillInputQueue()
}
entry, ok := c.inputQueue.peek()
if !ok {
panic("input queue empty after refill")
}
input := fuzzInput{
entry: entry.(CorpusEntry),
timeout: workerFuzzDuration,
warmup: c.warmupRun(),
}
if c.coverageMask != nil {
input.coverageData = make([]byte, len(c.coverageMask))
copy(input.coverageData, c.coverageMask)
}
if input.warmup {
// No fuzzing will occur, but it should count toward the limit set by
// -fuzztime.
input.limit = 1
return input, true
}
if c.opts.Limit > 0 {
input.limit = c.opts.Limit / int64(c.opts.Parallel)
if c.opts.Limit%int64(c.opts.Parallel) > 0 {
input.limit++
}
remaining := c.opts.Limit - c.count - c.countWaiting
if input.limit > remaining {
input.limit = remaining
}
}
return input, true
}
// sentInput updates internal counters after an input is sent to c.inputC.
func (c *coordinator) sentInput(input fuzzInput) {
c.inputQueue.dequeue()
c.countWaiting += input.limit
}
// refillInputQueue refills the input queue from the corpus after it becomes
// empty.
func (c *coordinator) refillInputQueue() {
for _, e := range c.corpus.entries {
c.inputQueue.enqueue(e)
}
}
// queueForMinimization creates a fuzzMinimizeInput from result and adds it
// to the minimization queue to be sent to workers.
func (c *coordinator) queueForMinimization(result fuzzResult, keepCoverage []byte) {
if result.crasherMsg != "" {
c.minimizeQueue.clear()
}
input := fuzzMinimizeInput{
entry: result.entry,
crasherMsg: result.crasherMsg,
keepCoverage: keepCoverage,
}
c.minimizeQueue.enqueue(input)
}
// peekMinimizeInput returns the next input that should be sent to workers for
// minimization.
func (c *coordinator) peekMinimizeInput() (fuzzMinimizeInput, bool) {
if !c.canMinimize() {
// Already making the maximum number of calls to the fuzz function.
// Don't send more inputs right now.
return fuzzMinimizeInput{}, false
}
v, ok := c.minimizeQueue.peek()
if !ok {
return fuzzMinimizeInput{}, false
}
input := v.(fuzzMinimizeInput)
if c.opts.MinimizeTimeout > 0 {
input.timeout = c.opts.MinimizeTimeout
}
if c.opts.MinimizeLimit > 0 {
input.limit = c.opts.MinimizeLimit
} else if c.opts.Limit > 0 {
if input.crasherMsg != "" {
input.limit = c.opts.Limit
} else {
input.limit = c.opts.Limit / int64(c.opts.Parallel)
if c.opts.Limit%int64(c.opts.Parallel) > 0 {
input.limit++
}
}
}
if c.opts.Limit > 0 {
remaining := c.opts.Limit - c.count - c.countWaiting
if input.limit > remaining {
input.limit = remaining
}
}
return input, true
}
// sentMinimizeInput removes an input from the minimization queue after it's
// sent to minimizeC.
func (c *coordinator) sentMinimizeInput(input fuzzMinimizeInput) {
c.minimizeQueue.dequeue()
c.countWaiting += input.limit
}
// warmupRun returns true while the coordinator is running inputs without
// mutating them as a warmup before fuzzing. This could be to gather baseline
// coverage data for entries in the corpus, or to test all of the seed corpus
// for errors before fuzzing begins.
//
// The coordinator doesn't store coverage data in the cache with each input
// because that data would be invalid when counter offsets in the test binary
// change.
//
// When gathering coverage, the coordinator sends each entry to a worker to
// gather coverage for that entry only, without fuzzing or minimizing. This
// phase ends when all workers have finished, and the coordinator has a combined
// coverage map.
func (c *coordinator) warmupRun() bool {
return c.warmupInputLeft > 0
}
// updateCoverage sets bits in c.coverageMask that are set in newCoverage.
// updateCoverage returns the number of newly set bits. See the comment on
// coverageMask for the format.
func (c *coordinator) updateCoverage(newCoverage []byte) int {
if len(newCoverage) != len(c.coverageMask) {
panic(fmt.Sprintf("number of coverage counters changed at runtime: %d, expected %d", len(newCoverage), len(c.coverageMask)))
}
newBitCount := 0
for i := range newCoverage {
diff := newCoverage[i] &^ c.coverageMask[i]
newBitCount += bits.OnesCount8(diff)
c.coverageMask[i] |= newCoverage[i]
}
return newBitCount
}
// canMinimize returns whether the coordinator should attempt to find smaller
// inputs that reproduce a crash or new coverage.
func (c *coordinator) canMinimize() bool {
return c.minimizationAllowed &&
(c.opts.Limit == 0 || c.count+c.countWaiting < c.opts.Limit)
}
func (c *coordinator) elapsed() time.Duration {
return time.Since(c.startTime).Round(1 * time.Second)
}
// readCache creates a combined corpus from seed values and values in the cache
// (in GOCACHE/fuzz).
//
// TODO(fuzzing): need a mechanism that can remove values that
// aren't useful anymore, for example, because they have the wrong type.
func readCache(seed []CorpusEntry, types []reflect.Type, cacheDir string) (corpus, error) {
var c corpus
c.entries = append(c.entries, seed...)
entries, err := ReadCorpus(cacheDir, types)
if err != nil {
if _, ok := err.(*MalformedCorpusError); !ok {
// It's okay if some files in the cache directory are malformed and
// are not included in the corpus, but fail if it's an I/O error.
return corpus{}, err
}
// TODO(jayconrod,katiehockman): consider printing some kind of warning
// indicating the number of files which were skipped because they are
// malformed.
}
c.entries = append(c.entries, entries...)
return c, nil
}
// MalformedCorpusError is an error found while reading the corpus from the
// filesystem. All of the errors are stored in the errs list. The testing
// framework uses this to report malformed files in testdata.
type MalformedCorpusError struct {
errs []error
}
func (e *MalformedCorpusError) Error() string {
var msgs []string
for _, s := range e.errs {
msgs = append(msgs, s.Error())
}
return strings.Join(msgs, "\n")
}
// ReadCorpus reads the corpus from the provided dir. The returned corpus
// entries are guaranteed to match the given types. Any malformed files will
// be saved in a MalformedCorpusError and returned, along with the most recent
// error.
func ReadCorpus(dir string, types []reflect.Type) ([]CorpusEntry, error) {
files, err := ioutil.ReadDir(dir)
if os.IsNotExist(err) {
return nil, nil // No corpus to read
} else if err != nil {
return nil, fmt.Errorf("reading seed corpus from testdata: %v", err)
}
var corpus []CorpusEntry
var errs []error
for _, file := range files {
// TODO(jayconrod,katiehockman): determine when a file is a fuzzing input
// based on its name. We should only read files created by writeToCorpus.
// If we read ALL files, we won't be able to change the file format by
// changing the extension. We also won't be able to add files like
// README.txt explaining why the directory exists.
if file.IsDir() {
continue
}
filename := filepath.Join(dir, file.Name())
data, err := ioutil.ReadFile(filename)
if err != nil {
return nil, fmt.Errorf("failed to read corpus file: %v", err)
}
var vals []interface{}
vals, err = readCorpusData(data, types)
if err != nil {
errs = append(errs, fmt.Errorf("%q: %v", filename, err))
continue
}
corpus = append(corpus, CorpusEntry{Path: filename, Values: vals})
}
if len(errs) > 0 {
return corpus, &MalformedCorpusError{errs: errs}
}
return corpus, nil
}
func readCorpusData(data []byte, types []reflect.Type) ([]interface{}, error) {
vals, err := unmarshalCorpusFile(data)
if err != nil {
return nil, fmt.Errorf("unmarshal: %v", err)
}
if err = CheckCorpus(vals, types); err != nil {
return nil, err
}
return vals, nil
}
// CheckCorpus verifies that the types in vals match the expected types
// provided.
func CheckCorpus(vals []interface{}, types []reflect.Type) error {
if len(vals) != len(types) {
return fmt.Errorf("wrong number of values in corpus entry: %d, want %d", len(vals), len(types))
}
valsT := make([]reflect.Type, len(vals))
for valsI, v := range vals {
valsT[valsI] = reflect.TypeOf(v)
}
for i := range types {
if valsT[i] != types[i] {
return fmt.Errorf("mismatched types in corpus entry: %v, want %v", valsT, types)
}
}
return nil
}
// writeToCorpus atomically writes the given bytes to a new file in testdata. If
// the directory does not exist, it will create one. If the file already exists,
// writeToCorpus will not rewrite it. writeToCorpus sets entry.Path to the new
// file that was just written or an error if it failed.
func writeToCorpus(entry *CorpusEntry, dir string) (err error) {
sum := fmt.Sprintf("%x", sha256.Sum256(entry.Data))
entry.Path = filepath.Join(dir, sum)
if err := os.MkdirAll(dir, 0777); err != nil {
return err
}
if err := ioutil.WriteFile(entry.Path, entry.Data, 0666); err != nil {
os.Remove(entry.Path) // remove partially written file
return err
}
return nil
}
func testName(path string) string {
return filepath.Base(path)
}
func zeroValue(t reflect.Type) interface{} {
for _, v := range zeroVals {
if reflect.TypeOf(v) == t {
return v
}
}
panic(fmt.Sprintf("unsupported type: %v", t))
}
var zeroVals []interface{} = []interface{}{
[]byte(""),
string(""),
false,
byte(0),
rune(0),
float32(0),
float64(0),
int(0),
int8(0),
int16(0),
int32(0),
int64(0),
uint(0),
uint8(0),
uint16(0),
uint32(0),
uint64(0),
}
var (
debugInfo bool
debugInfoOnce sync.Once
)
func shouldPrintDebugInfo() bool {
debugInfoOnce.Do(func() {
debugInfo = godebug.Get("fuzzdebug") == "1"
})
return debugInfo
}