Skip to content

Commit

Permalink
internal/scan: add binary extract mode
Browse files Browse the repository at this point in the history
The extract mode spits out a json blob representing the minimal
representation of a Go binary needed for govulncheck vulnerability
detection. binary mode accepts both a Go binary and this representation
as an input.

The contents of extract should be regarded as a blob. The users of this
flag should not rely on its representation. It might change in the
future.

Change-Id: I81027062d34609fed7541ad2092d4cbe5df0d118
Reviewed-on: https://go-review.googlesource.com/c/vuln/+/542035
Run-TryBot: Zvonimir Pavlinovic <[email protected]>
LUCI-TryBot-Result: Go LUCI <[email protected]>
Reviewed-by: Ian Cottrell <[email protected]>
Reviewed-by: Maceo Thompson <[email protected]>
TryBot-Result: Gopher Robot <[email protected]>
  • Loading branch information
zpavlinovic committed Jan 22, 2024
1 parent 3072335 commit 8fb35e0
Show file tree
Hide file tree
Showing 20 changed files with 285 additions and 23 deletions.
5 changes: 5 additions & 0 deletions cmd/govulncheck/doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ with the -mode=binary flag:
Govulncheck uses the binary's symbol information to find mentions of vulnerable
functions. Its output omits call stacks, which require source code analysis.
Govulncheck also supports -mode=extract on a Go binary for extraction of minimal
information needed to analyze the binary. This will produce a blob, typically much
smaller than the binary, that can also be passed to govulncheck as an argument with
-mode=binary. The users should not rely on the contents or representation of the blob.
Govulncheck exits successfully (exit code 0) if there are no vulnerabilities,
and exits unsuccessfully if there are. It also exits successfully if the -json flag
is provided, regardless of the number of detected vulnerabilities.
Expand Down
39 changes: 37 additions & 2 deletions cmd/govulncheck/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"unsafe"

"github.com/google/go-cmdtest"
"github.com/google/go-cmp/cmp"
"golang.org/x/vuln/internal/govulncheck"
"golang.org/x/vuln/internal/test"
"golang.org/x/vuln/internal/web"
Expand Down Expand Up @@ -153,7 +154,10 @@ func TestCommand(t *testing.T) {
varName := filepath.Base(md) + "_binary"
os.Setenv(varName, binary)
}
runTestSuite(t, filepath.Join(testDir, "testdata", "testfiles"), govulndbURI.String(), *update)
testFilesDir := filepath.Join(testDir, "testdata", "testfiles")
os.Setenv("testdir", testFilesDir)

runTestSuite(t, testFilesDir, govulndbURI.String(), *update)
if runtime.GOOS != "darwin" {
// Binaries are not stripped on darwin with go1.21 and earlier. See #61051.
runTestSuite(t, filepath.Join(testDir, "testdata", "strip"), govulndbURI.String(), *update)
Expand Down Expand Up @@ -196,7 +200,7 @@ func runTestSuite(t *testing.T, dir string, govulndb string, update bool) {
}
ts.DisableLogging = true

ts.Commands["govulncheck"] = func(args []string, inputFile string) ([]byte, error) {
govulncheckCmd := func(args []string, inputFile string) ([]byte, error) {
parallelLimiter <- struct{}{}
defer func() { <-parallelLimiter }()

Expand Down Expand Up @@ -250,6 +254,37 @@ func runTestSuite(t *testing.T, dir string, govulndb string, update bool) {
}
return out, err
}
ts.Commands["govulncheck"] = govulncheckCmd

// govulncheck-cmp is like govulncheck except that the last argument is a file
// whose contents are compared to the output of govulncheck. This command does
// not output anything.
ts.Commands["govulncheck-cmp"] = func(args []string, inputFile string) ([]byte, error) {
l := len(args)
if l == 0 {
return nil, nil
}
cmpArg := args[l-1]
gArgs := args[:l-1]

out, err := govulncheckCmd(gArgs, inputFile)
if err != nil {
return nil, &cmdtest.ExitCodeErr{Msg: err.Error(), Code: 1}
}
got := string(out)

file, err := os.ReadFile(cmpArg)
if err != nil {
return nil, &cmdtest.ExitCodeErr{Msg: err.Error(), Code: 1}
}
want := string(file)

if diff := cmp.Diff(want, got); diff != "" {
return nil, &cmdtest.ExitCodeErr{Msg: "govulncheck output not matching the file contents:\n" + diff, Code: 1}
}
return nil, nil
}

if update {
ts.Run(t, true)
return
Expand Down
Binary file not shown.
46 changes: 46 additions & 0 deletions cmd/govulncheck/testdata/testfiles/extract/binary_extract.ct
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#####
# Test binary mode using the extracted binary blob.
$ govulncheck -mode=binary ${testdir}/extract/vuln.blob --> FAIL 3
Scanning your binary for known vulnerabilities...

Vulnerability #1: GO-2021-0265
A maliciously crafted path can cause Get and other query functions to
consume excessive amounts of CPU and time.
More info: https://pkg.go.dev/vuln/GO-2021-0265
Module: github.com/tidwall/gjson
Found in: github.com/tidwall/[email protected]
Fixed in: github.com/tidwall/[email protected]
Example traces found:
#1: gjson.Get
#2: gjson.Result.Get

Vulnerability #2: GO-2021-0113
Due to improper index calculation, an incorrectly formatted language tag can
cause Parse to panic via an out of bounds read. If Parse is used to process
untrusted user inputs, this may be used as a vector for a denial of service
attack.
More info: https://pkg.go.dev/vuln/GO-2021-0113
Module: golang.org/x/text
Found in: golang.org/x/[email protected]
Fixed in: golang.org/x/[email protected]
Example traces found:
#1: language.Parse

Vulnerability #3: GO-2021-0054
Due to improper bounds checking, maliciously crafted JSON objects can cause
an out-of-bounds panic. If parsing user input, this may be used as a denial
of service vector.
More info: https://pkg.go.dev/vuln/GO-2021-0054
Module: github.com/tidwall/gjson
Found in: github.com/tidwall/[email protected]
Fixed in: github.com/tidwall/[email protected]
Example traces found:
#1: gjson.Result.ForEach

Your code is affected by 3 vulnerabilities from 2 modules.

Share feedback at https://go.dev/s/govulncheck-feedback.

# Test extract mode. Due to the size of the blob even for smallest programs, we
# directly compare its output to a target vuln_blob.json file.
$ govulncheck-cmp -mode=extract ${moddir}/vuln/vuln_dont_run_me ${testdir}/extract/vuln.blob
2 changes: 2 additions & 0 deletions cmd/govulncheck/testdata/testfiles/extract/vuln.blob

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"name":"govulncheck-extract","version":"0.1.0"}{"modules":[]}{"name":"govulncheck-extract","version":"0.1.0"}
49 changes: 47 additions & 2 deletions cmd/govulncheck/testdata/testfiles/failures/binary_fail.ct
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,54 @@ $ govulncheck -mode=binary notafile --> FAIL 2
"notafile" is not a file

#####
# Test of passing a non-binary file to -mode=binary
# Test of passing a non-binary and non-blob file to -mode=binary
$ govulncheck -mode=binary ${moddir}/vuln/go.mod --> FAIL 1
govulncheck: could not parse provided binary: unrecognized file format
govulncheck: unrecognized binary format

#####
# Test of passing a blob with invalid header id
$ govulncheck -mode=binary ${testdir}/failures/invalid_header_name.blob --> FAIL 1
govulncheck: unrecognized binary format

#####
# Test of passing a blob with invalid header version
$ govulncheck -mode=binary ${testdir}/failures/invalid_header_version.blob --> FAIL 1
govulncheck: unrecognized binary format

#####
# Test of passing a blob with no header
$ govulncheck -mode=binary ${testdir}/failures/no_header.blob --> FAIL 1
govulncheck: unrecognized binary format

#####
# Test of passing a blob with invalid header, i.e., no header
$ govulncheck -mode=binary ${testdir}/failures/no_header.blob --> FAIL 1
govulncheck: unrecognized binary format

#####
# Test of passing a blob with no body
$ govulncheck -mode=binary ${testdir}/failures/no_body.blob --> FAIL 1
govulncheck: unrecognized binary format

#####
# Test of passing an empty blob/file
$ govulncheck -mode=binary ${testdir}/failures/empty.blob --> FAIL 1
govulncheck: unrecognized binary format

#####
# Test of passing an empty blob message
$ govulncheck -mode=binary ${testdir}/failures/empty_message.blob --> FAIL 1
govulncheck: unrecognized binary format

#####
# Test of passing blob message with multiple headers
$ govulncheck -mode=binary ${testdir}/failures/multi_header.blob --> FAIL 1
govulncheck: unrecognized binary format

#####
# Test of passing blob message with something after the body
$ govulncheck -mode=binary ${testdir}/failures/multi_header.blob --> FAIL 1
govulncheck: unrecognized binary format

#####
# Test of trying to analyze multiple binaries
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
4 changes: 4 additions & 0 deletions cmd/govulncheck/testdata/testfiles/failures/extract_fail.ct
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#####
# Test extraction of an unsupported file format
$ govulncheck -mode=extract ${moddir}/vuln/go.mod --> FAIL 1
govulncheck: unrecognized binary format
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":"invalid-name","protocol":"0.1.0"}{"modules":[{"Path":"github.com/tidwall/gjson","Version":"v1.6.5","Replace":null,"Time":null,"Main":false,"Indirect":false,"Dir":"","GoMod":"","GoVersion":"","Error":null}]}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"name":"invalid-name","version":"0.1.0"}{"modules":[{"Path":"github.com/tidwall/gjson","Version":"v1.6.5","Replace":null,"Time":null,"Main":false,"Indirect":false,"Dir":"","GoMod":"","GoVersion":"","Error":null}]}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"name":"govulncheck-extract","version":"8.8.8"}{"modules":[{"Path":"github.com/tidwall/gjson","Version":"v1.6.5","Replace":null,"Time":null,"Main":false,"Indirect":false,"Dir":"","GoMod":"","GoVersion":"","Error":null}]}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"name":"govulncheck-extract","version":"0.1.0"}{"name":"govulncheck-extract","version":"0.1.0"}{"modules":[]}
1 change: 1 addition & 0 deletions cmd/govulncheck/testdata/testfiles/failures/no_body.blob
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"name":"govulncheck-extract","version":"0.1.0"}
1 change: 1 addition & 0 deletions cmd/govulncheck/testdata/testfiles/failures/no_header.blob
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"modules":[{"Path":"github.com/tidwall/gjson","Version":"v1.6.5","Replace":null,"Time":null,"Main":false,"Indirect":false,"Dir":"","GoMod":"","GoVersion":"","Error":null}]}
72 changes: 53 additions & 19 deletions internal/scan/binary.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ package scan

import (
"context"
"fmt"
"encoding/json"
"errors"
"io"
"os"
"runtime/debug"
Expand All @@ -21,17 +22,11 @@ import (
"golang.org/x/vuln/internal/vulncheck"
)

// runBinary detects presence of vulnerable symbols in an executable.
// runBinary detects presence of vulnerable symbols in an executable or its minimal blob representation.
func runBinary(ctx context.Context, handler govulncheck.Handler, cfg *config, client *client.Client) (err error) {
defer derrors.Wrap(&err, "govulncheck")

exe, err := os.Open(cfg.patterns[0])
if err != nil {
return err
}
defer exe.Close()

bin, err := createBin(exe)
bin, err := createBin(cfg.patterns[0])
if err != nil {
return err
}
Expand All @@ -43,18 +38,57 @@ func runBinary(ctx context.Context, handler govulncheck.Handler, cfg *config, cl
return vulncheck.Binary(ctx, handler, bin, &cfg.Config, client)
}

func createBin(exe io.ReaderAt) (*vulncheck.Bin, error) {
mods, packageSymbols, bi, err := buildinfo.ExtractPackagesAndSymbols(exe)
func createBin(path string) (*vulncheck.Bin, error) {
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("could not parse provided binary: %v", err)
return nil, err
}
defer f.Close()

// First check if the path points to a Go binary. Otherwise, blob
// parsing might json decode a Go binary which takes time.
//
// TODO(#64716): use fingerprinting to make this precise, clean, and fast.
mods, packageSymbols, bi, err := buildinfo.ExtractPackagesAndSymbols(f)
if err == nil {
return &vulncheck.Bin{
Modules: mods,
PkgSymbols: packageSymbols,
GoVersion: bi.GoVersion,
GOOS: findSetting("GOOS", bi),
GOARCH: findSetting("GOARCH", bi),
}, nil
}

// Otherwise, see if the path points to a valid blob.
bin := parseBlob(f)
if bin != nil {
return bin, nil
}

return nil, errors.New("unrecognized binary format")
}

// parseBlob extracts vulncheck.Bin from a valid blob. If it
// cannot recognize a valid blob, returns nil.
func parseBlob(from io.Reader) *vulncheck.Bin {
dec := json.NewDecoder(from)

var h header
if err := dec.Decode(&h); err != nil {
return nil // no header
} else if h.Name != extractModeID || h.Version != extractModeVersion {
return nil // invalid header
}

var b vulncheck.Bin
if err := dec.Decode(&b); err != nil {
return nil // no body
}
if dec.More() {
return nil // we want just header and body, nothing else
}
return &vulncheck.Bin{
Modules: mods,
PkgSymbols: packageSymbols,
GoVersion: bi.GoVersion,
GOOS: findSetting("GOOS", bi),
GOARCH: findSetting("GOARCH", bi),
}, nil
return &b
}

// findSetting returns value of setting from bi if present.
Expand Down
63 changes: 63 additions & 0 deletions internal/scan/extract.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build go1.18
// +build go1.18

package scan

import (
"encoding/json"
"fmt"
"io"
"sort"

"golang.org/x/vuln/internal/derrors"
"golang.org/x/vuln/internal/vulncheck"
)

const (
// extractModeID is the unique name of the extract mode protocol
extractModeID = "govulncheck-extract"
extractModeVersion = "0.1.0"
)

// header information for the blob output.
type header struct {
Name string `json:"name"`
Version string `json:"version"`
}

// runExtract dumps the extracted abstraction of binary at cfg.patterns to out.
// It prints out exactly two blob messages, one with the header and one with
// the vulncheck.Bin as the body.
func runExtract(cfg *config, out io.Writer) (err error) {
defer derrors.Wrap(&err, "govulncheck")

bin, err := createBin(cfg.patterns[0])
if err != nil {
return err
}
sortBin(bin) // sort for easier testing and validation
header := header{
Name: extractModeID,
Version: extractModeVersion,
}

enc := json.NewEncoder(out)

if err := enc.Encode(header); err != nil {
return fmt.Errorf("marshaling blob header: %v", err)
}
if err := enc.Encode(bin); err != nil {
return fmt.Errorf("marshaling blob body: %v", err)
}
return nil
}

func sortBin(bin *vulncheck.Bin) {
sort.SliceStable(bin.PkgSymbols, func(i, j int) bool {
return bin.PkgSymbols[i].Pkg+"."+bin.PkgSymbols[i].Name < bin.PkgSymbols[j].Pkg+"."+bin.PkgSymbols[j].Name
})
}
Loading

0 comments on commit 8fb35e0

Please sign in to comment.