Skip to content

Commit

Permalink
cmd/vulnreport: simplify the duplicates process
Browse files Browse the repository at this point in the history
Remove the "possible duplicate" label and instead
label all suspected duplicates as "duplicate" and
post a comment of the form "Duplicate of #NNN" to
the issue.

Update the instructions for the triager.

This is OK because the duplicate-finding check is
almost always correct.

Change-Id: I9d036f3a0490564000a13d783353608cde39880a
Reviewed-on: https://go-review.googlesource.com/c/vulndb/+/606236
LUCI-TryBot-Result: Go LUCI <[email protected]>
Reviewed-by: Damien Neil <[email protected]>
  • Loading branch information
tatianab committed Aug 16, 2024
1 parent 7f3ffd5 commit 0efc140
Show file tree
Hide file tree
Showing 8 changed files with 128 additions and 50 deletions.
23 changes: 9 additions & 14 deletions cmd/vulnreport/creator.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,6 @@ func skip(iss *issues.Issue, x *xrefer) string {
return "existing report needs alias"
}

if iss.HasLabel(labelPossibleDuplicate) {
return "possible duplicate"
}

if iss.HasLabel(labelPossiblyNotGo) {
return "possibly not Go"
}
Expand Down Expand Up @@ -278,16 +274,15 @@ func (c *creator) write(ctx context.Context, r *yamlReport) error {
}

const (
labelDuplicate = "duplicate"
labelDirect = "Direct External Report"
labelSuggestedEdit = "Suggested Edit"
labelNeedsAlias = "NeedsAlias"
labelTriaged = "triaged"
labelHighPriority = "high priority"
labelFirstParty = "first party"
labelPossibleDuplicate = "possible duplicate"
labelPossiblyNotGo = "possibly not Go"
labelOutOfScope = "excluded: OUT_OF_SCOPE"
labelDuplicate = "duplicate"
labelDirect = "Direct External Report"
labelSuggestedEdit = "Suggested Edit"
labelNeedsAlias = "NeedsAlias"
labelTriaged = "triaged"
labelHighPriority = "high priority"
labelFirstParty = "first party"
labelPossiblyNotGo = "possibly not Go"
labelOutOfScope = "excluded: OUT_OF_SCOPE"
)

func excludedReason(iss *issues.Issue) report.ExcludedReason {
Expand Down
14 changes: 14 additions & 0 deletions cmd/vulnreport/issue_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (

"golang.org/x/exp/maps"
"golang.org/x/tools/txtar"
"golang.org/x/vulndb/cmd/vulnreport/log"
"golang.org/x/vulndb/internal/issues"
"gopkg.in/yaml.v3"
)
Expand All @@ -20,6 +21,7 @@ type issueClient interface {
Issues(context.Context, issues.IssuesOptions) ([]*issues.Issue, error)
Issue(context.Context, int) (*issues.Issue, error)
SetLabels(context.Context, int, []string) error
AddComments(context.Context, int, []string) error
Reference(int) string
}

Expand Down Expand Up @@ -78,6 +80,18 @@ func (m *memIC) SetLabels(_ context.Context, n int, labels []string) error {
return fmt.Errorf("issue %d not found", n)
}

func (m *memIC) AddComments(_ context.Context, n int, comments []string) error {
if iss, ok := m.is[n]; ok {
for _, comment := range comments {
// TODO(tatianabradley): Store this instead of just printing it out.
log.Outf("posted comment to issue %d: %s", iss.Number, comment)
}
return nil
}

return fmt.Errorf("issue %d not found", n)
}

func (*memIC) Reference(n int) string {
return fmt.Sprintf("test-issue-tracker/%d", n)
}
4 changes: 4 additions & 0 deletions cmd/vulnreport/testdata/TestTriage/all.txtar
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,19 @@ command: "vulnreport triage "
-- out --
issue test-issue-tracker/7 is likely duplicate
- #7 shares alias(es) CVE-9999-0005 with data/reports/GO-9999-0005.yaml
posted comment to issue 7: Duplicate of #5
issue test-issue-tracker/10 is high priority
- golang.org/x/vuln has 101 importers (>= 100) and as many reviewed (0) as likely-binary reports (0)
issue test-issue-tracker/11 is possibly not Go
- more than 20 percent of reports (1 of 1) with this module are NOT_GO_CODE
issue test-issue-tracker/12 is likely duplicate
- #12 shares alias(es) CVE-1999-0002, GHSA-xxxx-yyyy-0002, GHSA-xxxx-yyyy-0003 with test-issue-tracker/13
- #12 shares alias(es) CVE-1999-0002, GHSA-xxxx-yyyy-0002, GHSA-xxxx-yyyy-0003 with test-issue-tracker/14
posted comment to issue 12: Duplicate of #13
posted comment to issue 12: Duplicate of #14
issue test-issue-tracker/13 is likely duplicate
- #13 shares alias(es) CVE-1999-0002, GHSA-xxxx-yyyy-0002, GHSA-xxxx-yyyy-0003 with test-issue-tracker/14
posted comment to issue 13: Duplicate of #14
triaged 6 issues:
- 1 high priority
- 5 low priority
Expand Down
106 changes: 78 additions & 28 deletions cmd/vulnreport/triage.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"golang.org/x/exp/slices"
"golang.org/x/vulndb/cmd/vulnreport/log"
"golang.org/x/vulndb/internal/issues"
"golang.org/x/vulndb/internal/report"
"golang.org/x/vulndb/internal/triage/priority"
)

Expand Down Expand Up @@ -78,7 +79,7 @@ func (t *triage) setup(ctx context.Context, env environment) error {
for _, a := range aliases {
t.addAlias(a, iss.Number)
}
if iss.HasLabel(labelPossibleDuplicate) || iss.HasLabel(labelDuplicate) {
if iss.HasLabel(labelDuplicate) {
t.duplicates[iss.Number] = true
}
}
Expand Down Expand Up @@ -111,32 +112,27 @@ func (t *triage) run(ctx context.Context, input any) (err error) {

func (t *triage) triage(ctx context.Context, iss *issues.Issue) {
labels := []string{labelTriaged}
comments := []string{}
defer func() {
// Preserve any existing labels.
labels = append(labels, iss.Labels...)
slices.Sort(labels)
labels = slices.Compact(labels)
if *dry {
log.Infof("issue #%d: would set labels: [%s]", iss.Number, strings.Join(labels, ", "))
} else {
if err := t.ic.SetLabels(ctx, iss.Number, labels); err != nil {
log.Warnf("issue #%d: could not auto-set label(s) %s\n\t%v", iss.Number, labels, err)
}
}
t.editIssue(ctx, iss, labels, comments)
t.addStat(iss, statTriaged, "")
}()

xrefs := t.findDuplicates(ctx, iss)
if len(xrefs) != 0 {
dupes := t.findDuplicates(ctx, iss)
if len(dupes) != 0 {
var strs []string
for ref, aliases := range xrefs {
for d, aliases := range dupes {
ref := t.ic.Reference(d.iss)
if d.fname != "" {
ref = filepath.ToSlash(d.fname)
}
strs = append(strs, fmt.Sprintf("#%d shares alias(es) %s with %s", iss.Number,
strings.Join(aliases, ", "),
filepath.ToSlash(ref)))
strings.Join(aliases, ", "), ref))
comments = append(comments, fmt.Sprintf("Duplicate of #%d", d.iss))
}
slices.Sort(strs)
t.addStat(iss, statDuplicate, strings.Join(strs, listItem))
labels = append(labels, labelPossibleDuplicate)
labels = append(labels, labelDuplicate)
}

mp := t.canonicalModule(modulePath(iss))
Expand All @@ -153,6 +149,38 @@ func (t *triage) triage(ctx context.Context, iss *issues.Issue) {
}
}

func (t *triage) editIssue(ctx context.Context, iss *issues.Issue, labels, comments []string) {
// Preserve any existing labels.
labels = append(labels, iss.Labels...)

// Sort and de-duplicate.
slices.Sort(labels)
labels = slices.Compact(labels)
slices.Sort(comments)
comments = slices.Compact(comments)

if *dry {
if len(labels) != 0 {
log.Infof("issue #%d: would set labels: [%s]", iss.Number, strings.Join(labels, ", "))
}
if len(comments) != 0 {
log.Infof("issue #%d: would add comments: [%s]", iss.Number, strings.Join(comments, ", "))
}
return
}

if err := t.ic.SetLabels(ctx, iss.Number, labels); err != nil {
log.Warnf("issue #%d: could not auto-set label(s) %s\n\t%v", iss.Number, labels, err)
}

// TODO(tatianabradley): Read existing comments to ensure we aren't
// posting the same comment twice.
// (This requires an extra Github API request.)
if err := t.ic.AddComments(ctx, iss.Number, comments); err != nil {
log.Warnf("issue #%d: could not add comment(s) %s\n\t%v", iss.Number, comments, err)
}
}

func toStat(p priority.Priority) int {
switch p {
case priority.Unknown:
Expand All @@ -174,43 +202,65 @@ func (t *triage) aliases(ctx context.Context, iss *issues.Issue) []string {
return t.allAliases(ctx, aliases)
}

func (t *triage) findDuplicates(ctx context.Context, iss *issues.Issue) map[string][]string {
type vuln struct {
// The issue number for this vulnerability.
iss int
// The filename of the report for this issue.
fname string
}

// findDuplicates returns a map of duplicate issues to the aliases
// that they share with the given issue.
func (t *triage) findDuplicates(ctx context.Context, iss *issues.Issue) map[vuln][]string {
aliases := t.aliases(ctx, iss)
if len(aliases) == 0 {
log.Infof("issue #%d: skipping duplicate search (no aliases found)", iss.Number)
return nil
}

xrefs := make(map[string][]string)
duplicates := make(map[vuln][]string)
for _, a := range aliases {
// Find existing reports with this alias.
if reports := t.rc.ReportsByAlias(a); len(reports) != 0 {
for _, r := range reports {
fname, err := r.YAMLFilename()
if err != nil {
fname = r.ID
log.Warnf("could not get filename of duplicate report: %s", err)
continue
}
_, _, iss, err := report.ParseFilepath(fname)
if err != nil {
log.Warnf("could not parse duplicate report: %s", err)
continue
}
xrefs[fname] = append(xrefs[fname], a)
d := vuln{
iss: iss,
fname: fname,
}
duplicates[d] = append(duplicates[d], a)
}
}

// Find other open issues with this alias.
for _, dup := range t.lookupAlias(a) {
if iss.Number == dup {
for _, issNum := range t.lookupAlias(a) {
if iss.Number == issNum {
continue
}
// If the other issue is already marked as a duplicate,
// we don't need to mark this one.
if t.duplicates[dup] {
if t.duplicates[issNum] {
continue
}
ref := t.ic.Reference(dup)
xrefs[ref] = append(xrefs[ref], a)
d := vuln{
iss: issNum,
// no report yet
}
duplicates[d] = append(duplicates[d], a)
t.duplicates[iss.Number] = true
}
}

return xrefs
return duplicates
}

func (t *triage) lookupAlias(a string) []int {
Expand Down
8 changes: 4 additions & 4 deletions doc/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ Other useful docs:

### Check for duplicates and not Go code

1. For all reports marked `possible duplicate`, determine if
the label is correct.
1. For all reports marked `duplicate`, quickly double-check if the label is correct (it usually is).

* If correct: replace the `possible duplicate` label with the `duplicate` label, add a comment exactly of the form "Duplicate of #NNN" where #NNN is number of the issue this is a duplicate of, and close the issue.
* If incorrect: remove the `possible duplicate` label and ensure the `triaged` label is present.
* If correct: close the issue.
* If incorrect: remove the `duplicate` label, delete the duplicate comment, and ensure the
`triaged` label is present.

2. For all reports marked `possibly not Go`, determine if the label is correct by investigating the report to see if the vulnerability affects Go code.

Expand Down
4 changes: 2 additions & 2 deletions doc/triage.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Any open issue should be in one of the following states:
- triaged
- standard priority (no additional label)
- high priority
- possible duplicate
- duplicate
- possibly not Go
- excluded
- needs-review
Expand All @@ -62,7 +62,7 @@ The issue has been auto-triaged.
The states are:
- `high priority`: the issue needs a REVIEWED report
- standard priority (no label): the issue needs an UNREVIEWED report
- `possible duplicate`: we need to check if the issue is a duplicate
- `duplicate`: we need to double-check if the issue is a duplicate
- `possibly not Go`: we need to check if the issue does not affect Go code

### excluded
Expand Down
2 changes: 1 addition & 1 deletion doc/vulnreport.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ $ vulnreport triage
This command looks at all untriaged issues to find and label:

* High-priority issues (label: `high priority`) - issues that affect modules with >= 100 importers
* Possible duplicates (label: `possible duplicate`) - issues
* Possible duplicates (label: `duplicate`) - issues
that may be duplicates of another issue because they share a CVE/GHSA
* Possibly not Go (label: `possibly Not Go`) - issues that possibly do not affect Go at all. This is applied to modules
for which more than 20% of current reports are marked `excluded: NOT_GO_CODE`.
Expand Down
17 changes: 16 additions & 1 deletion internal/issues/issues.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ func (c *Client) CreateIssue(ctx context.Context, iss *Issue) (number int, err e
}

func (c *Client) SetLabels(ctx context.Context, issNum int, labels []string) (err error) {
defer derrors.Wrap(&err, "AddLabels(%d, %s)", issNum, labels)
defer derrors.Wrap(&err, "SetLabels(%d, %s)", issNum, labels)

req := &github.IssueRequest{
Labels: &labels,
Expand All @@ -217,6 +217,21 @@ func (c *Client) SetLabels(ctx context.Context, issNum int, labels []string) (er
return nil
}

func (c *Client) AddComments(ctx context.Context, issNum int, comments []string) (err error) {
defer derrors.Wrap(&err, "AddComments(%d, %s)", issNum, comments)

for _, comment := range comments {
req := &github.IssueComment{
Body: &comment,
}
_, _, err = c.GitHub.Issues.CreateComment(ctx, c.Owner, c.Repo, issNum, req)
if err != nil {
return err
}
}
return nil
}

// NewGoID creates a Go advisory ID based on the issue number
// and time of issue creation.
func (iss *Issue) NewGoID() string {
Expand Down

0 comments on commit 0efc140

Please sign in to comment.