410 lines
12 KiB
Go
410 lines
12 KiB
Go
|
|
// Copyright 2023 The Go Authors. All rights reserved.
|
|||
|
|
// Use of this source code is governed by a BSD-style
|
|||
|
|
// license that can be found in the LICENSE file.
|
|||
|
|
|
|||
|
|
package sarif
|
|||
|
|
|
|||
|
|
import (
|
|||
|
|
"encoding/json"
|
|||
|
|
"fmt"
|
|||
|
|
"io"
|
|||
|
|
"path/filepath"
|
|||
|
|
"sort"
|
|||
|
|
|
|||
|
|
"golang.org/x/vuln/internal"
|
|||
|
|
"golang.org/x/vuln/internal/govulncheck"
|
|||
|
|
"golang.org/x/vuln/internal/osv"
|
|||
|
|
"golang.org/x/vuln/internal/traces"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
// handler for sarif output.
|
|||
|
|
type handler struct {
|
|||
|
|
w io.Writer
|
|||
|
|
cfg *govulncheck.Config
|
|||
|
|
osvs map[string]*osv.Entry
|
|||
|
|
// findings contains same-level findings for an
|
|||
|
|
// OSV at the most precise level of granularity
|
|||
|
|
// available. This means, for instance, that if
|
|||
|
|
// an osv is indeed called, then all findings for
|
|||
|
|
// the osv will have call stack info.
|
|||
|
|
findings map[string][]*govulncheck.Finding
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func NewHandler(w io.Writer) *handler {
|
|||
|
|
return &handler{
|
|||
|
|
w: w,
|
|||
|
|
osvs: make(map[string]*osv.Entry),
|
|||
|
|
findings: make(map[string][]*govulncheck.Finding),
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func (h *handler) Config(c *govulncheck.Config) error {
|
|||
|
|
h.cfg = c
|
|||
|
|
return nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func (h *handler) Progress(p *govulncheck.Progress) error {
|
|||
|
|
return nil // not needed by sarif
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func (h *handler) SBOM(s *govulncheck.SBOM) error {
|
|||
|
|
return nil // not needed by sarif
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func (h *handler) OSV(e *osv.Entry) error {
|
|||
|
|
h.osvs[e.ID] = e
|
|||
|
|
return nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// moreSpecific favors a call finding over a non-call
|
|||
|
|
// finding and a package finding over a module finding.
|
|||
|
|
func moreSpecific(f1, f2 *govulncheck.Finding) int {
|
|||
|
|
if len(f1.Trace) > 1 && len(f2.Trace) > 1 {
|
|||
|
|
// Both are call stack findings.
|
|||
|
|
return 0
|
|||
|
|
}
|
|||
|
|
if len(f1.Trace) > 1 {
|
|||
|
|
return -1
|
|||
|
|
}
|
|||
|
|
if len(f2.Trace) > 1 {
|
|||
|
|
return 1
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
fr1, fr2 := f1.Trace[0], f2.Trace[0]
|
|||
|
|
if fr1.Function != "" && fr2.Function == "" {
|
|||
|
|
return -1
|
|||
|
|
}
|
|||
|
|
if fr1.Function == "" && fr2.Function != "" {
|
|||
|
|
return 1
|
|||
|
|
}
|
|||
|
|
if fr1.Package != "" && fr2.Package == "" {
|
|||
|
|
return -1
|
|||
|
|
}
|
|||
|
|
if fr1.Package == "" && fr2.Package != "" {
|
|||
|
|
return -1
|
|||
|
|
}
|
|||
|
|
return 0 // findings always have module info
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func (h *handler) Finding(f *govulncheck.Finding) error {
|
|||
|
|
fs := h.findings[f.OSV]
|
|||
|
|
if len(fs) == 0 {
|
|||
|
|
fs = []*govulncheck.Finding{f}
|
|||
|
|
} else {
|
|||
|
|
if ms := moreSpecific(f, fs[0]); ms == -1 {
|
|||
|
|
// The new finding is more specific, so we need
|
|||
|
|
// to erase existing findings and add the new one.
|
|||
|
|
fs = []*govulncheck.Finding{f}
|
|||
|
|
} else if ms == 0 {
|
|||
|
|
// The new finding is equal to an existing one and
|
|||
|
|
// because of the invariant on h.findings, it is
|
|||
|
|
// also equal to all existing ones.
|
|||
|
|
fs = append(fs, f)
|
|||
|
|
}
|
|||
|
|
// Otherwise, the new finding is at a less precise level.
|
|||
|
|
}
|
|||
|
|
h.findings[f.OSV] = fs
|
|||
|
|
return nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Flush is used to print out to w the sarif json output.
|
|||
|
|
// This is needed as sarif is not streamed.
|
|||
|
|
func (h *handler) Flush() error {
|
|||
|
|
sLog := toSarif(h)
|
|||
|
|
s, err := json.MarshalIndent(sLog, "", " ")
|
|||
|
|
if err != nil {
|
|||
|
|
return err
|
|||
|
|
}
|
|||
|
|
h.w.Write(s)
|
|||
|
|
return nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func toSarif(h *handler) Log {
|
|||
|
|
cfg := h.cfg
|
|||
|
|
r := Run{
|
|||
|
|
Tool: Tool{
|
|||
|
|
Driver: Driver{
|
|||
|
|
Name: cfg.ScannerName,
|
|||
|
|
Version: cfg.ScannerVersion,
|
|||
|
|
InformationURI: "https://pkg.go.dev/golang.org/x/vuln/cmd/govulncheck",
|
|||
|
|
Properties: *cfg,
|
|||
|
|
Rules: rules(h),
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
Results: results(h),
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return Log{
|
|||
|
|
Version: "2.1.0",
|
|||
|
|
Schema: "https://json.schemastore.org/sarif-2.1.0.json",
|
|||
|
|
Runs: []Run{r},
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func rules(h *handler) []Rule {
|
|||
|
|
rs := make([]Rule, 0, len(h.findings)) // must not be nil
|
|||
|
|
for id := range h.findings {
|
|||
|
|
osv := h.osvs[id]
|
|||
|
|
// s is either summary if it exists, or details
|
|||
|
|
// otherwise. Govulncheck text does the same.
|
|||
|
|
s := osv.Summary
|
|||
|
|
if s == "" {
|
|||
|
|
s = osv.Details
|
|||
|
|
}
|
|||
|
|
rs = append(rs, Rule{
|
|||
|
|
ID: osv.ID,
|
|||
|
|
ShortDescription: Description{Text: fmt.Sprintf("[%s] %s", osv.ID, s)},
|
|||
|
|
FullDescription: Description{Text: s},
|
|||
|
|
HelpURI: fmt.Sprintf("https://pkg.go.dev/vuln/%s", osv.ID),
|
|||
|
|
Help: Description{Text: osv.Details},
|
|||
|
|
Properties: RuleTags{Tags: tags(osv)},
|
|||
|
|
})
|
|||
|
|
}
|
|||
|
|
sort.SliceStable(rs, func(i, j int) bool { return rs[i].ID < rs[j].ID })
|
|||
|
|
return rs
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// tags returns an slice of zero or
|
|||
|
|
// more aliases of o.
|
|||
|
|
func tags(o *osv.Entry) []string {
|
|||
|
|
if len(o.Aliases) > 0 {
|
|||
|
|
return o.Aliases
|
|||
|
|
}
|
|||
|
|
return []string{} // must not be nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func results(h *handler) []Result {
|
|||
|
|
results := make([]Result, 0, len(h.findings)) // must not be nil
|
|||
|
|
for osv, fs := range h.findings {
|
|||
|
|
var locs []Location
|
|||
|
|
if h.cfg.ScanMode != govulncheck.ScanModeBinary {
|
|||
|
|
// Attach result to the go.mod file for source analysis.
|
|||
|
|
// But there is no such place for binaries.
|
|||
|
|
locs = []Location{{PhysicalLocation: PhysicalLocation{
|
|||
|
|
ArtifactLocation: ArtifactLocation{
|
|||
|
|
URI: "go.mod",
|
|||
|
|
URIBaseID: SrcRootID,
|
|||
|
|
},
|
|||
|
|
Region: Region{StartLine: 1}, // for now, point to the first line
|
|||
|
|
},
|
|||
|
|
Message: Description{Text: fmt.Sprintf("Findings for vulnerability %s", osv)}, // not having a message here results in an invalid sarif
|
|||
|
|
}}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
res := Result{
|
|||
|
|
RuleID: osv,
|
|||
|
|
Level: level(fs[0], h.cfg),
|
|||
|
|
Message: Description{Text: resultMessage(fs, h.cfg)},
|
|||
|
|
Stacks: stacks(h, fs),
|
|||
|
|
CodeFlows: codeFlows(h, fs),
|
|||
|
|
Locations: locs,
|
|||
|
|
}
|
|||
|
|
results = append(results, res)
|
|||
|
|
}
|
|||
|
|
sort.SliceStable(results, func(i, j int) bool { return results[i].RuleID < results[j].RuleID }) // for deterministic output
|
|||
|
|
return results
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func resultMessage(findings []*govulncheck.Finding, cfg *govulncheck.Config) string {
|
|||
|
|
// We can infer the findings' level by just looking at the
|
|||
|
|
// top trace frame of any finding.
|
|||
|
|
frame := findings[0].Trace[0]
|
|||
|
|
uniqueElems := make(map[string]bool)
|
|||
|
|
if frame.Function == "" && frame.Package == "" { // module level findings
|
|||
|
|
for _, f := range findings {
|
|||
|
|
uniqueElems[f.Trace[0].Module] = true
|
|||
|
|
}
|
|||
|
|
} else { // symbol and package level findings
|
|||
|
|
for _, f := range findings {
|
|||
|
|
uniqueElems[f.Trace[0].Package] = true
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
var elems []string
|
|||
|
|
for e := range uniqueElems {
|
|||
|
|
elems = append(elems, e)
|
|||
|
|
}
|
|||
|
|
sort.Strings(elems)
|
|||
|
|
|
|||
|
|
l := len(elems)
|
|||
|
|
elemList := list(elems)
|
|||
|
|
main, addition := "", ""
|
|||
|
|
const runCallAnalysis = "Run the call-level analysis to understand whether your code actually calls the vulnerabilities."
|
|||
|
|
switch {
|
|||
|
|
case frame.Function != "":
|
|||
|
|
main = fmt.Sprintf("calls vulnerable functions in %d package%s (%s).", l, choose("", "s", l == 1), elemList)
|
|||
|
|
case frame.Package != "":
|
|||
|
|
main = fmt.Sprintf("imports %d vulnerable package%s (%s)", l, choose("", "s", l == 1), elemList)
|
|||
|
|
addition = choose(", but doesn’t appear to call any of the vulnerable symbols.", ". "+runCallAnalysis, cfg.ScanLevel.WantSymbols())
|
|||
|
|
default:
|
|||
|
|
main = fmt.Sprintf("depends on %d vulnerable module%s (%s)", l, choose("", "s", l == 1), elemList)
|
|||
|
|
informational := ", but doesn't appear to " + choose("call", "import", cfg.ScanLevel.WantSymbols()) + " any of the vulnerable symbols."
|
|||
|
|
addition = choose(informational, ". "+runCallAnalysis, cfg.ScanLevel.WantPackages())
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return fmt.Sprintf("Your code %s%s", main, addition)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
const (
|
|||
|
|
errorLevel = "error"
|
|||
|
|
warningLevel = "warning"
|
|||
|
|
informationalLevel = "note"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
func level(f *govulncheck.Finding, cfg *govulncheck.Config) string {
|
|||
|
|
fr := f.Trace[0]
|
|||
|
|
switch {
|
|||
|
|
case cfg.ScanLevel.WantSymbols():
|
|||
|
|
if fr.Function != "" {
|
|||
|
|
return errorLevel
|
|||
|
|
}
|
|||
|
|
if fr.Package != "" {
|
|||
|
|
return warningLevel
|
|||
|
|
}
|
|||
|
|
return informationalLevel
|
|||
|
|
case cfg.ScanLevel.WantPackages():
|
|||
|
|
if fr.Package != "" {
|
|||
|
|
return errorLevel
|
|||
|
|
}
|
|||
|
|
return warningLevel
|
|||
|
|
default:
|
|||
|
|
return errorLevel
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func stacks(h *handler, fs []*govulncheck.Finding) []Stack {
|
|||
|
|
if fs[0].Trace[0].Function == "" { // not call level findings
|
|||
|
|
return nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
var stacks []Stack
|
|||
|
|
for _, f := range fs {
|
|||
|
|
stacks = append(stacks, stack(h, f))
|
|||
|
|
}
|
|||
|
|
// Sort stacks for deterministic output. We sort by message
|
|||
|
|
// which is effectively sorting by full symbol name. The
|
|||
|
|
// performance should not be an issue here.
|
|||
|
|
sort.SliceStable(stacks, func(i, j int) bool { return stacks[i].Message.Text < stacks[j].Message.Text })
|
|||
|
|
return stacks
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// stack transforms call stack in f to a sarif stack.
|
|||
|
|
func stack(h *handler, f *govulncheck.Finding) Stack {
|
|||
|
|
trace := f.Trace
|
|||
|
|
top := trace[len(trace)-1] // belongs to top level module
|
|||
|
|
|
|||
|
|
frames := make([]Frame, 0, len(trace)) // must not be nil
|
|||
|
|
for i := len(trace) - 1; i >= 0; i-- { // vulnerable symbol is at the top frame
|
|||
|
|
frame := trace[i]
|
|||
|
|
pos := govulncheck.Position{Line: 1, Column: 1}
|
|||
|
|
if frame.Position != nil {
|
|||
|
|
pos = *frame.Position
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
sf := Frame{
|
|||
|
|
Module: frame.Module + "@" + frame.Version,
|
|||
|
|
Location: Location{Message: Description{Text: symbol(frame)}}, // show the (full) symbol name
|
|||
|
|
}
|
|||
|
|
file, base := fileURIInfo(pos.Filename, top.Module, frame.Module, frame.Version)
|
|||
|
|
if h.cfg.ScanMode != govulncheck.ScanModeBinary {
|
|||
|
|
sf.Location.PhysicalLocation = PhysicalLocation{
|
|||
|
|
ArtifactLocation: ArtifactLocation{
|
|||
|
|
URI: file,
|
|||
|
|
URIBaseID: base,
|
|||
|
|
},
|
|||
|
|
Region: Region{
|
|||
|
|
StartLine: pos.Line,
|
|||
|
|
StartColumn: pos.Column,
|
|||
|
|
},
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
frames = append(frames, sf)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return Stack{
|
|||
|
|
Frames: frames,
|
|||
|
|
Message: Description{Text: fmt.Sprintf("A call stack for vulnerable function %s", symbol(trace[0]))},
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func codeFlows(h *handler, fs []*govulncheck.Finding) []CodeFlow {
|
|||
|
|
if fs[0].Trace[0].Function == "" { // not call level findings
|
|||
|
|
return nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// group call stacks per symbol. There should
|
|||
|
|
// be one call stack currently per symbol, but
|
|||
|
|
// this might change in the future.
|
|||
|
|
m := make(map[govulncheck.Frame][]*govulncheck.Finding)
|
|||
|
|
for _, f := range fs {
|
|||
|
|
// fr.Position is currently the position
|
|||
|
|
// of the definition of the vuln symbol
|
|||
|
|
fr := *f.Trace[0]
|
|||
|
|
m[fr] = append(m[fr], f)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
var codeFlows []CodeFlow
|
|||
|
|
for fr, fs := range m {
|
|||
|
|
tfs := threadFlows(h, fs)
|
|||
|
|
codeFlows = append(codeFlows, CodeFlow{
|
|||
|
|
ThreadFlows: tfs,
|
|||
|
|
// TODO: should we instead show the message from govulncheck text output?
|
|||
|
|
Message: Description{Text: fmt.Sprintf("A summarized code flow for vulnerable function %s", symbol(&fr))},
|
|||
|
|
})
|
|||
|
|
}
|
|||
|
|
// Sort flows for deterministic output. We sort by message
|
|||
|
|
// which is effectively sorting by full symbol name. The
|
|||
|
|
// performance should not be an issue here.
|
|||
|
|
sort.SliceStable(codeFlows, func(i, j int) bool { return codeFlows[i].Message.Text < codeFlows[j].Message.Text })
|
|||
|
|
return codeFlows
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func threadFlows(h *handler, fs []*govulncheck.Finding) []ThreadFlow {
|
|||
|
|
tfs := make([]ThreadFlow, 0, len(fs)) // must not be nil
|
|||
|
|
for _, f := range fs {
|
|||
|
|
trace := traces.Compact(f)
|
|||
|
|
top := trace[len(trace)-1] // belongs to top level module
|
|||
|
|
|
|||
|
|
var tf []ThreadFlowLocation
|
|||
|
|
for i := len(trace) - 1; i >= 0; i-- { // vulnerable symbol is at the top frame
|
|||
|
|
// TODO: should we, similar to govulncheck text output, only
|
|||
|
|
// mention three elements of the compact trace?
|
|||
|
|
frame := trace[i]
|
|||
|
|
pos := govulncheck.Position{Line: 1, Column: 1}
|
|||
|
|
if frame.Position != nil {
|
|||
|
|
pos = *frame.Position
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
tfl := ThreadFlowLocation{
|
|||
|
|
Module: frame.Module + "@" + frame.Version,
|
|||
|
|
Location: Location{Message: Description{Text: symbol(frame)}}, // show the (full) symbol name
|
|||
|
|
}
|
|||
|
|
file, base := fileURIInfo(pos.Filename, top.Module, frame.Module, frame.Version)
|
|||
|
|
if h.cfg.ScanMode != govulncheck.ScanModeBinary {
|
|||
|
|
tfl.Location.PhysicalLocation = PhysicalLocation{
|
|||
|
|
ArtifactLocation: ArtifactLocation{
|
|||
|
|
URI: file,
|
|||
|
|
URIBaseID: base,
|
|||
|
|
},
|
|||
|
|
Region: Region{
|
|||
|
|
StartLine: pos.Line,
|
|||
|
|
StartColumn: pos.Column,
|
|||
|
|
},
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
tf = append(tf, tfl)
|
|||
|
|
}
|
|||
|
|
tfs = append(tfs, ThreadFlow{Locations: tf})
|
|||
|
|
}
|
|||
|
|
return tfs
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func fileURIInfo(filename, top, module, version string) (string, string) {
|
|||
|
|
if top == module {
|
|||
|
|
return filename, SrcRootID
|
|||
|
|
}
|
|||
|
|
if module == internal.GoStdModulePath {
|
|||
|
|
return filename, GoRootID
|
|||
|
|
}
|
|||
|
|
return filepath.ToSlash(filepath.Join(module+"@"+version, filename)), GoModCacheID
|
|||
|
|
}
|