gvisor/tools/nogo/nogo.go

648 lines
19 KiB
Go

// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package nogo implements binary analysis similar to bazel's nogo,
// or the unitchecker package. It exists in order to provide additional
// facilities for analysis, namely plumbing through the output from
// dumping the generated binary (to analyze actual produced code).
package nogo
import (
"bytes"
"encoding/gob"
"errors"
"fmt"
"go/ast"
"go/build"
"go/parser"
"go/token"
"go/types"
"io"
"io/ioutil"
"log"
"os"
"path"
"path/filepath"
"reflect"
"sort"
"strings"
"golang.org/x/tools/go/analysis"
"golang.org/x/tools/go/analysis/internal/facts"
"golang.org/x/tools/go/gcexportdata"
"golang.org/x/tools/go/types/objectpath"
// Special case: flags live here and change overall behavior.
"gvisor.dev/gvisor/tools/nogo/objdump"
"gvisor.dev/gvisor/tools/worker"
)
// StdlibConfig is serialized as the configuration.
//
// This contains everything required for stdlib analysis.
type StdlibConfig struct {
Srcs []string
GOOS string
GOARCH string
Tags []string
}
// PackageConfig is serialized as the configuration.
//
// This contains everything required for single package analysis.
type PackageConfig struct {
ImportPath string
GoFiles []string
NonGoFiles []string
Tags []string
GOOS string
GOARCH string
ImportMap map[string]string
FactMap map[string]string
StdlibFacts string
}
// loader is a fact-loader function.
type loader func(string) ([]byte, error)
// saver is a fact-saver function.
type saver func([]byte) error
// stdlibFact is used for serialiation.
type stdlibFact struct {
Package string
Facts []byte
}
// stdlibFacts is a set of standard library facts.
type stdlibFacts map[string][]byte
// Size implements worker.Sizer.Size.
func (sf stdlibFacts) Size() int64 {
size := int64(0)
for filename, data := range sf {
size += int64(len(filename))
size += int64(len(data))
}
return size
}
// EncodeTo serializes stdlibFacts.
func (sf stdlibFacts) EncodeTo(w io.Writer) error {
stdlibFactsSorted := make([]stdlibFact, 0, len(sf))
for pkg, facts := range sf {
stdlibFactsSorted = append(stdlibFactsSorted, stdlibFact{
Package: pkg,
Facts: facts,
})
}
sort.Slice(stdlibFactsSorted, func(i, j int) bool {
return stdlibFactsSorted[i].Package < stdlibFactsSorted[j].Package
})
enc := gob.NewEncoder(w)
if err := enc.Encode(stdlibFactsSorted); err != nil {
return err
}
return nil
}
// DecodeFrom deserializes stdlibFacts.
func (sf stdlibFacts) DecodeFrom(r io.Reader) error {
var stdlibFactsSorted []stdlibFact
dec := gob.NewDecoder(r)
if err := dec.Decode(&stdlibFactsSorted); err != nil {
return err
}
for _, stdlibFact := range stdlibFactsSorted {
sf[stdlibFact.Package] = stdlibFact.Facts
}
return nil
}
var (
// cachedFacts caches by file (just byte data).
cachedFacts = worker.NewCache("facts")
// stdlibCachedFacts caches the standard library (stdlibFacts).
stdlibCachedFacts = worker.NewCache("stdlib")
)
// factLoader loads facts.
func (c *PackageConfig) factLoader(path string) (data []byte, err error) {
filename, ok := c.FactMap[path]
if ok {
cb := cachedFacts.Lookup([]string{filename}, func() worker.Sizer {
data, readErr := ioutil.ReadFile(filename)
if readErr != nil {
err = fmt.Errorf("error loading %q: %w", filename, readErr)
return nil
}
return worker.CacheBytes(data)
})
if cb != nil {
return []byte(cb.(worker.CacheBytes)), err
}
return nil, err
}
cb := stdlibCachedFacts.Lookup([]string{c.StdlibFacts}, func() worker.Sizer {
r, openErr := os.Open(c.StdlibFacts)
if openErr != nil {
err = fmt.Errorf("error loading stdlib facts from %q: %w", c.StdlibFacts, openErr)
return nil
}
defer r.Close()
sf := make(stdlibFacts)
if readErr := sf.DecodeFrom(r); readErr != nil {
err = fmt.Errorf("error loading stdlib facts: %w", readErr)
return nil
}
return sf
})
if cb != nil {
return (cb.(stdlibFacts))[path], err
}
return nil, err
}
// shouldInclude indicates whether the file should be included.
//
// NOTE: This does only basic parsing of tags.
func (c *PackageConfig) shouldInclude(path string) (bool, error) {
ctx := build.Default
ctx.GOOS = c.GOOS
ctx.GOARCH = c.GOARCH
ctx.BuildTags = c.Tags
return ctx.MatchFile(filepath.Dir(path), filepath.Base(path))
}
// importer is an implementation of go/types.Importer.
//
// This wraps a configuration, which provides the map of package names to
// files, and the facts. Note that this importer implementation will always
// pass when a given package is not available.
type importer struct {
*PackageConfig
fset *token.FileSet
cache map[string]*types.Package
lastErr error
callback func(string) error
}
// Import implements types.Importer.Import.
func (i *importer) Import(path string) (*types.Package, error) {
if path == "unsafe" {
// Special case: go/types has pre-defined type information for
// unsafe. We ensure that this package is correct, in case any
// analyzers are specifically looking for this.
return types.Unsafe, nil
}
// Call the internal callback. This is used to resolve loading order
// for the standard library. See checkStdlib.
if i.callback != nil {
if err := i.callback(path); err != nil {
i.lastErr = err
return nil, err
}
}
// Check the cache.
if pkg, ok := i.cache[path]; ok && pkg.Complete() {
return pkg, nil
}
// Actually load the data.
realPath, ok := i.ImportMap[path]
var (
rc io.ReadCloser
err error
)
if !ok {
// Not found in the import path. Attempt to find the package
// via the standard library.
rc, err = findStdPkg(i.GOOS, i.GOARCH, path)
} else {
// Open the file.
rc, err = os.Open(realPath)
}
if err != nil {
i.lastErr = err
return nil, err
}
defer rc.Close()
// Load all exported data.
r, err := gcexportdata.NewReader(rc)
if err != nil {
return nil, err
}
return gcexportdata.Read(r, i.fset, i.cache, path)
}
// ErrSkip indicates the package should be skipped.
var ErrSkip = errors.New("skipped")
// CheckStdlib checks the standard library.
//
// This constructs a synthetic package configuration for each library in the
// standard library sources, and call CheckPackage repeatedly.
//
// Note that not all parts of the source are expected to build. We skip obvious
// test files, and cmd files, which should not be dependencies.
func CheckStdlib(config *StdlibConfig, analyzers []*analysis.Analyzer) (allFindings FindingSet, facts []byte, err error) {
if len(config.Srcs) == 0 {
return nil, nil, nil
}
// Ensure all paths are normalized.
for i := 0; i < len(config.Srcs); i++ {
config.Srcs[i] = path.Clean(config.Srcs[i])
}
// Calculate the root source directory. This is always a directory
// named 'src', of which we simply take the first we find. This is a
// bit fragile, but works for all currently known Go source
// configurations.
//
// Note that there may be extra files outside of the root source
// directory; we simply ignore those.
rootSrcPrefix := ""
for _, file := range config.Srcs {
const src = "/src/"
i := strings.Index(file, src)
if i == -1 {
// Superfluous file.
continue
}
// Index of first character after /src/.
i += len(src)
rootSrcPrefix = file[:i]
break
}
// Aggregate all files by directory.
packages := make(map[string]*PackageConfig)
for _, file := range config.Srcs {
if !strings.HasPrefix(file, rootSrcPrefix) {
// Superflouous file.
continue
}
d := path.Dir(file)
if len(rootSrcPrefix) >= len(d) {
continue // Not a file.
}
pkg := d[len(rootSrcPrefix):]
// Skip cmd packages and obvious test files: see above.
if strings.HasPrefix(pkg, "cmd/") || strings.HasSuffix(file, "_test.go") {
continue
}
c, ok := packages[pkg]
if !ok {
c = &PackageConfig{
ImportPath: pkg,
GOOS: config.GOOS,
GOARCH: config.GOARCH,
Tags: config.Tags,
}
packages[pkg] = c
}
// Add the files appropriately. Note that they will be further
// filtered by architecture and build tags below, so this need
// not be done immediately.
if strings.HasSuffix(file, ".go") {
c.GoFiles = append(c.GoFiles, file)
} else {
c.NonGoFiles = append(c.NonGoFiles, file)
}
}
// Closure to check a single package.
localStdlibFacts := make(stdlibFacts)
localStdlibErrs := make(map[string]error)
stdlibCachedFacts.Lookup([]string{""}, func() worker.Sizer {
return localStdlibFacts
})
var checkOne func(pkg string) error // Recursive.
checkOne = func(pkg string) error {
// Is this already done?
if _, ok := localStdlibFacts[pkg]; ok {
return nil
}
// Did this fail previously?
if _, ok := localStdlibErrs[pkg]; ok {
return nil
}
// Lookup the configuration.
config, ok := packages[pkg]
if !ok {
return nil // Not known.
}
// Find the binary package, and provide to objdump.
rc, err := findStdPkg(config.GOOS, config.GOARCH, pkg)
if err != nil {
// If there's no binary for this package, it is likely
// not built with the distribution. That's fine, we can
// just skip analysis.
localStdlibErrs[pkg] = err
return nil
}
// Provide the input.
oldReader := objdump.Reader
objdump.Reader = rc // For analysis.
defer func() {
rc.Close()
objdump.Reader = oldReader // Restore.
}()
// Run the analysis.
findings, factData, err := CheckPackage(config, analyzers, checkOne)
if err != nil {
// If we can't analyze a package from the standard library,
// then we skip it. It will simply not have any findings.
localStdlibErrs[pkg] = err
return nil
}
localStdlibFacts[pkg] = factData
allFindings = append(allFindings, findings...)
return nil
}
// Check all packages.
//
// Note that this may call checkOne recursively, so it's not guaranteed
// to evaluate in the order provided here. We do ensure however, that
// all packages are evaluated.
for pkg := range packages {
if err := checkOne(pkg); err != nil {
return nil, nil, err
}
}
// Sanity check.
if len(localStdlibFacts) == 0 {
return nil, nil, fmt.Errorf("no stdlib facts found: misconfiguration?")
}
// Write out all findings.
buf := bytes.NewBuffer(nil)
if err := localStdlibFacts.EncodeTo(buf); err != nil {
return nil, nil, fmt.Errorf("error serialized stdlib facts: %v", err)
}
// Write out all errors.
for pkg, err := range localStdlibErrs {
log.Printf("WARNING: error while processing %v: %v", pkg, err)
}
// Return all findings.
return allFindings, buf.Bytes(), nil
}
// sanityCheckScope checks that all object in astTypes map to the correct
// objects in binaryTypes. Note that we don't check whether the sets are the
// same, we only care about the fidelity of objects in astTypes.
//
// When an inconsistency is identified, we record it in the astToBinaryMap.
// This allows us to dynamically replace facts and correct for the issue. The
// total number of mismatches is returned.
func sanityCheckScope(astScope *types.Scope, binaryTypes *types.Package, binaryScope *types.Scope, astToBinary map[types.Object]types.Object) error {
for _, x := range astScope.Names() {
fe := astScope.Lookup(x)
path, err := objectpath.For(fe)
if err != nil {
continue // Not an encoded object.
}
se, err := objectpath.Object(binaryTypes, path)
if err != nil {
continue // May be unused, see below.
}
if fe.Id() != se.Id() {
// These types are incompatible. This means that when
// this objectpath is loading from the binaryTypes (for
// dependencies) it will resolve to a fact for that
// type. We don't actually care about this error since
// we do the rewritten, but may as well alert.
log.Printf("WARNING: Object %s is a victim of go/issues/44195.", fe.Id())
}
se = binaryScope.Lookup(x)
if se == nil {
// The fact may not be exported in the objectdata, if
// it is package internal. This is fine, as nothing out
// of this package can use these symbols.
continue
}
// Save the translation.
astToBinary[fe] = se
}
for i := 0; i < astScope.NumChildren(); i++ {
if err := sanityCheckScope(astScope.Child(i), binaryTypes, binaryScope, astToBinary); err != nil {
return err
}
}
return nil
}
// sanityCheckTypes checks that two types are sane. The total number of
// mismatches is returned.
func sanityCheckTypes(astTypes, binaryTypes *types.Package, astToBinary map[types.Object]types.Object) error {
return sanityCheckScope(astTypes.Scope(), binaryTypes, binaryTypes.Scope(), astToBinary)
}
// CheckPackage runs all given analyzers.
//
// The implementation was adapted from [1], which was in turn adpated from [2].
// This returns a list of matching analysis issues, or an error if the analysis
// could not be completed.
//
// [1] bazelbuid/rules_go/tools/builders/nogo_main.go
// [2] golang.org/x/tools/go/checker/internal/checker
func CheckPackage(config *PackageConfig, analyzers []*analysis.Analyzer, importCallback func(string) error) (findings []Finding, factData []byte, err error) {
imp := &importer{
PackageConfig: config,
fset: token.NewFileSet(),
cache: make(map[string]*types.Package),
callback: importCallback,
}
// Load all source files.
var syntax []*ast.File
for _, file := range config.GoFiles {
include, err := config.shouldInclude(file)
if err != nil {
return nil, nil, fmt.Errorf("error evaluating file %q: %v", file, err)
}
if !include {
continue
}
s, err := parser.ParseFile(imp.fset, file, nil, parser.ParseComments)
if err != nil {
return nil, nil, fmt.Errorf("error parsing file %q: %v", file, err)
}
syntax = append(syntax, s)
}
// Check type information.
typesSizes := types.SizesFor("gc", config.GOARCH)
typeConfig := types.Config{Importer: imp}
typesInfo := &types.Info{
Types: make(map[ast.Expr]types.TypeAndValue),
Uses: make(map[*ast.Ident]types.Object),
Defs: make(map[*ast.Ident]types.Object),
Implicits: make(map[ast.Node]types.Object),
Scopes: make(map[ast.Node]*types.Scope),
Selections: make(map[*ast.SelectorExpr]*types.Selection),
}
astTypes, err := typeConfig.Check(config.ImportPath, imp.fset, syntax, typesInfo)
if err != nil && imp.lastErr != ErrSkip {
return nil, nil, fmt.Errorf("error checking types: %w", err)
}
// Load all facts using the astTypes, although it may need reconciling
// later on. See the fact functions below.
astFacts, err := facts.Decode(astTypes, config.factLoader)
if err != nil {
return nil, nil, fmt.Errorf("error decoding facts: %w", err)
}
// Sanity check all types and record metadata to prevent
// https://github.com/golang/go/issues/44195.
//
// This block loads the binary types, whose encoding will be well
// defined and aligned with any downstream consumers. Below in the fact
// functions for the analysis, we serialize types to both the astFacts
// and the binaryFacts if available. The binaryFacts are the final
// encoded facts in order to ensure compatibility. We keep the
// intermediate astTypes in order to allow exporting and importing
// within the local package under analysis.
var (
astToBinary = make(map[types.Object]types.Object)
binaryFacts *facts.Set
)
if _, ok := config.ImportMap[config.ImportPath]; ok {
binaryTypes, err := imp.Import(config.ImportPath)
if err != nil {
return nil, nil, fmt.Errorf("error loading self: %w", err)
}
if err := sanityCheckTypes(astTypes, binaryTypes, astToBinary); err != nil {
return nil, nil, fmt.Errorf("error sanity checking types: %w", err)
}
binaryFacts, err = facts.Decode(binaryTypes, config.factLoader)
if err != nil {
return nil, nil, fmt.Errorf("error decoding facts: %w", err)
}
}
// Register fact types and establish dependencies between analyzers.
// The visit closure will execute recursively, and populate results
// will all required analysis results.
results := make(map[*analysis.Analyzer]interface{})
var visit func(*analysis.Analyzer) error // For recursion.
visit = func(a *analysis.Analyzer) error {
if _, ok := results[a]; ok {
return nil
}
// Run recursively for all dependencies.
for _, req := range a.Requires {
if err := visit(req); err != nil {
return err
}
}
// Run the analysis.
localFactsFilter := make(map[reflect.Type]bool)
for _, f := range a.FactTypes {
localFactsFilter[reflect.TypeOf(f)] = true
}
p := &analysis.Pass{
Analyzer: a,
Fset: imp.fset,
Files: syntax,
Pkg: astTypes,
TypesInfo: typesInfo,
ResultOf: results, // All results.
Report: func(d analysis.Diagnostic) {
findings = append(findings, Finding{
Category: AnalyzerName(a.Name),
Position: imp.fset.Position(d.Pos),
Message: d.Message,
})
},
ImportPackageFact: astFacts.ImportPackageFact,
ExportPackageFact: func(fact analysis.Fact) {
astFacts.ExportPackageFact(fact)
if binaryFacts != nil {
binaryFacts.ExportPackageFact(fact)
}
},
ImportObjectFact: astFacts.ImportObjectFact,
ExportObjectFact: func(obj types.Object, fact analysis.Fact) {
astFacts.ExportObjectFact(obj, fact)
// Note that if no object is recorded in
// astToBinary and binaryFacts != nil, then the
// object doesn't appear in the exported data.
// It was likely an internal object to the
// package, and there is no meaningful
// downstream consumer of the fact.
if binaryObj, ok := astToBinary[obj]; ok && binaryFacts != nil {
binaryFacts.ExportObjectFact(binaryObj, fact)
}
},
AllPackageFacts: func() []analysis.PackageFact { return astFacts.AllPackageFacts(localFactsFilter) },
AllObjectFacts: func() []analysis.ObjectFact { return astFacts.AllObjectFacts(localFactsFilter) },
TypesSizes: typesSizes,
}
result, err := a.Run(p)
if err != nil {
return fmt.Errorf("error running analysis %s: %v", a, err)
}
// Sanity check & save the result.
if got, want := reflect.TypeOf(result), a.ResultType; got != want {
return fmt.Errorf("error: analyzer %s returned a result of type %v, but declared ResultType %v", a, got, want)
}
results[a] = result
return nil // Success.
}
// Visit all analyzers recursively.
for _, a := range analyzers {
if imp.lastErr == ErrSkip {
continue // No local analysis.
}
if err := visit(a); err != nil {
return nil, nil, err // Already has context.
}
}
// Return all findings. Note that we have a preference to returning the
// binary facts if available, so that downstream consumers of these
// facts will find the export aligns with the internal type details.
// See the block above with the call to sanityCheckTypes.
if binaryFacts != nil {
return findings, binaryFacts.Encode(), nil
}
return findings, astFacts.Encode(), nil
}
func init() {
gob.Register((*stdlibFact)(nil))
}