go-marshal: Improve collision detection of import statments.

Previously, the import statement collision detection mechanism aborted
go-marshal whenever it detected two imports in any package that has
the same local name. Consider this trivial package, defined by the the
following two source files:

file1.go:

package example
import (
        path/a/to/foo
)
...

file2.go:

package example
import (
       another/package/with/final/component/foo
)
...

Go-marshal previously couldn't handle generating code for the the
above package, even if none of the types marked for marshalling used
either of the imported foo packages. This turns out to be too
restrictive as we run into this a lot in practice. Examples include
"encoding/binary" vs "gvisor/pkg/binary/binary", and "sync" vs
"gvisor/pkg/sync/sync".

This change allows go-marshal to proceed with marshalling, and only
abort if the code generated by go-marshal references any such
ambiguous import names.

PiperOrigin-RevId: 304131190
This commit is contained in:
Rahat Mahmood 2020-04-01 00:42:34 -07:00 committed by gVisor bot
parent 840980aeba
commit 507f997213
4 changed files with 141 additions and 41 deletions

View File

@ -326,7 +326,7 @@ func (g *Generator) collectImports(a *ast.File, f *token.FileSet) map[string]imp
// Make sure we have an import that doesn't use any local names that // Make sure we have an import that doesn't use any local names that
// would conflict with identifiers in the generated code. // would conflict with identifiers in the generated code.
if len(i.name) == 1 { if len(i.name) == 1 && i.name != "_" {
abortAt(f.Position(spec.Pos()), fmt.Sprintf("Import has a single character local name '%s'; this may conflict with code generated by go_marshal, use a multi-character import alias", i.name)) abortAt(f.Position(spec.Pos()), fmt.Sprintf("Import has a single character local name '%s'; this may conflict with code generated by go_marshal, use a multi-character import alias", i.name))
} }
if _, ok := badIdentsMap[i.name]; ok { if _, ok := badIdentsMap[i.name]; ok {
@ -421,7 +421,7 @@ func (g *Generator) Run() error {
// the list of imports we need to copy to the generated code. // the list of imports we need to copy to the generated code.
for name, _ := range impl.is { for name, _ := range impl.is {
if !g.imports.markUsed(name) { if !g.imports.markUsed(name) {
panic(fmt.Sprintf("Generated code for '%s' referenced a non-existent import with local name '%s'", impl.typeName(), name)) panic(fmt.Sprintf("Generated code for '%s' referenced a non-existent import with local name '%s'. Either go-marshal needs to add an import to the generated file, or a package in an input source file has a package name differ from the final component of its path, which go-marshal doesn't know how to detect; use an import alias to work around this limitation.", impl.typeName(), name))
} }
} }
ts = append(ts, g.generateOneTestSuite(t)) ts = append(ts, g.generateOneTestSuite(t))

View File

@ -72,7 +72,6 @@ func (g *interfaceGenerator) recordUsedMarshallable(m string) {
func (g *interfaceGenerator) recordUsedImport(i string) { func (g *interfaceGenerator) recordUsedImport(i string) {
g.is[i] = struct{}{} g.is[i] = struct{}{}
} }
func (g *interfaceGenerator) recordPotentiallyNonPackedField(fieldName string) { func (g *interfaceGenerator) recordPotentiallyNonPackedField(fieldName string) {

View File

@ -152,7 +152,7 @@ func (g *interfaceGenerator) emitMarshallableForStruct(st *ast.StructType) {
g.shift("dst", len) g.shift("dst", len)
} else { } else {
// We can't use shiftDynamic here because we don't have // We can't use shiftDynamic here because we don't have
// an instance of the dynamic type we can referece here // an instance of the dynamic type we can reference here
// (since the version in this struct is anonymous). Use // (since the version in this struct is anonymous). Use
// a typed nil pointer to call SizeBytes() instead. // a typed nil pointer to call SizeBytes() instead.
g.emit("dst = dst[(*%s)(nil).SizeBytes():]\n", t.Name) g.emit("dst = dst[(*%s)(nil).SizeBytes():]\n", t.Name)
@ -162,6 +162,11 @@ func (g *interfaceGenerator) emitMarshallableForStruct(st *ast.StructType) {
g.marshalScalar(g.fieldAccessor(n), t.Name, "dst") g.marshalScalar(g.fieldAccessor(n), t.Name, "dst")
}, },
selector: func(n, tX, tSel *ast.Ident) { selector: func(n, tX, tSel *ast.Ident) {
if n.Name == "_" {
g.emit("// Padding: dst[:sizeof(%s)] ~= %s(0)\n", tX.Name, tSel.Name)
g.emit("dst = dst[(*%s.%s)(nil).SizeBytes():]\n", tX.Name, tSel.Name)
return
}
g.marshalScalar(g.fieldAccessor(n), fmt.Sprintf("%s.%s", tX.Name, tSel.Name), "dst") g.marshalScalar(g.fieldAccessor(n), fmt.Sprintf("%s.%s", tX.Name, tSel.Name), "dst")
}, },
array: func(n, t *ast.Ident, size int) { array: func(n, t *ast.Ident, size int) {
@ -199,11 +204,11 @@ func (g *interfaceGenerator) emitMarshallableForStruct(st *ast.StructType) {
if len, dynamic := g.scalarSize(t); !dynamic { if len, dynamic := g.scalarSize(t); !dynamic {
g.shift("src", len) g.shift("src", len)
} else { } else {
// We can't use shiftDynamic here because we don't have // We don't have an instance of the dynamic type we can
// an instance of the dynamic type we can reference here // reference here (since the version in this struct is
// (since the version in this struct is anonymous). Use // anonymous). Use a typed nil pointer to call
// a typed nil pointer to call SizeBytes() instead. // SizeBytes() instead.
g.emit("src = src[(*%s)(nil).SizeBytes():]\n", t.Name) g.shiftDynamic("src", fmt.Sprintf("(*%s)(nil)", t.Name))
g.recordPotentiallyNonPackedField(fmt.Sprintf("(*%s)(nil)", t.Name)) g.recordPotentiallyNonPackedField(fmt.Sprintf("(*%s)(nil)", t.Name))
} }
return return
@ -211,6 +216,12 @@ func (g *interfaceGenerator) emitMarshallableForStruct(st *ast.StructType) {
g.unmarshalScalar(g.fieldAccessor(n), t.Name, "src") g.unmarshalScalar(g.fieldAccessor(n), t.Name, "src")
}, },
selector: func(n, tX, tSel *ast.Ident) { selector: func(n, tX, tSel *ast.Ident) {
if n.Name == "_" {
g.emit("// Padding: %s ~= src[:sizeof(%s.%s)]\n", g.fieldAccessor(n), tX.Name, tSel.Name)
g.emit("src = src[(*%s.%s)(nil).SizeBytes():]\n", tX.Name, tSel.Name)
g.recordPotentiallyNonPackedField(fmt.Sprintf("(*%s.%s)(nil)", tX.Name, tSel.Name))
return
}
g.unmarshalScalar(g.fieldAccessor(n), fmt.Sprintf("%s.%s", tX.Name, tSel.Name), "src") g.unmarshalScalar(g.fieldAccessor(n), fmt.Sprintf("%s.%s", tX.Name, tSel.Name), "src")
}, },
array: func(n, t *ast.Ident, size int) { array: func(n, t *ast.Ident, size int) {

View File

@ -285,6 +285,11 @@ type importStmt struct {
aliased bool aliased bool
// Indicates whether this import was referenced by generated code. // Indicates whether this import was referenced by generated code.
used bool used bool
// AST node and file set representing the import statement, if any. These
// are only non-nil if the import statement originates from an input source
// file.
spec *ast.ImportSpec
fset *token.FileSet
} }
func newImport(p string) *importStmt { func newImport(p string) *importStmt {
@ -310,14 +315,27 @@ func newImportFromSpec(spec *ast.ImportSpec, f *token.FileSet) *importStmt {
name: name, name: name,
path: p, path: p,
aliased: spec.Name != nil, aliased: spec.Name != nil,
spec: spec,
fset: f,
} }
} }
// String implements fmt.Stringer.String. This generates a string for the import
// statement appropriate for writing directly to generated code.
func (i *importStmt) String() string { func (i *importStmt) String() string {
if i.aliased { if i.aliased {
return fmt.Sprintf("%s \"%s\"", i.name, i.path) return fmt.Sprintf("%s %q", i.name, i.path)
} }
return fmt.Sprintf("\"%s\"", i.path) return fmt.Sprintf("%q", i.path)
}
// debugString returns a debug string representing an import statement. This
// representation is not valid golang code and is used for debugging output.
func (i *importStmt) debugString() string {
if i.spec != nil && i.fset != nil {
return fmt.Sprintf("%s: %s", i.fset.Position(i.spec.Path.Pos()), i)
}
return fmt.Sprintf("(go-marshal import): %s", i)
} }
func (i *importStmt) markUsed() { func (i *importStmt) markUsed() {
@ -329,43 +347,78 @@ func (i *importStmt) equivalent(other *importStmt) bool {
} }
// importTable represents a collection of importStmts. // importTable represents a collection of importStmts.
//
// An importTable may contain multiple import statements referencing the same
// local name. All import statements aliasing to the same local name are
// technically ambiguous, as if such an import name is used in the generated
// code, it's not clear which import statement it refers to. We ignore any
// potential collisions until actually writing the import table to the generated
// source file. See importTable.write.
//
// Given the following import statements across all the files comprising a
// package marshalled:
//
// "sync"
// "pkg/sync"
// "pkg/sentry/kernel"
// ktime "pkg/sentry/kernel/time"
//
// An importTable representing them would look like this:
//
// importTable {
// is: map[string][]*importStmt {
// "sync": []*importStmt{
// importStmt{name:"sync", path:"sync", aliased:false}
// importStmt{name:"sync", path:"pkg/sync", aliased:false}
// },
// "kernel": []*importStmt{importStmt{
// name: "kernel",
// path: "pkg/sentry/kernel",
// aliased: false
// }},
// "ktime": []*importStmt{importStmt{
// name: "ktime",
// path: "pkg/sentry/kernel/time",
// aliased: true,
// }},
// }
// }
//
// Note that the local name "sync" is assigned to two different import
// statements. This is possible if the import statements are from different
// source files in the same package.
//
// Since go-marshal generates a single output file per package regardless of the
// number of input files, if "sync" is referenced by any generated code, it's
// unclear which import statement "sync" refers to. While it's theoretically
// possible to resolve this by assigning a unique local alias to each instance
// of the sync package, go-marshal currently aborts when it encounters such an
// ambiguity.
//
// TODO(b/151478251): importTable considers the final component of an import
// path to be the package name, but this is only a convention. The actual
// package name is determined by the package statement in the source files for
// the package.
type importTable struct { type importTable struct {
// Map of imports and whether they should be copied to the output. // Map of imports and whether they should be copied to the output.
is map[string]*importStmt is map[string][]*importStmt
} }
func newImportTable() *importTable { func newImportTable() *importTable {
return &importTable{ return &importTable{
is: make(map[string]*importStmt), is: make(map[string][]*importStmt),
} }
} }
// Merges import statements from other into i. Collisions in import statements // Merges import statements from other into i.
// result in a panic.
func (i *importTable) merge(other *importTable) { func (i *importTable) merge(other *importTable) {
for name, im := range other.is { for name, ims := range other.is {
dup, ok := i.is[name] i.is[name] = append(i.is[name], ims...)
if ok {
// When merging two imports, if either are marked used, the merged entry
// should also be marked used.
im.used = im.used || dup.used
if !dup.equivalent(im) {
panic(fmt.Sprintf("Found colliding import statements: ours: %+v, other's: %+v", dup, im))
}
}
i.is[name] = im
} }
} }
func (i *importTable) addStmt(s *importStmt) *importStmt { func (i *importTable) addStmt(s *importStmt) *importStmt {
if old, ok := i.is[s.name]; ok && !old.equivalent(s) { i.is[s.name] = append(i.is[s.name], s)
// We could theoretically handle the collision by assigning a local name
// to one of the imports. However, this is a non-trivial transformation.
// Given that collisions should be rare, simply panic on collision.
panic(fmt.Sprintf("Import collision: old: %s as %v; new: %v as %v", old.path, old.name, s.path, s.name))
}
i.is[s.name] = s
return s return s
} }
@ -381,16 +434,20 @@ func (i *importTable) addFromSpec(spec *ast.ImportSpec, f *token.FileSet) *impor
// Marks the import named n as used. If no such import is in the table, returns // Marks the import named n as used. If no such import is in the table, returns
// false. // false.
func (i *importTable) markUsed(n string) bool { func (i *importTable) markUsed(n string) bool {
if n, ok := i.is[n]; ok { if ns, ok := i.is[n]; ok {
n.markUsed() for _, n := range ns {
n.markUsed()
}
return true return true
} }
return false return false
} }
func (i *importTable) clear() { func (i *importTable) clear() {
for _, i := range i.is { for _, is := range i.is {
i.used = false for _, i := range is {
i.used = false
}
} }
} }
@ -401,9 +458,42 @@ func (i *importTable) write(out io.Writer) error {
} }
imports := make([]string, 0, len(i.is)) imports := make([]string, 0, len(i.is))
for _, i := range i.is { for name, is := range i.is {
if i.used { var lastUsed *importStmt
imports = append(imports, i.String()) var ambiguous bool
for _, i := range is {
if i.used {
if lastUsed != nil {
if !i.equivalent(lastUsed) {
ambiguous = true
}
}
lastUsed = i
}
}
if ambiguous {
// We have two or more import statements across the different source
// files that share a local name, and at least one of these imports
// are used by the generated code. This ambiguity can't be resolved
// by go-marshal and requires the user intervention. Dump a list of
// the colliding import statements and let the user modify the input
// files as appropriate.
var b strings.Builder
fmt.Fprintf(&b, "The imported name %q is used by one of the types marked for marshalling, and which import statement the code refers to is ambiguous. Perhaps give the imports unique local names?\n\n", name)
fmt.Fprintf(&b, "The following %d import statements are ambiguous for the local name %q:\n", len(is), name)
// Note: len(is) is guaranteed to be 1 or greater or ambiguous can't
// be true. Therefore the slicing below is safe.
for _, i := range is[:len(is)-1] {
fmt.Fprintf(&b, " %v\n", i.debugString())
}
fmt.Fprintf(&b, " %v", is[len(is)-1].debugString())
panic(b.String())
}
if lastUsed != nil {
imports = append(imports, lastUsed.String())
} }
} }
sort.Strings(imports) sort.Strings(imports)