Prevent CAP_NET_RAW from appearing in exec

'docker exec' was getting CAP_NET_RAW even when --net-raw=false
because it was not filtered out from when copying container's
capabilities.

PiperOrigin-RevId: 272260451
This commit is contained in:
Fabricio Voznika 2019-10-01 11:48:24 -07:00 committed by gVisor bot
parent 53cc72da90
commit 0b02c3d5e5
10 changed files with 192 additions and 46 deletions

View File

@ -105,11 +105,11 @@ func (ex *Exec) SetFlags(f *flag.FlagSet) {
// Execute implements subcommands.Command.Execute. It starts a process in an
// already created container.
func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
e, id, err := ex.parseArgs(f)
conf := args[0].(*boot.Config)
e, id, err := ex.parseArgs(f, conf.EnableRaw)
if err != nil {
Fatalf("parsing process spec: %v", err)
}
conf := args[0].(*boot.Config)
waitStatus := args[1].(*syscall.WaitStatus)
c, err := container.Load(conf.RootDir, id)
@ -117,6 +117,9 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
Fatalf("loading sandbox: %v", err)
}
log.Debugf("Exec arguments: %+v", e)
log.Debugf("Exec capablities: %+v", e.Capabilities)
// Replace empty settings with defaults from container.
if e.WorkingDirectory == "" {
e.WorkingDirectory = c.Spec.Process.Cwd
@ -129,14 +132,11 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
}
if e.Capabilities == nil {
// enableRaw is set to true to prevent the filtering out of
// CAP_NET_RAW. This is the opposite of Create() because exec
// requires the capability to be set explicitly, while 'docker
// run' sets it by default.
e.Capabilities, err = specutils.Capabilities(true /* enableRaw */, c.Spec.Process.Capabilities)
e.Capabilities, err = specutils.Capabilities(conf.EnableRaw, c.Spec.Process.Capabilities)
if err != nil {
Fatalf("creating capabilities: %v", err)
}
log.Infof("Using exec capabilities from container: %+v", e.Capabilities)
}
// containerd expects an actual process to represent the container being
@ -283,14 +283,14 @@ func (ex *Exec) execChildAndWait(waitStatus *syscall.WaitStatus) subcommands.Exi
// parseArgs parses exec information from the command line or a JSON file
// depending on whether the --process flag was used. Returns an ExecArgs and
// the ID of the container to be used.
func (ex *Exec) parseArgs(f *flag.FlagSet) (*control.ExecArgs, string, error) {
func (ex *Exec) parseArgs(f *flag.FlagSet, enableRaw bool) (*control.ExecArgs, string, error) {
if ex.processPath == "" {
// Requires at least a container ID and command.
if f.NArg() < 2 {
f.Usage()
return nil, "", fmt.Errorf("both a container-id and command are required")
}
e, err := ex.argsFromCLI(f.Args()[1:])
e, err := ex.argsFromCLI(f.Args()[1:], enableRaw)
return e, f.Arg(0), err
}
// Requires only the container ID.
@ -298,11 +298,11 @@ func (ex *Exec) parseArgs(f *flag.FlagSet) (*control.ExecArgs, string, error) {
f.Usage()
return nil, "", fmt.Errorf("a container-id is required")
}
e, err := ex.argsFromProcessFile()
e, err := ex.argsFromProcessFile(enableRaw)
return e, f.Arg(0), err
}
func (ex *Exec) argsFromCLI(argv []string) (*control.ExecArgs, error) {
func (ex *Exec) argsFromCLI(argv []string, enableRaw bool) (*control.ExecArgs, error) {
extraKGIDs := make([]auth.KGID, 0, len(ex.extraKGIDs))
for _, s := range ex.extraKGIDs {
kgid, err := strconv.Atoi(s)
@ -315,7 +315,7 @@ func (ex *Exec) argsFromCLI(argv []string) (*control.ExecArgs, error) {
var caps *auth.TaskCapabilities
if len(ex.caps) > 0 {
var err error
caps, err = capabilities(ex.caps)
caps, err = capabilities(ex.caps, enableRaw)
if err != nil {
return nil, fmt.Errorf("capabilities error: %v", err)
}
@ -333,7 +333,7 @@ func (ex *Exec) argsFromCLI(argv []string) (*control.ExecArgs, error) {
}, nil
}
func (ex *Exec) argsFromProcessFile() (*control.ExecArgs, error) {
func (ex *Exec) argsFromProcessFile(enableRaw bool) (*control.ExecArgs, error) {
f, err := os.Open(ex.processPath)
if err != nil {
return nil, fmt.Errorf("error opening process file: %s, %v", ex.processPath, err)
@ -343,21 +343,21 @@ func (ex *Exec) argsFromProcessFile() (*control.ExecArgs, error) {
if err := json.NewDecoder(f).Decode(&p); err != nil {
return nil, fmt.Errorf("error parsing process file: %s, %v", ex.processPath, err)
}
return argsFromProcess(&p)
return argsFromProcess(&p, enableRaw)
}
// argsFromProcess performs all the non-IO conversion from the Process struct
// to ExecArgs.
func argsFromProcess(p *specs.Process) (*control.ExecArgs, error) {
func argsFromProcess(p *specs.Process, enableRaw bool) (*control.ExecArgs, error) {
// Create capabilities.
var caps *auth.TaskCapabilities
if p.Capabilities != nil {
var err error
// enableRaw is set to true to prevent the filtering out of
// CAP_NET_RAW. This is the opposite of Create() because exec
// requires the capability to be set explicitly, while 'docker
// run' sets it by default.
caps, err = specutils.Capabilities(true /* enableRaw */, p.Capabilities)
// Starting from Docker 19, capabilities are explicitly set for exec (instead
// of nil like before). So we can't distinguish 'exec' from
// 'exec --privileged', as both specify CAP_NET_RAW. Therefore, filter
// CAP_NET_RAW in the same way as container start.
caps, err = specutils.Capabilities(enableRaw, p.Capabilities)
if err != nil {
return nil, fmt.Errorf("error creating capabilities: %v", err)
}
@ -410,7 +410,7 @@ func resolveEnvs(envs ...[]string) ([]string, error) {
// capabilities takes a list of capabilities as strings and returns an
// auth.TaskCapabilities struct with those capabilities in every capability set.
// This mimics runc's behavior.
func capabilities(cs []string) (*auth.TaskCapabilities, error) {
func capabilities(cs []string, enableRaw bool) (*auth.TaskCapabilities, error) {
var specCaps specs.LinuxCapabilities
for _, cap := range cs {
specCaps.Ambient = append(specCaps.Ambient, cap)
@ -419,11 +419,11 @@ func capabilities(cs []string) (*auth.TaskCapabilities, error) {
specCaps.Inheritable = append(specCaps.Inheritable, cap)
specCaps.Permitted = append(specCaps.Permitted, cap)
}
// enableRaw is set to true to prevent the filtering out of
// CAP_NET_RAW. This is the opposite of Create() because exec requires
// the capability to be set explicitly, while 'docker run' sets it by
// default.
return specutils.Capabilities(true /* enableRaw */, &specCaps)
// Starting from Docker 19, capabilities are explicitly set for exec (instead
// of nil like before). So we can't distinguish 'exec' from
// 'exec --privileged', as both specify CAP_NET_RAW. Therefore, filter
// CAP_NET_RAW in the same way as container start.
return specutils.Capabilities(enableRaw, &specCaps)
}
// stringSlice allows a flag to be used multiple times, where each occurrence

View File

@ -91,7 +91,7 @@ func TestCLIArgs(t *testing.T) {
}
for _, tc := range testCases {
e, err := tc.ex.argsFromCLI(tc.argv)
e, err := tc.ex.argsFromCLI(tc.argv, true)
if err != nil {
t.Errorf("argsFromCLI(%+v): got error: %+v", tc.ex, err)
} else if !cmp.Equal(*e, tc.expected, cmpopts.IgnoreUnexported(os.File{})) {
@ -144,7 +144,7 @@ func TestJSONArgs(t *testing.T) {
}
for _, tc := range testCases {
e, err := argsFromProcess(&tc.p)
e, err := argsFromProcess(&tc.p, true)
if err != nil {
t.Errorf("argsFromProcess(%+v): got error: %+v", tc.p, err)
} else if !cmp.Equal(*e, tc.expected, cmpopts.IgnoreUnexported(os.File{})) {

View File

@ -47,6 +47,7 @@ go_test(
],
deps = [
"//pkg/abi/linux",
"//pkg/bits",
"//pkg/log",
"//pkg/sentry/control",
"//pkg/sentry/kernel",

View File

@ -34,6 +34,7 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bits"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@ -2049,6 +2050,30 @@ func TestMountSymlink(t *testing.T) {
}
}
// Check that --net-raw disables the CAP_NET_RAW capability.
func TestNetRaw(t *testing.T) {
capNetRaw := strconv.FormatUint(bits.MaskOf64(int(linux.CAP_NET_RAW)), 10)
app, err := testutil.FindFile("runsc/container/test_app/test_app")
if err != nil {
t.Fatal("error finding test_app:", err)
}
for _, enableRaw := range []bool{true, false} {
conf := testutil.TestConfig()
conf.EnableRaw = enableRaw
test := "--enabled"
if !enableRaw {
test = "--disabled"
}
spec := testutil.NewSpecWithArgs(app, "capability", test, capNetRaw)
if err := run(spec, conf); err != nil {
t.Fatalf("Error running container: %v", err)
}
}
}
// executeSync synchronously executes a new process.
func (cont *Container) executeSync(args *control.ExecArgs) (syscall.WaitStatus, error) {
pid, err := cont.Execute(args)

View File

@ -19,10 +19,12 @@ package main
import (
"context"
"fmt"
"io/ioutil"
"log"
"net"
"os"
"os/exec"
"regexp"
"strconv"
sys "syscall"
"time"
@ -35,6 +37,7 @@ import (
func main() {
subcommands.Register(subcommands.HelpCommand(), "")
subcommands.Register(subcommands.FlagsCommand(), "")
subcommands.Register(new(capability), "")
subcommands.Register(new(fdReceiver), "")
subcommands.Register(new(fdSender), "")
subcommands.Register(new(forkBomb), "")
@ -287,3 +290,65 @@ func (s *syscall) Execute(ctx context.Context, f *flag.FlagSet, args ...interfac
}
return subcommands.ExitSuccess
}
type capability struct {
enabled uint64
disabled uint64
}
// Name implements subcommands.Command.
func (*capability) Name() string {
return "capability"
}
// Synopsis implements subcommands.Command.
func (*capability) Synopsis() string {
return "checks if effective capabilities are set/unset"
}
// Usage implements subcommands.Command.
func (*capability) Usage() string {
return "capability [--enabled=number] [--disabled=number]"
}
// SetFlags implements subcommands.Command.
func (c *capability) SetFlags(f *flag.FlagSet) {
f.Uint64Var(&c.enabled, "enabled", 0, "")
f.Uint64Var(&c.disabled, "disabled", 0, "")
}
// Execute implements subcommands.Command.
func (c *capability) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
if c.enabled == 0 && c.disabled == 0 {
fmt.Println("One of the flags must be set")
return subcommands.ExitUsageError
}
status, err := ioutil.ReadFile("/proc/self/status")
if err != nil {
fmt.Printf("Error reading %q: %v\n", "proc/self/status", err)
return subcommands.ExitFailure
}
re := regexp.MustCompile("CapEff:\t([0-9a-f]+)\n")
matches := re.FindStringSubmatch(string(status))
if matches == nil || len(matches) != 2 {
fmt.Printf("Effective capabilities not found in\n%s\n", status)
return subcommands.ExitFailure
}
caps, err := strconv.ParseUint(matches[1], 16, 64)
if err != nil {
fmt.Printf("failed to convert capabilities %q: %v\n", matches[1], err)
return subcommands.ExitFailure
}
if c.enabled != 0 && (caps&c.enabled) != c.enabled {
fmt.Printf("Missing capabilities, want: %#x: got: %#x\n", c.enabled, caps)
return subcommands.ExitFailure
}
if c.disabled != 0 && (caps&c.disabled) != 0 {
fmt.Printf("Extra capabilities found, dont_want: %#x: got: %#x\n", c.disabled, caps)
return subcommands.ExitFailure
}
return subcommands.ExitSuccess
}

View File

@ -282,7 +282,14 @@ func (d *Docker) Logs() (string, error) {
// Exec calls 'docker exec' with the arguments provided.
func (d *Docker) Exec(args ...string) (string, error) {
a := []string{"exec", d.Name}
return d.ExecWithFlags(nil, args...)
}
// ExecWithFlags calls 'docker exec <flags> name <args>'.
func (d *Docker) ExecWithFlags(flags []string, args ...string) (string, error) {
a := []string{"exec"}
a = append(a, flags...)
a = append(a, d.Name)
a = append(a, args...)
return do(a...)
}

View File

@ -13,6 +13,7 @@ go_library(
visibility = ["//:sandbox"],
deps = [
"//pkg/abi/linux",
"//pkg/bits",
"//pkg/log",
"//pkg/sentry/kernel/auth",
"@com_github_cenkalti_backoff//:go_default_library",

View File

@ -31,6 +31,7 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bits"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
)
@ -241,6 +242,15 @@ func AllCapabilities() *specs.LinuxCapabilities {
}
}
// AllCapabilitiesUint64 returns a bitmask containing all capabilities set.
func AllCapabilitiesUint64() uint64 {
var rv uint64
for _, cap := range capFromName {
rv |= bits.MaskOf64(int(cap))
}
return rv
}
var capFromName = map[string]linux.Capability{
"CAP_CHOWN": linux.CAP_CHOWN,
"CAP_DAC_OVERRIDE": linux.CAP_DAC_OVERRIDE,

View File

@ -19,7 +19,9 @@ go_test(
visibility = ["//:sandbox"],
deps = [
"//pkg/abi/linux",
"//pkg/bits",
"//runsc/dockerutil",
"//runsc/specutils",
"//runsc/testutil",
],
)

View File

@ -30,14 +30,17 @@ import (
"time"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bits"
"gvisor.dev/gvisor/runsc/dockerutil"
"gvisor.dev/gvisor/runsc/specutils"
)
// Test that exec uses the exact same capability set as the container.
func TestExecCapabilities(t *testing.T) {
if err := dockerutil.Pull("alpine"); err != nil {
t.Fatalf("docker pull failed: %v", err)
}
d := dockerutil.MakeDocker("exec-test")
d := dockerutil.MakeDocker("exec-capabilities-test")
// Start the container.
if err := d.Run("alpine", "sh", "-c", "cat /proc/self/status; sleep 100"); err != nil {
@ -52,27 +55,59 @@ func TestExecCapabilities(t *testing.T) {
if len(matches) != 2 {
t.Fatalf("There should be a match for the whole line and the capability bitmask")
}
capString := matches[1]
t.Log("Root capabilities:", capString)
// CAP_NET_RAW was in the capability set for the container, but was
// removed. However, `exec` does not remove it. Verify that it's not
// set in the container, then re-add it for comparison.
caps, err := strconv.ParseUint(capString, 16, 64)
if err != nil {
t.Fatalf("failed to convert capabilities %q: %v", capString, err)
}
if caps&(1<<uint64(linux.CAP_NET_RAW)) != 0 {
t.Fatalf("CAP_NET_RAW should be filtered, but is set in the container: %x", caps)
}
caps |= 1 << uint64(linux.CAP_NET_RAW)
want := fmt.Sprintf("CapEff:\t%016x\n", caps)
want := fmt.Sprintf("CapEff:\t%s\n", matches[1])
t.Log("Root capabilities:", want)
// Now check that exec'd process capabilities match the root.
got, err := d.Exec("grep", "CapEff:", "/proc/self/status")
if err != nil {
t.Fatalf("docker exec failed: %v", err)
}
t.Logf("CapEff: %v", got)
if got != want {
t.Errorf("wrong capabilities, got: %q, want: %q", got, want)
}
}
// Test that 'exec --privileged' adds all capabilities, except for CAP_NET_RAW
// which is removed from the container when --net-raw=false.
func TestExecPrivileged(t *testing.T) {
if err := dockerutil.Pull("alpine"); err != nil {
t.Fatalf("docker pull failed: %v", err)
}
d := dockerutil.MakeDocker("exec-privileged-test")
// Start the container with all capabilities dropped.
if err := d.Run("--cap-drop=all", "alpine", "sh", "-c", "cat /proc/self/status; sleep 100"); err != nil {
t.Fatalf("docker run failed: %v", err)
}
defer d.CleanUp()
// Check that all capabilities where dropped from container.
matches, err := d.WaitForOutputSubmatch("CapEff:\t([0-9a-f]+)\n", 5*time.Second)
if err != nil {
t.Fatalf("WaitForOutputSubmatch() timeout: %v", err)
}
if len(matches) != 2 {
t.Fatalf("There should be a match for the whole line and the capability bitmask")
}
containerCaps, err := strconv.ParseUint(matches[1], 16, 64)
if err != nil {
t.Fatalf("failed to convert capabilities %q: %v", matches[1], err)
}
t.Logf("Container capabilities: %#x", containerCaps)
if containerCaps != 0 {
t.Fatalf("Container should have no capabilities: %x", containerCaps)
}
// Check that 'exec --privileged' adds all capabilities, except
// for CAP_NET_RAW.
got, err := d.ExecWithFlags([]string{"--privileged"}, "grep", "CapEff:", "/proc/self/status")
if err != nil {
t.Fatalf("docker exec failed: %v", err)
}
t.Logf("Exec CapEff: %v", got)
want := fmt.Sprintf("CapEff:\t%016x\n", specutils.AllCapabilitiesUint64()&^bits.MaskOf64(int(linux.CAP_NET_RAW)))
if got != want {
t.Errorf("wrong capabilities, got: %q, want: %q", got, want)
}
@ -184,7 +219,7 @@ func TestExecEnvHasHome(t *testing.T) {
if err := dockerutil.Pull("alpine"); err != nil {
t.Fatalf("docker pull failed: %v", err)
}
d := dockerutil.MakeDocker("exec-env-test")
d := dockerutil.MakeDocker("exec-env-home-test")
// We will check that HOME is set for root user, and also for a new
// non-root user we will create.