Allow 'runsc do' to run without root

'--rootless' flag lets a non-root user execute 'runsc do'.
The drawback is that the sandbox and gofer processes will
run as root inside a user namespace that is mapped to the
caller's user, intead of nobody. And network is defaulted
to '--network=host' inside the root network namespace. On
the bright side, it's very convenient for testing:

runsc --rootless do ls
runsc --rootless do curl www.google.com

PiperOrigin-RevId: 252840970
This commit is contained in:
Fabricio Voznika 2019-06-12 09:40:50 -07:00 committed by Shentubot
parent df110ad4fe
commit 356d1be140
15 changed files with 214 additions and 156 deletions

View File

@ -226,6 +226,12 @@ type Config struct {
// to the same underlying network device. This allows netstack to better
// scale for high throughput use cases.
NumNetworkChannels int
// Rootless allows the sandbox to be started with a user that is not root.
// Defense is depth measures are weaker with rootless. Specifically, the
// sandbox and Gofer process run as root inside a user namespace with root
// mapped to the caller's user.
Rootless bool
}
// ToFlags returns a slice of flags that correspond to the given Config.
@ -250,6 +256,7 @@ func (c *Config) ToFlags() []string {
"--profile=" + strconv.FormatBool(c.ProfileEnable),
"--net-raw=" + strconv.FormatBool(c.EnableRaw),
"--num-network-channels=" + strconv.Itoa(c.NumNetworkChannels),
"--rootless=" + strconv.FormatBool(c.Rootless),
}
if c.TestOnlyAllowRunAsCurrentUserWithoutChroot {
// Only include if set since it is never to be used by users.

View File

@ -130,6 +130,8 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// Ensure that if there is a panic, all goroutine stacks are printed.
debug.SetTraceback("all")
conf := args[0].(*boot.Config)
if b.setUpRoot {
if err := setUpChroot(b.pidns); err != nil {
Fatalf("error setting up chroot: %v", err)
@ -143,14 +145,16 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
args = append(args, arg)
}
}
// Note that we've already read the spec from the spec FD, and
// we will read it again after the exec call. This works
// because the ReadSpecFromFile function seeks to the beginning
// of the file before reading.
if err := callSelfAsNobody(args); err != nil {
Fatalf("%v", err)
if !conf.Rootless {
// Note that we've already read the spec from the spec FD, and
// we will read it again after the exec call. This works
// because the ReadSpecFromFile function seeks to the beginning
// of the file before reading.
if err := callSelfAsNobody(args); err != nil {
Fatalf("%v", err)
}
panic("callSelfAsNobody must never return success")
}
panic("callSelfAsNobody must never return success")
}
}
@ -163,9 +167,6 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
}
specutils.LogSpec(spec)
conf := args[0].(*boot.Config)
waitStatus := args[1].(*syscall.WaitStatus)
if b.applyCaps {
caps := spec.Process.Capabilities
if caps == nil {
@ -251,6 +252,7 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
ws := l.WaitExit()
log.Infof("application exiting with %+v", ws)
waitStatus := args[1].(*syscall.WaitStatus)
*waitStatus = syscall.WaitStatus(ws.Status())
l.Destroy()
return subcommands.ExitSuccess

View File

@ -116,6 +116,6 @@ func TestCapabilities(t *testing.T) {
}
func TestMain(m *testing.M) {
testutil.RunAsRoot()
specutils.MaybeRunAsRoot()
os.Exit(m.Run())
}

View File

@ -82,13 +82,17 @@ func (c *Create) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}
id := f.Arg(0)
conf := args[0].(*boot.Config)
if conf.Rootless {
return Errorf("Rootless mode not supported with %q", c.Name())
}
bundleDir := c.bundleDir
if bundleDir == "" {
bundleDir = getwdOrDie()
}
spec, err := specutils.ReadSpec(bundleDir)
if err != nil {
Fatalf("reading spec: %v", err)
return Errorf("reading spec: %v", err)
}
specutils.LogSpec(spec)
@ -96,7 +100,7 @@ func (c *Create) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}
// container unless the metadata specifies that it should be run in an
// existing container.
if _, err := container.Create(id, spec, conf, bundleDir, c.consoleSocket, c.pidFile, c.userLog); err != nil {
Fatalf("creating container: %v", err)
return Errorf("creating container: %v", err)
}
return subcommands.ExitSuccess
}

View File

@ -39,10 +39,9 @@ import (
// Do implements subcommands.Command for the "do" command. It sets up a simple
// sandbox and executes the command inside it. See Usage() for more details.
type Do struct {
root string
cwd string
ip string
networkNamespace bool
root string
cwd string
ip string
}
// Name implements subcommands.Command.Name.
@ -72,7 +71,6 @@ func (c *Do) SetFlags(f *flag.FlagSet) {
f.StringVar(&c.root, "root", "/", `path to the root directory, defaults to "/"`)
f.StringVar(&c.cwd, "cwd", ".", "path to the current directory, defaults to the current directory")
f.StringVar(&c.ip, "ip", "192.168.10.2", "IPv4 address for the sandbox")
f.BoolVar(&c.networkNamespace, "netns", true, "run in a new network namespace")
}
// Execute implements subcommands.Command.Execute.
@ -85,15 +83,21 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
conf := args[0].(*boot.Config)
waitStatus := args[1].(*syscall.WaitStatus)
// Map the entire host file system, but make it readonly with a writable
// overlay on top (ignore --overlay option).
conf.Overlay = true
if conf.Rootless {
if err := specutils.MaybeRunAsRoot(); err != nil {
return Errorf("Error executing inside namespace: %v", err)
}
// Execution will continue here if no more capabilities are needed...
}
hostname, err := os.Hostname()
if err != nil {
return Errorf("Error to retrieve hostname: %v", err)
}
// Map the entire host file system, but make it readonly with a writable
// overlay on top (ignore --overlay option).
conf.Overlay = true
absRoot, err := resolvePath(c.root)
if err != nil {
return Errorf("Error resolving root: %v", err)
@ -119,11 +123,22 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
specutils.LogSpec(spec)
cid := fmt.Sprintf("runsc-%06d", rand.Int31n(1000000))
if !c.networkNamespace {
if conf.Network != boot.NetworkHost {
Fatalf("The current network namespace can be used only if --network=host is set", nil)
if conf.Network == boot.NetworkNone {
netns := specs.LinuxNamespace{
Type: specs.NetworkNamespace,
}
} else if conf.Network != boot.NetworkNone {
if spec.Linux != nil {
panic("spec.Linux is not nil")
}
spec.Linux = &specs.Linux{Namespaces: []specs.LinuxNamespace{netns}}
} else if conf.Rootless {
if conf.Network == boot.NetworkSandbox {
fmt.Println("*** Rootless requires changing network type to host ***")
conf.Network = boot.NetworkHost
}
} else {
clean, err := c.setupNet(cid, spec)
if err != nil {
return Errorf("Error setting up network: %v", err)

View File

@ -80,25 +80,29 @@ func (r *Restore) Execute(_ context.Context, f *flag.FlagSet, args ...interface{
conf := args[0].(*boot.Config)
waitStatus := args[1].(*syscall.WaitStatus)
if conf.Rootless {
return Errorf("Rootless mode not supported with %q", r.Name())
}
bundleDir := r.bundleDir
if bundleDir == "" {
bundleDir = getwdOrDie()
}
spec, err := specutils.ReadSpec(bundleDir)
if err != nil {
Fatalf("reading spec: %v", err)
return Errorf("reading spec: %v", err)
}
specutils.LogSpec(spec)
if r.imagePath == "" {
Fatalf("image-path flag must be provided")
return Errorf("image-path flag must be provided")
}
conf.RestoreFile = filepath.Join(r.imagePath, checkpointFileName)
ws, err := container.Run(id, spec, conf, bundleDir, r.consoleSocket, r.pidFile, r.userLog, r.detach)
if err != nil {
Fatalf("running container: %v", err)
return Errorf("running container: %v", err)
}
*waitStatus = ws

View File

@ -67,19 +67,23 @@ func (r *Run) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s
conf := args[0].(*boot.Config)
waitStatus := args[1].(*syscall.WaitStatus)
if conf.Rootless {
return Errorf("Rootless mode not supported with %q", r.Name())
}
bundleDir := r.bundleDir
if bundleDir == "" {
bundleDir = getwdOrDie()
}
spec, err := specutils.ReadSpec(bundleDir)
if err != nil {
Fatalf("reading spec: %v", err)
return Errorf("reading spec: %v", err)
}
specutils.LogSpec(spec)
ws, err := container.Run(id, spec, conf, bundleDir, r.consoleSocket, r.pidFile, r.userLog, r.detach)
if err != nil {
Fatalf("running container: %v", err)
return Errorf("running container: %v", err)
}
*waitStatus = ws

View File

@ -36,6 +36,7 @@ import (
"gvisor.googlesource.com/gvisor/pkg/sentry/control"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
"gvisor.googlesource.com/gvisor/runsc/boot"
"gvisor.googlesource.com/gvisor/runsc/specutils"
"gvisor.googlesource.com/gvisor/runsc/test/testutil"
)
@ -1853,7 +1854,7 @@ func TestMain(m *testing.M) {
if err := testutil.ConfigureExePath(); err != nil {
panic(err.Error())
}
testutil.RunAsRoot()
specutils.MaybeRunAsRoot()
os.Exit(m.Run())
}

View File

@ -61,16 +61,19 @@ var (
straceLogSize = flag.Uint("strace-log-size", 1024, "default size (in bytes) to log data argument blobs")
// Flags that control sandbox runtime behavior.
platform = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm")
network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
gso = flag.Bool("gso", true, "enable generic segmenation offload")
fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.")
panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
netRaw = flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.")
numNetworkChannels = flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.")
platform = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm")
network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
gso = flag.Bool("gso", true, "enable generic segmenation offload")
fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.")
panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
netRaw = flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.")
numNetworkChannels = flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.")
rootless = flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.")
// Test flags, not to be used outside tests, ever.
testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.")
)
@ -166,26 +169,28 @@ func main() {
// Create a new Config from the flags.
conf := &boot.Config{
RootDir: *rootDir,
Debug: *debug,
LogFilename: *logFilename,
LogFormat: *logFormat,
DebugLog: *debugLog,
DebugLogFormat: *debugLogFormat,
FileAccess: fsAccess,
Overlay: *overlay,
Network: netType,
GSO: *gso,
LogPackets: *logPackets,
Platform: platformType,
Strace: *strace,
StraceLogSize: *straceLogSize,
WatchdogAction: wa,
PanicSignal: *panicSignal,
ProfileEnable: *profile,
EnableRaw: *netRaw,
RootDir: *rootDir,
Debug: *debug,
LogFilename: *logFilename,
LogFormat: *logFormat,
DebugLog: *debugLog,
DebugLogFormat: *debugLogFormat,
FileAccess: fsAccess,
Overlay: *overlay,
Network: netType,
GSO: *gso,
LogPackets: *logPackets,
Platform: platformType,
Strace: *strace,
StraceLogSize: *straceLogSize,
WatchdogAction: wa,
PanicSignal: *panicSignal,
ProfileEnable: *profile,
EnableRaw: *netRaw,
NumNetworkChannels: *numNetworkChannels,
Rootless: *rootless,
TestOnlyAllowRunAsCurrentUserWithoutChroot: *testOnlyAllowRunAsCurrentUserWithoutChroot,
NumNetworkChannels: *numNetworkChannels,
}
if len(*straceSyscalls) != 0 {
conf.StraceSyscalls = strings.Split(*straceSyscalls, ",")

View File

@ -515,46 +515,64 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
} else if specutils.HasCapabilities(capability.CAP_SETUID, capability.CAP_SETGID) {
log.Infof("Sandbox will be started in new user namespace")
nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace})
// Map nobody in the new namespace to nobody in the parent namespace.
//
// A sandbox process will construct an empty
// root for itself, so it has to have the CAP_SYS_ADMIN
// capability.
//
// FIXME(b/122554829): The current implementations of
// os/exec doesn't allow to set ambient capabilities if
// a process is started in a new user namespace. As a
// workaround, we start the sandbox process with the 0
// UID and then it constructs a chroot and sets UID to
// nobody. https://github.com/golang/go/issues/2315
const nobody = 65534
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
{
ContainerID: int(0),
HostID: int(nobody - 1),
Size: int(1),
},
{
ContainerID: int(nobody),
HostID: int(nobody),
Size: int(1),
},
}
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
{
ContainerID: int(nobody),
HostID: int(nobody),
Size: int(1),
},
}
// Set credentials to run as user and group nobody.
cmd.SysProcAttr.Credential = &syscall.Credential{
Uid: 0,
Gid: nobody,
}
cmd.Args = append(cmd.Args, "--setup-root")
if conf.Rootless {
log.Infof("Rootless mode: sandbox will run as root inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid())
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
{
ContainerID: 0,
HostID: os.Getuid(),
Size: 1,
},
}
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
{
ContainerID: 0,
HostID: os.Getgid(),
Size: 1,
},
}
cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: 0}
} else {
// Map nobody in the new namespace to nobody in the parent namespace.
//
// A sandbox process will construct an empty
// root for itself, so it has to have the CAP_SYS_ADMIN
// capability.
//
// FIXME(b/122554829): The current implementations of
// os/exec doesn't allow to set ambient capabilities if
// a process is started in a new user namespace. As a
// workaround, we start the sandbox process with the 0
// UID and then it constructs a chroot and sets UID to
// nobody. https://github.com/golang/go/issues/2315
const nobody = 65534
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
{
ContainerID: 0,
HostID: nobody - 1,
Size: 1,
},
{
ContainerID: nobody,
HostID: nobody,
Size: 1,
},
}
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
{
ContainerID: nobody,
HostID: nobody,
Size: 1,
},
}
// Set credentials to run as user and group nobody.
cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: nobody}
}
} else {
return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID")
}

View File

@ -10,10 +10,7 @@ go_library(
"specutils.go",
],
importpath = "gvisor.googlesource.com/gvisor/runsc/specutils",
visibility = [
"//runsc:__subpackages__",
"//test:__subpackages__",
],
visibility = ["//:sandbox"],
deps = [
"//pkg/abi/linux",
"//pkg/log",

View File

@ -220,3 +220,55 @@ func HasCapabilities(cs ...capability.Cap) bool {
}
return true
}
// MaybeRunAsRoot ensures the process runs with capabilities needed to create a
// sandbox, e.g. CAP_SYS_ADMIN, CAP_SYS_CHROOT, etc. If capabilities are needed,
// it will create a new user namespace and re-execute the process as root
// inside the namespace with the same arguments and environment.
//
// This function returns immediately when no new capability is needed. If
// another process is executed, it returns straight from here with the same exit
// code as the child.
func MaybeRunAsRoot() error {
if HasCapabilities(capability.CAP_SYS_ADMIN, capability.CAP_SYS_CHROOT, capability.CAP_SETUID, capability.CAP_SETGID) {
return nil
}
// Current process doesn't have required capabilities, create user namespace
// and run as root inside the namespace to acquire capabilities.
log.Infof("*** Re-running as root in new user namespace ***")
cmd := exec.Command("/proc/self/exe", os.Args[1:]...)
cmd.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS,
// Set current user/group as root inside the namespace. Since we may not
// have CAP_SETUID/CAP_SETGID, just map root to the current user/group.
UidMappings: []syscall.SysProcIDMap{
{ContainerID: 0, HostID: os.Getuid(), Size: 1},
},
GidMappings: []syscall.SysProcIDMap{
{ContainerID: 0, HostID: os.Getgid(), Size: 1},
},
Credential: &syscall.Credential{Uid: 0, Gid: 0},
GidMappingsEnableSetgroups: false,
}
cmd.Env = os.Environ()
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
if exit, ok := err.(*exec.ExitError); ok {
if ws, ok := exit.Sys().(syscall.WaitStatus); ok {
os.Exit(ws.ExitStatus())
}
log.Warningf("No wait status provided, exiting with -1: %v", err)
os.Exit(-1)
}
return fmt.Errorf("re-executing self: %v", err)
}
// Child completed with success.
os.Exit(0)
panic("unreachable")
}

View File

@ -18,6 +18,5 @@ go_library(
"@com_github_cenkalti_backoff//:go_default_library",
"@com_github_kr_pty//:go_default_library",
"@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
"@com_github_syndtr_gocapability//capability:go_default_library",
],
)

View File

@ -30,7 +30,6 @@ import (
"os/exec"
"os/signal"
"path/filepath"
"runtime"
"strings"
"sync"
"sync/atomic"
@ -39,7 +38,6 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/syndtr/gocapability/capability"
"gvisor.googlesource.com/gvisor/runsc/boot"
"gvisor.googlesource.com/gvisor/runsc/specutils"
)
@ -284,54 +282,6 @@ func WaitForHTTP(port int, timeout time.Duration) error {
return Poll(cb, timeout)
}
// RunAsRoot ensures the test runs with CAP_SYS_ADMIN and CAP_SYS_CHROOT. If
// needed it will create a new user namespace and re-execute the test as root
// inside of the namespace. This function returns when it's running as root. If
// it needs to create another process, it will exit from there and not return.
func RunAsRoot() {
if specutils.HasCapabilities(capability.CAP_SYS_ADMIN, capability.CAP_SYS_CHROOT) {
return
}
fmt.Println("*** Re-running test as root in new user namespace ***")
// Current process doesn't have CAP_SYS_ADMIN, create user namespace and run
// as root inside that namespace to get it.
runtime.LockOSThread()
defer runtime.UnlockOSThread()
cmd := exec.Command("/proc/self/exe", os.Args[1:]...)
cmd.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS,
// Set current user/group as root inside the namespace.
UidMappings: []syscall.SysProcIDMap{
{ContainerID: 0, HostID: os.Getuid(), Size: 1},
},
GidMappings: []syscall.SysProcIDMap{
{ContainerID: 0, HostID: os.Getgid(), Size: 1},
},
GidMappingsEnableSetgroups: false,
Credential: &syscall.Credential{
Uid: 0,
Gid: 0,
},
}
cmd.Env = os.Environ()
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
if exit, ok := err.(*exec.ExitError); ok {
if ws, ok := exit.Sys().(syscall.WaitStatus); ok {
os.Exit(ws.ExitStatus())
}
os.Exit(-1)
}
panic(fmt.Sprint("error running child process:", err.Error()))
}
os.Exit(0)
}
// Reaper reaps child processes.
type Reaper struct {
// mu protects ch, which will be nil if the reaper is not running.

View File

@ -212,8 +212,8 @@ run_runsc_do_tests() {
local runsc=$(find bazel-bin/runsc -type f -executable -name "runsc" | head -n1)
# run runsc do without root privileges.
unshare -Ur ${runsc} --network=none --TESTONLY-unsafe-nonroot do true
unshare -Ur ${runsc} --TESTONLY-unsafe-nonroot --network=host do --netns=false true
${runsc} --rootless do true
${runsc} --rootless --network=none do true
# run runsc do with root privileges.
sudo -n -E ${runsc} do true