Allow 'runsc do' to run without root
'--rootless' flag lets a non-root user execute 'runsc do'. The drawback is that the sandbox and gofer processes will run as root inside a user namespace that is mapped to the caller's user, intead of nobody. And network is defaulted to '--network=host' inside the root network namespace. On the bright side, it's very convenient for testing: runsc --rootless do ls runsc --rootless do curl www.google.com PiperOrigin-RevId: 252840970
This commit is contained in:
parent
df110ad4fe
commit
356d1be140
|
@ -226,6 +226,12 @@ type Config struct {
|
||||||
// to the same underlying network device. This allows netstack to better
|
// to the same underlying network device. This allows netstack to better
|
||||||
// scale for high throughput use cases.
|
// scale for high throughput use cases.
|
||||||
NumNetworkChannels int
|
NumNetworkChannels int
|
||||||
|
|
||||||
|
// Rootless allows the sandbox to be started with a user that is not root.
|
||||||
|
// Defense is depth measures are weaker with rootless. Specifically, the
|
||||||
|
// sandbox and Gofer process run as root inside a user namespace with root
|
||||||
|
// mapped to the caller's user.
|
||||||
|
Rootless bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// ToFlags returns a slice of flags that correspond to the given Config.
|
// ToFlags returns a slice of flags that correspond to the given Config.
|
||||||
|
@ -250,6 +256,7 @@ func (c *Config) ToFlags() []string {
|
||||||
"--profile=" + strconv.FormatBool(c.ProfileEnable),
|
"--profile=" + strconv.FormatBool(c.ProfileEnable),
|
||||||
"--net-raw=" + strconv.FormatBool(c.EnableRaw),
|
"--net-raw=" + strconv.FormatBool(c.EnableRaw),
|
||||||
"--num-network-channels=" + strconv.Itoa(c.NumNetworkChannels),
|
"--num-network-channels=" + strconv.Itoa(c.NumNetworkChannels),
|
||||||
|
"--rootless=" + strconv.FormatBool(c.Rootless),
|
||||||
}
|
}
|
||||||
if c.TestOnlyAllowRunAsCurrentUserWithoutChroot {
|
if c.TestOnlyAllowRunAsCurrentUserWithoutChroot {
|
||||||
// Only include if set since it is never to be used by users.
|
// Only include if set since it is never to be used by users.
|
||||||
|
|
|
@ -130,6 +130,8 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
|
||||||
// Ensure that if there is a panic, all goroutine stacks are printed.
|
// Ensure that if there is a panic, all goroutine stacks are printed.
|
||||||
debug.SetTraceback("all")
|
debug.SetTraceback("all")
|
||||||
|
|
||||||
|
conf := args[0].(*boot.Config)
|
||||||
|
|
||||||
if b.setUpRoot {
|
if b.setUpRoot {
|
||||||
if err := setUpChroot(b.pidns); err != nil {
|
if err := setUpChroot(b.pidns); err != nil {
|
||||||
Fatalf("error setting up chroot: %v", err)
|
Fatalf("error setting up chroot: %v", err)
|
||||||
|
@ -143,14 +145,16 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
|
||||||
args = append(args, arg)
|
args = append(args, arg)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Note that we've already read the spec from the spec FD, and
|
if !conf.Rootless {
|
||||||
// we will read it again after the exec call. This works
|
// Note that we've already read the spec from the spec FD, and
|
||||||
// because the ReadSpecFromFile function seeks to the beginning
|
// we will read it again after the exec call. This works
|
||||||
// of the file before reading.
|
// because the ReadSpecFromFile function seeks to the beginning
|
||||||
if err := callSelfAsNobody(args); err != nil {
|
// of the file before reading.
|
||||||
Fatalf("%v", err)
|
if err := callSelfAsNobody(args); err != nil {
|
||||||
|
Fatalf("%v", err)
|
||||||
|
}
|
||||||
|
panic("callSelfAsNobody must never return success")
|
||||||
}
|
}
|
||||||
panic("callSelfAsNobody must never return success")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -163,9 +167,6 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
|
||||||
}
|
}
|
||||||
specutils.LogSpec(spec)
|
specutils.LogSpec(spec)
|
||||||
|
|
||||||
conf := args[0].(*boot.Config)
|
|
||||||
waitStatus := args[1].(*syscall.WaitStatus)
|
|
||||||
|
|
||||||
if b.applyCaps {
|
if b.applyCaps {
|
||||||
caps := spec.Process.Capabilities
|
caps := spec.Process.Capabilities
|
||||||
if caps == nil {
|
if caps == nil {
|
||||||
|
@ -251,6 +252,7 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
|
||||||
|
|
||||||
ws := l.WaitExit()
|
ws := l.WaitExit()
|
||||||
log.Infof("application exiting with %+v", ws)
|
log.Infof("application exiting with %+v", ws)
|
||||||
|
waitStatus := args[1].(*syscall.WaitStatus)
|
||||||
*waitStatus = syscall.WaitStatus(ws.Status())
|
*waitStatus = syscall.WaitStatus(ws.Status())
|
||||||
l.Destroy()
|
l.Destroy()
|
||||||
return subcommands.ExitSuccess
|
return subcommands.ExitSuccess
|
||||||
|
|
|
@ -116,6 +116,6 @@ func TestCapabilities(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMain(m *testing.M) {
|
func TestMain(m *testing.M) {
|
||||||
testutil.RunAsRoot()
|
specutils.MaybeRunAsRoot()
|
||||||
os.Exit(m.Run())
|
os.Exit(m.Run())
|
||||||
}
|
}
|
||||||
|
|
|
@ -82,13 +82,17 @@ func (c *Create) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}
|
||||||
id := f.Arg(0)
|
id := f.Arg(0)
|
||||||
conf := args[0].(*boot.Config)
|
conf := args[0].(*boot.Config)
|
||||||
|
|
||||||
|
if conf.Rootless {
|
||||||
|
return Errorf("Rootless mode not supported with %q", c.Name())
|
||||||
|
}
|
||||||
|
|
||||||
bundleDir := c.bundleDir
|
bundleDir := c.bundleDir
|
||||||
if bundleDir == "" {
|
if bundleDir == "" {
|
||||||
bundleDir = getwdOrDie()
|
bundleDir = getwdOrDie()
|
||||||
}
|
}
|
||||||
spec, err := specutils.ReadSpec(bundleDir)
|
spec, err := specutils.ReadSpec(bundleDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
Fatalf("reading spec: %v", err)
|
return Errorf("reading spec: %v", err)
|
||||||
}
|
}
|
||||||
specutils.LogSpec(spec)
|
specutils.LogSpec(spec)
|
||||||
|
|
||||||
|
@ -96,7 +100,7 @@ func (c *Create) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}
|
||||||
// container unless the metadata specifies that it should be run in an
|
// container unless the metadata specifies that it should be run in an
|
||||||
// existing container.
|
// existing container.
|
||||||
if _, err := container.Create(id, spec, conf, bundleDir, c.consoleSocket, c.pidFile, c.userLog); err != nil {
|
if _, err := container.Create(id, spec, conf, bundleDir, c.consoleSocket, c.pidFile, c.userLog); err != nil {
|
||||||
Fatalf("creating container: %v", err)
|
return Errorf("creating container: %v", err)
|
||||||
}
|
}
|
||||||
return subcommands.ExitSuccess
|
return subcommands.ExitSuccess
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,10 +39,9 @@ import (
|
||||||
// Do implements subcommands.Command for the "do" command. It sets up a simple
|
// Do implements subcommands.Command for the "do" command. It sets up a simple
|
||||||
// sandbox and executes the command inside it. See Usage() for more details.
|
// sandbox and executes the command inside it. See Usage() for more details.
|
||||||
type Do struct {
|
type Do struct {
|
||||||
root string
|
root string
|
||||||
cwd string
|
cwd string
|
||||||
ip string
|
ip string
|
||||||
networkNamespace bool
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Name implements subcommands.Command.Name.
|
// Name implements subcommands.Command.Name.
|
||||||
|
@ -72,7 +71,6 @@ func (c *Do) SetFlags(f *flag.FlagSet) {
|
||||||
f.StringVar(&c.root, "root", "/", `path to the root directory, defaults to "/"`)
|
f.StringVar(&c.root, "root", "/", `path to the root directory, defaults to "/"`)
|
||||||
f.StringVar(&c.cwd, "cwd", ".", "path to the current directory, defaults to the current directory")
|
f.StringVar(&c.cwd, "cwd", ".", "path to the current directory, defaults to the current directory")
|
||||||
f.StringVar(&c.ip, "ip", "192.168.10.2", "IPv4 address for the sandbox")
|
f.StringVar(&c.ip, "ip", "192.168.10.2", "IPv4 address for the sandbox")
|
||||||
f.BoolVar(&c.networkNamespace, "netns", true, "run in a new network namespace")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Execute implements subcommands.Command.Execute.
|
// Execute implements subcommands.Command.Execute.
|
||||||
|
@ -85,15 +83,21 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
|
||||||
conf := args[0].(*boot.Config)
|
conf := args[0].(*boot.Config)
|
||||||
waitStatus := args[1].(*syscall.WaitStatus)
|
waitStatus := args[1].(*syscall.WaitStatus)
|
||||||
|
|
||||||
// Map the entire host file system, but make it readonly with a writable
|
if conf.Rootless {
|
||||||
// overlay on top (ignore --overlay option).
|
if err := specutils.MaybeRunAsRoot(); err != nil {
|
||||||
conf.Overlay = true
|
return Errorf("Error executing inside namespace: %v", err)
|
||||||
|
}
|
||||||
|
// Execution will continue here if no more capabilities are needed...
|
||||||
|
}
|
||||||
|
|
||||||
hostname, err := os.Hostname()
|
hostname, err := os.Hostname()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return Errorf("Error to retrieve hostname: %v", err)
|
return Errorf("Error to retrieve hostname: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Map the entire host file system, but make it readonly with a writable
|
||||||
|
// overlay on top (ignore --overlay option).
|
||||||
|
conf.Overlay = true
|
||||||
absRoot, err := resolvePath(c.root)
|
absRoot, err := resolvePath(c.root)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return Errorf("Error resolving root: %v", err)
|
return Errorf("Error resolving root: %v", err)
|
||||||
|
@ -119,11 +123,22 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
|
||||||
specutils.LogSpec(spec)
|
specutils.LogSpec(spec)
|
||||||
|
|
||||||
cid := fmt.Sprintf("runsc-%06d", rand.Int31n(1000000))
|
cid := fmt.Sprintf("runsc-%06d", rand.Int31n(1000000))
|
||||||
if !c.networkNamespace {
|
if conf.Network == boot.NetworkNone {
|
||||||
if conf.Network != boot.NetworkHost {
|
netns := specs.LinuxNamespace{
|
||||||
Fatalf("The current network namespace can be used only if --network=host is set", nil)
|
Type: specs.NetworkNamespace,
|
||||||
}
|
}
|
||||||
} else if conf.Network != boot.NetworkNone {
|
if spec.Linux != nil {
|
||||||
|
panic("spec.Linux is not nil")
|
||||||
|
}
|
||||||
|
spec.Linux = &specs.Linux{Namespaces: []specs.LinuxNamespace{netns}}
|
||||||
|
|
||||||
|
} else if conf.Rootless {
|
||||||
|
if conf.Network == boot.NetworkSandbox {
|
||||||
|
fmt.Println("*** Rootless requires changing network type to host ***")
|
||||||
|
conf.Network = boot.NetworkHost
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
clean, err := c.setupNet(cid, spec)
|
clean, err := c.setupNet(cid, spec)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return Errorf("Error setting up network: %v", err)
|
return Errorf("Error setting up network: %v", err)
|
||||||
|
|
|
@ -80,25 +80,29 @@ func (r *Restore) Execute(_ context.Context, f *flag.FlagSet, args ...interface{
|
||||||
conf := args[0].(*boot.Config)
|
conf := args[0].(*boot.Config)
|
||||||
waitStatus := args[1].(*syscall.WaitStatus)
|
waitStatus := args[1].(*syscall.WaitStatus)
|
||||||
|
|
||||||
|
if conf.Rootless {
|
||||||
|
return Errorf("Rootless mode not supported with %q", r.Name())
|
||||||
|
}
|
||||||
|
|
||||||
bundleDir := r.bundleDir
|
bundleDir := r.bundleDir
|
||||||
if bundleDir == "" {
|
if bundleDir == "" {
|
||||||
bundleDir = getwdOrDie()
|
bundleDir = getwdOrDie()
|
||||||
}
|
}
|
||||||
spec, err := specutils.ReadSpec(bundleDir)
|
spec, err := specutils.ReadSpec(bundleDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
Fatalf("reading spec: %v", err)
|
return Errorf("reading spec: %v", err)
|
||||||
}
|
}
|
||||||
specutils.LogSpec(spec)
|
specutils.LogSpec(spec)
|
||||||
|
|
||||||
if r.imagePath == "" {
|
if r.imagePath == "" {
|
||||||
Fatalf("image-path flag must be provided")
|
return Errorf("image-path flag must be provided")
|
||||||
}
|
}
|
||||||
|
|
||||||
conf.RestoreFile = filepath.Join(r.imagePath, checkpointFileName)
|
conf.RestoreFile = filepath.Join(r.imagePath, checkpointFileName)
|
||||||
|
|
||||||
ws, err := container.Run(id, spec, conf, bundleDir, r.consoleSocket, r.pidFile, r.userLog, r.detach)
|
ws, err := container.Run(id, spec, conf, bundleDir, r.consoleSocket, r.pidFile, r.userLog, r.detach)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
Fatalf("running container: %v", err)
|
return Errorf("running container: %v", err)
|
||||||
}
|
}
|
||||||
*waitStatus = ws
|
*waitStatus = ws
|
||||||
|
|
||||||
|
|
|
@ -67,19 +67,23 @@ func (r *Run) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s
|
||||||
conf := args[0].(*boot.Config)
|
conf := args[0].(*boot.Config)
|
||||||
waitStatus := args[1].(*syscall.WaitStatus)
|
waitStatus := args[1].(*syscall.WaitStatus)
|
||||||
|
|
||||||
|
if conf.Rootless {
|
||||||
|
return Errorf("Rootless mode not supported with %q", r.Name())
|
||||||
|
}
|
||||||
|
|
||||||
bundleDir := r.bundleDir
|
bundleDir := r.bundleDir
|
||||||
if bundleDir == "" {
|
if bundleDir == "" {
|
||||||
bundleDir = getwdOrDie()
|
bundleDir = getwdOrDie()
|
||||||
}
|
}
|
||||||
spec, err := specutils.ReadSpec(bundleDir)
|
spec, err := specutils.ReadSpec(bundleDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
Fatalf("reading spec: %v", err)
|
return Errorf("reading spec: %v", err)
|
||||||
}
|
}
|
||||||
specutils.LogSpec(spec)
|
specutils.LogSpec(spec)
|
||||||
|
|
||||||
ws, err := container.Run(id, spec, conf, bundleDir, r.consoleSocket, r.pidFile, r.userLog, r.detach)
|
ws, err := container.Run(id, spec, conf, bundleDir, r.consoleSocket, r.pidFile, r.userLog, r.detach)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
Fatalf("running container: %v", err)
|
return Errorf("running container: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
*waitStatus = ws
|
*waitStatus = ws
|
||||||
|
|
|
@ -36,6 +36,7 @@ import (
|
||||||
"gvisor.googlesource.com/gvisor/pkg/sentry/control"
|
"gvisor.googlesource.com/gvisor/pkg/sentry/control"
|
||||||
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
|
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
|
||||||
"gvisor.googlesource.com/gvisor/runsc/boot"
|
"gvisor.googlesource.com/gvisor/runsc/boot"
|
||||||
|
"gvisor.googlesource.com/gvisor/runsc/specutils"
|
||||||
"gvisor.googlesource.com/gvisor/runsc/test/testutil"
|
"gvisor.googlesource.com/gvisor/runsc/test/testutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -1853,7 +1854,7 @@ func TestMain(m *testing.M) {
|
||||||
if err := testutil.ConfigureExePath(); err != nil {
|
if err := testutil.ConfigureExePath(); err != nil {
|
||||||
panic(err.Error())
|
panic(err.Error())
|
||||||
}
|
}
|
||||||
testutil.RunAsRoot()
|
specutils.MaybeRunAsRoot()
|
||||||
|
|
||||||
os.Exit(m.Run())
|
os.Exit(m.Run())
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,16 +61,19 @@ var (
|
||||||
straceLogSize = flag.Uint("strace-log-size", 1024, "default size (in bytes) to log data argument blobs")
|
straceLogSize = flag.Uint("strace-log-size", 1024, "default size (in bytes) to log data argument blobs")
|
||||||
|
|
||||||
// Flags that control sandbox runtime behavior.
|
// Flags that control sandbox runtime behavior.
|
||||||
platform = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm")
|
platform = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm")
|
||||||
network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
|
network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
|
||||||
gso = flag.Bool("gso", true, "enable generic segmenation offload")
|
gso = flag.Bool("gso", true, "enable generic segmenation offload")
|
||||||
fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
|
fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
|
||||||
overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
|
overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
|
||||||
watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.")
|
watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.")
|
||||||
panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
|
panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
|
||||||
profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
|
profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
|
||||||
netRaw = flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.")
|
netRaw = flag.Bool("net-raw", false, "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network.")
|
||||||
numNetworkChannels = flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.")
|
numNetworkChannels = flag.Int("num-network-channels", 1, "number of underlying channels(FDs) to use for network link endpoints.")
|
||||||
|
rootless = flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.")
|
||||||
|
|
||||||
|
// Test flags, not to be used outside tests, ever.
|
||||||
testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.")
|
testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -166,26 +169,28 @@ func main() {
|
||||||
|
|
||||||
// Create a new Config from the flags.
|
// Create a new Config from the flags.
|
||||||
conf := &boot.Config{
|
conf := &boot.Config{
|
||||||
RootDir: *rootDir,
|
RootDir: *rootDir,
|
||||||
Debug: *debug,
|
Debug: *debug,
|
||||||
LogFilename: *logFilename,
|
LogFilename: *logFilename,
|
||||||
LogFormat: *logFormat,
|
LogFormat: *logFormat,
|
||||||
DebugLog: *debugLog,
|
DebugLog: *debugLog,
|
||||||
DebugLogFormat: *debugLogFormat,
|
DebugLogFormat: *debugLogFormat,
|
||||||
FileAccess: fsAccess,
|
FileAccess: fsAccess,
|
||||||
Overlay: *overlay,
|
Overlay: *overlay,
|
||||||
Network: netType,
|
Network: netType,
|
||||||
GSO: *gso,
|
GSO: *gso,
|
||||||
LogPackets: *logPackets,
|
LogPackets: *logPackets,
|
||||||
Platform: platformType,
|
Platform: platformType,
|
||||||
Strace: *strace,
|
Strace: *strace,
|
||||||
StraceLogSize: *straceLogSize,
|
StraceLogSize: *straceLogSize,
|
||||||
WatchdogAction: wa,
|
WatchdogAction: wa,
|
||||||
PanicSignal: *panicSignal,
|
PanicSignal: *panicSignal,
|
||||||
ProfileEnable: *profile,
|
ProfileEnable: *profile,
|
||||||
EnableRaw: *netRaw,
|
EnableRaw: *netRaw,
|
||||||
|
NumNetworkChannels: *numNetworkChannels,
|
||||||
|
Rootless: *rootless,
|
||||||
|
|
||||||
TestOnlyAllowRunAsCurrentUserWithoutChroot: *testOnlyAllowRunAsCurrentUserWithoutChroot,
|
TestOnlyAllowRunAsCurrentUserWithoutChroot: *testOnlyAllowRunAsCurrentUserWithoutChroot,
|
||||||
NumNetworkChannels: *numNetworkChannels,
|
|
||||||
}
|
}
|
||||||
if len(*straceSyscalls) != 0 {
|
if len(*straceSyscalls) != 0 {
|
||||||
conf.StraceSyscalls = strings.Split(*straceSyscalls, ",")
|
conf.StraceSyscalls = strings.Split(*straceSyscalls, ",")
|
||||||
|
|
|
@ -515,46 +515,64 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
|
||||||
} else if specutils.HasCapabilities(capability.CAP_SETUID, capability.CAP_SETGID) {
|
} else if specutils.HasCapabilities(capability.CAP_SETUID, capability.CAP_SETGID) {
|
||||||
log.Infof("Sandbox will be started in new user namespace")
|
log.Infof("Sandbox will be started in new user namespace")
|
||||||
nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace})
|
nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace})
|
||||||
|
|
||||||
// Map nobody in the new namespace to nobody in the parent namespace.
|
|
||||||
//
|
|
||||||
// A sandbox process will construct an empty
|
|
||||||
// root for itself, so it has to have the CAP_SYS_ADMIN
|
|
||||||
// capability.
|
|
||||||
//
|
|
||||||
// FIXME(b/122554829): The current implementations of
|
|
||||||
// os/exec doesn't allow to set ambient capabilities if
|
|
||||||
// a process is started in a new user namespace. As a
|
|
||||||
// workaround, we start the sandbox process with the 0
|
|
||||||
// UID and then it constructs a chroot and sets UID to
|
|
||||||
// nobody. https://github.com/golang/go/issues/2315
|
|
||||||
const nobody = 65534
|
|
||||||
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
|
|
||||||
{
|
|
||||||
ContainerID: int(0),
|
|
||||||
HostID: int(nobody - 1),
|
|
||||||
Size: int(1),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
ContainerID: int(nobody),
|
|
||||||
HostID: int(nobody),
|
|
||||||
Size: int(1),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
|
|
||||||
{
|
|
||||||
ContainerID: int(nobody),
|
|
||||||
HostID: int(nobody),
|
|
||||||
Size: int(1),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set credentials to run as user and group nobody.
|
|
||||||
cmd.SysProcAttr.Credential = &syscall.Credential{
|
|
||||||
Uid: 0,
|
|
||||||
Gid: nobody,
|
|
||||||
}
|
|
||||||
cmd.Args = append(cmd.Args, "--setup-root")
|
cmd.Args = append(cmd.Args, "--setup-root")
|
||||||
|
|
||||||
|
if conf.Rootless {
|
||||||
|
log.Infof("Rootless mode: sandbox will run as root inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid())
|
||||||
|
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
|
||||||
|
{
|
||||||
|
ContainerID: 0,
|
||||||
|
HostID: os.Getuid(),
|
||||||
|
Size: 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
|
||||||
|
{
|
||||||
|
ContainerID: 0,
|
||||||
|
HostID: os.Getgid(),
|
||||||
|
Size: 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: 0}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// Map nobody in the new namespace to nobody in the parent namespace.
|
||||||
|
//
|
||||||
|
// A sandbox process will construct an empty
|
||||||
|
// root for itself, so it has to have the CAP_SYS_ADMIN
|
||||||
|
// capability.
|
||||||
|
//
|
||||||
|
// FIXME(b/122554829): The current implementations of
|
||||||
|
// os/exec doesn't allow to set ambient capabilities if
|
||||||
|
// a process is started in a new user namespace. As a
|
||||||
|
// workaround, we start the sandbox process with the 0
|
||||||
|
// UID and then it constructs a chroot and sets UID to
|
||||||
|
// nobody. https://github.com/golang/go/issues/2315
|
||||||
|
const nobody = 65534
|
||||||
|
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
|
||||||
|
{
|
||||||
|
ContainerID: 0,
|
||||||
|
HostID: nobody - 1,
|
||||||
|
Size: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ContainerID: nobody,
|
||||||
|
HostID: nobody,
|
||||||
|
Size: 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
|
||||||
|
{
|
||||||
|
ContainerID: nobody,
|
||||||
|
HostID: nobody,
|
||||||
|
Size: 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set credentials to run as user and group nobody.
|
||||||
|
cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: nobody}
|
||||||
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID")
|
return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID")
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,10 +10,7 @@ go_library(
|
||||||
"specutils.go",
|
"specutils.go",
|
||||||
],
|
],
|
||||||
importpath = "gvisor.googlesource.com/gvisor/runsc/specutils",
|
importpath = "gvisor.googlesource.com/gvisor/runsc/specutils",
|
||||||
visibility = [
|
visibility = ["//:sandbox"],
|
||||||
"//runsc:__subpackages__",
|
|
||||||
"//test:__subpackages__",
|
|
||||||
],
|
|
||||||
deps = [
|
deps = [
|
||||||
"//pkg/abi/linux",
|
"//pkg/abi/linux",
|
||||||
"//pkg/log",
|
"//pkg/log",
|
||||||
|
|
|
@ -220,3 +220,55 @@ func HasCapabilities(cs ...capability.Cap) bool {
|
||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MaybeRunAsRoot ensures the process runs with capabilities needed to create a
|
||||||
|
// sandbox, e.g. CAP_SYS_ADMIN, CAP_SYS_CHROOT, etc. If capabilities are needed,
|
||||||
|
// it will create a new user namespace and re-execute the process as root
|
||||||
|
// inside the namespace with the same arguments and environment.
|
||||||
|
//
|
||||||
|
// This function returns immediately when no new capability is needed. If
|
||||||
|
// another process is executed, it returns straight from here with the same exit
|
||||||
|
// code as the child.
|
||||||
|
func MaybeRunAsRoot() error {
|
||||||
|
if HasCapabilities(capability.CAP_SYS_ADMIN, capability.CAP_SYS_CHROOT, capability.CAP_SETUID, capability.CAP_SETGID) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Current process doesn't have required capabilities, create user namespace
|
||||||
|
// and run as root inside the namespace to acquire capabilities.
|
||||||
|
log.Infof("*** Re-running as root in new user namespace ***")
|
||||||
|
|
||||||
|
cmd := exec.Command("/proc/self/exe", os.Args[1:]...)
|
||||||
|
|
||||||
|
cmd.SysProcAttr = &syscall.SysProcAttr{
|
||||||
|
Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS,
|
||||||
|
// Set current user/group as root inside the namespace. Since we may not
|
||||||
|
// have CAP_SETUID/CAP_SETGID, just map root to the current user/group.
|
||||||
|
UidMappings: []syscall.SysProcIDMap{
|
||||||
|
{ContainerID: 0, HostID: os.Getuid(), Size: 1},
|
||||||
|
},
|
||||||
|
GidMappings: []syscall.SysProcIDMap{
|
||||||
|
{ContainerID: 0, HostID: os.Getgid(), Size: 1},
|
||||||
|
},
|
||||||
|
Credential: &syscall.Credential{Uid: 0, Gid: 0},
|
||||||
|
GidMappingsEnableSetgroups: false,
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd.Env = os.Environ()
|
||||||
|
cmd.Stdin = os.Stdin
|
||||||
|
cmd.Stdout = os.Stdout
|
||||||
|
cmd.Stderr = os.Stderr
|
||||||
|
if err := cmd.Run(); err != nil {
|
||||||
|
if exit, ok := err.(*exec.ExitError); ok {
|
||||||
|
if ws, ok := exit.Sys().(syscall.WaitStatus); ok {
|
||||||
|
os.Exit(ws.ExitStatus())
|
||||||
|
}
|
||||||
|
log.Warningf("No wait status provided, exiting with -1: %v", err)
|
||||||
|
os.Exit(-1)
|
||||||
|
}
|
||||||
|
return fmt.Errorf("re-executing self: %v", err)
|
||||||
|
}
|
||||||
|
// Child completed with success.
|
||||||
|
os.Exit(0)
|
||||||
|
panic("unreachable")
|
||||||
|
}
|
||||||
|
|
|
@ -18,6 +18,5 @@ go_library(
|
||||||
"@com_github_cenkalti_backoff//:go_default_library",
|
"@com_github_cenkalti_backoff//:go_default_library",
|
||||||
"@com_github_kr_pty//:go_default_library",
|
"@com_github_kr_pty//:go_default_library",
|
||||||
"@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
|
"@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
|
||||||
"@com_github_syndtr_gocapability//capability:go_default_library",
|
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
|
@ -30,7 +30,6 @@ import (
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
|
@ -39,7 +38,6 @@ import (
|
||||||
|
|
||||||
"github.com/cenkalti/backoff"
|
"github.com/cenkalti/backoff"
|
||||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||||
"github.com/syndtr/gocapability/capability"
|
|
||||||
"gvisor.googlesource.com/gvisor/runsc/boot"
|
"gvisor.googlesource.com/gvisor/runsc/boot"
|
||||||
"gvisor.googlesource.com/gvisor/runsc/specutils"
|
"gvisor.googlesource.com/gvisor/runsc/specutils"
|
||||||
)
|
)
|
||||||
|
@ -284,54 +282,6 @@ func WaitForHTTP(port int, timeout time.Duration) error {
|
||||||
return Poll(cb, timeout)
|
return Poll(cb, timeout)
|
||||||
}
|
}
|
||||||
|
|
||||||
// RunAsRoot ensures the test runs with CAP_SYS_ADMIN and CAP_SYS_CHROOT. If
|
|
||||||
// needed it will create a new user namespace and re-execute the test as root
|
|
||||||
// inside of the namespace. This function returns when it's running as root. If
|
|
||||||
// it needs to create another process, it will exit from there and not return.
|
|
||||||
func RunAsRoot() {
|
|
||||||
if specutils.HasCapabilities(capability.CAP_SYS_ADMIN, capability.CAP_SYS_CHROOT) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Println("*** Re-running test as root in new user namespace ***")
|
|
||||||
|
|
||||||
// Current process doesn't have CAP_SYS_ADMIN, create user namespace and run
|
|
||||||
// as root inside that namespace to get it.
|
|
||||||
runtime.LockOSThread()
|
|
||||||
defer runtime.UnlockOSThread()
|
|
||||||
|
|
||||||
cmd := exec.Command("/proc/self/exe", os.Args[1:]...)
|
|
||||||
cmd.SysProcAttr = &syscall.SysProcAttr{
|
|
||||||
Cloneflags: syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS,
|
|
||||||
// Set current user/group as root inside the namespace.
|
|
||||||
UidMappings: []syscall.SysProcIDMap{
|
|
||||||
{ContainerID: 0, HostID: os.Getuid(), Size: 1},
|
|
||||||
},
|
|
||||||
GidMappings: []syscall.SysProcIDMap{
|
|
||||||
{ContainerID: 0, HostID: os.Getgid(), Size: 1},
|
|
||||||
},
|
|
||||||
GidMappingsEnableSetgroups: false,
|
|
||||||
Credential: &syscall.Credential{
|
|
||||||
Uid: 0,
|
|
||||||
Gid: 0,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
cmd.Env = os.Environ()
|
|
||||||
cmd.Stdin = os.Stdin
|
|
||||||
cmd.Stdout = os.Stdout
|
|
||||||
cmd.Stderr = os.Stderr
|
|
||||||
if err := cmd.Run(); err != nil {
|
|
||||||
if exit, ok := err.(*exec.ExitError); ok {
|
|
||||||
if ws, ok := exit.Sys().(syscall.WaitStatus); ok {
|
|
||||||
os.Exit(ws.ExitStatus())
|
|
||||||
}
|
|
||||||
os.Exit(-1)
|
|
||||||
}
|
|
||||||
panic(fmt.Sprint("error running child process:", err.Error()))
|
|
||||||
}
|
|
||||||
os.Exit(0)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reaper reaps child processes.
|
// Reaper reaps child processes.
|
||||||
type Reaper struct {
|
type Reaper struct {
|
||||||
// mu protects ch, which will be nil if the reaper is not running.
|
// mu protects ch, which will be nil if the reaper is not running.
|
||||||
|
|
|
@ -212,8 +212,8 @@ run_runsc_do_tests() {
|
||||||
local runsc=$(find bazel-bin/runsc -type f -executable -name "runsc" | head -n1)
|
local runsc=$(find bazel-bin/runsc -type f -executable -name "runsc" | head -n1)
|
||||||
|
|
||||||
# run runsc do without root privileges.
|
# run runsc do without root privileges.
|
||||||
unshare -Ur ${runsc} --network=none --TESTONLY-unsafe-nonroot do true
|
${runsc} --rootless do true
|
||||||
unshare -Ur ${runsc} --TESTONLY-unsafe-nonroot --network=host do --netns=false true
|
${runsc} --rootless --network=none do true
|
||||||
|
|
||||||
# run runsc do with root privileges.
|
# run runsc do with root privileges.
|
||||||
sudo -n -E ${runsc} do true
|
sudo -n -E ${runsc} do true
|
||||||
|
|
Loading…
Reference in New Issue