From 4e695adcd0c739101c3d50431ca18b1b911c9238 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 31 Jan 2019 15:17:50 -0800 Subject: [PATCH] gvisor/gofer: Use pivot_root instead of chroot PiperOrigin-RevId: 231864273 Change-Id: I8545b72b615f5c2945df374b801b80be64ec3e13 --- runsc/cmd/chroot.go | 45 +++++++++++----------- runsc/cmd/gofer.go | 69 +++++++++++++++++++++++++++------- runsc/container/container.go | 46 ++++++++++++++++++++--- runsc/main.go | 4 +- runsc/test/root/chroot_test.go | 5 +-- runsc/test/testutil/docker.go | 9 ----- 6 files changed, 124 insertions(+), 54 deletions(-) diff --git a/runsc/cmd/chroot.go b/runsc/cmd/chroot.go index c1acbf26b..ed1dafef1 100644 --- a/runsc/cmd/chroot.go +++ b/runsc/cmd/chroot.go @@ -36,6 +36,29 @@ func mountInChroot(chroot, src, dst, typ string, flags uint32) error { return nil } +func pivotRoot(root string) error { + if err := os.Chdir(root); err != nil { + return fmt.Errorf("error changing working directory: %v", err) + } + // pivot_root(new_root, put_old) moves the root filesystem (old_root) + // of the calling process to the directory put_old and makes new_root + // the new root filesystem of the calling process. + // + // pivot_root(".", ".") makes a mount of the working directory the new + // root filesystem, so it will be moved in "/" and then the old_root + // will be moved to "/" too. The parent mount of the old_root will be + // new_root, so after umounting the old_root, we will see only + // the new_root in "/". + if err := syscall.PivotRoot(".", "."); err != nil { + return fmt.Errorf("error changing root filesystem: %v", err) + } + + if err := syscall.Unmount(".", syscall.MNT_DETACH); err != nil { + return fmt.Errorf("error umounting the old root file system: %v", err) + } + return nil +} + // setUpChroot creates an empty directory with runsc mounted at /runsc and proc // mounted at /proc. func setUpChroot(pidns bool) error { @@ -66,29 +89,9 @@ func setUpChroot(pidns bool) error { } } - if err := os.Chdir(chroot); err != nil { - return fmt.Errorf("error changing working directory: %v", err) - } - if err := syscall.Mount("", chroot, "", syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_BIND, ""); err != nil { return fmt.Errorf("error remounting chroot in read-only: %v", err) } - // pivot_root(new_root, put_old) moves the root filesystem (old_root) - // of the calling process to the directory put_old and makes new_root - // the new root filesystem of the calling process. - // - // pivot_root(".", ".") makes a mount of the working directory the new - // root filesystem, so it will be moved in "/" and then the old_root - // will be moved to "/" too. The parent mount of the old_root will be - // new_root, so after umounting the old_root, we will see only - // the new_root in "/". - if err := syscall.PivotRoot(".", "."); err != nil { - return fmt.Errorf("error changing root filesystem: %v", err) - } - if err := syscall.Unmount(".", syscall.MNT_DETACH); err != nil { - return fmt.Errorf("error umounting the old root file system: %v", err) - } - - return nil + return pivotRoot(chroot) } diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 43286a2e5..6f9711518 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -26,6 +26,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/log" "gvisor.googlesource.com/gvisor/pkg/p9" "gvisor.googlesource.com/gvisor/pkg/unet" + "gvisor.googlesource.com/gvisor/runsc/boot" "gvisor.googlesource.com/gvisor/runsc/fsgofer" "gvisor.googlesource.com/gvisor/runsc/fsgofer/filter" "gvisor.googlesource.com/gvisor/runsc/specutils" @@ -54,8 +55,10 @@ type Gofer struct { bundleDir string ioFDs intFlags applyCaps bool + setUpRoot bool panicOnWrite bool + specFD int } // Name implements subcommands.Command. @@ -79,43 +82,83 @@ func (g *Gofer) SetFlags(f *flag.FlagSet) { f.Var(&g.ioFDs, "io-fds", "list of FDs to connect 9P servers. They must follow this order: root first, then mounts as defined in the spec") f.BoolVar(&g.applyCaps, "apply-caps", true, "if true, apply capabilities to restrict what the Gofer process can do") f.BoolVar(&g.panicOnWrite, "panic-on-write", false, "if true, panics on attempts to write to RO mounts. RW mounts are unnaffected") + f.BoolVar(&g.setUpRoot, "setup-root", true, "if true, set up an empty root for the process") + f.IntVar(&g.specFD, "spec-fd", -1, "required fd with the container spec") } // Execute implements subcommands.Command. func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { - if g.bundleDir == "" || len(g.ioFDs) < 1 { + if g.bundleDir == "" || len(g.ioFDs) < 1 || g.specFD < 0 { f.Usage() return subcommands.ExitUsageError } + specFile := os.NewFile(uintptr(g.specFD), "spec file") + defer specFile.Close() + spec, err := specutils.ReadSpecFromFile(g.bundleDir, specFile) + if err != nil { + Fatalf("reading spec: %v", err) + } + + // Find what path is going to be served by this gofer. + root := spec.Root.Path + + conf := args[0].(*boot.Config) + + if g.setUpRoot && !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { + // Convert all shared mounts into slave to be sure that nothing will be + // propagated outside of our namespace. + if err := syscall.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil { + Fatalf("error converting mounts: %v", err) + } + + // FIXME: runsc can't be re-executed without + // /proc, so we create a tmpfs mount, mount ./proc and ./root + // there, then move this mount to the root and after + // setCapsAndCallSelf, runsc will chroot into /root. + // + // We need a directory to construct a new root and we know that + // runsc can't start without /proc, so we can use it for this. + flags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_NOEXEC) + if err := syscall.Mount("runsc-root", "/proc", "tmpfs", flags, ""); err != nil { + Fatalf("error mounting tmpfs: %v", err) + } + os.Mkdir("/proc/proc", 0755) + os.Mkdir("/proc/root", 0755) + if err := syscall.Mount("runsc-proc", "/proc/proc", "proc", flags|syscall.MS_RDONLY, ""); err != nil { + Fatalf("error mounting proc: %v", err) + } + if err := syscall.Mount(root, "/proc/root", "", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { + Fatalf("error mounting root: %v", err) + } + if err := pivotRoot("/proc"); err != nil { + Fatalf("faild to change the root file system: %v", err) + } + if err := os.Chdir("/"); err != nil { + Fatalf("failed to change working directory") + } + } + if g.applyCaps { // Disable caps when calling myself again. // Note: minimal argument handling for the default case to keep it simple. args := os.Args - args = append(args, "--apply-caps=false") + args = append(args, "--apply-caps=false", "--setup-root=false") if err := setCapsAndCallSelf(args, goferCaps); err != nil { Fatalf("Unable to apply caps: %v", err) } panic("unreachable") } - specFile, err := specutils.OpenCleanSpec(g.bundleDir) - if err != nil { - Fatalf("opening spec: %v", err) - } - spec, err := specutils.ReadSpecFromFile(g.bundleDir, specFile) - specFile.Close() - if err != nil { - Fatalf("reading spec: %v", err) - } specutils.LogSpec(spec) // fsgofer should run with a umask of 0, because we want to preserve file // modes exactly as sent by the sandbox, which will have applied its own umask. syscall.Umask(0) - // Find what path is going to be served by this gofer. - root := spec.Root.Path + if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { + root = "/root" + } if err := syscall.Chroot(root); err != nil { Fatalf("failed to chroot to %q: %v", root, err) } diff --git a/runsc/container/container.go b/runsc/container/container.go index 37969d8c5..08a3725f5 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -787,11 +787,50 @@ func (c *Container) waitForStopped() error { func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bundleDir string) ([]*os.File, error) { // Start with the general config flags. args := conf.ToFlags() + + var goferEnds []*os.File + + // nextFD is the next available file descriptor for the gofer process. + // It starts at 3 because 0-2 are used by stdin/stdout/stderr. + nextFD := 3 + + if conf.LogFilename != "" { + logFile, err := os.OpenFile(conf.LogFilename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return nil, fmt.Errorf("opening log file %q: %v", conf.LogFilename, err) + } + defer logFile.Close() + goferEnds = append(goferEnds, logFile) + args = append(args, "--log-fd="+strconv.Itoa(nextFD)) + nextFD++ + } + + if conf.DebugLog != "" { + debugLogFile, err := specutils.DebugLogFile(conf.DebugLog, "gofer") + if err != nil { + return nil, fmt.Errorf("opening debug log file in %q: %v", conf.DebugLog, err) + } + defer debugLogFile.Close() + goferEnds = append(goferEnds, debugLogFile) + args = append(args, "--debug-log-fd="+strconv.Itoa(nextFD)) + nextFD++ + } + args = append(args, "gofer", "--bundle", bundleDir) if conf.Overlay { args = append(args, "--panic-on-write=true") } + // Open the spec file to donate to the sandbox. + specFile, err := specutils.OpenCleanSpec(bundleDir) + if err != nil { + return nil, fmt.Errorf("opening spec file: %v", err) + } + defer specFile.Close() + goferEnds = append(goferEnds, specFile) + args = append(args, "--spec-fd="+strconv.Itoa(nextFD)) + nextFD++ + // Add root mount and then add any other additional mounts. mountCount := 1 @@ -802,12 +841,8 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund } } sandEnds := make([]*os.File, 0, mountCount) - goferEnds := make([]*os.File, 0, mountCount) - // nextFD is the next available file descriptor for the gofer process. - // It starts at 3 because 0-2 are used by stdin/stdout/stderr. - nextFD := 3 - for ; nextFD-3 < mountCount; nextFD++ { + for i := 0; i < mountCount; i++ { fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) if err != nil { return nil, err @@ -819,6 +854,7 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund goferEnds = append(goferEnds, goferEnd) args = append(args, fmt.Sprintf("--io-fds=%d", nextFD)) + nextFD++ } binPath := specutils.ExePath diff --git a/runsc/main.go b/runsc/main.go index e036abc44..472839bf0 100644 --- a/runsc/main.go +++ b/runsc/main.go @@ -179,8 +179,8 @@ func main() { // Quick sanity check to make sure no other commands get passed // a log fd (they should use log dir instead). - if subcommand != "boot" { - cmd.Fatalf("flag --debug-log-fd should only be passed to 'boot' command, but was passed to %q", subcommand) + if subcommand != "boot" && subcommand != "gofer" { + cmd.Fatalf("flag --debug-log-fd should only be passed to 'boot' and 'gofer' command, but was passed to %q", subcommand) } // If we are the boot process, then we own our stdio FDs and diff --git a/runsc/test/root/chroot_test.go b/runsc/test/root/chroot_test.go index 89f90c3e0..0deca0532 100644 --- a/runsc/test/root/chroot_test.go +++ b/runsc/test/root/chroot_test.go @@ -118,10 +118,7 @@ func TestChrootGofer(t *testing.T) { // This where the root directory is mapped on the host and that's where the // gofer must have chroot'd to. - root, err := d.RootDirInHost() - if err != nil { - t.Fatalf("Docker.RootDirInHost(): %v", err) - } + root := "/root" for _, child := range children { childPID, err := strconv.Atoi(child) diff --git a/runsc/test/testutil/docker.go b/runsc/test/testutil/docker.go index 9a76397be..5a92a5835 100644 --- a/runsc/test/testutil/docker.go +++ b/runsc/test/testutil/docker.go @@ -297,15 +297,6 @@ func (d *Docker) SandboxPid() (int, error) { return pid, nil } -// RootDirInHost returns where the root directory is mapped on the host. -func (d *Docker) RootDirInHost() (string, error) { - out, err := do("inspect", "-f={{.GraphDriver.Data.MergedDir}}", d.Name) - if err != nil { - return "", fmt.Errorf("error retrieving pid: %v", err) - } - return strings.TrimSuffix(string(out), "\n"), nil -} - // ID returns the container ID. func (d *Docker) ID() (string, error) { out, err := do("inspect", "-f={{.Id}}", d.Name)