Add support for mount propagation

Properly handle propagation options for root and mounts. Now usage of
mount options shared, rshared, and noexec cause error to start. shared/
rshared breaks sandbox=>host isolation. slave however can be supported
because changes propagate from host to sandbox.

Root FS setup moved inside the gofer. Apart from simplifying the code,
it keeps all mounts inside the namespace. And they are torn down when
the namespace is destroyed (DestroyFS is no longer needed).

PiperOrigin-RevId: 239037661
Change-Id: I8b5ee4d50da33c042ea34fa68e56514ebe20e6e0
This commit is contained in:
Fabricio Voznika 2019-03-18 12:29:43 -07:00 committed by Shentubot
parent eb69542807
commit e420cc3e5d
14 changed files with 681 additions and 422 deletions

View File

@ -60,6 +60,7 @@ go_test(
"capability_test.go",
"delete_test.go",
"exec_test.go",
"gofer_test.go",
],
data = [
"//runsc",

View File

@ -76,6 +76,11 @@ type Boot struct {
// startSyncFD is the file descriptor to synchronize runsc and sandbox.
startSyncFD int
// mountsFD is the file descriptor to read list of mounts after they have
// been resolved (direct paths, no symlinks). They are resolved outside the
// sandbox (e.g. gofer) and sent through this FD.
mountsFD int
// pidns is set if the sanadbox is in its own pid namespace.
pidns bool
}
@ -111,6 +116,7 @@ func (b *Boot) SetFlags(f *flag.FlagSet) {
f.Uint64Var(&b.totalMem, "total-memory", 0, "sets the initial amount of total memory to report back to the container")
f.IntVar(&b.userLogFD, "user-log-fd", 0, "file descriptor to write user logs to. 0 means no logging.")
f.IntVar(&b.startSyncFD, "start-sync-fd", -1, "required FD to used to synchronize sandbox startup")
f.IntVar(&b.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to read list of mounts after they have been resolved (direct paths, no symlinks).")
}
// Execute implements subcommands.Command.Execute. It starts a sandbox in a
@ -191,6 +197,16 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
panic("setCapsAndCallSelf must never return success")
}
// Read resolved mount list and replace the original one from the spec.
mountsFile := os.NewFile(uintptr(b.mountsFD), "mounts file")
cleanMounts, err := specutils.ReadMounts(mountsFile)
if err != nil {
mountsFile.Close()
Fatalf("Error reading mounts file: %v", err)
}
mountsFile.Close()
spec.Mounts = cleanMounts
// Create the loader.
bootArgs := boot.Args{
ID: f.Arg(0),

View File

@ -16,7 +16,11 @@ package cmd
import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"syscall"
@ -59,6 +63,7 @@ type Gofer struct {
panicOnWrite bool
specFD int
mountsFD int
}
// Name implements subcommands.Command.
@ -84,6 +89,7 @@ func (g *Gofer) SetFlags(f *flag.FlagSet) {
f.BoolVar(&g.panicOnWrite, "panic-on-write", false, "if true, panics on attempts to write to RO mounts. RW mounts are unnaffected")
f.BoolVar(&g.setUpRoot, "setup-root", true, "if true, set up an empty root for the process")
f.IntVar(&g.specFD, "spec-fd", -1, "required fd with the container spec")
f.IntVar(&g.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to write list of mounts after they have been resolved (direct paths, no symlinks).")
}
// Execute implements subcommands.Command.
@ -100,45 +106,13 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
Fatalf("reading spec: %v", err)
}
// Find what path is going to be served by this gofer.
root := spec.Root.Path
conf := args[0].(*boot.Config)
if g.setUpRoot && !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
// Convert all shared mounts into slave to be sure that nothing will be
// propagated outside of our namespace.
if err := syscall.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil {
Fatalf("error converting mounts: %v", err)
}
// FIXME: runsc can't be re-executed without
// /proc, so we create a tmpfs mount, mount ./proc and ./root
// there, then move this mount to the root and after
// setCapsAndCallSelf, runsc will chroot into /root.
//
// We need a directory to construct a new root and we know that
// runsc can't start without /proc, so we can use it for this.
flags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_NOEXEC)
if err := syscall.Mount("runsc-root", "/proc", "tmpfs", flags, ""); err != nil {
Fatalf("error mounting tmpfs: %v", err)
}
os.Mkdir("/proc/proc", 0755)
os.Mkdir("/proc/root", 0755)
if err := syscall.Mount("runsc-proc", "/proc/proc", "proc", flags|syscall.MS_RDONLY, ""); err != nil {
Fatalf("error mounting proc: %v", err)
}
if err := syscall.Mount(root, "/proc/root", "", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
Fatalf("error mounting root: %v", err)
}
if err := pivotRoot("/proc"); err != nil {
Fatalf("faild to change the root file system: %v", err)
}
if err := os.Chdir("/"); err != nil {
Fatalf("failed to change working directory")
if g.setUpRoot {
if err := setupRootFS(spec, conf); err != nil {
Fatalf("Error setting up root FS: %v", err)
}
}
if g.applyCaps {
// Disable caps when calling myself again.
// Note: minimal argument handling for the default case to keep it simple.
@ -150,15 +124,34 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
panic("unreachable")
}
// Find what path is going to be served by this gofer.
root := spec.Root.Path
if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
root = "/root"
}
// Resolve mount points paths, then replace mounts from our spec and send the
// mount list over to the sandbox, so they are both in sync.
//
// Note that all mount points have been mounted in the proper location in
// setupRootFS().
cleanMounts, err := resolveMounts(spec.Mounts, root)
if err != nil {
Fatalf("Failure to resolve mounts: %v", err)
}
spec.Mounts = cleanMounts
go func() {
if err := g.writeMounts(cleanMounts); err != nil {
panic(fmt.Sprintf("Failed to write mounts: %v", err))
}
}()
specutils.LogSpec(spec)
// fsgofer should run with a umask of 0, because we want to preserve file
// modes exactly as sent by the sandbox, which will have applied its own umask.
syscall.Umask(0)
if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
root = "/root"
}
if err := syscall.Chroot(root); err != nil {
Fatalf("failed to chroot to %q: %v", root, err)
}
@ -232,6 +225,25 @@ func runServers(ats []p9.Attacher, ioFDs []int) {
log.Infof("All 9P servers exited.")
}
func (g *Gofer) writeMounts(mounts []specs.Mount) error {
bytes, err := json.Marshal(mounts)
if err != nil {
return err
}
f := os.NewFile(uintptr(g.mountsFD), "mounts file")
defer f.Close()
for written := 0; written < len(bytes); {
w, err := f.Write(bytes[written:])
if err != nil {
return err
}
written += w
}
return nil
}
func isReadonlyMount(opts []string) bool {
for _, o := range opts {
if o == "ro" {
@ -240,3 +252,194 @@ func isReadonlyMount(opts []string) bool {
}
return false
}
func setupRootFS(spec *specs.Spec, conf *boot.Config) error {
// Convert all shared mounts into slaves to be sure that nothing will be
// propagated outside of our namespace.
if err := syscall.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil {
Fatalf("error converting mounts: %v", err)
}
root := spec.Root.Path
if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
// FIXME: runsc can't be re-executed without
// /proc, so we create a tmpfs mount, mount ./proc and ./root
// there, then move this mount to the root and after
// setCapsAndCallSelf, runsc will chroot into /root.
//
// We need a directory to construct a new root and we know that
// runsc can't start without /proc, so we can use it for this.
flags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_NOEXEC)
if err := syscall.Mount("runsc-root", "/proc", "tmpfs", flags, ""); err != nil {
Fatalf("error mounting tmpfs: %v", err)
}
// Prepare tree structure for pivot_root(2).
os.Mkdir("/proc/proc", 0755)
os.Mkdir("/proc/root", 0755)
if err := syscall.Mount("runsc-proc", "/proc/proc", "proc", flags|syscall.MS_RDONLY, ""); err != nil {
Fatalf("error mounting proc: %v", err)
}
root = "/proc/root"
}
// Mount root path followed by submounts.
if err := syscall.Mount(spec.Root.Path, root, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
return fmt.Errorf("mounting root on root (%q) err: %v", spec.Root.Path, err)
}
flags := uint32(syscall.MS_SLAVE | syscall.MS_REC)
if spec.Linux != nil && spec.Linux.RootfsPropagation != "" {
flags = specutils.PropOptionsToFlags([]string{spec.Linux.RootfsPropagation})
}
if err := syscall.Mount("", spec.Root.Path, "", uintptr(flags), ""); err != nil {
return fmt.Errorf("mounting root (%q) with flags: %#x, err: %v", spec.Root.Path, flags, err)
}
// Replace the current spec, with the clean spec with symlinks resolved.
if err := setupMounts(spec.Mounts, root); err != nil {
Fatalf("error setting up FS: %v", err)
}
// Create working directory if needed.
if spec.Process.Cwd != "" {
dst, err := resolveSymlinks(root, spec.Process.Cwd)
if err != nil {
return fmt.Errorf("resolving symlinks to %q: %v", spec.Process.Cwd, err)
}
if err := os.MkdirAll(dst, 0755); err != nil {
return fmt.Errorf("creating working directory %q: %v", spec.Process.Cwd, err)
}
}
// Check if root needs to be remounted as readonly.
if spec.Root.Readonly {
// If root is a mount point but not read-only, we can change mount options
// to make it read-only for extra safety.
log.Infof("Remounting root as readonly: %q", spec.Root.Path)
flags := uintptr(syscall.MS_BIND | syscall.MS_REMOUNT | syscall.MS_RDONLY | syscall.MS_REC)
if err := syscall.Mount(spec.Root.Path, spec.Root.Path, "bind", flags, ""); err != nil {
return fmt.Errorf("remounting root as read-only with source: %q, target: %q, flags: %#x, err: %v", spec.Root.Path, spec.Root.Path, flags, err)
}
}
if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
if err := pivotRoot("/proc"); err != nil {
Fatalf("faild to change the root file system: %v", err)
}
if err := os.Chdir("/"); err != nil {
Fatalf("failed to change working directory")
}
}
return nil
}
// setupMounts binds mount all mounts specified in the spec in their correct
// location inside root. It will resolve relative paths and symlinks. It also
// creates directories as needed.
func setupMounts(mounts []specs.Mount, root string) error {
for _, m := range mounts {
if m.Type != "bind" || !specutils.IsSupportedDevMount(m) {
continue
}
dst, err := resolveSymlinks(root, m.Destination)
if err != nil {
return fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err)
}
flags := specutils.OptionsToFlags(m.Options) | syscall.MS_BIND
log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags)
if err := specutils.Mount(m.Source, dst, m.Type, flags); err != nil {
return fmt.Errorf("mounting %v: %v", m, err)
}
// Set propagation options that cannot be set together with other options.
flags = specutils.PropOptionsToFlags(m.Options)
if flags != 0 {
if err := syscall.Mount("", dst, "", uintptr(flags), ""); err != nil {
return fmt.Errorf("mount dst: %q, flags: %#x, err: %v", dst, flags, err)
}
}
}
return nil
}
// resolveMounts resolved relative paths and symlinks to mount points.
//
// Note: mount points must already be in place for resolution to work.
// Otherwise, it may follow symlinks to locations that would be overwritten
// with another mount point and return the wrong location. In short, make sure
// setupMounts() has been called before.
func resolveMounts(mounts []specs.Mount, root string) ([]specs.Mount, error) {
cleanMounts := make([]specs.Mount, 0, len(mounts))
for _, m := range mounts {
if m.Type != "bind" || !specutils.IsSupportedDevMount(m) {
cleanMounts = append(cleanMounts, m)
continue
}
dst, err := resolveSymlinks(root, m.Destination)
if err != nil {
return nil, fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err)
}
relDst, err := filepath.Rel(root, dst)
if err != nil {
panic(fmt.Sprintf("%q could not be made relative to %q: %v", dst, root, err))
}
cpy := m
cpy.Destination = filepath.Join("/", relDst)
cleanMounts = append(cleanMounts, cpy)
}
return cleanMounts, nil
}
// ResolveSymlinks walks 'rel' having 'root' as the root directory. If there are
// symlinks, they are evaluated relative to 'root' to ensure the end result is
// the same as if the process was running inside the container.
func resolveSymlinks(root, rel string) (string, error) {
return resolveSymlinksImpl(root, root, rel, 255)
}
func resolveSymlinksImpl(root, base, rel string, followCount uint) (string, error) {
if followCount == 0 {
return "", fmt.Errorf("too many symlinks to follow, path: %q", filepath.Join(base, rel))
}
rel = filepath.Clean(rel)
for _, name := range strings.Split(rel, string(filepath.Separator)) {
if name == "" {
continue
}
// Note that Join() resolves things like ".." and returns a clean path.
path := filepath.Join(base, name)
if !strings.HasPrefix(path, root) {
// One cannot '..' their way out of root.
path = root
continue
}
fi, err := os.Lstat(path)
if err != nil {
if !os.IsNotExist(err) {
return "", err
}
// Not found means there is no symlink to check. Just keep walking dirs.
base = path
continue
}
if fi.Mode()&os.ModeSymlink != 0 {
link, err := os.Readlink(path)
if err != nil {
return "", err
}
if filepath.IsAbs(link) {
base = root
}
base, err = resolveSymlinksImpl(root, base, link, followCount-1)
if err != nil {
return "", err
}
continue
}
base = path
}
return base, nil
}

View File

@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
package container
package cmd
import (
"fmt"
@ -21,10 +21,16 @@ import (
"path"
"path/filepath"
"testing"
"gvisor.googlesource.com/gvisor/runsc/test/testutil"
)
func tmpDir() string {
dir := os.Getenv("TEST_TMPDIR")
if dir == "" {
dir = "/tmp"
}
return dir
}
type dir struct {
rel string
link string
@ -50,7 +56,7 @@ func construct(root string, dirs []dir) error {
}
func TestResolveSymlinks(t *testing.T) {
root, err := ioutil.TempDir(testutil.TmpDir(), "root")
root, err := ioutil.TempDir(tmpDir(), "root")
if err != nil {
t.Fatal("ioutil.TempDir() failed:", err)
}
@ -141,7 +147,7 @@ func TestResolveSymlinks(t *testing.T) {
}
func TestResolveSymlinksLoop(t *testing.T) {
root, err := ioutil.TempDir(testutil.TmpDir(), "root")
root, err := ioutil.TempDir(tmpDir(), "root")
if err != nil {
t.Fatal("ioutil.TempDir() failed:", err)
}

View File

@ -6,7 +6,6 @@ go_library(
name = "container",
srcs = [
"container.go",
"fs.go",
"hook.go",
"status.go",
],
@ -34,7 +33,6 @@ go_test(
srcs = [
"console_test.go",
"container_test.go",
"fs_test.go",
"multi_container_test.go",
"shared_volume_test.go",
],

View File

@ -281,18 +281,6 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
if specutils.ShouldCreateSandbox(spec) {
log.Debugf("Creating new sandbox for container %q", id)
// Setup rootfs and mounts. It returns a new mount list with destination
// paths resolved. Since the spec for the root container is read from disk,
// Write the new spec to a new file that will be used by the sandbox.
cleanMounts, err := setupFS(spec, conf, bundleDir)
if err != nil {
return nil, fmt.Errorf("setup mounts: %v", err)
}
spec.Mounts = cleanMounts
if err := specutils.WriteCleanSpec(bundleDir, spec); err != nil {
return nil, fmt.Errorf("writing clean spec: %v", err)
}
// Create and join cgroup before processes are created to ensure they are
// part of the cgroup from the start (and all tneir children processes).
cg, err := cgroup.New(spec)
@ -306,14 +294,14 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
}
}
if err := runInCgroup(cg, func() error {
ioFiles, err := c.createGoferProcess(spec, conf, bundleDir)
ioFiles, specFile, err := c.createGoferProcess(spec, conf, bundleDir)
if err != nil {
return err
}
// Start a new sandbox for this container. Any errors after this point
// must destroy the container.
c.Sandbox, err = sandbox.New(id, spec, conf, bundleDir, consoleSocket, userLog, ioFiles, cg)
c.Sandbox, err = sandbox.New(id, spec, conf, bundleDir, consoleSocket, userLog, ioFiles, specFile, cg)
return err
}); err != nil {
return nil, err
@ -387,26 +375,22 @@ func (c *Container) Start(conf *boot.Config) error {
return err
}
} else {
// Setup rootfs and mounts. It returns a new mount list with destination
// paths resolved. Replace the original spec with new mount list and start
// container.
cleanMounts, err := setupFS(c.Spec, conf, c.BundleDir)
if err != nil {
return fmt.Errorf("setup mounts: %v", err)
}
c.Spec.Mounts = cleanMounts
if err := specutils.WriteCleanSpec(c.BundleDir, c.Spec); err != nil {
return fmt.Errorf("writing clean spec: %v", err)
}
// Join cgroup to strt gofer process to ensure it's part of the cgroup from
// the start (and all tneir children processes).
if err := runInCgroup(c.Sandbox.Cgroup, func() error {
// Create the gofer process.
ioFiles, err := c.createGoferProcess(c.Spec, conf, c.BundleDir)
ioFiles, mountsFile, err := c.createGoferProcess(c.Spec, conf, c.BundleDir)
if err != nil {
return err
}
defer mountsFile.Close()
cleanMounts, err := specutils.ReadMounts(mountsFile)
if err != nil {
return fmt.Errorf("reading mounts file: %v", err)
}
c.Spec.Mounts = cleanMounts
return c.Sandbox.StartContainer(c.Spec, conf, c.ID, ioFiles)
}); err != nil {
return err
@ -665,12 +649,6 @@ func (c *Container) Destroy() error {
errs = append(errs, err.Error())
}
if err := destroyFS(c.Spec); err != nil {
err = fmt.Errorf("destroying container fs: %v", err)
log.Warningf("%v", err)
errs = append(errs, err.Error())
}
if err := os.RemoveAll(c.Root); err != nil && !os.IsNotExist(err) {
err = fmt.Errorf("deleting container root directory %q: %v", c.Root, err)
log.Warningf("%v", err)
@ -787,7 +765,7 @@ func (c *Container) waitForStopped() error {
return backoff.Retry(op, b)
}
func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bundleDir string) ([]*os.File, error) {
func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bundleDir string) ([]*os.File, *os.File, error) {
// Start with the general config flags.
args := conf.ToFlags()
@ -800,7 +778,7 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund
if conf.LogFilename != "" {
logFile, err := os.OpenFile(conf.LogFilename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return nil, fmt.Errorf("opening log file %q: %v", conf.LogFilename, err)
return nil, nil, fmt.Errorf("opening log file %q: %v", conf.LogFilename, err)
}
defer logFile.Close()
goferEnds = append(goferEnds, logFile)
@ -811,7 +789,7 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund
if conf.DebugLog != "" {
debugLogFile, err := specutils.DebugLogFile(conf.DebugLog, "gofer")
if err != nil {
return nil, fmt.Errorf("opening debug log file in %q: %v", conf.DebugLog, err)
return nil, nil, fmt.Errorf("opening debug log file in %q: %v", conf.DebugLog, err)
}
defer debugLogFile.Close()
goferEnds = append(goferEnds, debugLogFile)
@ -825,30 +803,39 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund
}
// Open the spec file to donate to the sandbox.
specFile, err := specutils.OpenCleanSpec(bundleDir)
specFile, err := specutils.OpenSpec(bundleDir)
if err != nil {
return nil, fmt.Errorf("opening spec file: %v", err)
return nil, nil, fmt.Errorf("opening spec file: %v", err)
}
defer specFile.Close()
goferEnds = append(goferEnds, specFile)
args = append(args, "--spec-fd="+strconv.Itoa(nextFD))
nextFD++
// Create pipe that allows gofer to send mount list to sandbox after all paths
// have been resolved.
mountsSand, mountsGofer, err := os.Pipe()
if err != nil {
return nil, nil, err
}
defer mountsGofer.Close()
goferEnds = append(goferEnds, mountsGofer)
args = append(args, fmt.Sprintf("--mounts-fd=%d", nextFD))
nextFD++
// Add root mount and then add any other additional mounts.
mountCount := 1
// Add additional mounts.
for _, m := range spec.Mounts {
if specutils.Is9PMount(m) {
mountCount++
}
}
sandEnds := make([]*os.File, 0, mountCount)
sandEnds := make([]*os.File, 0, mountCount)
for i := 0; i < mountCount; i++ {
fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
if err != nil {
return nil, err
return nil, nil, err
}
sandEnds = append(sandEnds, os.NewFile(uintptr(fds[0]), "sandbox IO FD"))
@ -884,12 +871,12 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund
// Start the gofer in the given namespace.
log.Debugf("Starting gofer: %s %v", binPath, args)
if err := specutils.StartInNS(cmd, nss); err != nil {
return nil, err
return nil, nil, err
}
log.Infof("Gofer started, PID: %d", cmd.Process.Pid)
c.GoferPid = cmd.Process.Pid
c.goferIsChild = true
return sandEnds, nil
return sandEnds, mountsSand, nil
}
// changeStatus transitions from one status to another ensuring that the

View File

@ -1594,6 +1594,171 @@ func TestCreateWorkingDir(t *testing.T) {
}
}
// TestMountPropagation verifies that mount propagates to slave but not to
// private mounts.
func TestMountPropagation(t *testing.T) {
// Setup dir structure:
// - src: is mounted as shared and is used as source for both private and
// slave mounts
// - dir: will be bind mounted inside src and should propagate to slave
tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "mount")
if err != nil {
t.Fatalf("ioutil.TempDir() failed: %v", err)
}
src := filepath.Join(tmpDir, "src")
srcMnt := filepath.Join(src, "mnt")
dir := filepath.Join(tmpDir, "dir")
for _, path := range []string{src, srcMnt, dir} {
if err := os.MkdirAll(path, 0777); err != nil {
t.Fatalf("MkdirAll(%q): %v", path, err)
}
}
dirFile := filepath.Join(dir, "file")
f, err := os.Create(dirFile)
if err != nil {
t.Fatalf("os.Create(%q): %v", dirFile, err)
}
f.Close()
// Setup src as a shared mount.
if err := syscall.Mount(src, src, "bind", syscall.MS_BIND, ""); err != nil {
t.Fatalf("mount(%q, %q, MS_BIND): %v", dir, srcMnt, err)
}
if err := syscall.Mount("", src, "", syscall.MS_SHARED, ""); err != nil {
t.Fatalf("mount(%q, MS_SHARED): %v", srcMnt, err)
}
spec := testutil.NewSpecWithArgs("sleep", "1000")
priv := filepath.Join(tmpDir, "priv")
slave := filepath.Join(tmpDir, "slave")
spec.Mounts = []specs.Mount{
{
Source: src,
Destination: priv,
Type: "bind",
Options: []string{"private"},
},
{
Source: src,
Destination: slave,
Type: "bind",
Options: []string{"slave"},
},
}
conf := testutil.TestConfig()
rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
defer os.RemoveAll(rootDir)
defer os.RemoveAll(bundleDir)
cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
if err != nil {
t.Fatalf("creating container: %v", err)
}
defer cont.Destroy()
if err := cont.Start(conf); err != nil {
t.Fatalf("starting container: %v", err)
}
// After the container is started, mount dir inside source and check what
// happens to both destinations.
if err := syscall.Mount(dir, srcMnt, "bind", syscall.MS_BIND, ""); err != nil {
t.Fatalf("mount(%q, %q, MS_BIND): %v", dir, srcMnt, err)
}
// Check that mount didn't propagate to private mount.
privFile := filepath.Join(priv, "mnt", "file")
args := &control.ExecArgs{
Filename: "/usr/bin/test",
Argv: []string{"test", "!", "-f", privFile},
}
if ws, err := cont.executeSync(args); err != nil || ws != 0 {
t.Fatalf("exec: test ! -f %q, ws: %v, err: %v", privFile, ws, err)
}
// Check that mount propagated to slave mount.
slaveFile := filepath.Join(slave, "mnt", "file")
args = &control.ExecArgs{
Filename: "/usr/bin/test",
Argv: []string{"test", "-f", slaveFile},
}
if ws, err := cont.executeSync(args); err != nil || ws != 0 {
t.Fatalf("exec: test -f %q, ws: %v, err: %v", privFile, ws, err)
}
}
func TestMountSymlink(t *testing.T) {
for _, conf := range configs(overlay) {
t.Logf("Running test with conf: %+v", conf)
dir, err := ioutil.TempDir(testutil.TmpDir(), "mount-symlink")
if err != nil {
t.Fatalf("ioutil.TempDir() failed: %v", err)
}
source := path.Join(dir, "source")
target := path.Join(dir, "target")
for _, path := range []string{source, target} {
if err := os.MkdirAll(path, 0777); err != nil {
t.Fatalf("os.MkdirAll(): %v", err)
}
}
f, err := os.Create(path.Join(source, "file"))
if err != nil {
t.Fatalf("os.Create(): %v", err)
}
f.Close()
link := path.Join(dir, "link")
if err := os.Symlink(target, link); err != nil {
t.Fatalf("os.Symlink(%q, %q): %v", target, link, err)
}
spec := testutil.NewSpecWithArgs("/bin/sleep", "1000")
// Mount to a symlink to ensure the mount code will follow it and mount
// at the symlink target.
spec.Mounts = append(spec.Mounts, specs.Mount{
Type: "bind",
Destination: link,
Source: source,
})
rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
defer os.RemoveAll(rootDir)
defer os.RemoveAll(bundleDir)
cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "", "")
if err != nil {
t.Fatalf("creating container: %v", err)
}
defer cont.Destroy()
if err := cont.Start(conf); err != nil {
t.Fatalf("starting container: %v", err)
}
// Check that symlink was resolved and mount was created where the symlink
// is pointing to.
file := path.Join(target, "file")
args := &control.ExecArgs{
Filename: "/usr/bin/test",
Argv: []string{"test", "-f", file},
}
if ws, err := cont.executeSync(args); err != nil || ws != 0 {
t.Fatalf("exec: test -f %q, ws: %v, err: %v", file, ws, err)
}
}
}
// executeSync synchronously executes a new process.
func (cont *Container) executeSync(args *control.ExecArgs) (syscall.WaitStatus, error) {
pid, err := cont.Execute(args)

View File

@ -1,287 +0,0 @@
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package container
import (
"bufio"
"fmt"
"os"
"path/filepath"
"strings"
"syscall"
specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.googlesource.com/gvisor/pkg/log"
"gvisor.googlesource.com/gvisor/runsc/boot"
"gvisor.googlesource.com/gvisor/runsc/specutils"
)
type mapping struct {
set bool
val uint32
}
var optionsMap = map[string]mapping{
"acl": {set: true, val: syscall.MS_POSIXACL},
"async": {set: false, val: syscall.MS_SYNCHRONOUS},
"atime": {set: false, val: syscall.MS_NOATIME},
"bind": {set: true, val: syscall.MS_BIND},
"defaults": {set: true, val: 0},
"dev": {set: false, val: syscall.MS_NODEV},
"diratime": {set: false, val: syscall.MS_NODIRATIME},
"dirsync": {set: true, val: syscall.MS_DIRSYNC},
"exec": {set: false, val: syscall.MS_NOEXEC},
"iversion": {set: true, val: syscall.MS_I_VERSION},
"loud": {set: false, val: syscall.MS_SILENT},
"mand": {set: true, val: syscall.MS_MANDLOCK},
"noacl": {set: false, val: syscall.MS_POSIXACL},
"noatime": {set: true, val: syscall.MS_NOATIME},
"nodev": {set: true, val: syscall.MS_NODEV},
"nodiratime": {set: true, val: syscall.MS_NODIRATIME},
"noexec": {set: true, val: syscall.MS_NOEXEC},
"noiversion": {set: false, val: syscall.MS_I_VERSION},
"nomand": {set: false, val: syscall.MS_MANDLOCK},
"norelatime": {set: false, val: syscall.MS_RELATIME},
"nostrictatime": {set: false, val: syscall.MS_STRICTATIME},
"nosuid": {set: true, val: syscall.MS_NOSUID},
"private": {set: true, val: syscall.MS_PRIVATE},
"rbind": {set: true, val: syscall.MS_BIND | syscall.MS_REC},
"relatime": {set: true, val: syscall.MS_RELATIME},
"remount": {set: true, val: syscall.MS_REMOUNT},
"ro": {set: true, val: syscall.MS_RDONLY},
"rprivate": {set: true, val: syscall.MS_PRIVATE | syscall.MS_REC},
"rw": {set: false, val: syscall.MS_RDONLY},
"silent": {set: true, val: syscall.MS_SILENT},
"strictatime": {set: true, val: syscall.MS_STRICTATIME},
"suid": {set: false, val: syscall.MS_NOSUID},
"sync": {set: true, val: syscall.MS_SYNCHRONOUS},
}
// setupFS creates the container directory structure under 'spec.Root.Path'.
// This allows the gofer serving the containers to be chroot under this
// directory to create an extra layer to security in case the gofer gets
// compromised.
// Returns list of mounts equivalent to 'spec.Mounts' with all destination paths
// cleaned and with symlinks resolved.
func setupFS(spec *specs.Spec, conf *boot.Config, bundleDir string) ([]specs.Mount, error) {
rv := make([]specs.Mount, 0, len(spec.Mounts))
for _, m := range spec.Mounts {
if m.Type != "bind" || !specutils.IsSupportedDevMount(m) {
rv = append(rv, m)
continue
}
// It's possible that 'm.Destination' follows symlinks inside the
// container.
dst, err := resolveSymlinks(spec.Root.Path, m.Destination)
if err != nil {
return nil, fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err)
}
flags := optionsToFlags(m.Options)
flags |= syscall.MS_BIND
log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags)
if err := specutils.Mount(m.Source, dst, m.Type, flags); err != nil {
return nil, fmt.Errorf("mounting %v: %v", m, err)
}
// Make the mount a slave, so that for recursive bind mount, umount won't
// propagate to the source.
flags = syscall.MS_SLAVE | syscall.MS_REC
if err := syscall.Mount("", dst, "", uintptr(flags), ""); err != nil {
return nil, fmt.Errorf("mount rslave dst: %q, flags: %#x, err: %v", dst, flags, err)
}
cpy := m
relDst, err := filepath.Rel(spec.Root.Path, dst)
if err != nil {
panic(fmt.Sprintf("%q could not be made relative to %q: %v", dst, spec.Root.Path, err))
}
cpy.Destination = filepath.Join("/", relDst)
rv = append(rv, cpy)
}
if spec.Process.Cwd != "" {
dst, err := resolveSymlinks(spec.Root.Path, spec.Process.Cwd)
if err != nil {
return nil, fmt.Errorf("resolving symlinks to %q: %v", spec.Process.Cwd, err)
}
if err := os.MkdirAll(dst, 0755); err != nil {
return nil, err
}
}
// If root is read only, check if it needs to be remounted as readonly.
if spec.Root.Readonly {
isMountPoint, readonly, err := mountInfo(spec.Root.Path)
if err != nil {
return nil, err
}
if readonly {
return rv, nil
}
if !isMountPoint {
// Readonly root is not a mount point nor read-only. Can't do much other
// than just logging a warning. The gofer will prevent files to be open
// in write mode.
log.Warningf("Mount where root is located is not read-only and cannot be changed: %q", spec.Root.Path)
return rv, nil
}
// If root is a mount point but not read-only, we can change mount options
// to make it read-only for extra safety.
log.Infof("Remounting root as readonly: %q", spec.Root.Path)
flags := uintptr(syscall.MS_BIND | syscall.MS_REMOUNT | syscall.MS_RDONLY | syscall.MS_REC)
src := spec.Root.Path
if err := syscall.Mount(src, src, "bind", flags, ""); err != nil {
return nil, fmt.Errorf("remounting root as read-only with source: %q, target: %q, flags: %#x, err: %v", spec.Root.Path, spec.Root.Path, flags, err)
}
}
return rv, nil
}
// mountInfo returns whether the path is a mount point and whether the mount
// that path belongs to is read-only.
func mountInfo(path string) (bool, bool, error) {
// Mounts are listed by their real paths.
realPath, err := filepath.EvalSymlinks(path)
if err != nil {
return false, false, err
}
f, err := os.Open("/proc/mounts")
if err != nil {
return false, false, err
}
scanner := bufio.NewScanner(f)
var mountPoint string
var readonly bool
for scanner.Scan() {
line := scanner.Text()
parts := strings.Split(line, " ")
if len(parts) < 4 {
return false, false, fmt.Errorf("invalid /proc/mounts line format %q", line)
}
mp := parts[1]
opts := strings.Split(parts[3], ",")
// Find the closest submount to the path.
if strings.Contains(realPath, mp) && len(mp) > len(mountPoint) {
mountPoint = mp
readonly = specutils.ContainsStr(opts, "ro")
}
}
if err := scanner.Err(); err != nil {
return false, false, err
}
return mountPoint == realPath, readonly, nil
}
// destroyFS unmounts mounts done by runsc under `spec.Root.Path`. This
// recovers the container rootfs into the original state.
func destroyFS(spec *specs.Spec) error {
for _, m := range spec.Mounts {
if m.Type != "bind" || !specutils.IsSupportedDevMount(m) {
continue
}
// It's possible that 'm.Destination' follows symlinks inside the
// container.
dst, err := resolveSymlinks(spec.Root.Path, m.Destination)
if err != nil {
return err
}
flags := syscall.MNT_DETACH
log.Infof("Unmounting dst: %q, flags: %#x", dst, flags)
// Do not return error if dst is not a mountpoint.
// Based on http://man7.org/linux/man-pages/man2/umount.2.html
// For kernel version 2.6+ and MNT_DETACH flag, EINVAL means
// the dst is not a mount point.
if err := syscall.Unmount(dst, flags); err != nil &&
!os.IsNotExist(err) && err != syscall.EINVAL {
return err
}
}
return nil
}
// resolveSymlinks walks 'rel' having 'root' as the root directory. If there are
// symlinks, they are evaluated relative to 'root' to ensure the end result is
// the same as if the process was running inside the container.
func resolveSymlinks(root, rel string) (string, error) {
return resolveSymlinksImpl(root, root, rel, 255)
}
func resolveSymlinksImpl(root, base, rel string, followCount uint) (string, error) {
if followCount == 0 {
return "", fmt.Errorf("too many symlinks to follow, path: %q", filepath.Join(base, rel))
}
rel = filepath.Clean(rel)
for _, name := range strings.Split(rel, string(filepath.Separator)) {
if name == "" {
continue
}
// Note that Join() resolves things like ".." and returns a clean path.
path := filepath.Join(base, name)
if !strings.HasPrefix(path, root) {
// One cannot '..' their way out of root.
path = root
continue
}
fi, err := os.Lstat(path)
if err != nil {
if !os.IsNotExist(err) {
return "", err
}
// Not found means there is no symlink to check. Just keep walking dirs.
base = path
continue
}
if fi.Mode()&os.ModeSymlink != 0 {
link, err := os.Readlink(path)
if err != nil {
return "", err
}
if filepath.IsAbs(link) {
base = root
}
base, err = resolveSymlinksImpl(root, base, link, followCount-1)
if err != nil {
return "", err
}
continue
}
base = path
}
return base, nil
}
func optionsToFlags(opts []string) uint32 {
var rv uint32
for _, opt := range opts {
if m, ok := optionsMap[opt]; ok {
if m.set {
rv |= m.val
} else {
rv ^= m.val
}
} else {
log.Warningf("Ignoring mount option %q", opt)
}
}
return rv
}

View File

@ -75,7 +75,7 @@ type Sandbox struct {
// New creates the sandbox process. The caller must call Destroy() on the
// sandbox.
func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, cg *cgroup.Cgroup) (*Sandbox, error) {
func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, specFile *os.File, cg *cgroup.Cgroup) (*Sandbox, error) {
s := &Sandbox{ID: id, Cgroup: cg}
// The Cleanup object cleans up partially created sandboxes when an error
// occurs. Any errors occurring during cleanup itself are ignored.
@ -86,17 +86,14 @@ func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocke
defer c.Clean()
// Create pipe to synchronize when sandbox process has been booted.
fds := make([]int, 2)
if err := syscall.Pipe(fds); err != nil {
clientSyncFile, sandboxSyncFile, err := os.Pipe()
if err != nil {
return nil, fmt.Errorf("creating pipe for sandbox %q: %v", s.ID, err)
}
clientSyncFile := os.NewFile(uintptr(fds[0]), "client sandbox sync")
defer clientSyncFile.Close()
sandboxSyncFile := os.NewFile(uintptr(fds[1]), "sandbox sync")
// Create the sandbox process.
err := s.createSandboxProcess(spec, conf, bundleDir, consoleSocket, userLog, ioFiles, sandboxSyncFile)
err = s.createSandboxProcess(spec, conf, bundleDir, consoleSocket, userLog, ioFiles, specFile, sandboxSyncFile)
// sandboxSyncFile has to be closed to be able to detect when the sandbox
// process exits unexpectedly.
sandboxSyncFile.Close()
@ -294,7 +291,7 @@ func (s *Sandbox) connError(err error) error {
// createSandboxProcess starts the sandbox as a subprocess by running the "boot"
// command, passing in the bundle dir.
func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, startSyncFile *os.File) error {
func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, mountsFile, startSyncFile *os.File) error {
// nextFD is used to get unused FDs that we can pass to the sandbox. It
// starts at 3 because 0, 1, and 2 are taken by stdin/out/err.
nextFD := 3
@ -345,10 +342,14 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
cmd.Args = append(cmd.Args, "--controller-fd="+strconv.Itoa(nextFD))
nextFD++
// Open the spec file to donate to the sandbox.
specFile, err := specutils.OpenCleanSpec(bundleDir)
defer mountsFile.Close()
cmd.ExtraFiles = append(cmd.ExtraFiles, mountsFile)
cmd.Args = append(cmd.Args, "--mounts-fd="+strconv.Itoa(nextFD))
nextFD++
specFile, err := specutils.OpenSpec(bundleDir)
if err != nil {
return fmt.Errorf("opening spec file: %v", err)
return err
}
defer specFile.Close()
cmd.ExtraFiles = append(cmd.ExtraFiles, specFile)

View File

@ -5,6 +5,7 @@ package(licenses = ["notice"])
go_library(
name = "specutils",
srcs = [
"fs.go",
"namespace.go",
"specutils.go",
],

139
runsc/specutils/fs.go Normal file
View File

@ -0,0 +1,139 @@
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package specutils
import (
"fmt"
"path"
"syscall"
specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.googlesource.com/gvisor/pkg/log"
)
type mapping struct {
set bool
val uint32
}
// optionsMap maps mount propagation-related OCI filesystem options to mount(2)
// syscall flags.
var optionsMap = map[string]mapping{
"acl": {set: true, val: syscall.MS_POSIXACL},
"async": {set: false, val: syscall.MS_SYNCHRONOUS},
"atime": {set: false, val: syscall.MS_NOATIME},
"bind": {set: true, val: syscall.MS_BIND},
"defaults": {set: true, val: 0},
"dev": {set: false, val: syscall.MS_NODEV},
"diratime": {set: false, val: syscall.MS_NODIRATIME},
"dirsync": {set: true, val: syscall.MS_DIRSYNC},
"exec": {set: false, val: syscall.MS_NOEXEC},
"iversion": {set: true, val: syscall.MS_I_VERSION},
"loud": {set: false, val: syscall.MS_SILENT},
"mand": {set: true, val: syscall.MS_MANDLOCK},
"noacl": {set: false, val: syscall.MS_POSIXACL},
"noatime": {set: true, val: syscall.MS_NOATIME},
"nodev": {set: true, val: syscall.MS_NODEV},
"nodiratime": {set: true, val: syscall.MS_NODIRATIME},
"noiversion": {set: false, val: syscall.MS_I_VERSION},
"nomand": {set: false, val: syscall.MS_MANDLOCK},
"norelatime": {set: false, val: syscall.MS_RELATIME},
"nostrictatime": {set: false, val: syscall.MS_STRICTATIME},
"nosuid": {set: true, val: syscall.MS_NOSUID},
"rbind": {set: true, val: syscall.MS_BIND | syscall.MS_REC},
"relatime": {set: true, val: syscall.MS_RELATIME},
"remount": {set: true, val: syscall.MS_REMOUNT},
"ro": {set: true, val: syscall.MS_RDONLY},
"rw": {set: false, val: syscall.MS_RDONLY},
"silent": {set: true, val: syscall.MS_SILENT},
"strictatime": {set: true, val: syscall.MS_STRICTATIME},
"suid": {set: false, val: syscall.MS_NOSUID},
"sync": {set: true, val: syscall.MS_SYNCHRONOUS},
}
// propOptionsMap is similar to optionsMap, but it lists propagation options
// that cannot be used together with other flags.
var propOptionsMap = map[string]mapping{
"private": {set: true, val: syscall.MS_PRIVATE},
"rprivate": {set: true, val: syscall.MS_PRIVATE | syscall.MS_REC},
"slave": {set: true, val: syscall.MS_SLAVE},
"rslave": {set: true, val: syscall.MS_SLAVE | syscall.MS_REC},
"unbindable": {set: true, val: syscall.MS_UNBINDABLE},
"runbindable": {set: true, val: syscall.MS_UNBINDABLE | syscall.MS_REC},
}
// invalidOptions list options not allowed.
// - shared: sandbox must be isolated from the host. Propagating mount changes
// from the sandbox to the host breaks the isolation.
// - noexec: not yet supported. Don't ignore it since it could break
// in-sandbox security.
var invalidOptions = []string{"shared", "rshared", "noexec"}
// OptionsToFlags converts mount options to syscall flags.
func OptionsToFlags(opts []string) uint32 {
return optionsToFlags(opts, optionsMap)
}
// PropOptionsToFlags converts propagation mount options to syscall flags.
// Propagation options cannot be set other with other options and must be
// handled separatedly.
func PropOptionsToFlags(opts []string) uint32 {
return optionsToFlags(opts, propOptionsMap)
}
func optionsToFlags(opts []string, source map[string]mapping) uint32 {
var rv uint32
for _, opt := range opts {
if m, ok := source[opt]; ok {
if m.set {
rv |= m.val
} else {
rv ^= m.val
}
}
}
return rv
}
// ValidateMount validates that spec mounts are correct.
func validateMount(mnt *specs.Mount) error {
if !path.IsAbs(mnt.Destination) {
return fmt.Errorf("Mount.Destination must be an absolute path: %v", mnt)
}
if mnt.Type == "bind" {
for _, o := range mnt.Options {
if ContainsStr(invalidOptions, o) {
return fmt.Errorf("mount option %q is not supported: %v", o, mnt)
}
_, ok1 := optionsMap[o]
_, ok2 := propOptionsMap[o]
if !ok1 && !ok2 {
log.Warningf("Ignoring unknown mount option %q", o)
}
}
}
return nil
}
// ValidateRootfsPropagation validates that rootfs propagation options are
// correct.
func validateRootfsPropagation(opt string) error {
flags := PropOptionsToFlags([]string{opt})
if flags&(syscall.MS_SLAVE|syscall.MS_PRIVATE) == 0 {
return fmt.Errorf("root mount propagation option must specify private or slave: %q", opt)
}
return nil
}

View File

@ -105,9 +105,9 @@ func FilterNS(filter []specs.LinuxNamespaceType, s *specs.Spec) []specs.LinuxNam
return out
}
// SetNS sets the namespace of the given type. It must be called with
// setNS sets the namespace of the given type. It must be called with
// OSThreadLocked.
func SetNS(fd, nsType uintptr) error {
func setNS(fd, nsType uintptr) error {
if _, _, err := syscall.RawSyscall(unix.SYS_SETNS, fd, nsType, 0); err != 0 {
return err
}
@ -119,30 +119,30 @@ func SetNS(fd, nsType uintptr) error {
//
// Preconditions: Must be called with os thread locked.
func ApplyNS(ns specs.LinuxNamespace) (func(), error) {
log.Infof("applying namespace %v at path %q", ns.Type, ns.Path)
log.Infof("Applying namespace %v at path %q", ns.Type, ns.Path)
newNS, err := os.Open(ns.Path)
if err != nil {
return nil, fmt.Errorf("error opening %q: %v", ns.Path, err)
}
defer newNS.Close()
// Store current netns to restore back after child is started.
// Store current namespace to restore back.
curPath := nsPath(ns.Type)
oldNS, err := os.Open(curPath)
if err != nil {
return nil, fmt.Errorf("error opening %q: %v", curPath, err)
}
// Set netns to the one requested and setup function to restore it back.
// Set namespace to the one requested and setup function to restore it back.
flag := nsCloneFlag(ns.Type)
if err := SetNS(newNS.Fd(), flag); err != nil {
if err := setNS(newNS.Fd(), flag); err != nil {
oldNS.Close()
return nil, fmt.Errorf("error setting namespace of type %v and path %q: %v", ns.Type, ns.Path, err)
}
return func() {
log.Infof("restoring namespace %v", ns.Type)
log.Infof("Restoring namespace %v", ns.Type)
defer oldNS.Close()
if err := SetNS(oldNS.Fd(), flag); err != nil {
if err := setNS(oldNS.Fd(), flag); err != nil {
panic(fmt.Sprintf("error restoring namespace: of type %v: %v", ns.Type, err))
}
}, nil

View File

@ -92,9 +92,14 @@ func ValidateSpec(spec *specs.Spec) error {
log.Warningf("Seccomp spec is being ignored")
}
for i, m := range spec.Mounts {
if !path.IsAbs(m.Destination) {
return fmt.Errorf("Spec.Mounts[%d] Mount.Destination must be an absolute path: %v", i, m)
if spec.Linux != nil && spec.Linux.RootfsPropagation != "" {
if err := validateRootfsPropagation(spec.Linux.RootfsPropagation); err != nil {
return err
}
}
for _, m := range spec.Mounts {
if err := validateMount(&m); err != nil {
return err
}
}
@ -129,15 +134,19 @@ func absPath(base, rel string) string {
return filepath.Join(base, rel)
}
// OpenSpec opens an OCI runtime spec from the given bundle directory.
func OpenSpec(bundleDir string) (*os.File, error) {
// The spec file must be named "config.json" inside the bundle directory.
return os.Open(filepath.Join(bundleDir, "config.json"))
}
// ReadSpec reads an OCI runtime spec from the given bundle directory.
// ReadSpec also normalizes all potential relative paths into absolute
// path, e.g. spec.Root.Path, mount.Source.
func ReadSpec(bundleDir string) (*specs.Spec, error) {
// The spec file must be in "config.json" inside the bundle directory.
specPath := filepath.Join(bundleDir, "config.json")
specFile, err := os.Open(specPath)
specFile, err := OpenSpec(bundleDir)
if err != nil {
return nil, fmt.Errorf("error opening spec file %q: %v", specPath, err)
return nil, fmt.Errorf("error opening spec file %q: %v", specFile.Name(), err)
}
defer specFile.Close()
return ReadSpecFromFile(bundleDir, specFile)
@ -171,27 +180,17 @@ func ReadSpecFromFile(bundleDir string, specFile *os.File) (*specs.Spec, error)
return &spec, nil
}
// OpenCleanSpec opens spec file that has destination mount paths resolved to
// their absolute location.
func OpenCleanSpec(bundleDir string) (*os.File, error) {
f, err := os.Open(filepath.Join(bundleDir, "config.clean.json"))
// ReadMounts reads mount list from a file.
func ReadMounts(f *os.File) ([]specs.Mount, error) {
bytes, err := ioutil.ReadAll(f)
if err != nil {
return nil, err
return nil, fmt.Errorf("error reading mounts: %v", err)
}
if _, err := f.Seek(0, os.SEEK_SET); err != nil {
f.Close()
return nil, fmt.Errorf("error seeking to beginning of file %q: %v", f.Name(), err)
var mounts []specs.Mount
if err := json.Unmarshal(bytes, &mounts); err != nil {
return nil, fmt.Errorf("error unmarshaling mounts: %v\n %s", err, string(bytes))
}
return f, nil
}
// WriteCleanSpec writes a spec file that has destination mount paths resolved.
func WriteCleanSpec(bundleDir string, spec *specs.Spec) error {
bytes, err := json.Marshal(spec)
if err != nil {
return err
}
return ioutil.WriteFile(filepath.Join(bundleDir, "config.clean.json"), bytes, 0755)
return mounts, nil
}
// Capabilities takes in spec and returns a TaskCapabilities corresponding to
@ -407,8 +406,7 @@ func Mount(src, dst, typ string, flags uint32) error {
// source (file or directory).
var isDir bool
if typ == "proc" {
// Special case, as there is no source directory for proc
// mounts.
// Special case, as there is no source directory for proc mounts.
isDir = true
} else if fi, err := os.Stat(src); err != nil {
return fmt.Errorf("Stat(%q) failed: %v", src, err)

View File

@ -219,6 +219,37 @@ func TestSpecInvalid(t *testing.T) {
},
error: "must be an absolute path",
},
{
name: "invalid mount option",
spec: specs.Spec{
Root: &specs.Root{Path: "/"},
Process: &specs.Process{
Args: []string{"/bin/true"},
},
Mounts: []specs.Mount{
{
Source: "/src",
Destination: "/dst",
Type: "bind",
Options: []string{"shared"},
},
},
},
error: "is not supported",
},
{
name: "invalid rootfs propagation",
spec: specs.Spec{
Root: &specs.Root{Path: "/"},
Process: &specs.Process{
Args: []string{"/bin/true"},
},
Linux: &specs.Linux{
RootfsPropagation: "foo",
},
},
error: "root mount propagation option must specify private or slave",
},
} {
err := ValidateSpec(&test.spec)
if len(test.error) == 0 {