2020-04-17 17:38:04 +00:00
|
|
|
// Copyright 2018 The gVisor Authors.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package boot
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
2020-04-25 01:22:21 +00:00
|
|
|
"sort"
|
2020-04-17 17:38:04 +00:00
|
|
|
"strings"
|
|
|
|
|
|
|
|
specs "github.com/opencontainers/runtime-spec/specs-go"
|
|
|
|
"gvisor.dev/gvisor/pkg/abi/linux"
|
2020-09-17 08:07:55 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/cleanup"
|
2020-05-29 23:33:50 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/context"
|
2020-04-17 17:38:04 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/fspath"
|
2020-05-29 23:33:50 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/log"
|
2020-04-17 17:38:04 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sentry/devices/memdev"
|
2020-06-24 01:47:22 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sentry/devices/ttydev"
|
2020-06-24 23:21:53 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sentry/devices/tundev"
|
2020-05-29 23:33:50 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sentry/fs/user"
|
2020-05-13 17:30:00 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
|
2020-06-17 20:23:27 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sentry/fsimpl/fuse"
|
2020-05-13 17:30:00 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer"
|
Add //pkg/sentry/fsimpl/overlay.
Major differences from existing overlay filesystems:
- Linux allows lower layers in an overlay to require revalidation, but not the
upper layer. VFS1 allows the upper layer in an overlay to require
revalidation, but not the lower layer. VFS2 does not allow any layers to
require revalidation. (Now that vfs.MkdirOptions.ForSyntheticMountpoint
exists, no uses of overlay in VFS1 are believed to require upper layer
revalidation; in particular, the requirement that the upper layer support the
creation of "trusted." extended attributes for whiteouts effectively required
the upper filesystem to be tmpfs in most cases.)
- Like VFS1, but unlike Linux, VFS2 overlay does not attempt to make mutations
of the upper layer atomic using a working directory and features like
RENAME_WHITEOUT. (This may change in the future, since not having a working
directory makes error recovery for some operations, e.g. rmdir, particularly
painful.)
- Like Linux, but unlike VFS1, VFS2 represents whiteouts using character
devices with rdev == 0; the equivalent of the whiteout attribute on
directories is xattr trusted.overlay.opaque = "y"; and there is no equivalent
to the whiteout attribute on non-directories since non-directories are never
merged with lower layers.
- Device and inode numbers work as follows:
- In Linux, modulo the xino feature and a special case for when all layers
are the same filesystem:
- Directories use the overlay filesystem's device number and an
ephemeral inode number assigned by the overlay.
- Non-directories that have been copied up use the device and inode
number assigned by the upper filesystem.
- Non-directories that have not been copied up use a per-(overlay,
layer)-pair device number and the inode number assigned by the lower
filesystem.
- In VFS1, device and inode numbers always come from the lower layer unless
"whited out"; this has the adverse effect of requiring interaction with
the lower filesystem even for non-directory files that exist on the upper
layer.
- In VFS2, device and inode numbers are assigned as in Linux, except that
xino and the samefs special case are not supported.
- Like Linux, but unlike VFS1, VFS2 does not attempt to maintain memory mapping
coherence across copy-up. (This may have to change in the future, as users
may be dependent on this property.)
- Like Linux, but unlike VFS1, VFS2 uses the overlayfs mounter's credentials
when interacting with the overlay's layers, rather than the caller's.
- Like Linux, but unlike VFS1, VFS2 permits multiple lower layers in an
overlay.
- Like Linux, but unlike VFS1, VFS2's overlay filesystem is
application-mountable.
Updates #1199
PiperOrigin-RevId: 316019067
2020-06-12 01:33:35 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sentry/fsimpl/overlay"
|
2020-05-13 17:30:00 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sentry/fsimpl/proc"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/fsimpl/sys"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
|
2020-08-06 18:01:13 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sentry/inet"
|
2020-04-17 17:38:04 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sentry/kernel"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/vfs"
|
2020-05-29 23:33:50 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/syserror"
|
2020-08-20 01:35:35 +00:00
|
|
|
"gvisor.dev/gvisor/runsc/config"
|
2020-04-17 17:38:04 +00:00
|
|
|
)
|
|
|
|
|
2020-07-09 00:10:35 +00:00
|
|
|
func registerFilesystems(k *kernel.Kernel) error {
|
|
|
|
ctx := k.SupervisorContext()
|
|
|
|
creds := auth.NewRootCredentials(k.RootUserNamespace())
|
|
|
|
vfsObj := k.VFS()
|
|
|
|
|
2020-05-13 17:30:00 +00:00
|
|
|
vfsObj.MustRegisterFilesystemType(devpts.Name, &devpts.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
|
2020-04-17 17:38:04 +00:00
|
|
|
AllowUserList: true,
|
2020-05-04 17:58:01 +00:00
|
|
|
// TODO(b/29356795): Users may mount this once the terminals are in a
|
|
|
|
// usable state.
|
|
|
|
AllowUserMount: false,
|
2020-04-17 17:38:04 +00:00
|
|
|
})
|
2020-05-13 17:30:00 +00:00
|
|
|
vfsObj.MustRegisterFilesystemType(devtmpfs.Name, &devtmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
|
2020-04-17 17:38:04 +00:00
|
|
|
AllowUserMount: true,
|
|
|
|
AllowUserList: true,
|
|
|
|
})
|
2020-05-13 17:30:00 +00:00
|
|
|
vfsObj.MustRegisterFilesystemType(gofer.Name, &gofer.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
|
2020-05-04 17:58:01 +00:00
|
|
|
AllowUserList: true,
|
2020-04-17 17:38:04 +00:00
|
|
|
})
|
Add //pkg/sentry/fsimpl/overlay.
Major differences from existing overlay filesystems:
- Linux allows lower layers in an overlay to require revalidation, but not the
upper layer. VFS1 allows the upper layer in an overlay to require
revalidation, but not the lower layer. VFS2 does not allow any layers to
require revalidation. (Now that vfs.MkdirOptions.ForSyntheticMountpoint
exists, no uses of overlay in VFS1 are believed to require upper layer
revalidation; in particular, the requirement that the upper layer support the
creation of "trusted." extended attributes for whiteouts effectively required
the upper filesystem to be tmpfs in most cases.)
- Like VFS1, but unlike Linux, VFS2 overlay does not attempt to make mutations
of the upper layer atomic using a working directory and features like
RENAME_WHITEOUT. (This may change in the future, since not having a working
directory makes error recovery for some operations, e.g. rmdir, particularly
painful.)
- Like Linux, but unlike VFS1, VFS2 represents whiteouts using character
devices with rdev == 0; the equivalent of the whiteout attribute on
directories is xattr trusted.overlay.opaque = "y"; and there is no equivalent
to the whiteout attribute on non-directories since non-directories are never
merged with lower layers.
- Device and inode numbers work as follows:
- In Linux, modulo the xino feature and a special case for when all layers
are the same filesystem:
- Directories use the overlay filesystem's device number and an
ephemeral inode number assigned by the overlay.
- Non-directories that have been copied up use the device and inode
number assigned by the upper filesystem.
- Non-directories that have not been copied up use a per-(overlay,
layer)-pair device number and the inode number assigned by the lower
filesystem.
- In VFS1, device and inode numbers always come from the lower layer unless
"whited out"; this has the adverse effect of requiring interaction with
the lower filesystem even for non-directory files that exist on the upper
layer.
- In VFS2, device and inode numbers are assigned as in Linux, except that
xino and the samefs special case are not supported.
- Like Linux, but unlike VFS1, VFS2 does not attempt to maintain memory mapping
coherence across copy-up. (This may have to change in the future, as users
may be dependent on this property.)
- Like Linux, but unlike VFS1, VFS2 uses the overlayfs mounter's credentials
when interacting with the overlay's layers, rather than the caller's.
- Like Linux, but unlike VFS1, VFS2 permits multiple lower layers in an
overlay.
- Like Linux, but unlike VFS1, VFS2's overlay filesystem is
application-mountable.
Updates #1199
PiperOrigin-RevId: 316019067
2020-06-12 01:33:35 +00:00
|
|
|
vfsObj.MustRegisterFilesystemType(overlay.Name, &overlay.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
|
|
|
|
AllowUserMount: true,
|
|
|
|
AllowUserList: true,
|
|
|
|
})
|
2020-05-13 17:30:00 +00:00
|
|
|
vfsObj.MustRegisterFilesystemType(proc.Name, &proc.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
|
2020-04-17 17:38:04 +00:00
|
|
|
AllowUserMount: true,
|
|
|
|
AllowUserList: true,
|
|
|
|
})
|
2020-05-13 17:30:00 +00:00
|
|
|
vfsObj.MustRegisterFilesystemType(sys.Name, &sys.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
|
2020-04-17 17:38:04 +00:00
|
|
|
AllowUserMount: true,
|
|
|
|
AllowUserList: true,
|
|
|
|
})
|
2020-05-13 17:30:00 +00:00
|
|
|
vfsObj.MustRegisterFilesystemType(tmpfs.Name, &tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
|
2020-04-17 17:38:04 +00:00
|
|
|
AllowUserMount: true,
|
|
|
|
AllowUserList: true,
|
|
|
|
})
|
2020-06-09 16:35:39 +00:00
|
|
|
vfsObj.MustRegisterFilesystemType(fuse.Name, &fuse.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
|
|
|
|
AllowUserMount: true,
|
|
|
|
AllowUserList: true,
|
|
|
|
})
|
2020-04-17 17:38:04 +00:00
|
|
|
|
|
|
|
// Setup files in devtmpfs.
|
|
|
|
if err := memdev.Register(vfsObj); err != nil {
|
|
|
|
return fmt.Errorf("registering memdev: %w", err)
|
|
|
|
}
|
2020-06-24 01:47:22 +00:00
|
|
|
if err := ttydev.Register(vfsObj); err != nil {
|
|
|
|
return fmt.Errorf("registering ttydev: %w", err)
|
2020-06-23 18:25:38 +00:00
|
|
|
}
|
2020-08-06 18:01:13 +00:00
|
|
|
tunSupported := tundev.IsNetTunSupported(inet.StackFromContext(ctx))
|
|
|
|
if tunSupported {
|
|
|
|
if err := tundev.Register(vfsObj); err != nil {
|
|
|
|
return fmt.Errorf("registering tundev: %v", err)
|
|
|
|
}
|
|
|
|
}
|
2020-06-17 20:23:27 +00:00
|
|
|
|
2020-07-08 01:48:25 +00:00
|
|
|
if kernel.FUSEEnabled {
|
|
|
|
if err := fuse.Register(vfsObj); err != nil {
|
|
|
|
return fmt.Errorf("registering fusedev: %w", err)
|
|
|
|
}
|
2020-06-24 01:47:22 +00:00
|
|
|
}
|
2020-07-08 01:48:25 +00:00
|
|
|
|
2020-05-13 17:30:00 +00:00
|
|
|
a, err := devtmpfs.NewAccessor(ctx, vfsObj, creds, devtmpfs.Name)
|
2020-04-17 17:38:04 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("creating devtmpfs accessor: %w", err)
|
|
|
|
}
|
2020-08-03 20:33:47 +00:00
|
|
|
defer a.Release(ctx)
|
2020-04-17 17:38:04 +00:00
|
|
|
|
|
|
|
if err := a.UserspaceInit(ctx); err != nil {
|
|
|
|
return fmt.Errorf("initializing userspace: %w", err)
|
|
|
|
}
|
|
|
|
if err := memdev.CreateDevtmpfsFiles(ctx, a); err != nil {
|
2020-06-24 23:21:53 +00:00
|
|
|
return fmt.Errorf("creating memdev devtmpfs files: %w", err)
|
2020-04-17 17:38:04 +00:00
|
|
|
}
|
2020-06-24 01:47:22 +00:00
|
|
|
if err := ttydev.CreateDevtmpfsFiles(ctx, a); err != nil {
|
2020-06-24 23:21:53 +00:00
|
|
|
return fmt.Errorf("creating ttydev devtmpfs files: %w", err)
|
|
|
|
}
|
2020-08-06 18:01:13 +00:00
|
|
|
if tunSupported {
|
|
|
|
if err := tundev.CreateDevtmpfsFiles(ctx, a); err != nil {
|
|
|
|
return fmt.Errorf("creating tundev devtmpfs files: %v", err)
|
|
|
|
}
|
2020-06-24 01:47:22 +00:00
|
|
|
}
|
2020-07-08 01:48:25 +00:00
|
|
|
|
|
|
|
if kernel.FUSEEnabled {
|
|
|
|
if err := fuse.CreateDevtmpfsFile(ctx, a); err != nil {
|
|
|
|
return fmt.Errorf("creating fusedev devtmpfs files: %w", err)
|
|
|
|
}
|
2020-06-17 20:23:27 +00:00
|
|
|
}
|
2020-06-09 16:35:39 +00:00
|
|
|
|
2020-04-17 17:38:04 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-08-20 01:35:35 +00:00
|
|
|
func setupContainerVFS2(ctx context.Context, conf *config.Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error {
|
2020-09-08 20:58:50 +00:00
|
|
|
mns, err := mntr.mountAll(conf, procArgs)
|
2020-04-17 17:38:04 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to setupFS: %w", err)
|
|
|
|
}
|
|
|
|
procArgs.MountNamespaceVFS2 = mns
|
|
|
|
|
2020-05-29 23:33:50 +00:00
|
|
|
// Resolve the executable path from working dir and environment.
|
2020-06-09 06:06:50 +00:00
|
|
|
resolved, err := user.ResolveExecutablePath(ctx, procArgs)
|
2020-05-29 23:33:50 +00:00
|
|
|
if err != nil {
|
2020-06-09 06:06:50 +00:00
|
|
|
return err
|
2020-04-17 17:38:04 +00:00
|
|
|
}
|
2020-06-09 06:06:50 +00:00
|
|
|
procArgs.Filename = resolved
|
2020-05-29 23:33:50 +00:00
|
|
|
return nil
|
2020-04-17 17:38:04 +00:00
|
|
|
}
|
|
|
|
|
2020-09-08 20:58:50 +00:00
|
|
|
func (c *containerMounter) mountAll(conf *config.Config, procArgs *kernel.CreateProcessArgs) (*vfs.MountNamespace, error) {
|
2020-04-17 17:38:04 +00:00
|
|
|
log.Infof("Configuring container's file system with VFS2")
|
|
|
|
|
|
|
|
// Create context with root credentials to mount the filesystem (the current
|
|
|
|
// user may not be privileged enough).
|
2020-05-04 18:41:38 +00:00
|
|
|
rootCreds := auth.NewRootCredentials(procArgs.Credentials.UserNamespace)
|
2020-04-17 17:38:04 +00:00
|
|
|
rootProcArgs := *procArgs
|
|
|
|
rootProcArgs.WorkingDirectory = "/"
|
2020-05-04 18:41:38 +00:00
|
|
|
rootProcArgs.Credentials = rootCreds
|
2020-04-17 17:38:04 +00:00
|
|
|
rootProcArgs.Umask = 0022
|
|
|
|
rootProcArgs.MaxSymlinkTraversals = linux.MaxSymlinkTraversals
|
|
|
|
rootCtx := procArgs.NewContext(c.k)
|
|
|
|
|
2020-05-04 18:41:38 +00:00
|
|
|
mns, err := c.createMountNamespaceVFS2(rootCtx, conf, rootCreds)
|
2020-04-17 17:38:04 +00:00
|
|
|
if err != nil {
|
2020-04-25 01:22:21 +00:00
|
|
|
return nil, fmt.Errorf("creating mount namespace: %w", err)
|
2020-04-17 17:38:04 +00:00
|
|
|
}
|
|
|
|
rootProcArgs.MountNamespaceVFS2 = mns
|
|
|
|
|
2020-09-17 08:07:55 +00:00
|
|
|
root := mns.Root()
|
2020-10-13 18:29:21 +00:00
|
|
|
root.IncRef()
|
2020-09-17 08:07:55 +00:00
|
|
|
defer root.DecRef(rootCtx)
|
|
|
|
if root.Mount().ReadOnly() {
|
|
|
|
// Switch to ReadWrite while we setup submounts.
|
|
|
|
if err := c.k.VFS().SetMountReadOnly(root.Mount(), false); err != nil {
|
|
|
|
return nil, fmt.Errorf(`failed to set mount at "/" readwrite: %w`, err)
|
|
|
|
}
|
|
|
|
// Restore back to ReadOnly at the end.
|
|
|
|
defer func() {
|
|
|
|
if err := c.k.VFS().SetMountReadOnly(root.Mount(), true); err != nil {
|
|
|
|
panic(fmt.Sprintf(`failed to restore mount at "/" back to readonly: %v`, err))
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
}
|
|
|
|
|
2020-04-17 17:38:04 +00:00
|
|
|
// Mount submounts.
|
2020-05-04 18:41:38 +00:00
|
|
|
if err := c.mountSubmountsVFS2(rootCtx, conf, mns, rootCreds); err != nil {
|
2020-04-17 17:38:04 +00:00
|
|
|
return nil, fmt.Errorf("mounting submounts vfs2: %w", err)
|
|
|
|
}
|
2020-09-08 20:58:50 +00:00
|
|
|
|
2020-04-17 17:38:04 +00:00
|
|
|
return mns, nil
|
|
|
|
}
|
|
|
|
|
2020-09-08 20:58:50 +00:00
|
|
|
// createMountNamespaceVFS2 creates the container's root mount and namespace.
|
2020-08-20 01:35:35 +00:00
|
|
|
func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *config.Config, creds *auth.Credentials) (*vfs.MountNamespace, error) {
|
2020-04-25 01:22:21 +00:00
|
|
|
fd := c.fds.remove()
|
2020-09-08 20:58:50 +00:00
|
|
|
data := p9MountData(fd, conf.FileAccess, true /* vfs2 */)
|
2020-07-30 21:13:11 +00:00
|
|
|
|
|
|
|
if conf.OverlayfsStaleRead {
|
|
|
|
// We can't check for overlayfs here because sandbox is chroot'ed and gofer
|
|
|
|
// can only send mount options for specs.Mounts (specs.Root is missing
|
|
|
|
// Options field). So assume root is always on top of overlayfs.
|
2020-09-08 20:58:50 +00:00
|
|
|
data = append(data, "overlayfs_stale_read")
|
2020-07-30 21:13:11 +00:00
|
|
|
}
|
2020-04-25 01:22:21 +00:00
|
|
|
|
|
|
|
log.Infof("Mounting root over 9P, ioFD: %d", fd)
|
2020-09-08 20:58:50 +00:00
|
|
|
opts := &vfs.MountOptions{
|
2020-09-17 08:07:55 +00:00
|
|
|
ReadOnly: c.root.Readonly,
|
2020-09-08 20:58:50 +00:00
|
|
|
GetFilesystemOptions: vfs.GetFilesystemOptions{
|
|
|
|
Data: strings.Join(data, ","),
|
2020-10-24 00:46:43 +00:00
|
|
|
InternalData: gofer.InternalFilesystemOptions{
|
|
|
|
UniqueID: "/",
|
|
|
|
},
|
2020-09-08 20:58:50 +00:00
|
|
|
},
|
|
|
|
InternalMount: true,
|
|
|
|
}
|
2020-09-17 08:07:55 +00:00
|
|
|
|
|
|
|
fsName := gofer.Name
|
|
|
|
if conf.Overlay && !c.root.Readonly {
|
|
|
|
log.Infof("Adding overlay on top of root")
|
|
|
|
var err error
|
|
|
|
var cleanup func()
|
|
|
|
opts, cleanup, err = c.configureOverlay(ctx, creds, opts, fsName)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("mounting root with overlay: %w", err)
|
|
|
|
}
|
|
|
|
defer cleanup()
|
|
|
|
fsName = overlay.Name
|
|
|
|
}
|
|
|
|
|
|
|
|
mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", fsName, opts)
|
2020-04-25 01:22:21 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("setting up mount namespace: %w", err)
|
|
|
|
}
|
|
|
|
return mns, nil
|
|
|
|
}
|
|
|
|
|
2020-09-17 08:07:55 +00:00
|
|
|
// configureOverlay mounts the lower layer using "lowerOpts", mounts the upper
|
|
|
|
// layer using tmpfs, and return overlay mount options. "cleanup" must be called
|
|
|
|
// after the options have been used to mount the overlay, to release refs on
|
|
|
|
// lower and upper mounts.
|
|
|
|
func (c *containerMounter) configureOverlay(ctx context.Context, creds *auth.Credentials, lowerOpts *vfs.MountOptions, lowerFSName string) (*vfs.MountOptions, func(), error) {
|
|
|
|
// First copy options from lower layer to upper layer and overlay. Clear
|
|
|
|
// filesystem specific options.
|
|
|
|
upperOpts := *lowerOpts
|
|
|
|
upperOpts.GetFilesystemOptions = vfs.GetFilesystemOptions{}
|
|
|
|
|
|
|
|
overlayOpts := *lowerOpts
|
|
|
|
overlayOpts.GetFilesystemOptions = vfs.GetFilesystemOptions{}
|
|
|
|
|
|
|
|
// Next mount upper and lower. Upper is a tmpfs mount to keep all
|
|
|
|
// modifications inside the sandbox.
|
|
|
|
upper, err := c.k.VFS().MountDisconnected(ctx, creds, "" /* source */, tmpfs.Name, &upperOpts)
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, fmt.Errorf("failed to create upper layer for overlay, opts: %+v: %v", upperOpts, err)
|
|
|
|
}
|
|
|
|
cu := cleanup.Make(func() { upper.DecRef(ctx) })
|
|
|
|
defer cu.Clean()
|
|
|
|
|
|
|
|
// All writes go to the upper layer, be paranoid and make lower readonly.
|
|
|
|
lowerOpts.ReadOnly = true
|
|
|
|
lower, err := c.k.VFS().MountDisconnected(ctx, creds, "" /* source */, lowerFSName, lowerOpts)
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
cu.Add(func() { lower.DecRef(ctx) })
|
|
|
|
|
Fix runsc tests on VFS2 overlay.
- Check the sticky bit in overlay.filesystem.UnlinkAt(). Fixes
StickyTest.StickyBitPermDenied.
- When configuring a VFS2 overlay in runsc, copy the lower layer's root
owner/group/mode to the upper layer's root (as in the VFS1 equivalent,
boot.addOverlay()). This makes the overlay root owned by UID/GID 65534 with
mode 0755 rather than owned by UID/GID 0 with mode 01777. Fixes
CreateTest.CreateFailsOnUnpermittedDir, which assumes that the test cannot
create files in /.
- MknodTest.UnimplementedTypesReturnError assumes that the creation of device
special files is not supported. However, while the VFS2 gofer client still
doesn't support device special files, VFS2 tmpfs does, and in the overlay
test dimension mknod() targets a tmpfs upper layer. The test initially has
all capabilities, including CAP_MKNOD, so its creation of these files
succeeds. Constrain these tests to VFS1.
- Rename overlay.nonDirectoryFD to overlay.regularFileFD and only use it for
regular files, using the original FD for pipes and device special files. This
is more consistent with Linux (which gets the original inode_operations, and
therefore file_operations, for these file types from ovl_fill_inode() =>
init_special_inode()) and fixes remaining mknod and pipe tests.
- Read/write 1KB at a time in PipeTest.Streaming, rather than 4 bytes. This
isn't strictly necessary, but it makes the test less obnoxiously slow on
ptrace.
Fixes #4407
PiperOrigin-RevId: 337971042
2020-10-20 00:46:05 +00:00
|
|
|
// Propagate the lower layer's root's owner, group, and mode to the upper
|
|
|
|
// layer's root for consistency with VFS1.
|
|
|
|
upperRootVD := vfs.MakeVirtualDentry(upper, upper.Root())
|
|
|
|
lowerRootVD := vfs.MakeVirtualDentry(lower, lower.Root())
|
|
|
|
stat, err := c.k.VFS().StatAt(ctx, creds, &vfs.PathOperation{
|
|
|
|
Root: lowerRootVD,
|
|
|
|
Start: lowerRootVD,
|
|
|
|
}, &vfs.StatOptions{
|
|
|
|
Mask: linux.STATX_UID | linux.STATX_GID | linux.STATX_MODE,
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
err = c.k.VFS().SetStatAt(ctx, creds, &vfs.PathOperation{
|
|
|
|
Root: upperRootVD,
|
|
|
|
Start: upperRootVD,
|
|
|
|
}, &vfs.SetStatOptions{
|
|
|
|
Stat: linux.Statx{
|
|
|
|
Mask: (linux.STATX_UID | linux.STATX_GID | linux.STATX_MODE) & stat.Mask,
|
|
|
|
UID: stat.UID,
|
|
|
|
GID: stat.GID,
|
|
|
|
Mode: stat.Mode,
|
|
|
|
},
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
|
2020-09-17 08:07:55 +00:00
|
|
|
// Configure overlay with both layers.
|
|
|
|
overlayOpts.GetFilesystemOptions.InternalData = overlay.FilesystemOptions{
|
Fix runsc tests on VFS2 overlay.
- Check the sticky bit in overlay.filesystem.UnlinkAt(). Fixes
StickyTest.StickyBitPermDenied.
- When configuring a VFS2 overlay in runsc, copy the lower layer's root
owner/group/mode to the upper layer's root (as in the VFS1 equivalent,
boot.addOverlay()). This makes the overlay root owned by UID/GID 65534 with
mode 0755 rather than owned by UID/GID 0 with mode 01777. Fixes
CreateTest.CreateFailsOnUnpermittedDir, which assumes that the test cannot
create files in /.
- MknodTest.UnimplementedTypesReturnError assumes that the creation of device
special files is not supported. However, while the VFS2 gofer client still
doesn't support device special files, VFS2 tmpfs does, and in the overlay
test dimension mknod() targets a tmpfs upper layer. The test initially has
all capabilities, including CAP_MKNOD, so its creation of these files
succeeds. Constrain these tests to VFS1.
- Rename overlay.nonDirectoryFD to overlay.regularFileFD and only use it for
regular files, using the original FD for pipes and device special files. This
is more consistent with Linux (which gets the original inode_operations, and
therefore file_operations, for these file types from ovl_fill_inode() =>
init_special_inode()) and fixes remaining mknod and pipe tests.
- Read/write 1KB at a time in PipeTest.Streaming, rather than 4 bytes. This
isn't strictly necessary, but it makes the test less obnoxiously slow on
ptrace.
Fixes #4407
PiperOrigin-RevId: 337971042
2020-10-20 00:46:05 +00:00
|
|
|
UpperRoot: upperRootVD,
|
|
|
|
LowerRoots: []vfs.VirtualDentry{lowerRootVD},
|
2020-09-17 08:07:55 +00:00
|
|
|
}
|
|
|
|
return &overlayOpts, cu.Release(), nil
|
|
|
|
}
|
|
|
|
|
2020-08-20 01:35:35 +00:00
|
|
|
func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials) error {
|
2020-05-14 01:16:45 +00:00
|
|
|
mounts, err := c.prepareMountsVFS2()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-04-25 01:22:21 +00:00
|
|
|
|
2020-05-14 01:16:45 +00:00
|
|
|
for i := range mounts {
|
|
|
|
submount := &mounts[i]
|
2020-04-17 17:38:04 +00:00
|
|
|
log.Debugf("Mounting %q to %q, type: %s, options: %s", submount.Source, submount.Destination, submount.Type, submount.Options)
|
2020-08-21 21:28:27 +00:00
|
|
|
var (
|
|
|
|
mnt *vfs.Mount
|
|
|
|
err error
|
|
|
|
)
|
|
|
|
|
2020-07-09 00:10:35 +00:00
|
|
|
if hint := c.hints.findMount(submount.Mount); hint != nil && hint.isSupported() {
|
2020-08-21 21:28:27 +00:00
|
|
|
mnt, err = c.mountSharedSubmountVFS2(ctx, conf, mns, creds, submount.Mount, hint)
|
|
|
|
if err != nil {
|
2020-07-09 00:10:35 +00:00
|
|
|
return fmt.Errorf("mount shared mount %q to %q: %v", hint.name, submount.Destination, err)
|
|
|
|
}
|
|
|
|
} else {
|
2020-08-21 21:28:27 +00:00
|
|
|
mnt, err = c.mountSubmountVFS2(ctx, conf, mns, creds, submount)
|
|
|
|
if err != nil {
|
2020-07-09 00:10:35 +00:00
|
|
|
return fmt.Errorf("mount submount %q: %w", submount.Destination, err)
|
|
|
|
}
|
2020-04-17 17:38:04 +00:00
|
|
|
}
|
2020-08-21 21:28:27 +00:00
|
|
|
|
|
|
|
if mnt != nil && mnt.ReadOnly() {
|
|
|
|
// Switch to ReadWrite while we setup submounts.
|
|
|
|
if err := c.k.VFS().SetMountReadOnly(mnt, false); err != nil {
|
2020-09-17 08:07:55 +00:00
|
|
|
return fmt.Errorf("failed to set mount at %q readwrite: %w", submount.Destination, err)
|
2020-08-21 21:28:27 +00:00
|
|
|
}
|
2020-09-08 20:58:50 +00:00
|
|
|
// Restore back to ReadOnly at the end.
|
2020-08-21 21:28:27 +00:00
|
|
|
defer func() {
|
|
|
|
if err := c.k.VFS().SetMountReadOnly(mnt, true); err != nil {
|
|
|
|
panic(fmt.Sprintf("failed to restore mount at %q back to readonly: %v", submount.Destination, err))
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
}
|
2020-04-17 17:38:04 +00:00
|
|
|
}
|
|
|
|
|
2020-06-02 04:30:28 +00:00
|
|
|
if err := c.mountTmpVFS2(ctx, conf, creds, mns); err != nil {
|
|
|
|
return fmt.Errorf(`mount submount "\tmp": %w`, err)
|
|
|
|
}
|
2020-05-14 01:16:45 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
type mountAndFD struct {
|
|
|
|
specs.Mount
|
|
|
|
fd int
|
2020-04-17 17:38:04 +00:00
|
|
|
}
|
|
|
|
|
2020-05-14 01:16:45 +00:00
|
|
|
func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) {
|
|
|
|
// Associate bind mounts with their FDs before sorting since there is an
|
|
|
|
// undocumented assumption that FDs are dispensed in the order in which
|
|
|
|
// they are required by mounts.
|
|
|
|
var mounts []mountAndFD
|
|
|
|
for _, m := range c.mounts {
|
|
|
|
fd := -1
|
|
|
|
// Only bind mounts use host FDs; see
|
|
|
|
// containerMounter.getMountNameAndOptionsVFS2.
|
2020-05-28 19:24:37 +00:00
|
|
|
if m.Type == bind {
|
2020-05-14 01:16:45 +00:00
|
|
|
fd = c.fds.remove()
|
|
|
|
}
|
|
|
|
mounts = append(mounts, mountAndFD{
|
|
|
|
Mount: m,
|
|
|
|
fd: fd,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
if err := c.checkDispenser(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2020-04-25 01:22:21 +00:00
|
|
|
// Sort the mounts so that we don't place children before parents.
|
2020-05-14 01:16:45 +00:00
|
|
|
sort.Slice(mounts, func(i, j int) bool {
|
|
|
|
return len(mounts[i].Destination) < len(mounts[j].Destination)
|
|
|
|
})
|
|
|
|
|
|
|
|
return mounts, nil
|
2020-04-25 01:22:21 +00:00
|
|
|
}
|
|
|
|
|
2020-08-21 21:28:27 +00:00
|
|
|
func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *mountAndFD) (*vfs.Mount, error) {
|
2020-09-17 08:07:55 +00:00
|
|
|
fsName, opts, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, submount)
|
2020-04-17 17:38:04 +00:00
|
|
|
if err != nil {
|
2020-08-21 21:28:27 +00:00
|
|
|
return nil, fmt.Errorf("mountOptions failed: %w", err)
|
2020-04-17 17:38:04 +00:00
|
|
|
}
|
2020-06-02 04:30:28 +00:00
|
|
|
if len(fsName) == 0 {
|
2020-04-25 01:22:21 +00:00
|
|
|
// Filesystem is not supported (e.g. cgroup), just skip it.
|
2020-08-21 21:28:27 +00:00
|
|
|
return nil, nil
|
2020-04-25 01:22:21 +00:00
|
|
|
}
|
|
|
|
|
2020-09-17 08:07:55 +00:00
|
|
|
if err := c.makeMountPoint(ctx, creds, mns, submount.Destination); err != nil {
|
|
|
|
return nil, fmt.Errorf("creating mount point %q: %w", submount.Destination, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if useOverlay {
|
|
|
|
log.Infof("Adding overlay on top of mount %q", submount.Destination)
|
|
|
|
var cleanup func()
|
|
|
|
opts, cleanup, err = c.configureOverlay(ctx, creds, opts, fsName)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("mounting volume with overlay at %q: %w", submount.Destination, err)
|
|
|
|
}
|
|
|
|
defer cleanup()
|
|
|
|
fsName = overlay.Name
|
|
|
|
}
|
|
|
|
|
|
|
|
root := mns.Root()
|
2020-10-13 18:29:21 +00:00
|
|
|
root.IncRef()
|
2020-09-17 08:07:55 +00:00
|
|
|
defer root.DecRef(ctx)
|
|
|
|
target := &vfs.PathOperation{
|
|
|
|
Root: root,
|
|
|
|
Start: root,
|
|
|
|
Path: fspath.Parse(submount.Destination),
|
2020-04-25 01:22:21 +00:00
|
|
|
}
|
2020-08-21 21:28:27 +00:00
|
|
|
mnt, err := c.k.VFS().MountAt(ctx, creds, "", target, fsName, opts)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.Destination, submount.Type, err, opts)
|
2020-04-17 17:38:04 +00:00
|
|
|
}
|
2020-05-13 00:24:46 +00:00
|
|
|
log.Infof("Mounted %q to %q type: %s, internal-options: %q", submount.Source, submount.Destination, submount.Type, opts.GetFilesystemOptions.Data)
|
2020-08-21 21:28:27 +00:00
|
|
|
return mnt, nil
|
2020-04-17 17:38:04 +00:00
|
|
|
}
|
|
|
|
|
2020-05-14 01:16:45 +00:00
|
|
|
// getMountNameAndOptionsVFS2 retrieves the fsName, opts, and useOverlay values
|
|
|
|
// used for mounts.
|
2020-09-17 08:07:55 +00:00
|
|
|
func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mountAndFD) (string, *vfs.MountOptions, bool, error) {
|
2020-07-09 00:10:35 +00:00
|
|
|
fsName := m.Type
|
2020-09-17 08:07:55 +00:00
|
|
|
useOverlay := false
|
2020-07-09 00:10:35 +00:00
|
|
|
var data []string
|
2020-10-24 00:46:43 +00:00
|
|
|
var iopts interface{}
|
2020-05-14 01:16:45 +00:00
|
|
|
|
2020-06-02 04:30:28 +00:00
|
|
|
// Find filesystem name and FS specific data field.
|
2020-05-14 01:16:45 +00:00
|
|
|
switch m.Type {
|
|
|
|
case devpts.Name, devtmpfs.Name, proc.Name, sys.Name:
|
2020-07-09 00:10:35 +00:00
|
|
|
// Nothing to do.
|
|
|
|
|
2020-05-14 01:16:45 +00:00
|
|
|
case nonefs:
|
|
|
|
fsName = sys.Name
|
|
|
|
|
2020-07-09 00:10:35 +00:00
|
|
|
case tmpfs.Name:
|
2020-05-14 01:16:45 +00:00
|
|
|
var err error
|
2020-06-02 04:30:28 +00:00
|
|
|
data, err = parseAndFilterOptions(m.Options, tmpfsAllowedData...)
|
2020-05-14 01:16:45 +00:00
|
|
|
if err != nil {
|
2020-09-17 08:07:55 +00:00
|
|
|
return "", nil, false, err
|
2020-05-14 01:16:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
case bind:
|
|
|
|
fsName = gofer.Name
|
2020-07-09 00:10:35 +00:00
|
|
|
if m.fd == 0 {
|
|
|
|
// Check that an FD was provided to fails fast. Technically FD=0 is valid,
|
|
|
|
// but unlikely to be correct in this context.
|
2020-09-17 08:07:55 +00:00
|
|
|
return "", nil, false, fmt.Errorf("9P mount requires a connection FD")
|
2020-07-09 00:10:35 +00:00
|
|
|
}
|
2020-06-02 04:30:28 +00:00
|
|
|
data = p9MountData(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */)
|
2020-10-24 00:46:43 +00:00
|
|
|
iopts = gofer.InternalFilesystemOptions{
|
|
|
|
UniqueID: m.Destination,
|
|
|
|
}
|
2020-05-14 01:16:45 +00:00
|
|
|
|
2020-09-17 08:07:55 +00:00
|
|
|
// If configured, add overlay to all writable mounts.
|
|
|
|
useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly
|
|
|
|
|
2020-05-14 01:16:45 +00:00
|
|
|
default:
|
|
|
|
log.Warningf("ignoring unknown filesystem type %q", m.Type)
|
2020-09-17 08:07:55 +00:00
|
|
|
return "", nil, false, nil
|
2020-05-14 01:16:45 +00:00
|
|
|
}
|
2020-06-02 04:30:28 +00:00
|
|
|
|
|
|
|
opts := &vfs.MountOptions{
|
|
|
|
GetFilesystemOptions: vfs.GetFilesystemOptions{
|
2020-10-24 00:46:43 +00:00
|
|
|
Data: strings.Join(data, ","),
|
|
|
|
InternalData: iopts,
|
2020-06-02 04:30:28 +00:00
|
|
|
},
|
|
|
|
InternalMount: true,
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, o := range m.Options {
|
|
|
|
switch o {
|
|
|
|
case "rw":
|
|
|
|
opts.ReadOnly = false
|
|
|
|
case "ro":
|
|
|
|
opts.ReadOnly = true
|
|
|
|
case "noatime":
|
2020-06-06 02:10:28 +00:00
|
|
|
opts.Flags.NoATime = true
|
2020-06-02 04:30:28 +00:00
|
|
|
case "noexec":
|
|
|
|
opts.Flags.NoExec = true
|
|
|
|
default:
|
|
|
|
log.Warningf("ignoring unknown mount option %q", o)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-17 08:07:55 +00:00
|
|
|
return fsName, opts, useOverlay, nil
|
2020-05-14 01:16:45 +00:00
|
|
|
}
|
|
|
|
|
2020-06-02 04:30:28 +00:00
|
|
|
// mountTmpVFS2 mounts an internal tmpfs at '/tmp' if it's safe to do so.
|
|
|
|
// Technically we don't have to mount tmpfs at /tmp, as we could just rely on
|
|
|
|
// the host /tmp, but this is a nice optimization, and fixes some apps that call
|
|
|
|
// mknod in /tmp. It's unsafe to mount tmpfs if:
|
|
|
|
// 1. /tmp is mounted explicitly: we should not override user's wish
|
|
|
|
// 2. /tmp is not empty: mounting tmpfs would hide existing files in /tmp
|
|
|
|
//
|
|
|
|
// Note that when there are submounts inside of '/tmp', directories for the
|
|
|
|
// mount points must be present, making '/tmp' not empty anymore.
|
2020-08-20 01:35:35 +00:00
|
|
|
func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *config.Config, creds *auth.Credentials, mns *vfs.MountNamespace) error {
|
2020-06-02 04:30:28 +00:00
|
|
|
for _, m := range c.mounts {
|
|
|
|
// m.Destination has been cleaned, so it's to use equality here.
|
|
|
|
if m.Destination == "/tmp" {
|
|
|
|
log.Debugf(`Explict "/tmp" mount found, skipping internal tmpfs, mount: %+v`, m)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
root := mns.Root()
|
2020-10-13 18:29:21 +00:00
|
|
|
root.IncRef()
|
2020-08-03 20:33:47 +00:00
|
|
|
defer root.DecRef(ctx)
|
2020-06-02 04:30:28 +00:00
|
|
|
pop := vfs.PathOperation{
|
|
|
|
Root: root,
|
|
|
|
Start: root,
|
|
|
|
Path: fspath.Parse("/tmp"),
|
|
|
|
}
|
|
|
|
// TODO(gvisor.dev/issue/2782): Use O_PATH when available.
|
2020-08-08 03:06:39 +00:00
|
|
|
fd, err := c.k.VFS().OpenAt(ctx, creds, &pop, &vfs.OpenOptions{Flags: linux.O_RDONLY | linux.O_DIRECTORY})
|
2020-06-02 04:30:28 +00:00
|
|
|
switch err {
|
|
|
|
case nil:
|
2020-08-08 03:06:39 +00:00
|
|
|
defer fd.DecRef(ctx)
|
|
|
|
|
|
|
|
err := fd.IterDirents(ctx, vfs.IterDirentsCallbackFunc(func(dirent vfs.Dirent) error {
|
|
|
|
if dirent.Name != "." && dirent.Name != ".." {
|
|
|
|
return syserror.ENOTEMPTY
|
|
|
|
}
|
2020-06-02 04:30:28 +00:00
|
|
|
return nil
|
2020-08-08 03:06:39 +00:00
|
|
|
}))
|
|
|
|
switch err {
|
|
|
|
case nil:
|
|
|
|
log.Infof(`Mounting internal tmpfs on top of empty "/tmp"`)
|
|
|
|
case syserror.ENOTEMPTY:
|
2020-06-02 04:30:28 +00:00
|
|
|
// If more than "." and ".." is found, skip internal tmpfs to prevent
|
|
|
|
// hiding existing files.
|
|
|
|
log.Infof(`Skipping internal tmpfs mount for "/tmp" because it's not empty`)
|
|
|
|
return nil
|
2020-08-08 03:06:39 +00:00
|
|
|
default:
|
|
|
|
return err
|
2020-06-02 04:30:28 +00:00
|
|
|
}
|
|
|
|
fallthrough
|
|
|
|
|
|
|
|
case syserror.ENOENT:
|
|
|
|
// No '/tmp' found (or fallthrough from above). It's safe to mount internal
|
|
|
|
// tmpfs.
|
|
|
|
tmpMount := specs.Mount{
|
|
|
|
Type: tmpfs.Name,
|
|
|
|
Destination: "/tmp",
|
|
|
|
// Sticky bit is added to prevent accidental deletion of files from
|
|
|
|
// another user. This is normally done for /tmp.
|
|
|
|
Options: []string{"mode=01777"},
|
|
|
|
}
|
2020-08-21 21:28:27 +00:00
|
|
|
_, err := c.mountSubmountVFS2(ctx, conf, mns, creds, &mountAndFD{Mount: tmpMount})
|
|
|
|
return err
|
2020-06-02 04:30:28 +00:00
|
|
|
|
2020-08-08 03:06:39 +00:00
|
|
|
case syserror.ENOTDIR:
|
|
|
|
// Not a dir?! Let it be.
|
|
|
|
return nil
|
|
|
|
|
2020-06-02 04:30:28 +00:00
|
|
|
default:
|
2020-08-08 03:06:39 +00:00
|
|
|
return fmt.Errorf(`opening "/tmp" inside container: %w`, err)
|
2020-06-02 04:30:28 +00:00
|
|
|
}
|
|
|
|
}
|
2020-07-09 00:10:35 +00:00
|
|
|
|
|
|
|
// processHintsVFS2 processes annotations that container hints about how volumes
|
|
|
|
// should be mounted (e.g. a volume shared between containers). It must be
|
|
|
|
// called for the root container only.
|
2020-08-20 01:35:35 +00:00
|
|
|
func (c *containerMounter) processHintsVFS2(conf *config.Config, creds *auth.Credentials) error {
|
2020-07-09 00:10:35 +00:00
|
|
|
ctx := c.k.SupervisorContext()
|
|
|
|
for _, hint := range c.hints.mounts {
|
|
|
|
// TODO(b/142076984): Only support tmpfs for now. Bind mounts require a
|
|
|
|
// common gofer to mount all shared volumes.
|
|
|
|
if hint.mount.Type != tmpfs.Name {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type)
|
|
|
|
mnt, err := c.mountSharedMasterVFS2(ctx, conf, hint, creds)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("mounting shared master %q: %v", hint.name, err)
|
|
|
|
}
|
|
|
|
hint.vfsMount = mnt
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// mountSharedMasterVFS2 mounts the master of a volume that is shared among
|
|
|
|
// containers in a pod.
|
2020-08-20 01:35:35 +00:00
|
|
|
func (c *containerMounter) mountSharedMasterVFS2(ctx context.Context, conf *config.Config, hint *mountHint, creds *auth.Credentials) (*vfs.Mount, error) {
|
2020-07-09 00:10:35 +00:00
|
|
|
// Map mount type to filesystem name, and parse out the options that we are
|
|
|
|
// capable of dealing with.
|
|
|
|
mntFD := &mountAndFD{Mount: hint.mount}
|
2020-09-17 08:07:55 +00:00
|
|
|
fsName, opts, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, mntFD)
|
2020-07-09 00:10:35 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if len(fsName) == 0 {
|
|
|
|
return nil, fmt.Errorf("mount type not supported %q", hint.mount.Type)
|
|
|
|
}
|
2020-09-17 08:07:55 +00:00
|
|
|
|
|
|
|
if useOverlay {
|
|
|
|
log.Infof("Adding overlay on top of shared mount %q", mntFD.Destination)
|
|
|
|
var cleanup func()
|
|
|
|
opts, cleanup, err = c.configureOverlay(ctx, creds, opts, fsName)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("mounting shared volume with overlay at %q: %w", mntFD.Destination, err)
|
|
|
|
}
|
|
|
|
defer cleanup()
|
|
|
|
fsName = overlay.Name
|
|
|
|
}
|
|
|
|
|
2020-07-09 00:10:35 +00:00
|
|
|
return c.k.VFS().MountDisconnected(ctx, creds, "", fsName, opts)
|
|
|
|
}
|
|
|
|
|
|
|
|
// mountSharedSubmount binds mount to a previously mounted volume that is shared
|
|
|
|
// among containers in the same pod.
|
2020-08-21 21:28:27 +00:00
|
|
|
func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, mount specs.Mount, source *mountHint) (*vfs.Mount, error) {
|
2020-07-09 00:10:35 +00:00
|
|
|
if err := source.checkCompatible(mount); err != nil {
|
2020-08-21 21:28:27 +00:00
|
|
|
return nil, err
|
2020-07-09 00:10:35 +00:00
|
|
|
}
|
|
|
|
|
2020-09-17 08:07:55 +00:00
|
|
|
// Ignore data and useOverlay because these were already applied to
|
|
|
|
// the master mount.
|
|
|
|
_, opts, _, err := c.getMountNameAndOptionsVFS2(conf, &mountAndFD{Mount: mount})
|
2020-07-09 00:10:35 +00:00
|
|
|
if err != nil {
|
2020-08-21 21:28:27 +00:00
|
|
|
return nil, err
|
2020-07-09 00:10:35 +00:00
|
|
|
}
|
|
|
|
newMnt, err := c.k.VFS().NewDisconnectedMount(source.vfsMount.Filesystem(), source.vfsMount.Root(), opts)
|
|
|
|
if err != nil {
|
2020-08-21 21:28:27 +00:00
|
|
|
return nil, err
|
2020-07-09 00:10:35 +00:00
|
|
|
}
|
2020-08-03 20:33:47 +00:00
|
|
|
defer newMnt.DecRef(ctx)
|
2020-07-09 00:10:35 +00:00
|
|
|
|
|
|
|
root := mns.Root()
|
2020-10-13 18:29:21 +00:00
|
|
|
root.IncRef()
|
2020-08-03 20:33:47 +00:00
|
|
|
defer root.DecRef(ctx)
|
2020-07-09 00:10:35 +00:00
|
|
|
target := &vfs.PathOperation{
|
|
|
|
Root: root,
|
|
|
|
Start: root,
|
|
|
|
Path: fspath.Parse(mount.Destination),
|
|
|
|
}
|
2020-09-17 08:07:55 +00:00
|
|
|
|
|
|
|
if err := c.makeMountPoint(ctx, creds, mns, mount.Destination); err != nil {
|
|
|
|
return nil, fmt.Errorf("creating mount point %q: %w", mount.Destination, err)
|
|
|
|
}
|
|
|
|
|
2020-07-09 00:10:35 +00:00
|
|
|
if err := c.k.VFS().ConnectMountAt(ctx, creds, newMnt, target); err != nil {
|
2020-08-21 21:28:27 +00:00
|
|
|
return nil, err
|
2020-07-09 00:10:35 +00:00
|
|
|
}
|
|
|
|
log.Infof("Mounted %q type shared bind to %q", mount.Destination, source.name)
|
2020-08-21 21:28:27 +00:00
|
|
|
return newMnt, nil
|
2020-07-09 00:10:35 +00:00
|
|
|
}
|
2020-09-17 08:07:55 +00:00
|
|
|
|
|
|
|
func (c *containerMounter) makeMountPoint(ctx context.Context, creds *auth.Credentials, mns *vfs.MountNamespace, dest string) error {
|
|
|
|
root := mns.Root()
|
2020-10-13 18:29:21 +00:00
|
|
|
root.IncRef()
|
2020-09-17 08:07:55 +00:00
|
|
|
defer root.DecRef(ctx)
|
|
|
|
target := &vfs.PathOperation{
|
|
|
|
Root: root,
|
|
|
|
Start: root,
|
|
|
|
Path: fspath.Parse(dest),
|
|
|
|
}
|
|
|
|
// First check if mount point exists. When overlay is enabled, gofer doesn't
|
|
|
|
// allow changes to the FS, making MakeSytheticMountpoint() ineffective
|
|
|
|
// because MkdirAt fails with EROFS even if file exists.
|
|
|
|
vd, err := c.k.VFS().GetDentryAt(ctx, creds, target, &vfs.GetDentryOptions{})
|
|
|
|
if err == nil {
|
|
|
|
// File exists, we're done.
|
|
|
|
vd.DecRef(ctx)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
return c.k.VFS().MakeSyntheticMountpoint(ctx, dest, root, creds)
|
|
|
|
}
|
2020-10-24 00:46:43 +00:00
|
|
|
|
|
|
|
// configureRestore returns an updated context.Context including filesystem
|
|
|
|
// state used by restore defined by conf.
|
|
|
|
func (c *containerMounter) configureRestore(ctx context.Context, conf *config.Config) (context.Context, error) {
|
|
|
|
fdmap := make(map[string]int)
|
|
|
|
fdmap["/"] = c.fds.remove()
|
|
|
|
mounts, err := c.prepareMountsVFS2()
|
|
|
|
if err != nil {
|
|
|
|
return ctx, err
|
|
|
|
}
|
|
|
|
for i := range c.mounts {
|
|
|
|
submount := &mounts[i]
|
|
|
|
if submount.fd >= 0 {
|
|
|
|
fdmap[submount.Destination] = submount.fd
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return context.WithValue(ctx, gofer.CtxRestoreServerFDMap, fdmap), nil
|
|
|
|
}
|