Merge f520d0d5
(automated)
This commit is contained in:
commit
4f56f1bf22
3
go.mod
3
go.mod
|
@ -1,8 +1,9 @@
|
|||
module gvisor.googlesource.com/gvisor
|
||||
|
||||
go 1.12
|
||||
|
||||
require (
|
||||
github.com/cenkalti/backoff v2.1.1
|
||||
github.com/cenkalti/backoff v2.2.0
|
||||
github.com/gofrs/flock v0.6.1-0.20180915234121-886344bea079
|
||||
github.com/golang/mock v1.3.1
|
||||
github.com/golang/protobuf v1.3.1
|
||||
|
|
|
@ -545,12 +545,28 @@ type lockedWriter struct {
|
|||
|
||||
// Write implements io.Writer.Write.
|
||||
func (w *lockedWriter) Write(buf []byte) (int, error) {
|
||||
n, err := w.File.FileOperations.Write(w.Ctx, w.File, usermem.BytesIOSequence(buf), w.File.offset)
|
||||
return int(n), err
|
||||
return w.WriteAt(buf, w.File.offset)
|
||||
}
|
||||
|
||||
// WriteAt implements io.Writer.WriteAt.
|
||||
func (w *lockedWriter) WriteAt(buf []byte, offset int64) (int, error) {
|
||||
n, err := w.File.FileOperations.Write(w.Ctx, w.File, usermem.BytesIOSequence(buf), offset)
|
||||
return int(n), err
|
||||
var (
|
||||
written int
|
||||
err error
|
||||
)
|
||||
// The io.Writer contract requires that Write writes all available
|
||||
// bytes and does not return short writes. This causes errors with
|
||||
// io.Copy, since our own Write interface does not have this same
|
||||
// contract. Enforce that here.
|
||||
for written < len(buf) {
|
||||
var n int64
|
||||
n, err = w.File.FileOperations.Write(w.Ctx, w.File, usermem.BytesIOSequence(buf[written:]), offset+int64(written))
|
||||
if n > 0 {
|
||||
written += int(n)
|
||||
}
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
return written, err
|
||||
}
|
||||
|
|
|
@ -139,3 +139,8 @@ func (e *endpoint) UnidirectionalConnect() (transport.ConnectedEndpoint, *syserr
|
|||
func (e *endpoint) Release() {
|
||||
e.inode.DecRef()
|
||||
}
|
||||
|
||||
// Passcred implements transport.BoundEndpoint.Passcred.
|
||||
func (e *endpoint) Passcred() bool {
|
||||
return false
|
||||
}
|
||||
|
|
|
@ -2,11 +2,10 @@ package kernel
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"gvisor.googlesource.com/gvisor/third_party/gvsync"
|
||||
"reflect"
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
"gvisor.googlesource.com/gvisor/third_party/gvsync"
|
||||
)
|
||||
|
||||
// SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
package ring0
|
||||
|
||||
import (
|
||||
"gvisor.googlesource.com/gvisor/pkg/cpuid"
|
||||
"syscall"
|
||||
|
||||
"fmt"
|
||||
"gvisor.googlesource.com/gvisor/pkg/cpuid"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
|
||||
"io"
|
||||
|
|
|
@ -406,12 +406,20 @@ func makeCreds(t *kernel.Task, socketOrEndpoint interface{}) SCMCredentials {
|
|||
return nil
|
||||
}
|
||||
if cr, ok := socketOrEndpoint.(transport.Credentialer); ok && (cr.Passcred() || cr.ConnectedPasscred()) {
|
||||
tcred := t.Credentials()
|
||||
return &scmCredentials{t, tcred.EffectiveKUID, tcred.EffectiveKGID}
|
||||
return MakeCreds(t)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// MakeCreds creates default SCMCredentials.
|
||||
func MakeCreds(t *kernel.Task) SCMCredentials {
|
||||
if t == nil {
|
||||
return nil
|
||||
}
|
||||
tcred := t.Credentials()
|
||||
return &scmCredentials{t, tcred.EffectiveKUID, tcred.EffectiveKGID}
|
||||
}
|
||||
|
||||
// New creates default control messages if needed.
|
||||
func New(t *kernel.Task, socketOrEndpoint interface{}, rights SCMRights) transport.ControlMessages {
|
||||
return transport.ControlMessages{
|
||||
|
|
|
@ -237,6 +237,10 @@ type BoundEndpoint interface {
|
|||
// endpoint.
|
||||
UnidirectionalConnect() (ConnectedEndpoint, *syserr.Error)
|
||||
|
||||
// Passcred returns whether or not the SO_PASSCRED socket option is
|
||||
// enabled on this end.
|
||||
Passcred() bool
|
||||
|
||||
// Release releases any resources held by the BoundEndpoint. It must be
|
||||
// called before dropping all references to a BoundEndpoint returned by a
|
||||
// function.
|
||||
|
|
|
@ -385,6 +385,10 @@ func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []
|
|||
}
|
||||
defer ep.Release()
|
||||
w.To = ep
|
||||
|
||||
if ep.Passcred() && w.Control.Credentials == nil {
|
||||
w.Control.Credentials = control.MakeCreds(t)
|
||||
}
|
||||
}
|
||||
|
||||
n, err := src.CopyInTo(t, &w)
|
||||
|
@ -516,7 +520,7 @@ func (s *SocketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags
|
|||
if n, err := dst.CopyOutFrom(t, &r); err != syserror.ErrWouldBlock || dontWait {
|
||||
var from interface{}
|
||||
var fromLen uint32
|
||||
if r.From != nil {
|
||||
if r.From != nil && len([]byte(r.From.Addr)) != 0 {
|
||||
from, fromLen = epsocket.ConvertAddress(linux.AF_UNIX, *r.From)
|
||||
}
|
||||
|
||||
|
|
|
@ -2,11 +2,10 @@ package time
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"gvisor.googlesource.com/gvisor/third_party/gvsync"
|
||||
"reflect"
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
"gvisor.googlesource.com/gvisor/third_party/gvsync"
|
||||
)
|
||||
|
||||
// SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race
|
||||
|
|
|
@ -19,7 +19,6 @@ import (
|
|||
"encoding/binary"
|
||||
"hash"
|
||||
"io"
|
||||
"log"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
|
@ -307,7 +306,6 @@ func (e *endpoint) handleSynSegment(ctx *listenContext, s *segment, opts *header
|
|||
|
||||
func (e *endpoint) incSynRcvdCount() bool {
|
||||
e.mu.Lock()
|
||||
log.Printf("l: %d, c: %d, e.synRcvdCount: %d", len(e.acceptedChan), cap(e.acceptedChan), e.synRcvdCount)
|
||||
if l, c := len(e.acceptedChan), cap(e.acceptedChan); l == c && e.synRcvdCount >= c {
|
||||
e.mu.Unlock()
|
||||
return false
|
||||
|
@ -333,17 +331,14 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) {
|
|||
// Drop the SYN if the listen endpoint's accept queue is
|
||||
// overflowing.
|
||||
if e.incSynRcvdCount() {
|
||||
log.Printf("processing syn packet")
|
||||
s.incRef()
|
||||
go e.handleSynSegment(ctx, s, &opts) // S/R-SAFE: synRcvdCount is the barrier.
|
||||
return
|
||||
}
|
||||
log.Printf("dropping syn packet")
|
||||
e.stack.Stats().TCP.ListenOverflowSynDrop.Increment()
|
||||
e.stack.Stats().DroppedPackets.Increment()
|
||||
return
|
||||
} else {
|
||||
// TODO(bhaskerh): Increment syncookie sent stat.
|
||||
cookie := ctx.createCookie(s.id, s.sequenceNumber, encodeMSS(opts.MSS))
|
||||
// Send SYN with window scaling because we currently
|
||||
// dont't encode this information in the cookie.
|
||||
|
|
|
@ -237,7 +237,7 @@ func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error {
|
|||
return fmt.Errorf("start arguments must contain stdin, stderr, and stdout followed by at least one file for the container root gofer")
|
||||
}
|
||||
|
||||
err := cm.l.startContainer(cm.l.k, args.Spec, args.Conf, args.CID, args.FilePayload.Files)
|
||||
err := cm.l.startContainer(args.Spec, args.Conf, args.CID, args.FilePayload.Files)
|
||||
if err != nil {
|
||||
log.Debugf("containerManager.Start failed %q: %+v: %v", args.CID, args, err)
|
||||
return err
|
||||
|
@ -340,8 +340,8 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
|
|||
cm.l.k = k
|
||||
|
||||
// Set up the restore environment.
|
||||
fds := &fdDispenser{fds: cm.l.goferFDs}
|
||||
renv, err := createRestoreEnvironment(cm.l.spec, cm.l.conf, fds)
|
||||
mntr := newContainerMounter(cm.l.spec, "", cm.l.goferFDs, cm.l.k)
|
||||
renv, err := mntr.createRestoreEnvironment(cm.l.conf)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating RestoreEnvironment: %v", err)
|
||||
}
|
||||
|
@ -369,11 +369,11 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
|
|||
k.Timekeeper().SetClocks(time.NewCalibratedClocks())
|
||||
|
||||
// Since we have a new kernel we also must make a new watchdog.
|
||||
watchdog := watchdog.New(k, watchdog.DefaultTimeout, cm.l.conf.WatchdogAction)
|
||||
dog := watchdog.New(k, watchdog.DefaultTimeout, cm.l.conf.WatchdogAction)
|
||||
|
||||
// Change the loader fields to reflect the changes made when restoring.
|
||||
cm.l.k = k
|
||||
cm.l.watchdog = watchdog
|
||||
cm.l.watchdog = dog
|
||||
cm.l.rootProcArgs = kernel.CreateProcessArgs{}
|
||||
cm.l.restore = true
|
||||
|
||||
|
@ -420,16 +420,12 @@ type WaitPIDArgs struct {
|
|||
|
||||
// CID is the container ID.
|
||||
CID string
|
||||
|
||||
// ClearStatus determines whether the exit status of the process should
|
||||
// be cleared when WaitPID returns.
|
||||
ClearStatus bool
|
||||
}
|
||||
|
||||
// WaitPID waits for the process with PID 'pid' in the sandbox.
|
||||
func (cm *containerManager) WaitPID(args *WaitPIDArgs, waitStatus *uint32) error {
|
||||
log.Debugf("containerManager.Wait")
|
||||
return cm.l.waitPID(kernel.ThreadID(args.PID), args.CID, args.ClearStatus, waitStatus)
|
||||
return cm.l.waitPID(kernel.ThreadID(args.PID), args.CID, waitStatus)
|
||||
}
|
||||
|
||||
// SignalDeliveryMode enumerates different signal delivery modes.
|
||||
|
|
|
@ -28,11 +28,12 @@ import (
|
|||
// createFDMap creates an FD map that contains stdin, stdout, and stderr. If
|
||||
// console is true, then ioctl calls will be passed through to the host FD.
|
||||
// Upon success, createFDMap dups then closes stdioFDs.
|
||||
func createFDMap(ctx context.Context, k *kernel.Kernel, l *limits.LimitSet, console bool, stdioFDs []int) (*kernel.FDMap, error) {
|
||||
func createFDMap(ctx context.Context, l *limits.LimitSet, console bool, stdioFDs []int) (*kernel.FDMap, error) {
|
||||
if len(stdioFDs) != 3 {
|
||||
return nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs))
|
||||
}
|
||||
|
||||
k := kernel.KernelFromContext(ctx)
|
||||
fdm := k.NewFDMap()
|
||||
defer fdm.DecRef()
|
||||
mounter := fs.FileOwnerFromContext(ctx)
|
||||
|
|
727
runsc/boot/fs.go
727
runsc/boot/fs.go
|
@ -29,9 +29,6 @@ import (
|
|||
_ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/sys"
|
||||
_ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tmpfs"
|
||||
_ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tty"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/limits"
|
||||
|
||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
|
||||
|
@ -40,6 +37,8 @@ import (
|
|||
"gvisor.googlesource.com/gvisor/pkg/sentry/fs"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/fs/gofer"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/fs/ramfs"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
|
||||
"gvisor.googlesource.com/gvisor/pkg/syserror"
|
||||
"gvisor.googlesource.com/gvisor/runsc/specutils"
|
||||
)
|
||||
|
@ -65,67 +64,24 @@ const (
|
|||
nonefs = "none"
|
||||
)
|
||||
|
||||
type fdDispenser struct {
|
||||
fds []int
|
||||
}
|
||||
func addOverlay(ctx context.Context, conf *Config, lower *fs.Inode, name string, lowerFlags fs.MountSourceFlags) (*fs.Inode, error) {
|
||||
// Upper layer uses the same flags as lower, but it must be read-write.
|
||||
upperFlags := lowerFlags
|
||||
upperFlags.ReadOnly = false
|
||||
|
||||
func (f *fdDispenser) remove() int {
|
||||
if f.empty() {
|
||||
panic("fdDispenser out of fds")
|
||||
tmpFS := mustFindFilesystem("tmpfs")
|
||||
if !fs.IsDir(lower.StableAttr) {
|
||||
// Create overlay on top of mount file, e.g. /etc/hostname.
|
||||
msrc := fs.NewCachingMountSource(tmpFS, upperFlags)
|
||||
return fs.NewOverlayRootFile(ctx, msrc, lower, upperFlags)
|
||||
}
|
||||
rv := f.fds[0]
|
||||
f.fds = f.fds[1:]
|
||||
return rv
|
||||
}
|
||||
|
||||
func (f *fdDispenser) empty() bool {
|
||||
return len(f.fds) == 0
|
||||
}
|
||||
|
||||
func adjustDirentCache(k *kernel.Kernel) error {
|
||||
var hl syscall.Rlimit
|
||||
if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &hl); err != nil {
|
||||
return fmt.Errorf("getting RLIMIT_NOFILE: %v", err)
|
||||
}
|
||||
if int64(hl.Cur) != syscall.RLIM_INFINITY {
|
||||
newSize := hl.Cur / 2
|
||||
if newSize < gofer.DefaultDirentCacheSize {
|
||||
log.Infof("Setting gofer dirent cache size to %d", newSize)
|
||||
gofer.DefaultDirentCacheSize = newSize
|
||||
k.DirentCacheLimiter = fs.NewDirentCacheLimiter(newSize)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupRootContainerFS creates a mount namespace containing the root filesystem
|
||||
// and all mounts. 'rootCtx' is used to walk directories to find mount points.
|
||||
// 'setMountNS' is called after namespace is created. It must set the mount NS
|
||||
// to 'rootCtx'.
|
||||
func setupRootContainerFS(userCtx context.Context, rootCtx context.Context, spec *specs.Spec, conf *Config, goferFDs []int, setMountNS func(*fs.MountNamespace)) error {
|
||||
mounts := compileMounts(spec)
|
||||
|
||||
// Create a tmpfs mount where we create and mount a root filesystem for
|
||||
// each child container.
|
||||
mounts = append(mounts, specs.Mount{
|
||||
Type: tmpfs,
|
||||
Destination: ChildContainersDir,
|
||||
})
|
||||
|
||||
fds := &fdDispenser{fds: goferFDs}
|
||||
rootInode, err := createRootMount(rootCtx, spec, conf, fds, mounts)
|
||||
// Create overlay on top of mount dir.
|
||||
upper, err := tmpFS.Mount(ctx, name+"-upper", upperFlags, "", nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating root mount: %v", err)
|
||||
return nil, fmt.Errorf("creating tmpfs overlay: %v", err)
|
||||
}
|
||||
mns, err := fs.NewMountNamespace(userCtx, rootInode)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating root mount namespace: %v", err)
|
||||
}
|
||||
setMountNS(mns)
|
||||
|
||||
root := mns.Root()
|
||||
defer root.DecRef()
|
||||
return mountSubmounts(rootCtx, conf, mns, root, mounts, fds)
|
||||
return fs.NewOverlayRoot(ctx, upper, lower, upperFlags)
|
||||
}
|
||||
|
||||
// compileMounts returns the supported mounts from the mount spec, adding any
|
||||
|
@ -184,186 +140,6 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
|
|||
return mounts
|
||||
}
|
||||
|
||||
// createRootMount creates the root filesystem.
|
||||
func createRootMount(ctx context.Context, spec *specs.Spec, conf *Config, fds *fdDispenser, mounts []specs.Mount) (*fs.Inode, error) {
|
||||
// First construct the filesystem from the spec.Root.
|
||||
mf := fs.MountSourceFlags{ReadOnly: spec.Root.Readonly || conf.Overlay}
|
||||
|
||||
var (
|
||||
rootInode *fs.Inode
|
||||
err error
|
||||
)
|
||||
|
||||
fd := fds.remove()
|
||||
log.Infof("Mounting root over 9P, ioFD: %d", fd)
|
||||
p9FS := mustFindFilesystem("9p")
|
||||
opts := p9MountOptions(fd, conf.FileAccess)
|
||||
rootInode, err = p9FS.Mount(ctx, rootDevice, mf, strings.Join(opts, ","), nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating root mount point: %v", err)
|
||||
}
|
||||
|
||||
// We need to overlay the root on top of a ramfs with stub directories
|
||||
// for submount paths. "/dev" "/sys" "/proc" and "/tmp" are always
|
||||
// mounted even if they are not in the spec.
|
||||
submounts := append(subtargets("/", mounts), "/dev", "/sys", "/proc", "/tmp")
|
||||
rootInode, err = addSubmountOverlay(ctx, rootInode, submounts)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("adding submount overlay: %v", err)
|
||||
}
|
||||
|
||||
if conf.Overlay && !spec.Root.Readonly {
|
||||
log.Debugf("Adding overlay on top of root mount")
|
||||
// Overlay a tmpfs filesystem on top of the root.
|
||||
rootInode, err = addOverlay(ctx, conf, rootInode, "root-overlay-upper", mf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
log.Infof("Mounted %q to %q type root", spec.Root.Path, "/")
|
||||
return rootInode, nil
|
||||
}
|
||||
|
||||
func addOverlay(ctx context.Context, conf *Config, lower *fs.Inode, name string, lowerFlags fs.MountSourceFlags) (*fs.Inode, error) {
|
||||
// Upper layer uses the same flags as lower, but it must be read-write.
|
||||
lowerFlags.ReadOnly = false
|
||||
|
||||
tmpFS := mustFindFilesystem("tmpfs")
|
||||
if !fs.IsDir(lower.StableAttr) {
|
||||
// Create overlay on top of mount file, e.g. /etc/hostname.
|
||||
msrc := fs.NewCachingMountSource(tmpFS, lowerFlags)
|
||||
return fs.NewOverlayRootFile(ctx, msrc, lower, lowerFlags)
|
||||
}
|
||||
|
||||
// Create overlay on top of mount dir.
|
||||
upper, err := tmpFS.Mount(ctx, name+"-upper", lowerFlags, "", nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating tmpfs overlay: %v", err)
|
||||
}
|
||||
return fs.NewOverlayRoot(ctx, upper, lower, lowerFlags)
|
||||
}
|
||||
|
||||
// getMountNameAndOptions retrieves the fsName, opts, and useOverlay values
|
||||
// used for mounts.
|
||||
func getMountNameAndOptions(conf *Config, m specs.Mount, fds *fdDispenser) (string, []string, bool, error) {
|
||||
var (
|
||||
fsName string
|
||||
opts []string
|
||||
useOverlay bool
|
||||
err error
|
||||
)
|
||||
|
||||
switch m.Type {
|
||||
case devpts, devtmpfs, proc, sysfs:
|
||||
fsName = m.Type
|
||||
case nonefs:
|
||||
fsName = sysfs
|
||||
case tmpfs:
|
||||
fsName = m.Type
|
||||
|
||||
// tmpfs has some extra supported options that we must pass through.
|
||||
opts, err = parseAndFilterOptions(m.Options, "mode", "uid", "gid")
|
||||
|
||||
case bind:
|
||||
fd := fds.remove()
|
||||
fsName = "9p"
|
||||
// Non-root bind mounts are always shared.
|
||||
opts = p9MountOptions(fd, FileAccessShared)
|
||||
// If configured, add overlay to all writable mounts.
|
||||
useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly
|
||||
|
||||
default:
|
||||
// TODO(nlacasse): Support all the mount types and make this a
|
||||
// fatal error. Most applications will "just work" without
|
||||
// them, so this is a warning for now.
|
||||
// we do not support.
|
||||
log.Warningf("ignoring unknown filesystem type %q", m.Type)
|
||||
}
|
||||
return fsName, opts, useOverlay, err
|
||||
}
|
||||
|
||||
func mountSubmounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent, mounts []specs.Mount, fds *fdDispenser) error {
|
||||
for _, m := range mounts {
|
||||
if err := mountSubmount(ctx, conf, mns, root, fds, m, mounts); err != nil {
|
||||
return fmt.Errorf("mount submount %q: %v", m.Destination, err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := mountTmp(ctx, conf, mns, root, mounts); err != nil {
|
||||
return fmt.Errorf("mount submount %q: %v", "tmp", err)
|
||||
}
|
||||
|
||||
if !fds.empty() {
|
||||
return fmt.Errorf("not all mount points were consumed, remaining: %v", fds)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// mountSubmount mounts volumes inside the container's root. Because mounts may
|
||||
// be readonly, a lower ramfs overlay is added to create the mount point dir.
|
||||
// Another overlay is added with tmpfs on top if Config.Overlay is true.
|
||||
// 'm.Destination' must be an absolute path with '..' and symlinks resolved.
|
||||
func mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent, fds *fdDispenser, m specs.Mount, mounts []specs.Mount) error {
|
||||
// Map mount type to filesystem name, and parse out the options that we are
|
||||
// capable of dealing with.
|
||||
fsName, opts, useOverlay, err := getMountNameAndOptions(conf, m, fds)
|
||||
|
||||
// Return the error or nil that corresponds to the default case in getMountNameAndOptions.
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if fsName == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
// All filesystem names should have been mapped to something we know.
|
||||
filesystem := mustFindFilesystem(fsName)
|
||||
|
||||
mf := mountFlags(m.Options)
|
||||
if useOverlay {
|
||||
// All writes go to upper, be paranoid and make lower readonly.
|
||||
mf.ReadOnly = true
|
||||
}
|
||||
|
||||
inode, err := filesystem.Mount(ctx, mountDevice(m), mf, strings.Join(opts, ","), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating mount with source %q: %v", m.Source, err)
|
||||
}
|
||||
|
||||
// If there are submounts, we need to overlay the mount on top of a
|
||||
// ramfs with stub directories for submount paths.
|
||||
submounts := subtargets(m.Destination, mounts)
|
||||
if len(submounts) > 0 {
|
||||
log.Infof("Adding submount overlay over %q", m.Destination)
|
||||
inode, err = addSubmountOverlay(ctx, inode, submounts)
|
||||
if err != nil {
|
||||
return fmt.Errorf("adding submount overlay: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if useOverlay {
|
||||
log.Debugf("Adding overlay on top of mount %q", m.Destination)
|
||||
inode, err = addOverlay(ctx, conf, inode, m.Type, mf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
maxTraversals := uint(0)
|
||||
dirent, err := mns.FindInode(ctx, root, root, m.Destination, &maxTraversals)
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't find mount destination %q: %v", m.Destination, err)
|
||||
}
|
||||
defer dirent.DecRef()
|
||||
if err := mns.Mount(ctx, dirent, inode); err != nil {
|
||||
return fmt.Errorf("mount %q error: %v", m.Destination, err)
|
||||
}
|
||||
|
||||
log.Infof("Mounted %q to %q type %s", m.Source, m.Destination, m.Type)
|
||||
return nil
|
||||
}
|
||||
|
||||
// p9MountOptions creates a slice of options for a p9 mount.
|
||||
func p9MountOptions(fd int, fa FileAccessType) []string {
|
||||
opts := []string{
|
||||
|
@ -416,82 +192,6 @@ func mountDevice(m specs.Mount) string {
|
|||
return "none"
|
||||
}
|
||||
|
||||
// addRestoreMount adds a mount to the MountSources map used for restoring a
|
||||
// checkpointed container.
|
||||
func addRestoreMount(conf *Config, renv *fs.RestoreEnvironment, m specs.Mount, fds *fdDispenser) error {
|
||||
fsName, opts, useOverlay, err := getMountNameAndOptions(conf, m, fds)
|
||||
|
||||
// Return the error or nil that corresponds to the default case in getMountNameAndOptions.
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// TODO(nlacasse): Fix this when we support all the mount types and
|
||||
// make this a fatal error.
|
||||
if fsName == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
newMount := fs.MountArgs{
|
||||
Dev: mountDevice(m),
|
||||
Flags: mountFlags(m.Options),
|
||||
DataString: strings.Join(opts, ","),
|
||||
}
|
||||
if useOverlay {
|
||||
newMount.Flags.ReadOnly = true
|
||||
}
|
||||
renv.MountSources[fsName] = append(renv.MountSources[fsName], newMount)
|
||||
log.Infof("Added mount at %q: %+v", fsName, newMount)
|
||||
return nil
|
||||
}
|
||||
|
||||
// createRestoreEnvironment builds a fs.RestoreEnvironment called renv by adding the mounts
|
||||
// to the environment.
|
||||
func createRestoreEnvironment(spec *specs.Spec, conf *Config, fds *fdDispenser) (*fs.RestoreEnvironment, error) {
|
||||
renv := &fs.RestoreEnvironment{
|
||||
MountSources: make(map[string][]fs.MountArgs),
|
||||
}
|
||||
|
||||
// Add root mount.
|
||||
fd := fds.remove()
|
||||
opts := p9MountOptions(fd, conf.FileAccess)
|
||||
|
||||
mf := fs.MountSourceFlags{}
|
||||
if spec.Root.Readonly || conf.Overlay {
|
||||
mf.ReadOnly = true
|
||||
}
|
||||
|
||||
rootMount := fs.MountArgs{
|
||||
Dev: rootDevice,
|
||||
Flags: mf,
|
||||
DataString: strings.Join(opts, ","),
|
||||
}
|
||||
renv.MountSources[rootFsName] = append(renv.MountSources[rootFsName], rootMount)
|
||||
|
||||
// Add submounts.
|
||||
var tmpMounted bool
|
||||
for _, m := range compileMounts(spec) {
|
||||
if err := addRestoreMount(conf, renv, m, fds); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if filepath.Clean(m.Destination) == "/tmp" {
|
||||
tmpMounted = true
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(b/67958150): handle '/tmp' properly (see mountTmp()).
|
||||
if !tmpMounted {
|
||||
tmpMount := specs.Mount{
|
||||
Type: tmpfs,
|
||||
Destination: "/tmp",
|
||||
}
|
||||
if err := addRestoreMount(conf, renv, tmpMount, fds); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return renv, nil
|
||||
}
|
||||
|
||||
func mountFlags(opts []string) fs.MountSourceFlags {
|
||||
mf := fs.MountSourceFlags{}
|
||||
for _, o := range opts {
|
||||
|
@ -546,22 +246,83 @@ func subtargets(root string, mnts []specs.Mount) []string {
|
|||
return targets
|
||||
}
|
||||
|
||||
// setupContainerFS is used to set up the file system and amend the procArgs accordingly.
|
||||
// procArgs are passed by reference and the FDMap field is modified. It dups stdioFDs.
|
||||
func setupContainerFS(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf *Config, stdioFDs, goferFDs []int, console bool, creds *auth.Credentials, ls *limits.LimitSet, k *kernel.Kernel, cid string) error {
|
||||
ctx := procArgs.NewContext(k)
|
||||
|
||||
// Create the FD map, which will set stdin, stdout, and stderr. If console
|
||||
// is true, then ioctl calls will be passed through to the host fd.
|
||||
fdm, err := createFDMap(ctx, k, ls, console, stdioFDs)
|
||||
// setExecutablePath sets the procArgs.Filename by searching the PATH for an
|
||||
// executable matching the procArgs.Argv[0].
|
||||
func setExecutablePath(ctx context.Context, mns *fs.MountNamespace, procArgs *kernel.CreateProcessArgs) error {
|
||||
paths := fs.GetPath(procArgs.Envv)
|
||||
exe := procArgs.Argv[0]
|
||||
f, err := mns.ResolveExecutablePath(ctx, procArgs.WorkingDirectory, exe, paths)
|
||||
if err != nil {
|
||||
return fmt.Errorf("importing fds: %v", err)
|
||||
return fmt.Errorf("searching for executable %q, cwd: %q, $PATH=%q: %v", exe, procArgs.WorkingDirectory, strings.Join(paths, ":"), err)
|
||||
}
|
||||
procArgs.Filename = f
|
||||
return nil
|
||||
}
|
||||
|
||||
// CreateProcess takes a reference on FDMap if successful. We
|
||||
// won't need ours either way.
|
||||
procArgs.FDMap = fdm
|
||||
func adjustDirentCache(k *kernel.Kernel) error {
|
||||
var hl syscall.Rlimit
|
||||
if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &hl); err != nil {
|
||||
return fmt.Errorf("getting RLIMIT_NOFILE: %v", err)
|
||||
}
|
||||
if int64(hl.Cur) != syscall.RLIM_INFINITY {
|
||||
newSize := hl.Cur / 2
|
||||
if newSize < gofer.DefaultDirentCacheSize {
|
||||
log.Infof("Setting gofer dirent cache size to %d", newSize)
|
||||
gofer.DefaultDirentCacheSize = newSize
|
||||
k.DirentCacheLimiter = fs.NewDirentCacheLimiter(newSize)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type fdDispenser struct {
|
||||
fds []int
|
||||
}
|
||||
|
||||
func (f *fdDispenser) remove() int {
|
||||
if f.empty() {
|
||||
panic("fdDispenser out of fds")
|
||||
}
|
||||
rv := f.fds[0]
|
||||
f.fds = f.fds[1:]
|
||||
return rv
|
||||
}
|
||||
|
||||
func (f *fdDispenser) empty() bool {
|
||||
return len(f.fds) == 0
|
||||
}
|
||||
|
||||
type containerMounter struct {
|
||||
// cid is the container ID. May be set to empty for the root container.
|
||||
cid string
|
||||
|
||||
root *specs.Root
|
||||
|
||||
// mounts is the set of submounts for the container. It's a copy from the spec
|
||||
// that may be freely modified without affecting the original spec.
|
||||
mounts []specs.Mount
|
||||
|
||||
// fds is the list of FDs to be dispensed for mounts that require it.
|
||||
fds fdDispenser
|
||||
|
||||
k *kernel.Kernel
|
||||
}
|
||||
|
||||
func newContainerMounter(spec *specs.Spec, cid string, goferFDs []int, k *kernel.Kernel) *containerMounter {
|
||||
return &containerMounter{
|
||||
cid: cid,
|
||||
root: spec.Root,
|
||||
mounts: compileMounts(spec),
|
||||
fds: fdDispenser{fds: goferFDs},
|
||||
k: k,
|
||||
}
|
||||
}
|
||||
|
||||
// setupFS is used to set up the file system for containers and amend
|
||||
// the procArgs accordingly. This is the main entry point for this rest of
|
||||
// functions in this file. procArgs are passed by reference and the FDMap field
|
||||
// is modified. It dups stdioFDs.
|
||||
func (c *containerMounter) setupFS(ctx context.Context, conf *Config, procArgs *kernel.CreateProcessArgs, creds *auth.Credentials) error {
|
||||
// Use root user to configure mounts. The current user might not have
|
||||
// permission to do so.
|
||||
rootProcArgs := kernel.CreateProcessArgs{
|
||||
|
@ -570,16 +331,19 @@ func setupContainerFS(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf
|
|||
Umask: 0022,
|
||||
MaxSymlinkTraversals: linux.MaxSymlinkTraversals,
|
||||
}
|
||||
rootCtx := rootProcArgs.NewContext(k)
|
||||
rootCtx := rootProcArgs.NewContext(c.k)
|
||||
|
||||
// If this is the root container, we also need to setup the root mount
|
||||
// namespace.
|
||||
mns := k.RootMountNamespace()
|
||||
mns := c.k.RootMountNamespace()
|
||||
if mns == nil {
|
||||
// Setup the root container.
|
||||
return setupRootContainerFS(ctx, rootCtx, spec, conf, goferFDs, func(mns *fs.MountNamespace) {
|
||||
k.SetRootMountNamespace(mns)
|
||||
})
|
||||
if err := c.setupRootContainer(ctx, rootCtx, conf, func(mns *fs.MountNamespace) {
|
||||
c.k.SetRootMountNamespace(mns)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
return c.checkDispenser()
|
||||
}
|
||||
|
||||
// Setup a child container.
|
||||
|
@ -593,18 +357,17 @@ func setupContainerFS(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf
|
|||
if err != nil {
|
||||
return fmt.Errorf("couldn't find child container dir %q: %v", ChildContainersDir, err)
|
||||
}
|
||||
if err := contDir.CreateDirectory(ctx, globalRoot, cid, fs.FilePermsFromMode(0755)); err != nil {
|
||||
return fmt.Errorf("create directory %q: %v", cid, err)
|
||||
if err := contDir.CreateDirectory(ctx, globalRoot, c.cid, fs.FilePermsFromMode(0755)); err != nil {
|
||||
return fmt.Errorf("create directory %q: %v", c.cid, err)
|
||||
}
|
||||
containerRoot, err := contDir.Walk(ctx, globalRoot, cid)
|
||||
containerRoot, err := contDir.Walk(ctx, globalRoot, c.cid)
|
||||
if err != nil {
|
||||
return fmt.Errorf("walk to %q failed: %v", cid, err)
|
||||
return fmt.Errorf("walk to %q failed: %v", c.cid, err)
|
||||
}
|
||||
defer containerRoot.DecRef()
|
||||
|
||||
// Create the container's root filesystem mount.
|
||||
fds := &fdDispenser{fds: goferFDs}
|
||||
rootInode, err := createRootMount(rootCtx, spec, conf, fds, nil)
|
||||
rootInode, err := c.createRootMount(rootCtx, conf)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating filesystem for container: %v", err)
|
||||
}
|
||||
|
@ -614,39 +377,32 @@ func setupContainerFS(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf
|
|||
return fmt.Errorf("mount container root: %v", err)
|
||||
}
|
||||
|
||||
// We have to re-walk to the dirent to find the mounted
|
||||
// directory. The old dirent is invalid at this point.
|
||||
containerRoot, err = contDir.Walk(ctx, globalRoot, cid)
|
||||
// We have to re-walk to the dirent to find the mounted directory. The old
|
||||
// dirent is invalid at this point.
|
||||
containerRoot, err = contDir.Walk(ctx, globalRoot, c.cid)
|
||||
if err != nil {
|
||||
return fmt.Errorf("find container mount point %q: %v", cid, err)
|
||||
return fmt.Errorf("find container mount point %q: %v", c.cid, err)
|
||||
}
|
||||
cu := specutils.MakeCleanup(func() { containerRoot.DecRef() })
|
||||
defer cu.Clean()
|
||||
|
||||
log.Infof("Mounted child's root fs to %q", filepath.Join(ChildContainersDir, cid))
|
||||
log.Infof("Mounted child's root fs to %q", filepath.Join(ChildContainersDir, c.cid))
|
||||
|
||||
// Set process root here, so 'rootCtx.Value(CtxRoot)' will return it.
|
||||
procArgs.Root = containerRoot
|
||||
|
||||
// Mount all submounts.
|
||||
mounts := compileMounts(spec)
|
||||
if err := mountSubmounts(rootCtx, conf, mns, containerRoot, mounts, fds); err != nil {
|
||||
if err := c.mountSubmounts(rootCtx, conf, mns, containerRoot); err != nil {
|
||||
return err
|
||||
}
|
||||
cu.Release()
|
||||
return nil
|
||||
return c.checkDispenser()
|
||||
}
|
||||
|
||||
// setExecutablePath sets the procArgs.Filename by searching the PATH for an
|
||||
// executable matching the procArgs.Argv[0].
|
||||
func setExecutablePath(ctx context.Context, mns *fs.MountNamespace, procArgs *kernel.CreateProcessArgs) error {
|
||||
paths := fs.GetPath(procArgs.Envv)
|
||||
exe := procArgs.Argv[0]
|
||||
f, err := mns.ResolveExecutablePath(ctx, procArgs.WorkingDirectory, exe, paths)
|
||||
if err != nil {
|
||||
return fmt.Errorf("searching for executable %q, cwd: %q, $PATH=%q: %v", exe, procArgs.WorkingDirectory, strings.Join(paths, ":"), err)
|
||||
func (c *containerMounter) checkDispenser() error {
|
||||
if !c.fds.empty() {
|
||||
return fmt.Errorf("not all gofer FDs were consumed, remaining: %v", c.fds)
|
||||
}
|
||||
procArgs.Filename = f
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -715,6 +471,261 @@ func destroyContainerFS(ctx context.Context, cid string, k *kernel.Kernel) error
|
|||
return nil
|
||||
}
|
||||
|
||||
// setupRootContainer creates a mount namespace containing the root filesystem
|
||||
// and all mounts. 'rootCtx' is used to walk directories to find mount points.
|
||||
// 'setMountNS' is called after namespace is created. It must set the mount NS
|
||||
// to 'rootCtx'.
|
||||
func (c *containerMounter) setupRootContainer(userCtx context.Context, rootCtx context.Context, conf *Config, setMountNS func(*fs.MountNamespace)) error {
|
||||
// Create a tmpfs mount where we create and mount a root filesystem for
|
||||
// each child container.
|
||||
c.mounts = append(c.mounts, specs.Mount{
|
||||
Type: tmpfs,
|
||||
Destination: ChildContainersDir,
|
||||
})
|
||||
|
||||
rootInode, err := c.createRootMount(rootCtx, conf)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating root mount: %v", err)
|
||||
}
|
||||
mns, err := fs.NewMountNamespace(userCtx, rootInode)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating root mount namespace: %v", err)
|
||||
}
|
||||
setMountNS(mns)
|
||||
|
||||
root := mns.Root()
|
||||
defer root.DecRef()
|
||||
return c.mountSubmounts(rootCtx, conf, mns, root)
|
||||
}
|
||||
|
||||
// createRootMount creates the root filesystem.
|
||||
func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (*fs.Inode, error) {
|
||||
// First construct the filesystem from the spec.Root.
|
||||
mf := fs.MountSourceFlags{ReadOnly: c.root.Readonly || conf.Overlay}
|
||||
|
||||
var (
|
||||
rootInode *fs.Inode
|
||||
err error
|
||||
)
|
||||
|
||||
fd := c.fds.remove()
|
||||
log.Infof("Mounting root over 9P, ioFD: %d", fd)
|
||||
p9FS := mustFindFilesystem("9p")
|
||||
opts := p9MountOptions(fd, conf.FileAccess)
|
||||
rootInode, err = p9FS.Mount(ctx, rootDevice, mf, strings.Join(opts, ","), nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating root mount point: %v", err)
|
||||
}
|
||||
|
||||
// We need to overlay the root on top of a ramfs with stub directories
|
||||
// for submount paths. "/dev" "/sys" "/proc" and "/tmp" are always
|
||||
// mounted even if they are not in the spec.
|
||||
submounts := append(subtargets("/", c.mounts), "/dev", "/sys", "/proc", "/tmp")
|
||||
rootInode, err = addSubmountOverlay(ctx, rootInode, submounts)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("adding submount overlay: %v", err)
|
||||
}
|
||||
|
||||
if conf.Overlay && !c.root.Readonly {
|
||||
log.Debugf("Adding overlay on top of root mount")
|
||||
// Overlay a tmpfs filesystem on top of the root.
|
||||
rootInode, err = addOverlay(ctx, conf, rootInode, "root-overlay-upper", mf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
log.Infof("Mounted %q to %q type root", c.root.Path, "/")
|
||||
return rootInode, nil
|
||||
}
|
||||
|
||||
// getMountNameAndOptions retrieves the fsName, opts, and useOverlay values
|
||||
// used for mounts.
|
||||
func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (string, []string, bool, error) {
|
||||
var (
|
||||
fsName string
|
||||
opts []string
|
||||
useOverlay bool
|
||||
err error
|
||||
)
|
||||
|
||||
switch m.Type {
|
||||
case devpts, devtmpfs, proc, sysfs:
|
||||
fsName = m.Type
|
||||
case nonefs:
|
||||
fsName = sysfs
|
||||
case tmpfs:
|
||||
fsName = m.Type
|
||||
|
||||
// tmpfs has some extra supported options that we must pass through.
|
||||
opts, err = parseAndFilterOptions(m.Options, "mode", "uid", "gid")
|
||||
|
||||
case bind:
|
||||
fd := c.fds.remove()
|
||||
fsName = "9p"
|
||||
// Non-root bind mounts are always shared.
|
||||
opts = p9MountOptions(fd, FileAccessShared)
|
||||
// If configured, add overlay to all writable mounts.
|
||||
useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly
|
||||
|
||||
default:
|
||||
// TODO(nlacasse): Support all the mount types and make this a fatal error.
|
||||
// Most applications will "just work" without them, so this is a warning
|
||||
// for now.
|
||||
log.Warningf("ignoring unknown filesystem type %q", m.Type)
|
||||
}
|
||||
return fsName, opts, useOverlay, err
|
||||
}
|
||||
|
||||
func (c *containerMounter) mountSubmounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent) error {
|
||||
for _, m := range c.mounts {
|
||||
if err := c.mountSubmount(ctx, conf, mns, root, m); err != nil {
|
||||
return fmt.Errorf("mount submount %q: %v", m.Destination, err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := c.mountTmp(ctx, conf, mns, root); err != nil {
|
||||
return fmt.Errorf("mount submount %q: %v", "tmp", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// mountSubmount mounts volumes inside the container's root. Because mounts may
|
||||
// be readonly, a lower ramfs overlay is added to create the mount point dir.
|
||||
// Another overlay is added with tmpfs on top if Config.Overlay is true.
|
||||
// 'm.Destination' must be an absolute path with '..' and symlinks resolved.
|
||||
func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent, m specs.Mount) error {
|
||||
// Map mount type to filesystem name, and parse out the options that we are
|
||||
// capable of dealing with.
|
||||
fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, m)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if fsName == "" {
|
||||
// Filesystem is not supported (e.g. cgroup), just skip it.
|
||||
return nil
|
||||
}
|
||||
|
||||
// All filesystem names should have been mapped to something we know.
|
||||
filesystem := mustFindFilesystem(fsName)
|
||||
|
||||
mf := mountFlags(m.Options)
|
||||
if useOverlay {
|
||||
// All writes go to upper, be paranoid and make lower readonly.
|
||||
mf.ReadOnly = true
|
||||
}
|
||||
|
||||
inode, err := filesystem.Mount(ctx, mountDevice(m), mf, strings.Join(opts, ","), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating mount with source %q: %v", m.Source, err)
|
||||
}
|
||||
|
||||
// If there are submounts, we need to overlay the mount on top of a ramfs
|
||||
// with stub directories for submount paths.
|
||||
submounts := subtargets(m.Destination, c.mounts)
|
||||
if len(submounts) > 0 {
|
||||
log.Infof("Adding submount overlay over %q", m.Destination)
|
||||
inode, err = addSubmountOverlay(ctx, inode, submounts)
|
||||
if err != nil {
|
||||
return fmt.Errorf("adding submount overlay: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if useOverlay {
|
||||
log.Debugf("Adding overlay on top of mount %q", m.Destination)
|
||||
inode, err = addOverlay(ctx, conf, inode, m.Type, mf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
maxTraversals := uint(0)
|
||||
dirent, err := mns.FindInode(ctx, root, root, m.Destination, &maxTraversals)
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't find mount destination %q: %v", m.Destination, err)
|
||||
}
|
||||
defer dirent.DecRef()
|
||||
if err := mns.Mount(ctx, dirent, inode); err != nil {
|
||||
return fmt.Errorf("mount %q error: %v", m.Destination, err)
|
||||
}
|
||||
|
||||
log.Infof("Mounted %q to %q type %s", m.Source, m.Destination, m.Type)
|
||||
return nil
|
||||
}
|
||||
|
||||
// addRestoreMount adds a mount to the MountSources map used for restoring a
|
||||
// checkpointed container.
|
||||
func (c *containerMounter) addRestoreMount(conf *Config, renv *fs.RestoreEnvironment, m specs.Mount) error {
|
||||
fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, m)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if fsName == "" {
|
||||
// Filesystem is not supported (e.g. cgroup), just skip it.
|
||||
return nil
|
||||
}
|
||||
|
||||
newMount := fs.MountArgs{
|
||||
Dev: mountDevice(m),
|
||||
Flags: mountFlags(m.Options),
|
||||
DataString: strings.Join(opts, ","),
|
||||
}
|
||||
if useOverlay {
|
||||
newMount.Flags.ReadOnly = true
|
||||
}
|
||||
renv.MountSources[fsName] = append(renv.MountSources[fsName], newMount)
|
||||
log.Infof("Added mount at %q: %+v", fsName, newMount)
|
||||
return nil
|
||||
}
|
||||
|
||||
// createRestoreEnvironment builds a fs.RestoreEnvironment called renv by adding the mounts
|
||||
// to the environment.
|
||||
func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEnvironment, error) {
|
||||
renv := &fs.RestoreEnvironment{
|
||||
MountSources: make(map[string][]fs.MountArgs),
|
||||
}
|
||||
|
||||
// Add root mount.
|
||||
fd := c.fds.remove()
|
||||
opts := p9MountOptions(fd, conf.FileAccess)
|
||||
|
||||
mf := fs.MountSourceFlags{}
|
||||
if c.root.Readonly || conf.Overlay {
|
||||
mf.ReadOnly = true
|
||||
}
|
||||
|
||||
rootMount := fs.MountArgs{
|
||||
Dev: rootDevice,
|
||||
Flags: mf,
|
||||
DataString: strings.Join(opts, ","),
|
||||
}
|
||||
renv.MountSources[rootFsName] = append(renv.MountSources[rootFsName], rootMount)
|
||||
|
||||
// Add submounts.
|
||||
var tmpMounted bool
|
||||
for _, m := range c.mounts {
|
||||
if err := c.addRestoreMount(conf, renv, m); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if filepath.Clean(m.Destination) == "/tmp" {
|
||||
tmpMounted = true
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(b/67958150): handle '/tmp' properly (see mountTmp()).
|
||||
if !tmpMounted {
|
||||
tmpMount := specs.Mount{
|
||||
Type: tmpfs,
|
||||
Destination: "/tmp",
|
||||
}
|
||||
if err := c.addRestoreMount(conf, renv, tmpMount); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return renv, nil
|
||||
}
|
||||
|
||||
// mountTmp mounts an internal tmpfs at '/tmp' if it's safe to do so.
|
||||
// Technically we don't have to mount tmpfs at /tmp, as we could just rely on
|
||||
// the host /tmp, but this is a nice optimization, and fixes some apps that call
|
||||
|
@ -724,8 +735,8 @@ func destroyContainerFS(ctx context.Context, cid string, k *kernel.Kernel) error
|
|||
//
|
||||
// Note that when there are submounts inside of '/tmp', directories for the
|
||||
// mount points must be present, making '/tmp' not empty anymore.
|
||||
func mountTmp(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent, mounts []specs.Mount) error {
|
||||
for _, m := range mounts {
|
||||
func (c *containerMounter) mountTmp(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent) error {
|
||||
for _, m := range c.mounts {
|
||||
if filepath.Clean(m.Destination) == "/tmp" {
|
||||
log.Debugf("Explict %q mount found, skipping internal tmpfs, mount: %+v", "/tmp", m)
|
||||
return nil
|
||||
|
@ -766,7 +777,7 @@ func mountTmp(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *f
|
|||
// another user. This is normally done for /tmp.
|
||||
Options: []string{"mode=1777"},
|
||||
}
|
||||
return mountSubmount(ctx, conf, mns, root, nil, tmpMount, mounts)
|
||||
return c.mountSubmount(ctx, conf, mns, root, tmpMount)
|
||||
|
||||
default:
|
||||
return err
|
||||
|
|
|
@ -288,7 +288,7 @@ func New(args Args) (*Loader, error) {
|
|||
}
|
||||
|
||||
// Create a watchdog.
|
||||
watchdog := watchdog.New(k, watchdog.DefaultTimeout, args.Conf.WatchdogAction)
|
||||
dog := watchdog.New(k, watchdog.DefaultTimeout, args.Conf.WatchdogAction)
|
||||
|
||||
procArgs, err := newProcess(args.ID, args.Spec, creds, k)
|
||||
if err != nil {
|
||||
|
@ -304,7 +304,7 @@ func New(args Args) (*Loader, error) {
|
|||
k: k,
|
||||
conf: args.Conf,
|
||||
console: args.Console,
|
||||
watchdog: watchdog,
|
||||
watchdog: dog,
|
||||
spec: args.Spec,
|
||||
goferFDs: args.GoferFDs,
|
||||
stdioFDs: args.StdioFDs,
|
||||
|
@ -432,7 +432,7 @@ func createMemoryFile() (*pgalloc.MemoryFile, error) {
|
|||
return mf, nil
|
||||
}
|
||||
|
||||
// Run runs the root container..
|
||||
// Run runs the root container.
|
||||
func (l *Loader) Run() error {
|
||||
err := l.run()
|
||||
l.ctrl.manager.startResultChan <- err
|
||||
|
@ -486,17 +486,21 @@ func (l *Loader) run() error {
|
|||
// If we are restoring, we do not want to create a process.
|
||||
// l.restore is set by the container manager when a restore call is made.
|
||||
if !l.restore {
|
||||
if err := setupContainerFS(
|
||||
&l.rootProcArgs,
|
||||
l.spec,
|
||||
l.conf,
|
||||
l.stdioFDs,
|
||||
l.goferFDs,
|
||||
l.console,
|
||||
l.rootProcArgs.Credentials,
|
||||
l.rootProcArgs.Limits,
|
||||
l.k,
|
||||
"" /* CID, which isn't needed for the root container */); err != nil {
|
||||
// Create the FD map, which will set stdin, stdout, and stderr. If console
|
||||
// is true, then ioctl calls will be passed through to the host fd.
|
||||
ctx := l.rootProcArgs.NewContext(l.k)
|
||||
fdm, err := createFDMap(ctx, l.rootProcArgs.Limits, l.console, l.stdioFDs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("importing fds: %v", err)
|
||||
}
|
||||
// CreateProcess takes a reference on FDMap if successful. We won't need
|
||||
// ours either way.
|
||||
l.rootProcArgs.FDMap = fdm
|
||||
|
||||
// cid for root container can be empty. Only subcontainers need it to set
|
||||
// the mount location.
|
||||
mntr := newContainerMounter(l.spec, "", l.goferFDs, l.k)
|
||||
if err := mntr.setupFS(ctx, l.conf, &l.rootProcArgs, l.rootProcArgs.Credentials); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -552,7 +556,7 @@ func (l *Loader) createContainer(cid string) error {
|
|||
// startContainer starts a child container. It returns the thread group ID of
|
||||
// the newly created process. Caller owns 'files' and may close them after
|
||||
// this method returns.
|
||||
func (l *Loader) startContainer(k *kernel.Kernel, spec *specs.Spec, conf *Config, cid string, files []*os.File) error {
|
||||
func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, files []*os.File) error {
|
||||
// Create capabilities.
|
||||
caps, err := specutils.Capabilities(conf.EnableRaw, spec.Process.Capabilities)
|
||||
if err != nil {
|
||||
|
@ -596,6 +600,16 @@ func (l *Loader) startContainer(k *kernel.Kernel, spec *specs.Spec, conf *Config
|
|||
stdioFDs = append(stdioFDs, int(f.Fd()))
|
||||
}
|
||||
|
||||
// Create the FD map, which will set stdin, stdout, and stderr.
|
||||
ctx := procArgs.NewContext(l.k)
|
||||
fdm, err := createFDMap(ctx, procArgs.Limits, false, stdioFDs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("importing fds: %v", err)
|
||||
}
|
||||
// CreateProcess takes a reference on FDMap if successful. We won't need ours
|
||||
// either way.
|
||||
procArgs.FDMap = fdm
|
||||
|
||||
// Can't take ownership away from os.File. dup them to get a new FDs.
|
||||
var goferFDs []int
|
||||
for _, f := range files[3:] {
|
||||
|
@ -606,22 +620,12 @@ func (l *Loader) startContainer(k *kernel.Kernel, spec *specs.Spec, conf *Config
|
|||
goferFDs = append(goferFDs, fd)
|
||||
}
|
||||
|
||||
if err := setupContainerFS(
|
||||
&procArgs,
|
||||
spec,
|
||||
conf,
|
||||
stdioFDs,
|
||||
goferFDs,
|
||||
false,
|
||||
creds,
|
||||
procArgs.Limits,
|
||||
k,
|
||||
cid); err != nil {
|
||||
mntr := newContainerMounter(spec, cid, goferFDs, l.k)
|
||||
if err := mntr.setupFS(ctx, conf, &procArgs, creds); err != nil {
|
||||
return fmt.Errorf("configuring container FS: %v", err)
|
||||
}
|
||||
|
||||
ctx := procArgs.NewContext(l.k)
|
||||
mns := k.RootMountNamespace()
|
||||
mns := l.k.RootMountNamespace()
|
||||
if err := setExecutablePath(ctx, mns, &procArgs); err != nil {
|
||||
return fmt.Errorf("setting executable path for %+v: %v", procArgs, err)
|
||||
}
|
||||
|
@ -724,7 +728,7 @@ func (l *Loader) waitContainer(cid string, waitStatus *uint32) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, clearStatus bool, waitStatus *uint32) error {
|
||||
func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) error {
|
||||
if tgid <= 0 {
|
||||
return fmt.Errorf("PID (%d) must be positive", tgid)
|
||||
}
|
||||
|
@ -736,13 +740,10 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, clearStatus bool, wai
|
|||
ws := l.wait(execTG)
|
||||
*waitStatus = ws
|
||||
|
||||
// Remove tg from the cache if caller requested it.
|
||||
if clearStatus {
|
||||
l.mu.Lock()
|
||||
delete(l.processes, eid)
|
||||
log.Debugf("updated processes (removal): %v", l.processes)
|
||||
l.mu.Unlock()
|
||||
}
|
||||
l.mu.Lock()
|
||||
delete(l.processes, eid)
|
||||
log.Debugf("updated processes (removal): %v", l.processes)
|
||||
l.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -40,8 +40,6 @@ import (
|
|||
"gvisor.googlesource.com/gvisor/runsc/specutils"
|
||||
)
|
||||
|
||||
const privateClearStatusFlag = "private-clear-status"
|
||||
|
||||
// Exec implements subcommands.Command for the "exec" command.
|
||||
type Exec struct {
|
||||
cwd string
|
||||
|
@ -51,7 +49,6 @@ type Exec struct {
|
|||
extraKGIDs stringSlice
|
||||
caps stringSlice
|
||||
detach bool
|
||||
clearStatus bool
|
||||
processPath string
|
||||
pidFile string
|
||||
internalPidFile string
|
||||
|
@ -103,10 +100,6 @@ func (ex *Exec) SetFlags(f *flag.FlagSet) {
|
|||
f.StringVar(&ex.pidFile, "pid-file", "", "filename that the container pid will be written to")
|
||||
f.StringVar(&ex.internalPidFile, "internal-pid-file", "", "filename that the container-internal pid will be written to")
|
||||
f.StringVar(&ex.consoleSocket, "console-socket", "", "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal")
|
||||
|
||||
// This flag clears the status of the exec'd process upon completion. It is
|
||||
// only used when we fork due to --detach being set on the parent.
|
||||
f.BoolVar(&ex.clearStatus, privateClearStatusFlag, true, "private flag, do not use")
|
||||
}
|
||||
|
||||
// Execute implements subcommands.Command.Execute. It starts a process in an
|
||||
|
@ -156,7 +149,7 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
|
|||
// Start the new process and get it pid.
|
||||
pid, err := c.Execute(e)
|
||||
if err != nil {
|
||||
Fatalf("getting processes for container: %v", err)
|
||||
Fatalf("executing processes for container: %v", err)
|
||||
}
|
||||
|
||||
if e.StdioIsPty {
|
||||
|
@ -184,7 +177,7 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
|
|||
}
|
||||
|
||||
// Wait for the process to exit.
|
||||
ws, err := c.WaitPID(pid, ex.clearStatus)
|
||||
ws, err := c.WaitPID(pid)
|
||||
if err != nil {
|
||||
Fatalf("waiting on pid %d: %v", pid, err)
|
||||
}
|
||||
|
@ -193,8 +186,12 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
|
|||
}
|
||||
|
||||
func (ex *Exec) execAndWait(waitStatus *syscall.WaitStatus) subcommands.ExitStatus {
|
||||
binPath := specutils.ExePath
|
||||
var args []string
|
||||
for _, a := range os.Args[1:] {
|
||||
if !strings.Contains(a, "detach") {
|
||||
args = append(args, a)
|
||||
}
|
||||
}
|
||||
|
||||
// The command needs to write a pid file so that execAndWait can tell
|
||||
// when it has started. If no pid-file was provided, we should use a
|
||||
|
@ -210,19 +207,7 @@ func (ex *Exec) execAndWait(waitStatus *syscall.WaitStatus) subcommands.ExitStat
|
|||
args = append(args, "--pid-file="+pidFile)
|
||||
}
|
||||
|
||||
// Add the rest of the args, excluding the "detach" flag.
|
||||
for _, a := range os.Args[1:] {
|
||||
if strings.Contains(a, "detach") {
|
||||
// Replace with the "private-clear-status" flag, which tells
|
||||
// the new process it's a detached child and shouldn't
|
||||
// clear the exit status of the sentry process.
|
||||
args = append(args, fmt.Sprintf("--%s=false", privateClearStatusFlag))
|
||||
} else {
|
||||
args = append(args, a)
|
||||
}
|
||||
}
|
||||
|
||||
cmd := exec.Command(binPath, args...)
|
||||
cmd := exec.Command(specutils.ExePath, args...)
|
||||
cmd.Args[0] = "runsc-exec"
|
||||
|
||||
// Exec stdio defaults to current process stdio.
|
||||
|
@ -233,8 +218,7 @@ func (ex *Exec) execAndWait(waitStatus *syscall.WaitStatus) subcommands.ExitStat
|
|||
// If the console control socket file is provided, then create a new
|
||||
// pty master/slave pair and set the TTY on the sandbox process.
|
||||
if ex.consoleSocket != "" {
|
||||
// Create a new TTY pair and send the master on the provided
|
||||
// socket.
|
||||
// Create a new TTY pair and send the master on the provided socket.
|
||||
tty, err := console.NewWithSocket(ex.consoleSocket)
|
||||
if err != nil {
|
||||
Fatalf("setting up console with socket %q: %v", ex.consoleSocket, err)
|
||||
|
@ -256,7 +240,7 @@ func (ex *Exec) execAndWait(waitStatus *syscall.WaitStatus) subcommands.ExitStat
|
|||
Fatalf("failure to start child exec process, err: %v", err)
|
||||
}
|
||||
|
||||
log.Infof("Started child (PID: %d) to exec and wait: %s %s", cmd.Process.Pid, binPath, args)
|
||||
log.Infof("Started child (PID: %d) to exec and wait: %s %s", cmd.Process.Pid, specutils.ExePath, args)
|
||||
|
||||
// Wait for PID file to ensure that child process has started. Otherwise,
|
||||
// '--process' file is deleted as soon as this process returns and the child
|
||||
|
|
|
@ -88,14 +88,14 @@ func (wt *Wait) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
|
|||
waitStatus = ws
|
||||
// Wait on a PID in the root PID namespace.
|
||||
case wt.rootPID != unsetPID:
|
||||
ws, err := c.WaitRootPID(int32(wt.rootPID), true /* clearStatus */)
|
||||
ws, err := c.WaitRootPID(int32(wt.rootPID))
|
||||
if err != nil {
|
||||
Fatalf("waiting on PID in root PID namespace %d in container %q: %v", wt.rootPID, c.ID, err)
|
||||
}
|
||||
waitStatus = ws
|
||||
// Wait on a PID in the container's PID namespace.
|
||||
case wt.pid != unsetPID:
|
||||
ws, err := c.WaitPID(int32(wt.pid), true /* clearStatus */)
|
||||
ws, err := c.WaitPID(int32(wt.pid))
|
||||
if err != nil {
|
||||
Fatalf("waiting on PID %d in container %q: %v", wt.pid, c.ID, err)
|
||||
}
|
||||
|
|
|
@ -530,22 +530,22 @@ func (c *Container) Wait() (syscall.WaitStatus, error) {
|
|||
|
||||
// WaitRootPID waits for process 'pid' in the sandbox's PID namespace and
|
||||
// returns its WaitStatus.
|
||||
func (c *Container) WaitRootPID(pid int32, clearStatus bool) (syscall.WaitStatus, error) {
|
||||
func (c *Container) WaitRootPID(pid int32) (syscall.WaitStatus, error) {
|
||||
log.Debugf("Wait on PID %d in sandbox %q", pid, c.Sandbox.ID)
|
||||
if !c.isSandboxRunning() {
|
||||
return 0, fmt.Errorf("sandbox is not running")
|
||||
}
|
||||
return c.Sandbox.WaitPID(c.Sandbox.ID, pid, clearStatus)
|
||||
return c.Sandbox.WaitPID(c.Sandbox.ID, pid)
|
||||
}
|
||||
|
||||
// WaitPID waits for process 'pid' in the container's PID namespace and returns
|
||||
// its WaitStatus.
|
||||
func (c *Container) WaitPID(pid int32, clearStatus bool) (syscall.WaitStatus, error) {
|
||||
func (c *Container) WaitPID(pid int32) (syscall.WaitStatus, error) {
|
||||
log.Debugf("Wait on PID %d in container %q", pid, c.ID)
|
||||
if !c.isSandboxRunning() {
|
||||
return 0, fmt.Errorf("sandbox is not running")
|
||||
}
|
||||
return c.Sandbox.WaitPID(c.ID, pid, clearStatus)
|
||||
return c.Sandbox.WaitPID(c.ID, pid)
|
||||
}
|
||||
|
||||
// SignalContainer sends the signal to the container. If all is true and signal
|
||||
|
|
|
@ -649,7 +649,7 @@ func (s *Sandbox) Wait(cid string) (syscall.WaitStatus, error) {
|
|||
|
||||
// WaitPID waits for process 'pid' in the container's sandbox and returns its
|
||||
// WaitStatus.
|
||||
func (s *Sandbox) WaitPID(cid string, pid int32, clearStatus bool) (syscall.WaitStatus, error) {
|
||||
func (s *Sandbox) WaitPID(cid string, pid int32) (syscall.WaitStatus, error) {
|
||||
log.Debugf("Waiting for PID %d in sandbox %q", pid, s.ID)
|
||||
var ws syscall.WaitStatus
|
||||
conn, err := s.sandboxConnect()
|
||||
|
@ -659,9 +659,8 @@ func (s *Sandbox) WaitPID(cid string, pid int32, clearStatus bool) (syscall.Wait
|
|||
defer conn.Close()
|
||||
|
||||
args := &boot.WaitPIDArgs{
|
||||
PID: pid,
|
||||
CID: cid,
|
||||
ClearStatus: clearStatus,
|
||||
PID: pid,
|
||||
CID: cid,
|
||||
}
|
||||
if err := conn.Call(boot.ContainerWaitPID, args, &ws); err != nil {
|
||||
return ws, fmt.Errorf("waiting on PID %d in sandbox %q: %v", pid, s.ID, err)
|
||||
|
|
Loading…
Reference in New Issue