Fix stdios ownership

Set stdio ownership based on the container's user to ensure the
user can open/read/write to/from stdios.

1. stdios in the host are changed to have the owner be the same
uid/gid of the process running the sandbox. This ensures that the
sandbox has full control over it.
2. stdios owner owner inside the sandbox is changed to match the
container's user to give access inside the container and make it
behave the same as runc.

Fixes #6180

PiperOrigin-RevId: 384347009
This commit is contained in:
Fabricio Voznika 2021-07-12 16:52:53 -07:00 committed by gVisor bot
parent 7132b9a07b
commit f51e0486d4
13 changed files with 414 additions and 167 deletions

View File

@ -223,7 +223,7 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI
_ = fd.Close()
}
}()
ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, args.StdioIsPty, fds)
ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, args.StdioIsPty, args.KUID, args.KGID, fds)
if err != nil {
return nil, 0, nil, nil, err
}

View File

@ -15,6 +15,7 @@ go_library(
"//pkg/sentry/fs/host",
"//pkg/sentry/fsimpl/host",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
],
)

View File

@ -24,6 +24,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fs/host"
hostvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/host"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
)
@ -31,9 +32,9 @@ import (
// sets up TTY for the first 3 FDs in the slice representing stdin, stdout,
// stderr. Used FDs are either closed or released. It's safe for the caller to
// close any remaining files upon return.
func Import(ctx context.Context, fdTable *kernel.FDTable, console bool, fds []*fd.FD) (*host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
func Import(ctx context.Context, fdTable *kernel.FDTable, console bool, uid auth.KUID, gid auth.KGID, fds []*fd.FD) (*host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
if kernel.VFS2Enabled {
ttyFile, err := importVFS2(ctx, fdTable, console, fds)
ttyFile, err := importVFS2(ctx, fdTable, console, uid, gid, fds)
return nil, ttyFile, err
}
ttyFile, err := importFS(ctx, fdTable, console, fds)
@ -89,7 +90,7 @@ func importFS(ctx context.Context, fdTable *kernel.FDTable, console bool, fds []
return ttyFile.FileOperations.(*host.TTYFileOperations), nil
}
func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, stdioFDs []*fd.FD) (*hostvfs2.TTYFileDescription, error) {
func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, uid auth.KUID, gid auth.KGID, stdioFDs []*fd.FD) (*hostvfs2.TTYFileDescription, error) {
k := kernel.KernelFromContext(ctx)
if k == nil {
return nil, fmt.Errorf("cannot find kernel from context")
@ -103,7 +104,13 @@ func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, stdi
// Import the file as a host TTY file.
if ttyFile == nil {
var err error
appFile, err = hostvfs2.ImportFD(ctx, k.HostMount(), hostFD.FD(), true /* isTTY */)
appFile, err = hostvfs2.NewFD(ctx, k.HostMount(), hostFD.FD(), &hostvfs2.NewFDOptions{
Savable: true,
IsTTY: true,
VirtualOwner: true,
UID: uid,
GID: gid,
})
if err != nil {
return nil, err
}
@ -121,7 +128,12 @@ func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, stdi
}
} else {
var err error
appFile, err = hostvfs2.ImportFD(ctx, k.HostMount(), hostFD.FD(), false /* isTTY */)
appFile, err = hostvfs2.NewFD(ctx, k.HostMount(), hostFD.FD(), &hostvfs2.NewFDOptions{
Savable: true,
VirtualOwner: true,
UID: uid,
GID: gid,
})
if err != nil {
return nil, err
}

View File

@ -42,6 +42,36 @@ import (
"gvisor.dev/gvisor/pkg/waiter"
)
// These are the modes that are stored with virtualOwner.
const virtualOwnerModes = linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID
// +stateify savable
type virtualOwner struct {
// This field is initialized at creation time and is immutable.
enabled bool
// mu protects the fields below and they can be accessed using atomic memory
// operations.
mu sync.Mutex `state:"nosave"`
uid uint32
gid uint32
// mode is also stored, otherwise setting the host file to `0000` could remove
// access to the file.
mode uint32
}
func (v *virtualOwner) atomicUID() uint32 {
return atomic.LoadUint32(&v.uid)
}
func (v *virtualOwner) atomicGID() uint32 {
return atomic.LoadUint32(&v.gid)
}
func (v *virtualOwner) atomicMode() uint32 {
return atomic.LoadUint32(&v.mode)
}
// inode implements kernfs.Inode.
//
// +stateify savable
@ -98,6 +128,11 @@ type inode struct {
// Event queue for blocking operations.
queue waiter.Queue
// virtualOwner caches ownership and permission information to override the
// underlying file owner and permission. This is used to allow the unstrusted
// application to change these fields without affecting the host.
virtualOwner virtualOwner
// If haveBuf is non-zero, hostFD represents a pipe, and buf contains data
// read from the pipe from previous calls to inode.beforeSave(). haveBuf
// and buf are protected by bufMu. haveBuf is accessed using atomic memory
@ -147,7 +182,7 @@ func newInode(ctx context.Context, fs *filesystem, hostFD int, savable bool, fil
type NewFDOptions struct {
// If Savable is true, the host file descriptor may be saved/restored by
// numeric value; the sandbox API requires a corresponding host FD with the
// same numeric value to be provieded at time of restore.
// same numeric value to be provided at time of restore.
Savable bool
// If IsTTY is true, the file descriptor is a TTY.
@ -157,6 +192,12 @@ type NewFDOptions struct {
// the new file description will inherit flags from hostFD.
HaveFlags bool
Flags uint32
// VirtualOwner allow the host file to have owner and permissions different
// than the underlying host file.
VirtualOwner bool
UID auth.KUID
GID auth.KGID
}
// NewFD returns a vfs.FileDescription representing the given host file
@ -168,8 +209,8 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions)
}
// Retrieve metadata.
var s unix.Stat_t
if err := unix.Fstat(hostFD, &s); err != nil {
var stat unix.Stat_t
if err := unix.Fstat(hostFD, &stat); err != nil {
return nil, err
}
@ -183,11 +224,19 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions)
flags = uint32(flagsInt)
}
d := &kernfs.Dentry{}
i, err := newInode(ctx, fs, hostFD, opts.Savable, linux.FileMode(s.Mode).FileType(), opts.IsTTY)
fileType := linux.FileMode(stat.Mode).FileType()
i, err := newInode(ctx, fs, hostFD, opts.Savable, fileType, opts.IsTTY)
if err != nil {
return nil, err
}
if opts.VirtualOwner {
i.virtualOwner.enabled = true
i.virtualOwner.uid = uint32(opts.UID)
i.virtualOwner.gid = uint32(opts.GID)
i.virtualOwner.mode = stat.Mode
}
d := &kernfs.Dentry{}
d.Init(&fs.Filesystem, i)
// i.open will take a reference on d.
@ -196,15 +245,7 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions)
// For simplicity, fileDescription.offset is set to 0. Technically, we
// should only set to 0 on files that are not seekable (sockets, pipes,
// etc.), and use the offset from the host fd otherwise when importing.
return i.open(ctx, d, mnt, flags)
}
// ImportFD sets up and returns a vfs.FileDescription from a donated fd.
func ImportFD(ctx context.Context, mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs.FileDescription, error) {
return NewFD(ctx, mnt, hostFD, &NewFDOptions{
Savable: true,
IsTTY: isTTY,
})
return i.open(ctx, d, mnt, fileType, flags)
}
// filesystemType implements vfs.FilesystemType.
@ -270,7 +311,7 @@ func (fs *filesystem) MountOptions() string {
// CheckPermissions implements kernfs.Inode.CheckPermissions.
func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error {
var s unix.Stat_t
if err := unix.Fstat(i.hostFD, &s); err != nil {
if err := i.stat(&s); err != nil {
return err
}
return vfs.GenericCheckPermissions(creds, ats, linux.FileMode(s.Mode), auth.KUID(s.Uid), auth.KGID(s.Gid))
@ -279,7 +320,7 @@ func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, a
// Mode implements kernfs.Inode.Mode.
func (i *inode) Mode() linux.FileMode {
var s unix.Stat_t
if err := unix.Fstat(i.hostFD, &s); err != nil {
if err := i.stat(&s); err != nil {
// Retrieving the mode from the host fd using fstat(2) should not fail.
// If the syscall does not succeed, something is fundamentally wrong.
panic(fmt.Sprintf("failed to retrieve mode from host fd %d: %v", i.hostFD, err))
@ -306,7 +347,7 @@ func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOp
// Fallback to fstat(2), if statx(2) is not supported on the host.
//
// TODO(b/151263641): Remove fallback.
return i.fstat(fs)
return i.statxFromStat(fs)
}
if err != nil {
return linux.Statx{}, err
@ -330,19 +371,35 @@ func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOp
// device numbers.
ls.Mask |= s.Mask & linux.STATX_ALL
if s.Mask&linux.STATX_TYPE != 0 {
ls.Mode |= s.Mode & linux.S_IFMT
if i.virtualOwner.enabled {
ls.Mode |= uint16(i.virtualOwner.atomicMode()) & linux.S_IFMT
} else {
ls.Mode |= s.Mode & linux.S_IFMT
}
}
if s.Mask&linux.STATX_MODE != 0 {
ls.Mode |= s.Mode &^ linux.S_IFMT
if i.virtualOwner.enabled {
ls.Mode |= uint16(i.virtualOwner.atomicMode()) &^ linux.S_IFMT
} else {
ls.Mode |= s.Mode &^ linux.S_IFMT
}
}
if s.Mask&linux.STATX_NLINK != 0 {
ls.Nlink = s.Nlink
}
if s.Mask&linux.STATX_UID != 0 {
ls.UID = s.Uid
if i.virtualOwner.enabled {
ls.UID = i.virtualOwner.atomicUID()
} else {
ls.UID = s.Uid
}
}
if s.Mask&linux.STATX_GID != 0 {
ls.GID = s.Gid
if i.virtualOwner.enabled {
ls.GID = i.virtualOwner.atomicGID()
} else {
ls.GID = s.Gid
}
}
if s.Mask&linux.STATX_ATIME != 0 {
ls.Atime = unixToLinuxStatxTimestamp(s.Atime)
@ -366,7 +423,7 @@ func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOp
return ls, nil
}
// fstat is a best-effort fallback for inode.Stat() if the host does not
// statxFromStat is a best-effort fallback for inode.Stat() if the host does not
// support statx(2).
//
// We ignore the mask and sync flags in opts and simply supply
@ -374,9 +431,9 @@ func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOp
// of a mask or sync flags. fstat(2) does not provide any metadata
// equivalent to Statx.Attributes, Statx.AttributesMask, or Statx.Btime, so
// those fields remain empty.
func (i *inode) fstat(fs *filesystem) (linux.Statx, error) {
func (i *inode) statxFromStat(fs *filesystem) (linux.Statx, error) {
var s unix.Stat_t
if err := unix.Fstat(i.hostFD, &s); err != nil {
if err := i.stat(&s); err != nil {
return linux.Statx{}, err
}
@ -400,7 +457,21 @@ func (i *inode) fstat(fs *filesystem) (linux.Statx, error) {
}, nil
}
func (i *inode) stat(stat *unix.Stat_t) error {
if err := unix.Fstat(i.hostFD, stat); err != nil {
return err
}
if i.virtualOwner.enabled {
stat.Uid = i.virtualOwner.atomicUID()
stat.Gid = i.virtualOwner.atomicGID()
stat.Mode = i.virtualOwner.atomicMode()
}
return nil
}
// SetStat implements kernfs.Inode.SetStat.
//
// +checklocksignore
func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
s := &opts.Stat
@ -408,11 +479,22 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre
if m == 0 {
return nil
}
if m&^(linux.STATX_MODE|linux.STATX_SIZE|linux.STATX_ATIME|linux.STATX_MTIME) != 0 {
supportedModes := uint32(linux.STATX_MODE | linux.STATX_SIZE | linux.STATX_ATIME | linux.STATX_MTIME)
if i.virtualOwner.enabled {
if m&virtualOwnerModes != 0 {
// Take lock if any of the virtual owner fields will be updated.
i.virtualOwner.mu.Lock()
defer i.virtualOwner.mu.Unlock()
}
supportedModes |= virtualOwnerModes
}
if m&^supportedModes != 0 {
return linuxerr.EPERM
}
var hostStat unix.Stat_t
if err := unix.Fstat(i.hostFD, &hostStat); err != nil {
if err := i.stat(&hostStat); err != nil {
return err
}
if err := vfs.CheckSetStat(ctx, creds, &opts, linux.FileMode(hostStat.Mode), auth.KUID(hostStat.Uid), auth.KGID(hostStat.Gid)); err != nil {
@ -420,8 +502,12 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre
}
if m&linux.STATX_MODE != 0 {
if err := unix.Fchmod(i.hostFD, uint32(s.Mode)); err != nil {
return err
if i.virtualOwner.enabled {
i.virtualOwner.mode = uint32(opts.Stat.Mode)
} else {
if err := unix.Fchmod(i.hostFD, uint32(s.Mode)); err != nil {
return err
}
}
}
if m&linux.STATX_SIZE != 0 {
@ -449,6 +535,14 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre
return err
}
}
if i.virtualOwner.enabled {
if m&linux.STATX_UID != 0 {
i.virtualOwner.uid = opts.Stat.UID
}
if m&linux.STATX_GID != 0 {
i.virtualOwner.gid = opts.Stat.GID
}
}
return nil
}
@ -473,16 +567,15 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentr
if i.Mode().FileType() == linux.S_IFSOCK {
return nil, linuxerr.ENXIO
}
return i.open(ctx, d, rp.Mount(), opts.Flags)
}
func (i *inode) open(ctx context.Context, d *kernfs.Dentry, mnt *vfs.Mount, flags uint32) (*vfs.FileDescription, error) {
var s unix.Stat_t
if err := unix.Fstat(i.hostFD, &s); err != nil {
var stat unix.Stat_t
if err := i.stat(&stat); err != nil {
return nil, err
}
fileType := s.Mode & linux.FileTypeMask
fileType := linux.FileMode(stat.Mode).FileType()
return i.open(ctx, d, rp.Mount(), fileType, opts.Flags)
}
func (i *inode) open(ctx context.Context, d *kernfs.Dentry, mnt *vfs.Mount, fileType linux.FileMode, flags uint32) (*vfs.FileDescription, error) {
// Constrain flags to a subset we can handle.
//
// TODO(gvisor.dev/issue/2601): Support O_NONBLOCK by adding RWF_NOWAIT to pread/pwrite calls.

View File

@ -715,7 +715,7 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid strin
return fmt.Errorf("using TTY, stdios not expected: %d", l)
}
if ep.hostTTY == nil {
return fmt.Errorf("terminal enabled but no TTY provided (--console-socket possibly passed)")
return fmt.Errorf("terminal enabled but no TTY provided. Did you set --console-socket on create?")
}
info.stdioFDs = []*fd.FD{ep.hostTTY, ep.hostTTY, ep.hostTTY}
ep.hostTTY = nil
@ -734,7 +734,7 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid strin
func (l *Loader) createContainerProcess(root bool, cid string, info *containerInfo) (*kernel.ThreadGroup, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
// Create the FD map, which will set stdin, stdout, and stderr.
ctx := info.procArgs.NewContext(l.k)
fdTable, ttyFile, ttyFileVFS2, err := createFDTable(ctx, info.spec.Process.Terminal, info.stdioFDs)
fdTable, ttyFile, ttyFileVFS2, err := createFDTable(ctx, info.spec.Process.Terminal, info.stdioFDs, info.spec.Process.User)
if err != nil {
return nil, nil, nil, fmt.Errorf("importing fds: %w", err)
}
@ -980,7 +980,7 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
tty: ttyFile,
ttyVFS2: ttyFileVFS2,
}
log.Debugf("updated processes: %s", l.processes)
log.Debugf("updated processes: %v", l.processes)
return tgid, nil
}
@ -1024,7 +1024,7 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) e
l.mu.Lock()
delete(l.processes, eid)
log.Debugf("updated processes (removal): %s", l.processes)
log.Debugf("updated processes (removal): %v", l.processes)
l.mu.Unlock()
return nil
}
@ -1092,7 +1092,7 @@ func newRootNetworkNamespace(conf *config.Config, clock tcpip.Clock, uniqueID st
return inet.NewRootNamespace(s, creator), nil
default:
panic(fmt.Sprintf("invalid network configuration: %d", conf.Network))
panic(fmt.Sprintf("invalid network configuration: %v", conf.Network))
}
}
@ -1212,7 +1212,7 @@ func (l *Loader) signal(cid string, pid, signo int32, mode SignalDeliveryMode) e
return nil
default:
panic(fmt.Sprintf("unknown signal delivery mode %s", mode))
panic(fmt.Sprintf("unknown signal delivery mode %v", mode))
}
}
@ -1337,14 +1337,14 @@ func (l *Loader) ttyFromIDLocked(key execID) (*host.TTYFileOperations, *hostvfs2
return ep.tty, ep.ttyVFS2, nil
}
func createFDTable(ctx context.Context, console bool, stdioFDs []*fd.FD) (*kernel.FDTable, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
func createFDTable(ctx context.Context, console bool, stdioFDs []*fd.FD, user specs.User) (*kernel.FDTable, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
if len(stdioFDs) != 3 {
return nil, nil, nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs))
}
k := kernel.KernelFromContext(ctx)
fdTable := k.NewFDTable()
ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, console, stdioFDs)
ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, console, auth.KUID(user.UID), auth.KGID(user.GID), stdioFDs)
if err != nil {
fdTable.DecRef(ctx)
return nil, nil, nil, err

View File

@ -146,12 +146,12 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
if ex.detach {
return ex.execChildAndWait(waitStatus)
}
return ex.exec(c, e, waitStatus)
return ex.exec(conf, c, e, waitStatus)
}
func (ex *Exec) exec(c *container.Container, e *control.ExecArgs, waitStatus *unix.WaitStatus) subcommands.ExitStatus {
func (ex *Exec) exec(conf *config.Config, c *container.Container, e *control.ExecArgs, waitStatus *unix.WaitStatus) subcommands.ExitStatus {
// Start the new process and get its pid.
pid, err := c.Execute(e)
pid, err := c.Execute(conf, e)
if err != nil {
return Errorf("executing processes for container: %v", err)
}

View File

@ -288,7 +288,7 @@ func TestJobControlSignalExec(t *testing.T) {
StdioIsPty: true,
}
pid, err := c.Execute(execArgs)
pid, err := c.Execute(conf, execArgs)
if err != nil {
t.Fatalf("error executing: %v", err)
}

View File

@ -310,7 +310,7 @@ func New(conf *config.Config, args Args) (*Container, error) {
defer tty.Close()
}
if err := c.Sandbox.CreateContainer(c.ID, tty); err != nil {
if err := c.Sandbox.CreateContainer(conf, c.ID, tty); err != nil {
return nil, err
}
}
@ -480,13 +480,13 @@ func Run(conf *config.Config, args Args) (unix.WaitStatus, error) {
// Execute runs the specified command in the container. It returns the PID of
// the newly created process.
func (c *Container) Execute(args *control.ExecArgs) (int32, error) {
func (c *Container) Execute(conf *config.Config, args *control.ExecArgs) (int32, error) {
log.Debugf("Execute in container, cid: %s, args: %+v", c.ID, args)
if err := c.requireStatus("execute in", Created, Running); err != nil {
return 0, err
}
args.ContainerID = c.ID
return c.Sandbox.Execute(args)
return c.Sandbox.Execute(conf, args)
}
// Event returns events for the container.
@ -910,6 +910,9 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu
binPath := specutils.ExePath
cmd := exec.Command(binPath, args...)
cmd.ExtraFiles = goferEnds
// Set Args[0] to make easier to spot the gofer process. Otherwise it's
// shown as `exe`.
cmd.Args[0] = "runsc-gofer"
if attached {

View File

@ -60,15 +60,15 @@ func TestMain(m *testing.M) {
os.Exit(m.Run())
}
func execute(cont *Container, name string, arg ...string) (unix.WaitStatus, error) {
func execute(conf *config.Config, cont *Container, name string, arg ...string) (unix.WaitStatus, error) {
args := &control.ExecArgs{
Filename: name,
Argv: append([]string{name}, arg...),
}
return cont.executeSync(args)
return cont.executeSync(conf, args)
}
func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte, error) {
func executeCombinedOutput(conf *config.Config, cont *Container, name string, arg ...string) ([]byte, error) {
r, w, err := os.Pipe()
if err != nil {
return nil, err
@ -80,7 +80,7 @@ func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte,
Argv: append([]string{name}, arg...),
FilePayload: urpc.FilePayload{Files: []*os.File{os.Stdin, w, w}},
}
ws, err := cont.executeSync(args)
ws, err := cont.executeSync(conf, args)
w.Close()
if err != nil {
return nil, err
@ -94,8 +94,8 @@ func executeCombinedOutput(cont *Container, name string, arg ...string) ([]byte,
}
// executeSync synchronously executes a new process.
func (c *Container) executeSync(args *control.ExecArgs) (unix.WaitStatus, error) {
pid, err := c.Execute(args)
func (c *Container) executeSync(conf *config.Config, args *control.ExecArgs) (unix.WaitStatus, error) {
pid, err := c.Execute(conf, args)
if err != nil {
return 0, fmt.Errorf("error executing: %v", err)
}
@ -172,8 +172,8 @@ func blockUntilWaitable(pid int) error {
}
// execPS executes `ps` inside the container and return the processes.
func execPS(c *Container) ([]*control.Process, error) {
out, err := executeCombinedOutput(c, "/bin/ps", "-e")
func execPS(conf *config.Config, c *Container) ([]*control.Process, error) {
out, err := executeCombinedOutput(conf, c, "/bin/ps", "-e")
if err != nil {
return nil, err
}
@ -864,7 +864,7 @@ func TestExec(t *testing.T) {
} {
t.Run(tc.name, func(t *testing.T) {
// t.Parallel()
if ws, err := cont.executeSync(&tc.args); err != nil {
if ws, err := cont.executeSync(conf, &tc.args); err != nil {
t.Fatalf("executeAsync(%+v): %v", tc.args, err)
} else if ws != 0 {
t.Fatalf("executeAsync(%+v) failed with exit: %v", tc.args, ws)
@ -882,7 +882,7 @@ func TestExec(t *testing.T) {
}
defer unix.Close(fds[0])
_, err = cont.executeSync(&control.ExecArgs{
_, err = cont.executeSync(conf, &control.ExecArgs{
Argv: []string{"/nonexist"},
FilePayload: urpc.FilePayload{
Files: []*os.File{os.NewFile(uintptr(fds[1]), "sock")},
@ -937,7 +937,7 @@ func TestExecProcList(t *testing.T) {
// start running exec (which blocks).
ch := make(chan error)
go func() {
exitStatus, err := cont.executeSync(execArgs)
exitStatus, err := cont.executeSync(conf, execArgs)
if err != nil {
ch <- err
} else if exitStatus != 0 {
@ -1544,7 +1544,7 @@ func TestCapabilities(t *testing.T) {
}
// "exe" should fail because we don't have the necessary permissions.
if _, err := cont.executeSync(execArgs); err == nil {
if _, err := cont.executeSync(conf, execArgs); err == nil {
t.Fatalf("container executed without error, but an error was expected")
}
@ -1553,7 +1553,7 @@ func TestCapabilities(t *testing.T) {
EffectiveCaps: auth.CapabilitySetOf(linux.CAP_DAC_OVERRIDE),
}
// "exe" should not fail this time.
if _, err := cont.executeSync(execArgs); err != nil {
if _, err := cont.executeSync(conf, execArgs); err != nil {
t.Fatalf("container failed to exec %v: %v", args, err)
}
})
@ -1664,7 +1664,7 @@ func TestReadonlyRoot(t *testing.T) {
}
// Read mounts to check that root is readonly.
out, err := executeCombinedOutput(c, "/bin/sh", "-c", "mount | grep ' / ' | grep -o -e '(.*)'")
out, err := executeCombinedOutput(conf, c, "/bin/sh", "-c", "mount | grep ' / ' | grep -o -e '(.*)'")
if err != nil {
t.Fatalf("exec failed: %v", err)
}
@ -1674,7 +1674,7 @@ func TestReadonlyRoot(t *testing.T) {
}
// Check that file cannot be created.
ws, err := execute(c, "/bin/touch", "/foo")
ws, err := execute(conf, c, "/bin/touch", "/foo")
if err != nil {
t.Fatalf("touch file in ro mount: %v", err)
}
@ -1723,7 +1723,7 @@ func TestReadonlyMount(t *testing.T) {
// Read mounts to check that volume is readonly.
cmd := fmt.Sprintf("mount | grep ' %s ' | grep -o -e '(.*)'", dir)
out, err := executeCombinedOutput(c, "/bin/sh", "-c", cmd)
out, err := executeCombinedOutput(conf, c, "/bin/sh", "-c", cmd)
if err != nil {
t.Fatalf("exec failed, err: %v", err)
}
@ -1733,7 +1733,7 @@ func TestReadonlyMount(t *testing.T) {
}
// Check that file cannot be created.
ws, err := execute(c, "/bin/touch", path.Join(dir, "file"))
ws, err := execute(conf, c, "/bin/touch", path.Join(dir, "file"))
if err != nil {
t.Fatalf("touch file in ro mount: %v", err)
}
@ -2278,13 +2278,13 @@ func TestMountPropagation(t *testing.T) {
// Check that mount didn't propagate to private mount.
privFile := filepath.Join(priv, "mnt", "file")
if ws, err := execute(cont, "/usr/bin/test", "!", "-f", privFile); err != nil || ws != 0 {
if ws, err := execute(conf, cont, "/usr/bin/test", "!", "-f", privFile); err != nil || ws != 0 {
t.Fatalf("exec: test ! -f %q, ws: %v, err: %v", privFile, ws, err)
}
// Check that mount propagated to slave mount.
slaveFile := filepath.Join(slave, "mnt", "file")
if ws, err := execute(cont, "/usr/bin/test", "-f", slaveFile); err != nil || ws != 0 {
if ws, err := execute(conf, cont, "/usr/bin/test", "-f", slaveFile); err != nil || ws != 0 {
t.Fatalf("exec: test -f %q, ws: %v, err: %v", privFile, ws, err)
}
}
@ -2350,7 +2350,7 @@ func TestMountSymlink(t *testing.T) {
// Check that symlink was resolved and mount was created where the symlink
// is pointing to.
file := path.Join(target, "file")
if ws, err := execute(cont, "/usr/bin/test", "-f", file); err != nil || ws != 0 {
if ws, err := execute(conf, cont, "/usr/bin/test", "-f", file); err != nil || ws != 0 {
t.Fatalf("exec: test -f %q, ws: %v, err: %v", file, ws, err)
}
})
@ -2589,7 +2589,7 @@ func TestRlimitsExec(t *testing.T) {
t.Fatalf("error starting container: %v", err)
}
got, err := executeCombinedOutput(cont, "/bin/sh", "-c", "ulimit -n")
got, err := executeCombinedOutput(conf, cont, "/bin/sh", "-c", "ulimit -n")
if err != nil {
t.Fatal(err)
}

View File

@ -105,11 +105,11 @@ type execDesc struct {
name string
}
func execMany(t *testing.T, execs []execDesc) {
func execMany(t *testing.T, conf *config.Config, execs []execDesc) {
for _, exec := range execs {
t.Run(exec.name, func(t *testing.T) {
args := &control.ExecArgs{Argv: exec.cmd}
if ws, err := exec.c.executeSync(args); err != nil {
if ws, err := exec.c.executeSync(conf, args); err != nil {
t.Errorf("error executing %+v: %v", args, err)
} else if ws.ExitStatus() != exec.want {
t.Errorf("%q: exec %q got exit status: %d, want: %d", exec.name, exec.cmd, ws.ExitStatus(), exec.want)
@ -217,7 +217,7 @@ func TestMultiPIDNS(t *testing.T) {
newProcessBuilder().PID(2).Cmd("sleep").Process(),
newProcessBuilder().Cmd("ps").Process(),
}
got, err := execPS(containers[0])
got, err := execPS(conf, containers[0])
if err != nil {
t.Fatal(err)
}
@ -229,7 +229,7 @@ func TestMultiPIDNS(t *testing.T) {
newProcessBuilder().PID(1).Cmd("sleep").Process(),
newProcessBuilder().Cmd("ps").Process(),
}
got, err = execPS(containers[1])
got, err = execPS(conf, containers[1])
if err != nil {
t.Fatal(err)
}
@ -313,7 +313,7 @@ func TestMultiPIDNSPath(t *testing.T) {
newProcessBuilder().PID(3).Cmd("sleep").Process(),
newProcessBuilder().Cmd("ps").Process(),
}
got, err := execPS(containers[0])
got, err := execPS(conf, containers[0])
if err != nil {
t.Fatal(err)
}
@ -328,7 +328,7 @@ func TestMultiPIDNSPath(t *testing.T) {
newProcessBuilder().PID(3).Cmd("sleep").Process(),
newProcessBuilder().Cmd("ps").Process(),
}
got, err = execPS(containers[1])
got, err = execPS(conf, containers[1])
if err != nil {
t.Fatal(err)
}
@ -341,7 +341,7 @@ func TestMultiPIDNSPath(t *testing.T) {
newProcessBuilder().PID(1).Cmd("sleep").Process(),
newProcessBuilder().Cmd("ps").Process(),
}
got, err = execPS(containers[2])
got, err = execPS(conf, containers[2])
if err != nil {
t.Fatal(err)
}
@ -541,7 +541,7 @@ func TestExecWait(t *testing.T) {
WorkingDirectory: "/",
KUID: 0,
}
pid, err := containers[0].Execute(args)
pid, err := containers[0].Execute(conf, args)
if err != nil {
t.Fatalf("error executing: %v", err)
}
@ -744,7 +744,7 @@ func TestMultiContainerDestroy(t *testing.T) {
Filename: app,
Argv: []string{app, "fork-bomb"},
}
if _, err := containers[1].Execute(args); err != nil {
if _, err := containers[1].Execute(conf, args); err != nil {
t.Fatalf("error exec'ing: %v", err)
}
@ -821,7 +821,7 @@ func TestMultiContainerProcesses(t *testing.T) {
Filename: "/bin/sleep",
Argv: []string{"/bin/sleep", "100"},
}
if _, err := containers[1].Execute(args); err != nil {
if _, err := containers[1].Execute(conf, args); err != nil {
t.Fatalf("error exec'ing: %v", err)
}
expectedPL1 = append(expectedPL1, newProcessBuilder().PID(4).Cmd("sleep").Process())
@ -882,7 +882,7 @@ func TestMultiContainerKillAll(t *testing.T) {
Filename: app,
Argv: []string{app, "task-tree", "--depth=2", "--width=2"},
}
if _, err := containers[1].Execute(args); err != nil {
if _, err := containers[1].Execute(conf, args); err != nil {
t.Fatalf("error exec'ing: %v", err)
}
// Wait for these new processes to start.
@ -1317,7 +1317,7 @@ func TestMultiContainerSharedMount(t *testing.T) {
name: "dir removed from container1",
},
}
execMany(t, execs)
execMany(t, conf, execs)
})
}
}
@ -1382,7 +1382,7 @@ func TestMultiContainerSharedMountReadonly(t *testing.T) {
name: "fails to write to container1",
},
}
execMany(t, execs)
execMany(t, conf, execs)
})
}
}
@ -1440,7 +1440,7 @@ func TestMultiContainerSharedMountRestart(t *testing.T) {
name: "file appears in container1",
},
}
execMany(t, execs)
execMany(t, conf, execs)
containers[1].Destroy()
@ -1490,7 +1490,7 @@ func TestMultiContainerSharedMountRestart(t *testing.T) {
name: "file removed from container1",
},
}
execMany(t, execs)
execMany(t, conf, execs)
})
}
}
@ -1543,7 +1543,7 @@ func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) {
name: "directory is mounted in container1",
},
}
execMany(t, execs)
execMany(t, conf, execs)
})
}
}
@ -1654,7 +1654,7 @@ func TestMultiContainerGoferKilled(t *testing.T) {
}
// Check that container isn't running anymore.
if _, err := execute(c, "/bin/true"); err == nil {
if _, err := execute(conf, c, "/bin/true"); err == nil {
t.Fatalf("Container %q was not stopped after gofer death", c.ID)
}
@ -1669,7 +1669,7 @@ func TestMultiContainerGoferKilled(t *testing.T) {
if err := waitForProcessList(c, pl); err != nil {
t.Errorf("Container %q was affected by another container: %v", c.ID, err)
}
if _, err := execute(c, "/bin/true"); err != nil {
if _, err := execute(conf, c, "/bin/true"); err != nil {
t.Fatalf("Container %q was affected by another container: %v", c.ID, err)
}
}
@ -1691,7 +1691,7 @@ func TestMultiContainerGoferKilled(t *testing.T) {
// Check that entire sandbox isn't running anymore.
for _, c := range containers {
if _, err := execute(c, "/bin/true"); err == nil {
if _, err := execute(conf, c, "/bin/true"); err == nil {
t.Fatalf("Container %q was not stopped after gofer death", c.ID)
}
}
@ -1867,7 +1867,7 @@ func TestMultiContainerHomeEnvDir(t *testing.T) {
defer cleanup()
// Exec into the root container synchronously.
if _, err := execute(containers[0], "/bin/sh", "-c", execCmd); err != nil {
if _, err := execute(conf, containers[0], "/bin/sh", "-c", execCmd); err != nil {
t.Errorf("error executing %+v: %v", execCmd, err)
}
@ -2056,7 +2056,7 @@ func TestDuplicateEnvVariable(t *testing.T) {
Argv: []string{"/bin/sh", "-c", cmdExec},
Envv: []string{"VAR=foo", "VAR=bar"},
}
if ws, err := containers[0].executeSync(execArgs); err != nil || ws.ExitStatus() != 0 {
if ws, err := containers[0].executeSync(conf, execArgs); err != nil || ws.ExitStatus() != 0 {
t.Fatalf("exec failed, ws: %v, err: %v", ws, err)
}

View File

@ -72,7 +72,7 @@ func TestSharedVolume(t *testing.T) {
Filename: "/usr/bin/test",
Argv: []string{"test", "-f", filename},
}
if ws, err := c.executeSync(argsTestFile); err != nil {
if ws, err := c.executeSync(conf, argsTestFile); err != nil {
t.Fatalf("unexpected error testing file %q: %v", filename, err)
} else if ws.ExitStatus() == 0 {
t.Errorf("test %q exited with code %v, wanted not zero", ws.ExitStatus(), err)
@ -84,7 +84,7 @@ func TestSharedVolume(t *testing.T) {
}
// Now we should be able to test the file from within the sandbox.
if ws, err := c.executeSync(argsTestFile); err != nil {
if ws, err := c.executeSync(conf, argsTestFile); err != nil {
t.Fatalf("unexpected error testing file %q: %v", filename, err)
} else if ws.ExitStatus() != 0 {
t.Errorf("test %q exited with code %v, wanted zero", filename, ws.ExitStatus())
@ -97,7 +97,7 @@ func TestSharedVolume(t *testing.T) {
}
// File should no longer exist at the old path within the sandbox.
if ws, err := c.executeSync(argsTestFile); err != nil {
if ws, err := c.executeSync(conf, argsTestFile); err != nil {
t.Fatalf("unexpected error testing file %q: %v", filename, err)
} else if ws.ExitStatus() == 0 {
t.Errorf("test %q exited with code %v, wanted not zero", filename, ws.ExitStatus())
@ -108,7 +108,7 @@ func TestSharedVolume(t *testing.T) {
Filename: "/usr/bin/test",
Argv: []string{"test", "-f", newFilename},
}
if ws, err := c.executeSync(argsTestNewFile); err != nil {
if ws, err := c.executeSync(conf, argsTestNewFile); err != nil {
t.Fatalf("unexpected error testing file %q: %v", newFilename, err)
} else if ws.ExitStatus() != 0 {
t.Errorf("test %q exited with code %v, wanted zero", newFilename, ws.ExitStatus())
@ -120,7 +120,7 @@ func TestSharedVolume(t *testing.T) {
}
// Renamed file should no longer exist at the old path within the sandbox.
if ws, err := c.executeSync(argsTestNewFile); err != nil {
if ws, err := c.executeSync(conf, argsTestNewFile); err != nil {
t.Fatalf("unexpected error testing file %q: %v", newFilename, err)
} else if ws.ExitStatus() == 0 {
t.Errorf("test %q exited with code %v, wanted not zero", newFilename, ws.ExitStatus())
@ -133,7 +133,7 @@ func TestSharedVolume(t *testing.T) {
KUID: auth.KUID(os.Getuid()),
KGID: auth.KGID(os.Getgid()),
}
if ws, err := c.executeSync(argsTouch); err != nil {
if ws, err := c.executeSync(conf, argsTouch); err != nil {
t.Fatalf("unexpected error touching file %q: %v", filename, err)
} else if ws.ExitStatus() != 0 {
t.Errorf("touch %q exited with code %v, wanted zero", filename, ws.ExitStatus())
@ -154,7 +154,7 @@ func TestSharedVolume(t *testing.T) {
Filename: "/bin/rm",
Argv: []string{"rm", filename},
}
if ws, err := c.executeSync(argsRemove); err != nil {
if ws, err := c.executeSync(conf, argsRemove); err != nil {
t.Fatalf("unexpected error removing file %q: %v", filename, err)
} else if ws.ExitStatus() != 0 {
t.Errorf("remove %q exited with code %v, wanted zero", filename, ws.ExitStatus())
@ -166,9 +166,9 @@ func TestSharedVolume(t *testing.T) {
}
}
func checkFile(c *Container, filename string, want []byte) error {
func checkFile(conf *config.Config, c *Container, filename string, want []byte) error {
cpy := filename + ".copy"
if _, err := execute(c, "/bin/cp", "-f", filename, cpy); err != nil {
if _, err := execute(conf, c, "/bin/cp", "-f", filename, cpy); err != nil {
return fmt.Errorf("unexpected error copying file %q to %q: %v", filename, cpy, err)
}
got, err := ioutil.ReadFile(cpy)
@ -226,16 +226,16 @@ func TestSharedVolumeFile(t *testing.T) {
if err := ioutil.WriteFile(filename, []byte(want), 0666); err != nil {
t.Fatalf("Error writing to %q: %v", filename, err)
}
if err := checkFile(c, filename, want); err != nil {
if err := checkFile(conf, c, filename, want); err != nil {
t.Fatal(err.Error())
}
// Append to file inside the container and check that content is not lost.
if _, err := execute(c, "/bin/bash", "-c", "echo -n sandbox- >> "+filename); err != nil {
if _, err := execute(conf, c, "/bin/bash", "-c", "echo -n sandbox- >> "+filename); err != nil {
t.Fatalf("unexpected error appending file %q: %v", filename, err)
}
want = []byte("host-sandbox-")
if err := checkFile(c, filename, want); err != nil {
if err := checkFile(conf, c, filename, want); err != nil {
t.Fatal(err.Error())
}
@ -250,7 +250,7 @@ func TestSharedVolumeFile(t *testing.T) {
t.Fatalf("Error writing to file %q: %v", filename, err)
}
want = []byte("host-sandbox-host")
if err := checkFile(c, filename, want); err != nil {
if err := checkFile(conf, c, filename, want); err != nil {
t.Fatal(err.Error())
}
@ -259,7 +259,7 @@ func TestSharedVolumeFile(t *testing.T) {
t.Fatalf("Error truncating file %q: %v", filename, err)
}
want = want[:5]
if err := checkFile(c, filename, want); err != nil {
if err := checkFile(conf, c, filename, want); err != nil {
t.Fatal(err.Error())
}
}

View File

@ -65,6 +65,11 @@ type Sandbox struct {
// is not running.
Pid int `json:"pid"`
// UID is the user ID in the parent namespace that the sandbox is running as.
UID int `json:"uid"`
// GID is the group ID in the parent namespace that the sandbox is running as.
GID int `json:"gid"`
// Cgroup has the cgroup configuration for the sandbox.
Cgroup *cgroup.Cgroup `json:"cgroup"`
@ -176,18 +181,22 @@ func New(conf *config.Config, args *Args) (*Sandbox, error) {
}
// CreateContainer creates a non-root container inside the sandbox.
func (s *Sandbox) CreateContainer(cid string, tty *os.File) error {
func (s *Sandbox) CreateContainer(conf *config.Config, cid string, tty *os.File) error {
log.Debugf("Create non-root container %q in sandbox %q, PID: %d", cid, s.ID, s.Pid)
sandboxConn, err := s.sandboxConnect()
if err != nil {
return fmt.Errorf("couldn't connect to sandbox: %v", err)
}
defer sandboxConn.Close()
var files []*os.File
if tty != nil {
files = []*os.File{tty}
}
if err := s.configureStdios(conf, files); err != nil {
return err
}
sandboxConn, err := s.sandboxConnect()
if err != nil {
return fmt.Errorf("couldn't connect to sandbox: %v", err)
}
defer sandboxConn.Close()
args := boot.CreateArgs{
CID: cid,
@ -225,6 +234,11 @@ func (s *Sandbox) StartRoot(spec *specs.Spec, conf *config.Config) error {
// StartContainer starts running a non-root container inside the sandbox.
func (s *Sandbox) StartContainer(spec *specs.Spec, conf *config.Config, cid string, stdios, goferFiles []*os.File) error {
log.Debugf("Start non-root container %q in sandbox %q, PID: %d", cid, s.ID, s.Pid)
if err := s.configureStdios(conf, stdios); err != nil {
return err
}
sandboxConn, err := s.sandboxConnect()
if err != nil {
return fmt.Errorf("couldn't connect to sandbox: %v", err)
@ -318,8 +332,13 @@ func (s *Sandbox) NewCGroup() (*cgroup.Cgroup, error) {
// Execute runs the specified command in the container. It returns the PID of
// the newly created process.
func (s *Sandbox) Execute(args *control.ExecArgs) (int32, error) {
func (s *Sandbox) Execute(conf *config.Config, args *control.ExecArgs) (int32, error) {
log.Debugf("Executing new process in container %q in sandbox %q", args.ContainerID, s.ID)
if err := s.configureStdios(conf, args.Files); err != nil {
return 0, err
}
conn, err := s.sandboxConnect()
if err != nil {
return 0, s.connError(err)
@ -505,6 +524,7 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
cmd.Stdin = nil
cmd.Stdout = nil
cmd.Stderr = nil
var stdios [3]*os.File
// If the console control socket file is provided, then create a new
// pty master/replica pair and set the TTY on the sandbox process.
@ -525,11 +545,9 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
cmd.SysProcAttr.Ctty = nextFD
// Pass the tty as all stdio fds to sandbox.
for i := 0; i < 3; i++ {
cmd.ExtraFiles = append(cmd.ExtraFiles, tty)
cmd.Args = append(cmd.Args, "--stdio-fds="+strconv.Itoa(nextFD))
nextFD++
}
stdios[0] = tty
stdios[1] = tty
stdios[2] = tty
if conf.Debug {
// If debugging, send the boot process stdio to the
@ -541,11 +559,9 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
} else {
// If not using a console, pass our current stdio as the
// container stdio via flags.
for _, f := range []*os.File{os.Stdin, os.Stdout, os.Stderr} {
cmd.ExtraFiles = append(cmd.ExtraFiles, f)
cmd.Args = append(cmd.Args, "--stdio-fds="+strconv.Itoa(nextFD))
nextFD++
}
stdios[0] = os.Stdin
stdios[1] = os.Stdout
stdios[2] = os.Stderr
if conf.Debug {
// If debugging, send the boot process stdio to the
@ -595,6 +611,10 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
nss = append(nss, specs.LinuxNamespace{Type: specs.NetworkNamespace})
}
// These are set to the uid/gid that the sandbox process will use.
s.UID = os.Getuid()
s.GID = os.Getgid()
// User namespace depends on the network type. Host network requires to run
// inside the user namespace specified in the spec or the current namespace
// if none is configured.
@ -636,51 +656,49 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
const nobody = 65534
if conf.Rootless {
log.Infof("Rootless mode: sandbox will run as nobody inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid())
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
{
ContainerID: nobody,
HostID: os.Getuid(),
Size: 1,
},
}
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
{
ContainerID: nobody,
HostID: os.Getgid(),
Size: 1,
},
}
} else {
// Map nobody in the new namespace to nobody in the parent namespace.
//
// A sandbox process will construct an empty
// root for itself, so it has to have
// CAP_SYS_ADMIN and CAP_SYS_CHROOT capabilities.
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
{
ContainerID: nobody,
HostID: nobody,
Size: 1,
},
}
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
{
ContainerID: nobody,
HostID: nobody,
Size: 1,
},
}
s.UID = nobody
s.GID = nobody
}
// Set credentials to run as user and group nobody.
cmd.SysProcAttr.Credential = &syscall.Credential{Uid: nobody, Gid: nobody}
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
{
ContainerID: nobody,
HostID: s.UID,
Size: 1,
},
}
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
{
ContainerID: nobody,
HostID: s.GID,
Size: 1,
},
}
// A sandbox process will construct an empty root for itself, so it has
// to have CAP_SYS_ADMIN and CAP_SYS_CHROOT capabilities.
cmd.SysProcAttr.AmbientCaps = append(cmd.SysProcAttr.AmbientCaps, uintptr(capability.CAP_SYS_ADMIN), uintptr(capability.CAP_SYS_CHROOT))
} else {
return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID")
}
}
if err := s.configureStdios(conf, stdios[:]); err != nil {
return fmt.Errorf("configuring stdios: %w", err)
}
for _, file := range stdios {
cmd.ExtraFiles = append(cmd.ExtraFiles, file)
cmd.Args = append(cmd.Args, "--stdio-fds="+strconv.Itoa(nextFD))
nextFD++
}
// Set Args[0] to make easier to spot the sandbox process. Otherwise it's
// shown as `exe`.
cmd.Args[0] = "runsc-sandbox"
if s.Cgroup != nil {
@ -1167,6 +1185,23 @@ func (s *Sandbox) waitForStopped() error {
return backoff.Retry(op, b)
}
// configureStdios change stdios ownership to give access to the sandbox
// process. This may be skipped depending on the configuration.
func (s *Sandbox) configureStdios(conf *config.Config, stdios []*os.File) error {
if conf.Rootless || conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
// Cannot change ownership without CAP_CHOWN.
return nil
}
for _, file := range stdios {
log.Debugf("Changing %q ownership to %d/%d", file.Name(), s.UID, s.GID)
if err := file.Chown(s.UID, s.GID); err != nil {
return err
}
}
return nil
}
// deviceFileForPlatform opens the device file for the given platform. If the
// platform does not need a device file, then nil is returned.
func deviceFileForPlatform(name string) (*os.File, error) {

View File

@ -30,6 +30,7 @@ import (
"net/http"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"testing"
@ -426,10 +427,10 @@ func TestTmpMount(t *testing.T) {
// Test that it is allowed to mount a file on top of /dev files, e.g.
// /dev/random.
func TestMountOverDev(t *testing.T) {
if usingVFS2, err := dockerutil.UsingVFS2(); !usingVFS2 {
t.Skip("VFS1 doesn't allow /dev/random to be mounted.")
} else if err != nil {
if vfs2, err := dockerutil.UsingVFS2(); err != nil {
t.Fatalf("Failed to read config for runtime %s: %v", dockerutil.Runtime(), err)
} else if !vfs2 {
t.Skip("VFS1 doesn't allow /dev/random to be mounted.")
}
random, err := ioutil.TempFile(testutil.TmpDir(), "random")
@ -574,11 +575,12 @@ func runIntegrationTest(t *testing.T, capAdd []string, args ...string) {
d := dockerutil.MakeContainer(ctx, t)
defer d.CleanUp(ctx)
if got, err := d.Run(ctx, dockerutil.RunOpts{
opts := dockerutil.RunOpts{
Image: "basic/integrationtest",
WorkDir: "/root",
CapAdd: capAdd,
}, args...); err != nil {
}
if got, err := d.Run(ctx, opts, args...); err != nil {
t.Fatalf("docker run failed: %v", err)
} else if got != "" {
t.Errorf("test failed:\n%s", got)
@ -609,6 +611,107 @@ func TestBindOverlay(t *testing.T) {
}
}
func TestStdios(t *testing.T) {
if vfs2, err := dockerutil.UsingVFS2(); err != nil {
t.Fatalf("Failed to read config for runtime %s: %v", dockerutil.Runtime(), err)
} else if !vfs2 {
t.Skip("VFS1 doesn't adjust stdios user")
}
ctx := context.Background()
d := dockerutil.MakeContainer(ctx, t)
defer d.CleanUp(ctx)
testStdios(t, func(user string, args ...string) (string, error) {
defer d.CleanUp(ctx)
opts := dockerutil.RunOpts{
Image: "basic/alpine",
User: user,
}
return d.Run(ctx, opts, args...)
})
}
func TestStdiosExec(t *testing.T) {
if vfs2, err := dockerutil.UsingVFS2(); err != nil {
t.Fatalf("Failed to read config for runtime %s: %v", dockerutil.Runtime(), err)
} else if !vfs2 {
t.Skip("VFS1 doesn't adjust stdios user")
}
ctx := context.Background()
d := dockerutil.MakeContainer(ctx, t)
defer d.CleanUp(ctx)
runOpts := dockerutil.RunOpts{Image: "basic/alpine"}
if err := d.Spawn(ctx, runOpts, "sleep", "100"); err != nil {
t.Fatalf("docker run failed: %v", err)
}
testStdios(t, func(user string, args ...string) (string, error) {
opts := dockerutil.ExecOpts{User: user}
return d.Exec(ctx, opts, args...)
})
}
func testStdios(t *testing.T, run func(string, ...string) (string, error)) {
const cmd = "stat -L /proc/self/fd/0 /proc/self/fd/1 /proc/self/fd/2 | grep 'Uid:'"
got, err := run("123", "/bin/sh", "-c", cmd)
if err != nil {
t.Fatalf("docker exec failed: %v", err)
}
if len(got) == 0 {
t.Errorf("Unexpected empty output from %q", cmd)
}
re := regexp.MustCompile(`Uid: \(\s*(\w+)\/.*\)`)
for _, line := range strings.SplitN(got, "\n", 3) {
t.Logf("stat -L: %s", line)
matches := re.FindSubmatch([]byte(line))
if len(matches) != 2 {
t.Fatalf("wrong output format: %q: matches: %v", line, matches)
}
if want, got := "123", string(matches[1]); want != got {
t.Errorf("wrong user, want: %q, got: %q", want, got)
}
}
// Check that stdout and stderr can be open and written to. This checks
// that ownership and permissions are correct inside gVisor.
got, err = run("456", "/bin/sh", "-c", "echo foobar | tee /proc/self/fd/1 > /proc/self/fd/2")
if err != nil {
t.Fatalf("docker run failed: %v", err)
}
t.Logf("echo foobar: %q", got)
// Check it repeats twice, once for stdout and once for stderr.
if want := "foobar\nfoobar\n"; want != got {
t.Errorf("Wrong echo output, want: %q, got: %q", want, got)
}
// Check that timestamps can be changed. Setting timestamps require an extra
// write check _after_ the file was opened, and may fail if the underlying
// host file is not setup correctly.
if _, err := run("789", "touch", "/proc/self/fd/0", "/proc/self/fd/1", "/proc/self/fd/2"); err != nil {
t.Fatalf("docker run failed: %v", err)
}
}
func TestStdiosChown(t *testing.T) {
if vfs2, err := dockerutil.UsingVFS2(); err != nil {
t.Fatalf("Failed to read config for runtime %s: %v", dockerutil.Runtime(), err)
} else if !vfs2 {
t.Skip("VFS1 doesn't adjust stdios user")
}
ctx := context.Background()
d := dockerutil.MakeContainer(ctx, t)
defer d.CleanUp(ctx)
opts := dockerutil.RunOpts{Image: "basic/alpine"}
if _, err := d.Run(ctx, opts, "chown", "123", "/proc/self/fd/0", "/proc/self/fd/1", "/proc/self/fd/2"); err != nil {
t.Fatalf("docker run failed: %v", err)
}
}
func TestMain(m *testing.M) {
dockerutil.EnsureSupportedDockerVersion()
flag.Parse()