gvisor/runsc/sandbox/sandbox.go

1072 lines
33 KiB
Go
Raw Normal View History

// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package sandbox creates and manipulates sandboxes.
package sandbox
import (
"context"
"fmt"
"os"
"os/exec"
"strconv"
"sync"
"syscall"
"time"
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/syndtr/gocapability/capability"
"gvisor.dev/gvisor/pkg/control/client"
"gvisor.dev/gvisor/pkg/control/server"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/urpc"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/boot/platforms"
"gvisor.dev/gvisor/runsc/cgroup"
"gvisor.dev/gvisor/runsc/console"
"gvisor.dev/gvisor/runsc/specutils"
)
// Sandbox wraps a sandbox process.
//
// It is used to start/stop sandbox process (and associated processes like
// gofers), as well as for running and manipulating containers inside a running
// sandbox.
//
// Note: Sandbox must be immutable because a copy of it is saved for each
// container and changes would not be synchronized to all of them.
type Sandbox struct {
// ID is the id of the sandbox (immutable). By convention, this is the same
// ID as the first container run in the sandbox.
ID string `json:"id"`
// Pid is the pid of the running sandbox (immutable). May be 0 if the sandbox
// is not running.
Pid int `json:"pid"`
// Cgroup has the cgroup configuration for the sandbox.
Cgroup *cgroup.Cgroup `json:"cgroup"`
// child is set if a sandbox process is a child of the current process.
//
// This field isn't saved to json, because only a creator of sandbox
// will have it as a child process.
child bool
// status is an exit status of a sandbox process.
status syscall.WaitStatus
// statusMu protects status.
statusMu sync.Mutex
}
// Args is used to configure a new sandbox.
type Args struct {
// ID is the sandbox unique identifier.
ID string
// Spec is the OCI spec that describes the container.
Spec *specs.Spec
// BundleDir is the directory containing the container bundle.
BundleDir string
// ConsoleSocket is the path to a unix domain socket that will receive
// the console FD. It may be empty.
ConsoleSocket string
// UserLog is the filename to send user-visible logs to. It may be empty.
UserLog string
// IOFiles is the list of files that connect to a 9P endpoint for the mounts
// points using Gofers. They must be in the same order as mounts appear in
// the spec.
IOFiles []*os.File
// MountsFile is a file container mount information from the spec. It's
// equivalent to the mounts from the spec, except that all paths have been
// resolved to their final absolute location.
MountsFile *os.File
// Gcgroup is the cgroup that the sandbox is part of.
Cgroup *cgroup.Cgroup
// Attached indicates that the sandbox lifecycle is attached with the caller.
// If the caller exits, the sandbox should exit too.
Attached bool
}
// New creates the sandbox process. The caller must call Destroy() on the
// sandbox.
func New(conf *boot.Config, args *Args) (*Sandbox, error) {
s := &Sandbox{ID: args.ID, Cgroup: args.Cgroup}
// The Cleanup object cleans up partially created sandboxes when an error
// occurs. Any errors occurring during cleanup itself are ignored.
c := specutils.MakeCleanup(func() {
err := s.destroy()
log.Warningf("error destroying sandbox: %v", err)
})
defer c.Clean()
// Create pipe to synchronize when sandbox process has been booted.
clientSyncFile, sandboxSyncFile, err := os.Pipe()
if err != nil {
return nil, fmt.Errorf("creating pipe for sandbox %q: %v", s.ID, err)
}
defer clientSyncFile.Close()
// Create the sandbox process.
err = s.createSandboxProcess(conf, args, sandboxSyncFile)
// sandboxSyncFile has to be closed to be able to detect when the sandbox
// process exits unexpectedly.
sandboxSyncFile.Close()
if err != nil {
return nil, err
}
// Wait until the sandbox has booted.
b := make([]byte, 1)
if l, err := clientSyncFile.Read(b); err != nil || l != 1 {
return nil, fmt.Errorf("waiting for sandbox to start: %v", err)
}
c.Release()
return s, nil
}
// CreateContainer creates a non-root container inside the sandbox.
func (s *Sandbox) CreateContainer(cid string) error {
log.Debugf("Create non-root container %q in sandbox %q, PID: %d", cid, s.ID, s.Pid)
sandboxConn, err := s.sandboxConnect()
if err != nil {
return fmt.Errorf("couldn't connect to sandbox: %v", err)
}
defer sandboxConn.Close()
if err := sandboxConn.Call(boot.ContainerCreate, &cid, nil); err != nil {
return fmt.Errorf("creating non-root container %q: %v", cid, err)
}
return nil
}
// StartRoot starts running the root container process inside the sandbox.
func (s *Sandbox) StartRoot(spec *specs.Spec, conf *boot.Config) error {
runsc: Support job control signals in "exec -it". Terminal support in runsc relies on host tty file descriptors that are imported into the sandbox. Application tty ioctls are sent directly to the host fd. However, those host tty ioctls are associated in the host kernel with a host process (in this case runsc), and the host kernel intercepts job control characters like ^C and send signals to the host process. Thus, typing ^C into a "runsc exec" shell will send a SIGINT to the runsc process. This change makes "runsc exec" handle all signals, and forward them into the sandbox via the "ContainerSignal" urpc method. Since the "runsc exec" is associated with a particular container process in the sandbox, the signal must be associated with the same container process. One big difficulty is that the signal should not necessarily be sent to the sandbox process started by "exec", but instead must be sent to the foreground process group for the tty. For example, we may exec "bash", and from bash call "sleep 100". A ^C at this point should SIGINT sleep, not bash. To handle this, tty files inside the sandbox must keep track of their foreground process group, which is set/get via ioctls. When an incoming ContainerSignal urpc comes in, we look up the foreground process group via the tty file. Unfortunately, this means we have to expose and cache the tty file in the Loader. Note that "runsc exec" now handles signals properly, but "runs run" does not. That will come in a later CL, as this one is complex enough already. Example: root@:/usr/local/apache2# sleep 100 ^C root@:/usr/local/apache2# sleep 100 ^Z [1]+ Stopped sleep 100 root@:/usr/local/apache2# fg sleep 100 ^C root@:/usr/local/apache2# PiperOrigin-RevId: 215334554 Change-Id: I53cdce39653027908510a5ba8d08c49f9cf24f39
2018-10-02 05:05:41 +00:00
log.Debugf("Start root sandbox %q, PID: %d", s.ID, s.Pid)
conn, err := s.sandboxConnect()
if err != nil {
return err
}
defer conn.Close()
// Configure the network.
if err := setupNetwork(conn, s.Pid, spec, conf); err != nil {
return fmt.Errorf("setting up network: %v", err)
}
// Send a message to the sandbox control server to start the root
// container.
if err := conn.Call(boot.RootContainerStart, &s.ID, nil); err != nil {
return fmt.Errorf("starting root container: %v", err)
}
return nil
}
// StartContainer starts running a non-root container inside the sandbox.
func (s *Sandbox) StartContainer(spec *specs.Spec, conf *boot.Config, cid string, goferFiles []*os.File) error {
for _, f := range goferFiles {
defer f.Close()
}
log.Debugf("Start non-root container %q in sandbox %q, PID: %d", cid, s.ID, s.Pid)
sandboxConn, err := s.sandboxConnect()
if err != nil {
return fmt.Errorf("couldn't connect to sandbox: %v", err)
}
defer sandboxConn.Close()
// The payload must container stdin/stdout/stderr followed by gofer
// files.
files := append([]*os.File{os.Stdin, os.Stdout, os.Stderr}, goferFiles...)
// Start running the container.
args := boot.StartArgs{
Spec: spec,
Conf: conf,
CID: cid,
FilePayload: urpc.FilePayload{Files: files},
}
if err := sandboxConn.Call(boot.ContainerStart, &args, nil); err != nil {
return fmt.Errorf("starting non-root container %v: %v", spec.Process.Args, err)
}
return nil
}
// Restore sends the restore call for a container in the sandbox.
func (s *Sandbox) Restore(cid string, spec *specs.Spec, conf *boot.Config, filename string) error {
log.Debugf("Restore sandbox %q", s.ID)
rf, err := os.Open(filename)
if err != nil {
return fmt.Errorf("opening restore file %q failed: %v", filename, err)
}
defer rf.Close()
opt := boot.RestoreOpts{
FilePayload: urpc.FilePayload{
Files: []*os.File{rf},
},
SandboxID: s.ID,
}
runsc: Support job control signals in "exec -it". Terminal support in runsc relies on host tty file descriptors that are imported into the sandbox. Application tty ioctls are sent directly to the host fd. However, those host tty ioctls are associated in the host kernel with a host process (in this case runsc), and the host kernel intercepts job control characters like ^C and send signals to the host process. Thus, typing ^C into a "runsc exec" shell will send a SIGINT to the runsc process. This change makes "runsc exec" handle all signals, and forward them into the sandbox via the "ContainerSignal" urpc method. Since the "runsc exec" is associated with a particular container process in the sandbox, the signal must be associated with the same container process. One big difficulty is that the signal should not necessarily be sent to the sandbox process started by "exec", but instead must be sent to the foreground process group for the tty. For example, we may exec "bash", and from bash call "sleep 100". A ^C at this point should SIGINT sleep, not bash. To handle this, tty files inside the sandbox must keep track of their foreground process group, which is set/get via ioctls. When an incoming ContainerSignal urpc comes in, we look up the foreground process group via the tty file. Unfortunately, this means we have to expose and cache the tty file in the Loader. Note that "runsc exec" now handles signals properly, but "runs run" does not. That will come in a later CL, as this one is complex enough already. Example: root@:/usr/local/apache2# sleep 100 ^C root@:/usr/local/apache2# sleep 100 ^Z [1]+ Stopped sleep 100 root@:/usr/local/apache2# fg sleep 100 ^C root@:/usr/local/apache2# PiperOrigin-RevId: 215334554 Change-Id: I53cdce39653027908510a5ba8d08c49f9cf24f39
2018-10-02 05:05:41 +00:00
// If the platform needs a device FD we must pass it in.
if deviceFile, err := deviceFileForPlatform(conf.Platform); err != nil {
return err
} else if deviceFile != nil {
defer deviceFile.Close()
opt.FilePayload.Files = append(opt.FilePayload.Files, deviceFile)
}
conn, err := s.sandboxConnect()
if err != nil {
return err
}
defer conn.Close()
// Configure the network.
if err := setupNetwork(conn, s.Pid, spec, conf); err != nil {
return fmt.Errorf("setting up network: %v", err)
}
// Restore the container and start the root container.
if err := conn.Call(boot.ContainerRestore, &opt, nil); err != nil {
return fmt.Errorf("restoring container %q: %v", cid, err)
}
return nil
}
// Processes retrieves the list of processes and associated metadata for a
// given container in this sandbox.
func (s *Sandbox) Processes(cid string) ([]*control.Process, error) {
log.Debugf("Getting processes for container %q in sandbox %q", cid, s.ID)
conn, err := s.sandboxConnect()
if err != nil {
return nil, err
}
defer conn.Close()
var pl []*control.Process
if err := conn.Call(boot.ContainerProcesses, &cid, &pl); err != nil {
return nil, fmt.Errorf("retrieving process data from sandbox: %v", err)
}
return pl, nil
}
runsc: Support job control signals in "exec -it". Terminal support in runsc relies on host tty file descriptors that are imported into the sandbox. Application tty ioctls are sent directly to the host fd. However, those host tty ioctls are associated in the host kernel with a host process (in this case runsc), and the host kernel intercepts job control characters like ^C and send signals to the host process. Thus, typing ^C into a "runsc exec" shell will send a SIGINT to the runsc process. This change makes "runsc exec" handle all signals, and forward them into the sandbox via the "ContainerSignal" urpc method. Since the "runsc exec" is associated with a particular container process in the sandbox, the signal must be associated with the same container process. One big difficulty is that the signal should not necessarily be sent to the sandbox process started by "exec", but instead must be sent to the foreground process group for the tty. For example, we may exec "bash", and from bash call "sleep 100". A ^C at this point should SIGINT sleep, not bash. To handle this, tty files inside the sandbox must keep track of their foreground process group, which is set/get via ioctls. When an incoming ContainerSignal urpc comes in, we look up the foreground process group via the tty file. Unfortunately, this means we have to expose and cache the tty file in the Loader. Note that "runsc exec" now handles signals properly, but "runs run" does not. That will come in a later CL, as this one is complex enough already. Example: root@:/usr/local/apache2# sleep 100 ^C root@:/usr/local/apache2# sleep 100 ^Z [1]+ Stopped sleep 100 root@:/usr/local/apache2# fg sleep 100 ^C root@:/usr/local/apache2# PiperOrigin-RevId: 215334554 Change-Id: I53cdce39653027908510a5ba8d08c49f9cf24f39
2018-10-02 05:05:41 +00:00
// Execute runs the specified command in the container. It returns the PID of
// the newly created process.
func (s *Sandbox) Execute(args *control.ExecArgs) (int32, error) {
log.Debugf("Executing new process in container %q in sandbox %q", args.ContainerID, s.ID)
conn, err := s.sandboxConnect()
if err != nil {
return 0, s.connError(err)
}
defer conn.Close()
// Send a message to the sandbox control server to start the container.
var pid int32
if err := conn.Call(boot.ContainerExecuteAsync, args, &pid); err != nil {
return 0, fmt.Errorf("executing command %q in sandbox: %v", args, err)
}
return pid, nil
}
// Event retrieves stats about the sandbox such as memory and CPU utilization.
func (s *Sandbox) Event(cid string) (*boot.Event, error) {
log.Debugf("Getting events for container %q in sandbox %q", cid, s.ID)
conn, err := s.sandboxConnect()
if err != nil {
return nil, err
}
defer conn.Close()
var e boot.Event
// TODO(b/129292330): Pass in the container id (cid) here. The sandbox
// should return events only for that container.
if err := conn.Call(boot.ContainerEvent, nil, &e); err != nil {
return nil, fmt.Errorf("retrieving event data from sandbox: %v", err)
}
e.ID = cid
return &e, nil
}
func (s *Sandbox) sandboxConnect() (*urpc.Client, error) {
log.Debugf("Connecting to sandbox %q", s.ID)
conn, err := client.ConnectTo(boot.ControlSocketAddr(s.ID))
if err != nil {
return nil, s.connError(err)
}
return conn, nil
}
func (s *Sandbox) connError(err error) error {
return fmt.Errorf("connecting to control server at PID %d: %v", s.Pid, err)
}
// createSandboxProcess starts the sandbox as a subprocess by running the "boot"
// command, passing in the bundle dir.
func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncFile *os.File) error {
// nextFD is used to get unused FDs that we can pass to the sandbox. It
// starts at 3 because 0, 1, and 2 are taken by stdin/out/err.
nextFD := 3
binPath := specutils.ExePath
cmd := exec.Command(binPath, conf.ToFlags()...)
cmd.SysProcAttr = &syscall.SysProcAttr{}
// Open the log files to pass to the sandbox as FDs.
//
// These flags must come BEFORE the "boot" command in cmd.Args.
if conf.LogFilename != "" {
logFile, err := os.OpenFile(conf.LogFilename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return fmt.Errorf("opening log file %q: %v", conf.LogFilename, err)
}
defer logFile.Close()
cmd.ExtraFiles = append(cmd.ExtraFiles, logFile)
cmd.Args = append(cmd.Args, "--log-fd="+strconv.Itoa(nextFD))
nextFD++
}
if conf.DebugLog != "" {
test := ""
if len(conf.TestOnlyTestNameEnv) != 0 {
// Fetch test name if one is provided and the test only flag was set.
if t, ok := specutils.EnvVar(args.Spec.Process.Env, conf.TestOnlyTestNameEnv); ok {
test = t
}
}
debugLogFile, err := specutils.DebugLogFile(conf.DebugLog, "boot", test)
if err != nil {
return fmt.Errorf("opening debug log file in %q: %v", conf.DebugLog, err)
}
defer debugLogFile.Close()
cmd.ExtraFiles = append(cmd.ExtraFiles, debugLogFile)
cmd.Args = append(cmd.Args, "--debug-log-fd="+strconv.Itoa(nextFD))
nextFD++
}
cmd.Args = append(cmd.Args, "--panic-signal="+strconv.Itoa(int(syscall.SIGTERM)))
// Add the "boot" command to the args.
//
// All flags after this must be for the boot command
cmd.Args = append(cmd.Args, "boot", "--bundle="+args.BundleDir)
// Create a socket for the control server and donate it to the sandbox.
addr := boot.ControlSocketAddr(s.ID)
sockFD, err := server.CreateSocket(addr)
log.Infof("Creating sandbox process with addr: %s", addr[1:]) // skip "\00".
if err != nil {
return fmt.Errorf("creating control server socket for sandbox %q: %v", s.ID, err)
}
controllerFile := os.NewFile(uintptr(sockFD), "control_server_socket")
defer controllerFile.Close()
cmd.ExtraFiles = append(cmd.ExtraFiles, controllerFile)
cmd.Args = append(cmd.Args, "--controller-fd="+strconv.Itoa(nextFD))
nextFD++
defer args.MountsFile.Close()
cmd.ExtraFiles = append(cmd.ExtraFiles, args.MountsFile)
cmd.Args = append(cmd.Args, "--mounts-fd="+strconv.Itoa(nextFD))
nextFD++
specFile, err := specutils.OpenSpec(args.BundleDir)
if err != nil {
return err
}
defer specFile.Close()
cmd.ExtraFiles = append(cmd.ExtraFiles, specFile)
cmd.Args = append(cmd.Args, "--spec-fd="+strconv.Itoa(nextFD))
nextFD++
cmd.ExtraFiles = append(cmd.ExtraFiles, startSyncFile)
cmd.Args = append(cmd.Args, "--start-sync-fd="+strconv.Itoa(nextFD))
nextFD++
// If there is a gofer, sends all socket ends to the sandbox.
for _, f := range args.IOFiles {
defer f.Close()
cmd.ExtraFiles = append(cmd.ExtraFiles, f)
cmd.Args = append(cmd.Args, "--io-fds="+strconv.Itoa(nextFD))
nextFD++
}
runsc: Support job control signals in "exec -it". Terminal support in runsc relies on host tty file descriptors that are imported into the sandbox. Application tty ioctls are sent directly to the host fd. However, those host tty ioctls are associated in the host kernel with a host process (in this case runsc), and the host kernel intercepts job control characters like ^C and send signals to the host process. Thus, typing ^C into a "runsc exec" shell will send a SIGINT to the runsc process. This change makes "runsc exec" handle all signals, and forward them into the sandbox via the "ContainerSignal" urpc method. Since the "runsc exec" is associated with a particular container process in the sandbox, the signal must be associated with the same container process. One big difficulty is that the signal should not necessarily be sent to the sandbox process started by "exec", but instead must be sent to the foreground process group for the tty. For example, we may exec "bash", and from bash call "sleep 100". A ^C at this point should SIGINT sleep, not bash. To handle this, tty files inside the sandbox must keep track of their foreground process group, which is set/get via ioctls. When an incoming ContainerSignal urpc comes in, we look up the foreground process group via the tty file. Unfortunately, this means we have to expose and cache the tty file in the Loader. Note that "runsc exec" now handles signals properly, but "runs run" does not. That will come in a later CL, as this one is complex enough already. Example: root@:/usr/local/apache2# sleep 100 ^C root@:/usr/local/apache2# sleep 100 ^Z [1]+ Stopped sleep 100 root@:/usr/local/apache2# fg sleep 100 ^C root@:/usr/local/apache2# PiperOrigin-RevId: 215334554 Change-Id: I53cdce39653027908510a5ba8d08c49f9cf24f39
2018-10-02 05:05:41 +00:00
// If the platform needs a device FD we must pass it in.
if deviceFile, err := deviceFileForPlatform(conf.Platform); err != nil {
return err
} else if deviceFile != nil {
defer deviceFile.Close()
cmd.ExtraFiles = append(cmd.ExtraFiles, deviceFile)
cmd.Args = append(cmd.Args, "--device-fd="+strconv.Itoa(nextFD))
nextFD++
}
// The current process' stdio must be passed to the application via the
// --stdio-fds flag. The stdio of the sandbox process itself must not
// be connected to the same FDs, otherwise we risk leaking sandbox
// errors to the application, so we set the sandbox stdio to nil,
// causing them to read/write from the null device.
cmd.Stdin = nil
cmd.Stdout = nil
cmd.Stderr = nil
// If the console control socket file is provided, then create a new
runsc: Support job control signals in "exec -it". Terminal support in runsc relies on host tty file descriptors that are imported into the sandbox. Application tty ioctls are sent directly to the host fd. However, those host tty ioctls are associated in the host kernel with a host process (in this case runsc), and the host kernel intercepts job control characters like ^C and send signals to the host process. Thus, typing ^C into a "runsc exec" shell will send a SIGINT to the runsc process. This change makes "runsc exec" handle all signals, and forward them into the sandbox via the "ContainerSignal" urpc method. Since the "runsc exec" is associated with a particular container process in the sandbox, the signal must be associated with the same container process. One big difficulty is that the signal should not necessarily be sent to the sandbox process started by "exec", but instead must be sent to the foreground process group for the tty. For example, we may exec "bash", and from bash call "sleep 100". A ^C at this point should SIGINT sleep, not bash. To handle this, tty files inside the sandbox must keep track of their foreground process group, which is set/get via ioctls. When an incoming ContainerSignal urpc comes in, we look up the foreground process group via the tty file. Unfortunately, this means we have to expose and cache the tty file in the Loader. Note that "runsc exec" now handles signals properly, but "runs run" does not. That will come in a later CL, as this one is complex enough already. Example: root@:/usr/local/apache2# sleep 100 ^C root@:/usr/local/apache2# sleep 100 ^Z [1]+ Stopped sleep 100 root@:/usr/local/apache2# fg sleep 100 ^C root@:/usr/local/apache2# PiperOrigin-RevId: 215334554 Change-Id: I53cdce39653027908510a5ba8d08c49f9cf24f39
2018-10-02 05:05:41 +00:00
// pty master/slave pair and set the TTY on the sandbox process.
if args.ConsoleSocket != "" {
cmd.Args = append(cmd.Args, "--console=true")
// console.NewWithSocket will send the master on the given
// socket, and return the slave.
tty, err := console.NewWithSocket(args.ConsoleSocket)
if err != nil {
return fmt.Errorf("setting up console with socket %q: %v", args.ConsoleSocket, err)
}
defer tty.Close()
// Set the TTY as a controlling TTY on the sandbox process.
cmd.SysProcAttr.Setctty = true
// The Ctty FD must be the FD in the child process's FD table,
// which will be nextFD in this case.
// See https://github.com/golang/go/issues/29458.
cmd.SysProcAttr.Ctty = nextFD
// Pass the tty as all stdio fds to sandbox.
for i := 0; i < 3; i++ {
cmd.ExtraFiles = append(cmd.ExtraFiles, tty)
cmd.Args = append(cmd.Args, "--stdio-fds="+strconv.Itoa(nextFD))
nextFD++
}
if conf.Debug {
// If debugging, send the boot process stdio to the
// TTY, so that it is easier to find.
cmd.Stdin = tty
cmd.Stdout = tty
cmd.Stderr = tty
}
} else {
// If not using a console, pass our current stdio as the
// container stdio via flags.
for _, f := range []*os.File{os.Stdin, os.Stdout, os.Stderr} {
cmd.ExtraFiles = append(cmd.ExtraFiles, f)
cmd.Args = append(cmd.Args, "--stdio-fds="+strconv.Itoa(nextFD))
nextFD++
}
if conf.Debug {
// If debugging, send the boot process stdio to the
// this process' stdio, so that is is easier to find.
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
}
}
// Detach from this session, otherwise cmd will get SIGHUP and SIGCONT
// when re-parented.
cmd.SysProcAttr.Setsid = true
// nss is the set of namespaces to join or create before starting the sandbox
// process. Mount, IPC and UTS namespaces from the host are not used as they
// are virtualized inside the sandbox. Be paranoid and run inside an empty
// namespace for these. Don't unshare cgroup because sandbox is added to a
// cgroup in the caller's namespace.
log.Infof("Sandbox will be started in new mount, IPC and UTS namespaces")
nss := []specs.LinuxNamespace{
{Type: specs.IPCNamespace},
{Type: specs.MountNamespace},
{Type: specs.UTSNamespace},
}
if conf.Platform == platforms.Ptrace {
// TODO(b/75837838): Also set a new PID namespace so that we limit
// access to other host processes.
log.Infof("Sandbox will be started in the current PID namespace")
} else {
log.Infof("Sandbox will be started in a new PID namespace")
nss = append(nss, specs.LinuxNamespace{Type: specs.PIDNamespace})
cmd.Args = append(cmd.Args, "--pidns=true")
}
// Joins the network namespace if network is enabled. the sandbox talks
// directly to the host network, which may have been configured in the
// namespace.
if ns, ok := specutils.GetNS(specs.NetworkNamespace, args.Spec); ok && conf.Network != boot.NetworkNone {
log.Infof("Sandbox will be started in the container's network namespace: %+v", ns)
nss = append(nss, ns)
} else if conf.Network == boot.NetworkHost {
log.Infof("Sandbox will be started in the host network namespace")
} else {
log.Infof("Sandbox will be started in new network namespace")
nss = append(nss, specs.LinuxNamespace{Type: specs.NetworkNamespace})
}
// User namespace depends on the network type. Host network requires to run
// inside the user namespace specified in the spec or the current namespace
// if none is configured.
if conf.Network == boot.NetworkHost {
if userns, ok := specutils.GetNS(specs.UserNamespace, args.Spec); ok {
log.Infof("Sandbox will be started in container's user namespace: %+v", userns)
nss = append(nss, userns)
specutils.SetUIDGIDMappings(cmd, args.Spec)
} else {
log.Infof("Sandbox will be started in the current user namespace")
}
// When running in the caller's defined user namespace, apply the same
// capabilities to the sandbox process to ensure it abides to the same
// rules.
cmd.Args = append(cmd.Args, "--apply-caps=true")
// If we have CAP_SYS_ADMIN, we can create an empty chroot and
// bind-mount the executable inside it.
if conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
log.Warningf("Running sandbox in test mode without chroot. This is only safe in tests!")
} else if specutils.HasCapabilities(capability.CAP_SYS_ADMIN) {
log.Infof("Sandbox will be started in minimal chroot")
cmd.Args = append(cmd.Args, "--setup-root")
} else {
return fmt.Errorf("can't run sandbox process in minimal chroot since we don't have CAP_SYS_ADMIN")
}
} else {
// If we have CAP_SETUID and CAP_SETGID, then we can also run
// as user nobody.
if conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
log.Warningf("Running sandbox in test mode as current user (uid=%d gid=%d). This is only safe in tests!", os.Getuid(), os.Getgid())
log.Warningf("Running sandbox in test mode without chroot. This is only safe in tests!")
} else if specutils.HasCapabilities(capability.CAP_SETUID, capability.CAP_SETGID) {
log.Infof("Sandbox will be started in new user namespace")
nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace})
cmd.Args = append(cmd.Args, "--setup-root")
if conf.Rootless {
log.Infof("Rootless mode: sandbox will run as root inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid())
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
{
ContainerID: 0,
HostID: os.Getuid(),
Size: 1,
},
}
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
{
ContainerID: 0,
HostID: os.Getgid(),
Size: 1,
},
}
cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: 0}
} else {
// Map nobody in the new namespace to nobody in the parent namespace.
//
// A sandbox process will construct an empty
// root for itself, so it has to have the CAP_SYS_ADMIN
// capability.
//
// FIXME(b/122554829): The current implementations of
// os/exec doesn't allow to set ambient capabilities if
// a process is started in a new user namespace. As a
// workaround, we start the sandbox process with the 0
// UID and then it constructs a chroot and sets UID to
// nobody. https://github.com/golang/go/issues/2315
const nobody = 65534
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
{
ContainerID: 0,
HostID: nobody - 1,
Size: 1,
},
{
ContainerID: nobody,
HostID: nobody,
Size: 1,
},
}
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
{
ContainerID: nobody,
HostID: nobody,
Size: 1,
},
}
// Set credentials to run as user and group nobody.
cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: nobody}
}
} else {
return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID")
}
}
cmd.Args[0] = "runsc-sandbox"
if s.Cgroup != nil {
cpuNum, err := s.Cgroup.NumCPU()
if err != nil {
return fmt.Errorf("getting cpu count from cgroups: %v", err)
}
cmd.Args = append(cmd.Args, "--cpu-num", strconv.Itoa(cpuNum))
mem, err := s.Cgroup.MemoryLimit()
if err != nil {
return fmt.Errorf("getting memory limit from cgroups: %v", err)
}
// When memory limit is unset, a "large" number is returned. In that case,
// just stick with the default.
if mem < 0x7ffffffffffff000 {
cmd.Args = append(cmd.Args, "--total-memory", strconv.FormatUint(mem, 10))
}
}
if args.UserLog != "" {
f, err := os.OpenFile(args.UserLog, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0664)
if err != nil {
return fmt.Errorf("opening compat log file: %v", err)
}
defer f.Close()
cmd.ExtraFiles = append(cmd.ExtraFiles, f)
cmd.Args = append(cmd.Args, "--user-log-fd", strconv.Itoa(nextFD))
nextFD++
}
// Add container as the last argument.
cmd.Args = append(cmd.Args, s.ID)
runsc: Support job control signals in "exec -it". Terminal support in runsc relies on host tty file descriptors that are imported into the sandbox. Application tty ioctls are sent directly to the host fd. However, those host tty ioctls are associated in the host kernel with a host process (in this case runsc), and the host kernel intercepts job control characters like ^C and send signals to the host process. Thus, typing ^C into a "runsc exec" shell will send a SIGINT to the runsc process. This change makes "runsc exec" handle all signals, and forward them into the sandbox via the "ContainerSignal" urpc method. Since the "runsc exec" is associated with a particular container process in the sandbox, the signal must be associated with the same container process. One big difficulty is that the signal should not necessarily be sent to the sandbox process started by "exec", but instead must be sent to the foreground process group for the tty. For example, we may exec "bash", and from bash call "sleep 100". A ^C at this point should SIGINT sleep, not bash. To handle this, tty files inside the sandbox must keep track of their foreground process group, which is set/get via ioctls. When an incoming ContainerSignal urpc comes in, we look up the foreground process group via the tty file. Unfortunately, this means we have to expose and cache the tty file in the Loader. Note that "runsc exec" now handles signals properly, but "runs run" does not. That will come in a later CL, as this one is complex enough already. Example: root@:/usr/local/apache2# sleep 100 ^C root@:/usr/local/apache2# sleep 100 ^Z [1]+ Stopped sleep 100 root@:/usr/local/apache2# fg sleep 100 ^C root@:/usr/local/apache2# PiperOrigin-RevId: 215334554 Change-Id: I53cdce39653027908510a5ba8d08c49f9cf24f39
2018-10-02 05:05:41 +00:00
// Log the FDs we are donating to the sandbox process.
for i, f := range cmd.ExtraFiles {
log.Debugf("Donating FD %d: %q", i+3, f.Name())
}
if args.Attached {
// Kill sandbox if parent process exits in attached mode.
cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL
}
log.Debugf("Starting sandbox: %s %v", binPath, cmd.Args)
log.Debugf("SysProcAttr: %+v", cmd.SysProcAttr)
if err := specutils.StartInNS(cmd, nss); err != nil {
return fmt.Errorf("Sandbox: %v", err)
}
s.child = true
s.Pid = cmd.Process.Pid
runsc: Support job control signals in "exec -it". Terminal support in runsc relies on host tty file descriptors that are imported into the sandbox. Application tty ioctls are sent directly to the host fd. However, those host tty ioctls are associated in the host kernel with a host process (in this case runsc), and the host kernel intercepts job control characters like ^C and send signals to the host process. Thus, typing ^C into a "runsc exec" shell will send a SIGINT to the runsc process. This change makes "runsc exec" handle all signals, and forward them into the sandbox via the "ContainerSignal" urpc method. Since the "runsc exec" is associated with a particular container process in the sandbox, the signal must be associated with the same container process. One big difficulty is that the signal should not necessarily be sent to the sandbox process started by "exec", but instead must be sent to the foreground process group for the tty. For example, we may exec "bash", and from bash call "sleep 100". A ^C at this point should SIGINT sleep, not bash. To handle this, tty files inside the sandbox must keep track of their foreground process group, which is set/get via ioctls. When an incoming ContainerSignal urpc comes in, we look up the foreground process group via the tty file. Unfortunately, this means we have to expose and cache the tty file in the Loader. Note that "runsc exec" now handles signals properly, but "runs run" does not. That will come in a later CL, as this one is complex enough already. Example: root@:/usr/local/apache2# sleep 100 ^C root@:/usr/local/apache2# sleep 100 ^Z [1]+ Stopped sleep 100 root@:/usr/local/apache2# fg sleep 100 ^C root@:/usr/local/apache2# PiperOrigin-RevId: 215334554 Change-Id: I53cdce39653027908510a5ba8d08c49f9cf24f39
2018-10-02 05:05:41 +00:00
log.Infof("Sandbox started, PID: %d", s.Pid)
return nil
}
// Wait waits for the containerized process to exit, and returns its WaitStatus.
func (s *Sandbox) Wait(cid string) (syscall.WaitStatus, error) {
log.Debugf("Waiting for container %q in sandbox %q", cid, s.ID)
var ws syscall.WaitStatus
if conn, err := s.sandboxConnect(); err != nil {
// The sandbox may have exited while before we had a chance to
// wait on it.
log.Warningf("Wait on container %q failed: %v. Will try waiting on the sandbox process instead.", cid, err)
} else {
defer conn.Close()
// Try the Wait RPC to the sandbox.
err = conn.Call(boot.ContainerWait, &cid, &ws)
if err == nil {
// It worked!
return ws, nil
}
// The sandbox may have exited after we connected, but before
// or during the Wait RPC.
log.Warningf("Wait RPC to container %q failed: %v. Will try waiting on the sandbox process instead.", cid, err)
}
// The sandbox may have already exited, or exited while handling the
// Wait RPC. The best we can do is ask Linux what the sandbox exit
// status was, since in most cases that will be the same as the
// container exit status.
if err := s.waitForStopped(); err != nil {
return ws, err
}
if !s.child {
return ws, fmt.Errorf("sandbox no longer running and its exit status is unavailable")
}
return s.status, nil
}
// WaitPID waits for process 'pid' in the container's sandbox and returns its
// WaitStatus.
func (s *Sandbox) WaitPID(cid string, pid int32) (syscall.WaitStatus, error) {
log.Debugf("Waiting for PID %d in sandbox %q", pid, s.ID)
var ws syscall.WaitStatus
conn, err := s.sandboxConnect()
if err != nil {
return ws, err
}
defer conn.Close()
args := &boot.WaitPIDArgs{
PID: pid,
CID: cid,
}
if err := conn.Call(boot.ContainerWaitPID, args, &ws); err != nil {
return ws, fmt.Errorf("waiting on PID %d in sandbox %q: %v", pid, s.ID, err)
}
return ws, nil
}
// IsRootContainer returns true if the specified container ID belongs to the
// root container.
func (s *Sandbox) IsRootContainer(cid string) bool {
return s.ID == cid
}
// Destroy frees all resources associated with the sandbox. It fails fast and
// is idempotent.
func (s *Sandbox) destroy() error {
log.Debugf("Destroy sandbox %q", s.ID)
if s.Pid != 0 {
log.Debugf("Killing sandbox %q", s.ID)
if err := syscall.Kill(s.Pid, syscall.SIGKILL); err != nil && err != syscall.ESRCH {
return fmt.Errorf("killing sandbox %q PID %q: %v", s.ID, s.Pid, err)
}
if err := s.waitForStopped(); err != nil {
return fmt.Errorf("waiting sandbox %q stop: %v", s.ID, err)
}
}
return nil
}
runsc: Support job control signals in "exec -it". Terminal support in runsc relies on host tty file descriptors that are imported into the sandbox. Application tty ioctls are sent directly to the host fd. However, those host tty ioctls are associated in the host kernel with a host process (in this case runsc), and the host kernel intercepts job control characters like ^C and send signals to the host process. Thus, typing ^C into a "runsc exec" shell will send a SIGINT to the runsc process. This change makes "runsc exec" handle all signals, and forward them into the sandbox via the "ContainerSignal" urpc method. Since the "runsc exec" is associated with a particular container process in the sandbox, the signal must be associated with the same container process. One big difficulty is that the signal should not necessarily be sent to the sandbox process started by "exec", but instead must be sent to the foreground process group for the tty. For example, we may exec "bash", and from bash call "sleep 100". A ^C at this point should SIGINT sleep, not bash. To handle this, tty files inside the sandbox must keep track of their foreground process group, which is set/get via ioctls. When an incoming ContainerSignal urpc comes in, we look up the foreground process group via the tty file. Unfortunately, this means we have to expose and cache the tty file in the Loader. Note that "runsc exec" now handles signals properly, but "runs run" does not. That will come in a later CL, as this one is complex enough already. Example: root@:/usr/local/apache2# sleep 100 ^C root@:/usr/local/apache2# sleep 100 ^Z [1]+ Stopped sleep 100 root@:/usr/local/apache2# fg sleep 100 ^C root@:/usr/local/apache2# PiperOrigin-RevId: 215334554 Change-Id: I53cdce39653027908510a5ba8d08c49f9cf24f39
2018-10-02 05:05:41 +00:00
// SignalContainer sends the signal to a container in the sandbox. If all is
// true and signal is SIGKILL, then waits for all processes to exit before
// returning.
func (s *Sandbox) SignalContainer(cid string, sig syscall.Signal, all bool) error {
log.Debugf("Signal sandbox %q", s.ID)
conn, err := s.sandboxConnect()
if err != nil {
return err
}
defer conn.Close()
mode := boot.DeliverToProcess
if all {
mode = boot.DeliverToAllProcesses
}
args := boot.SignalArgs{
CID: cid,
Signo: int32(sig),
Mode: mode,
}
if err := conn.Call(boot.ContainerSignal, &args, nil); err != nil {
return fmt.Errorf("signaling container %q: %v", cid, err)
}
return nil
}
runsc: Support job control signals in "exec -it". Terminal support in runsc relies on host tty file descriptors that are imported into the sandbox. Application tty ioctls are sent directly to the host fd. However, those host tty ioctls are associated in the host kernel with a host process (in this case runsc), and the host kernel intercepts job control characters like ^C and send signals to the host process. Thus, typing ^C into a "runsc exec" shell will send a SIGINT to the runsc process. This change makes "runsc exec" handle all signals, and forward them into the sandbox via the "ContainerSignal" urpc method. Since the "runsc exec" is associated with a particular container process in the sandbox, the signal must be associated with the same container process. One big difficulty is that the signal should not necessarily be sent to the sandbox process started by "exec", but instead must be sent to the foreground process group for the tty. For example, we may exec "bash", and from bash call "sleep 100". A ^C at this point should SIGINT sleep, not bash. To handle this, tty files inside the sandbox must keep track of their foreground process group, which is set/get via ioctls. When an incoming ContainerSignal urpc comes in, we look up the foreground process group via the tty file. Unfortunately, this means we have to expose and cache the tty file in the Loader. Note that "runsc exec" now handles signals properly, but "runs run" does not. That will come in a later CL, as this one is complex enough already. Example: root@:/usr/local/apache2# sleep 100 ^C root@:/usr/local/apache2# sleep 100 ^Z [1]+ Stopped sleep 100 root@:/usr/local/apache2# fg sleep 100 ^C root@:/usr/local/apache2# PiperOrigin-RevId: 215334554 Change-Id: I53cdce39653027908510a5ba8d08c49f9cf24f39
2018-10-02 05:05:41 +00:00
// SignalProcess sends the signal to a particular process in the container. If
// fgProcess is true, then the signal is sent to the foreground process group
// in the same session that PID belongs to. This is only valid if the process
// is attached to a host TTY.
func (s *Sandbox) SignalProcess(cid string, pid int32, sig syscall.Signal, fgProcess bool) error {
log.Debugf("Signal sandbox %q", s.ID)
conn, err := s.sandboxConnect()
if err != nil {
return err
}
defer conn.Close()
mode := boot.DeliverToProcess
if fgProcess {
mode = boot.DeliverToForegroundProcessGroup
}
args := boot.SignalArgs{
CID: cid,
Signo: int32(sig),
PID: pid,
Mode: mode,
runsc: Support job control signals in "exec -it". Terminal support in runsc relies on host tty file descriptors that are imported into the sandbox. Application tty ioctls are sent directly to the host fd. However, those host tty ioctls are associated in the host kernel with a host process (in this case runsc), and the host kernel intercepts job control characters like ^C and send signals to the host process. Thus, typing ^C into a "runsc exec" shell will send a SIGINT to the runsc process. This change makes "runsc exec" handle all signals, and forward them into the sandbox via the "ContainerSignal" urpc method. Since the "runsc exec" is associated with a particular container process in the sandbox, the signal must be associated with the same container process. One big difficulty is that the signal should not necessarily be sent to the sandbox process started by "exec", but instead must be sent to the foreground process group for the tty. For example, we may exec "bash", and from bash call "sleep 100". A ^C at this point should SIGINT sleep, not bash. To handle this, tty files inside the sandbox must keep track of their foreground process group, which is set/get via ioctls. When an incoming ContainerSignal urpc comes in, we look up the foreground process group via the tty file. Unfortunately, this means we have to expose and cache the tty file in the Loader. Note that "runsc exec" now handles signals properly, but "runs run" does not. That will come in a later CL, as this one is complex enough already. Example: root@:/usr/local/apache2# sleep 100 ^C root@:/usr/local/apache2# sleep 100 ^Z [1]+ Stopped sleep 100 root@:/usr/local/apache2# fg sleep 100 ^C root@:/usr/local/apache2# PiperOrigin-RevId: 215334554 Change-Id: I53cdce39653027908510a5ba8d08c49f9cf24f39
2018-10-02 05:05:41 +00:00
}
if err := conn.Call(boot.ContainerSignal, &args, nil); err != nil {
return fmt.Errorf("signaling container %q PID %d: %v", cid, pid, err)
runsc: Support job control signals in "exec -it". Terminal support in runsc relies on host tty file descriptors that are imported into the sandbox. Application tty ioctls are sent directly to the host fd. However, those host tty ioctls are associated in the host kernel with a host process (in this case runsc), and the host kernel intercepts job control characters like ^C and send signals to the host process. Thus, typing ^C into a "runsc exec" shell will send a SIGINT to the runsc process. This change makes "runsc exec" handle all signals, and forward them into the sandbox via the "ContainerSignal" urpc method. Since the "runsc exec" is associated with a particular container process in the sandbox, the signal must be associated with the same container process. One big difficulty is that the signal should not necessarily be sent to the sandbox process started by "exec", but instead must be sent to the foreground process group for the tty. For example, we may exec "bash", and from bash call "sleep 100". A ^C at this point should SIGINT sleep, not bash. To handle this, tty files inside the sandbox must keep track of their foreground process group, which is set/get via ioctls. When an incoming ContainerSignal urpc comes in, we look up the foreground process group via the tty file. Unfortunately, this means we have to expose and cache the tty file in the Loader. Note that "runsc exec" now handles signals properly, but "runs run" does not. That will come in a later CL, as this one is complex enough already. Example: root@:/usr/local/apache2# sleep 100 ^C root@:/usr/local/apache2# sleep 100 ^Z [1]+ Stopped sleep 100 root@:/usr/local/apache2# fg sleep 100 ^C root@:/usr/local/apache2# PiperOrigin-RevId: 215334554 Change-Id: I53cdce39653027908510a5ba8d08c49f9cf24f39
2018-10-02 05:05:41 +00:00
}
return nil
}
// Checkpoint sends the checkpoint call for a container in the sandbox.
// The statefile will be written to f.
func (s *Sandbox) Checkpoint(cid string, f *os.File) error {
log.Debugf("Checkpoint sandbox %q", s.ID)
conn, err := s.sandboxConnect()
if err != nil {
return err
}
defer conn.Close()
opt := control.SaveOpts{
FilePayload: urpc.FilePayload{
Files: []*os.File{f},
},
}
if err := conn.Call(boot.ContainerCheckpoint, &opt, nil); err != nil {
return fmt.Errorf("checkpointing container %q: %v", cid, err)
}
return nil
}
// Pause sends the pause call for a container in the sandbox.
func (s *Sandbox) Pause(cid string) error {
log.Debugf("Pause sandbox %q", s.ID)
conn, err := s.sandboxConnect()
if err != nil {
return err
}
defer conn.Close()
if err := conn.Call(boot.ContainerPause, nil, nil); err != nil {
return fmt.Errorf("pausing container %q: %v", cid, err)
}
return nil
}
// Resume sends the resume call for a container in the sandbox.
func (s *Sandbox) Resume(cid string) error {
log.Debugf("Resume sandbox %q", s.ID)
conn, err := s.sandboxConnect()
if err != nil {
return err
}
defer conn.Close()
if err := conn.Call(boot.ContainerResume, nil, nil); err != nil {
return fmt.Errorf("resuming container %q: %v", cid, err)
}
return nil
}
// IsRunning returns true if the sandbox or gofer process is running.
func (s *Sandbox) IsRunning() bool {
if s.Pid != 0 {
// Send a signal 0 to the sandbox process.
if err := syscall.Kill(s.Pid, 0); err == nil {
// Succeeded, process is running.
return true
}
}
return false
}
// Stacks collects and returns all stacks for the sandbox.
func (s *Sandbox) Stacks() (string, error) {
log.Debugf("Stacks sandbox %q", s.ID)
conn, err := s.sandboxConnect()
if err != nil {
return "", err
}
defer conn.Close()
var stacks string
if err := conn.Call(boot.SandboxStacks, nil, &stacks); err != nil {
return "", fmt.Errorf("getting sandbox %q stacks: %v", s.ID, err)
}
return stacks, nil
}
// HeapProfile writes a heap profile to the given file.
func (s *Sandbox) HeapProfile(f *os.File) error {
log.Debugf("Heap profile %q", s.ID)
conn, err := s.sandboxConnect()
if err != nil {
return err
}
defer conn.Close()
opts := control.ProfileOpts{
FilePayload: urpc.FilePayload{
Files: []*os.File{f},
},
}
if err := conn.Call(boot.HeapProfile, &opts, nil); err != nil {
return fmt.Errorf("getting sandbox %q heap profile: %v", s.ID, err)
}
return nil
}
// StartCPUProfile start CPU profile writing to the given file.
func (s *Sandbox) StartCPUProfile(f *os.File) error {
log.Debugf("CPU profile start %q", s.ID)
conn, err := s.sandboxConnect()
if err != nil {
return err
}
defer conn.Close()
opts := control.ProfileOpts{
FilePayload: urpc.FilePayload{
Files: []*os.File{f},
},
}
if err := conn.Call(boot.StartCPUProfile, &opts, nil); err != nil {
return fmt.Errorf("starting sandbox %q CPU profile: %v", s.ID, err)
}
return nil
}
// StopCPUProfile stops a previously started CPU profile.
func (s *Sandbox) StopCPUProfile() error {
log.Debugf("CPU profile stop %q", s.ID)
conn, err := s.sandboxConnect()
if err != nil {
return err
}
defer conn.Close()
if err := conn.Call(boot.StopCPUProfile, nil, nil); err != nil {
return fmt.Errorf("stopping sandbox %q CPU profile: %v", s.ID, err)
}
return nil
}
// StartTrace start trace writing to the given file.
func (s *Sandbox) StartTrace(f *os.File) error {
log.Debugf("Trace start %q", s.ID)
conn, err := s.sandboxConnect()
if err != nil {
return err
}
defer conn.Close()
opts := control.ProfileOpts{
FilePayload: urpc.FilePayload{
Files: []*os.File{f},
},
}
if err := conn.Call(boot.StartTrace, &opts, nil); err != nil {
return fmt.Errorf("starting sandbox %q trace: %v", s.ID, err)
}
return nil
}
// StopTrace stops a previously started trace.
func (s *Sandbox) StopTrace() error {
log.Debugf("Trace stop %q", s.ID)
conn, err := s.sandboxConnect()
if err != nil {
return err
}
defer conn.Close()
if err := conn.Call(boot.StopTrace, nil, nil); err != nil {
return fmt.Errorf("stopping sandbox %q trace: %v", s.ID, err)
}
return nil
}
// ChangeLogging changes logging options.
func (s *Sandbox) ChangeLogging(args control.LoggingArgs) error {
log.Debugf("Change logging start %q", s.ID)
conn, err := s.sandboxConnect()
if err != nil {
return err
}
defer conn.Close()
if err := conn.Call(boot.ChangeLogging, &args, nil); err != nil {
return fmt.Errorf("changing sandbox %q logging: %v", s.ID, err)
}
return nil
}
// DestroyContainer destroys the given container. If it is the root container,
// then the entire sandbox is destroyed.
func (s *Sandbox) DestroyContainer(cid string) error {
if s.IsRootContainer(cid) {
log.Debugf("Destroying root container %q by destroying sandbox", cid)
return s.destroy()
}
if !s.IsRunning() {
// Sandbox isn't running anymore, container is already destroyed.
return nil
}
log.Debugf("Destroying container %q in sandbox %q", cid, s.ID)
conn, err := s.sandboxConnect()
if err != nil {
return err
}
defer conn.Close()
if err := conn.Call(boot.ContainerDestroy, &cid, nil); err != nil {
return fmt.Errorf("destroying container %q: %v", cid, err)
}
return nil
}
func (s *Sandbox) waitForStopped() error {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx)
op := func() error {
if s.child {
s.statusMu.Lock()
defer s.statusMu.Unlock()
if s.Pid == 0 {
return nil
}
// The sandbox process is a child of the current process,
// so we can wait it and collect its zombie.
wpid, err := syscall.Wait4(int(s.Pid), &s.status, syscall.WNOHANG, nil)
if err != nil {
return fmt.Errorf("error waiting the sandbox process: %v", err)
}
if wpid == 0 {
return fmt.Errorf("sandbox is still running")
}
s.Pid = 0
} else if s.IsRunning() {
return fmt.Errorf("sandbox is still running")
}
return nil
}
return backoff.Retry(op, b)
}
// deviceFileForPlatform opens the device file for the given platform. If the
// platform does not need a device file, then nil is returned.
func deviceFileForPlatform(name string) (*os.File, error) {
p, err := platform.Lookup(name)
if err != nil {
return nil, err
}
f, err := p.OpenDevice()
if err != nil {
return nil, fmt.Errorf("opening device file for platform %q: %v", p, err)
}
return f, nil
}