From 8f0b6e7fc02919df034dea9e9c9dbab1b80de2be Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Wed, 5 Sep 2018 21:13:46 -0700 Subject: [PATCH] runsc: Support runsc kill multi-container. Now, we can kill individual containers rather than the entire sandbox. PiperOrigin-RevId: 211748106 Change-Id: Ic97e91db33d53782f838338c4a6d0aab7a313ead --- runsc/boot/controller.go | 11 +-- runsc/boot/loader.go | 17 ++++ runsc/cmd/kill.go | 2 + runsc/container/container.go | 1 + runsc/container/container_test.go | 142 ++++++++++++++++++++++++++++-- 5 files changed, 156 insertions(+), 17 deletions(-) diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index ec1110059..45aa255c4 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -22,7 +22,6 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.googlesource.com/gvisor/pkg/control/server" "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" "gvisor.googlesource.com/gvisor/pkg/sentry/control" "gvisor.googlesource.com/gvisor/pkg/sentry/fs" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" @@ -387,13 +386,5 @@ type SignalArgs struct { // Signal sends a signal to the init process of the container. func (cm *containerManager) Signal(args *SignalArgs, _ *struct{}) error { log.Debugf("containerManager.Signal") - // TODO: Use the cid and send the signal to the init - // process in theat container. Currently we just signal PID 1 in the - // sandbox. - si := arch.SignalInfo{Signo: args.Signo} - t := cm.l.k.TaskSet().Root.TaskWithID(1) - if t == nil { - return fmt.Errorf("cannot signal: no task with id 1") - } - return t.SendSignal(&si) + return cm.l.signal(args.CID, args.Signo) } diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 2733c4d69..ae2226e12 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -31,6 +31,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/cpuid" "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/pkg/sentry/arch" "gvisor.googlesource.com/gvisor/pkg/sentry/inet" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" @@ -576,3 +577,19 @@ func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) { panic(fmt.Sprintf("invalid network configuration: %v", conf.Network)) } } + +func (l *Loader) signal(cid string, signo int32) error { + l.mu.Lock() + tgid, ok := l.containerRootTGIDs[cid] + l.mu.Unlock() + if !ok { + return fmt.Errorf("failed to signal container %q: no such container", cid) + } + + // The thread group ID of a process is the leading task's thread ID. + t := l.k.TaskSet().Root.TaskWithID(tgid) + if t == nil { + return fmt.Errorf("cannot signal: no task with ID %d", tgid) + } + return t.SendSignal(&arch.SignalInfo{Signo: signo}) +} diff --git a/runsc/cmd/kill.go b/runsc/cmd/kill.go index 0979b002b..6fa5674f1 100644 --- a/runsc/cmd/kill.go +++ b/runsc/cmd/kill.go @@ -81,6 +81,8 @@ func (*Kill) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su if err != nil { Fatalf("%v", err) } + // TODO: Distinguish between already-exited containers and + // genuine errors. if err := c.Signal(sig); err != nil { Fatalf("%v", err) } diff --git a/runsc/container/container.go b/runsc/container/container.go index a4a3ed56d..5977fbd21 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -427,6 +427,7 @@ func (c *Container) Signal(sig syscall.Signal) error { log.Warningf("container %q not running, not sending signal %v", c.ID, sig) return nil } + // TODO: Query the container for its state, then save it. return c.Sandbox.Signal(c.ID, sig) } diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index c00db3e91..00e38e12c 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -38,6 +38,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" "gvisor.googlesource.com/gvisor/pkg/unet" "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/specutils" "gvisor.googlesource.com/gvisor/runsc/test/testutil" ) @@ -336,8 +337,8 @@ func TestLifecycle(t *testing.T) { wg.Done() }() - // Wait a bit to ensure that we've started waiting on the container - // before we signal. + // Wait a bit to ensure that we've started waiting on the + // container before we signal. <-ch time.Sleep(100 * time.Millisecond) // Send the container a SIGTERM which will cause it to stop. @@ -347,11 +348,11 @@ func TestLifecycle(t *testing.T) { // Wait for it to die. wg.Wait() - // The sandbox process should have exited by now, but it is a zombie. - // In normal runsc usage, it will be parented to init, and init will - // reap the sandbox. However, in this case the test runner is the - // parent and will not reap the sandbox process, so we must do it - // ourselves. + // The sandbox process should have exited by now, but it is a + // zombie. In normal runsc usage, it will be parented to init, + // and init will reap the sandbox. However, in this case the + // test runner is the parent and will not reap the sandbox + // process, so we must do it ourselves. p, _ := os.FindProcess(s.Sandbox.Pid) p.Wait() g, _ := os.FindProcess(s.GoferPid) @@ -1547,6 +1548,133 @@ func TestGoferExits(t *testing.T) { } } +// TestMultiContainerSignal checks that it is possible to signal individual +// containers without killing the entire sandbox. +func TestMultiContainerSignal(t *testing.T) { + for _, conf := range configs(all...) { + t.Logf("Running test with conf: %+v", conf) + + containerIDs := []string{ + testutil.UniqueContainerID(), + testutil.UniqueContainerID(), + } + containerAnnotations := []map[string]string{ + // The first container creates a sandbox. + map[string]string{ + specutils.ContainerdContainerTypeAnnotation: specutils.ContainerdContainerTypeSandbox, + }, + // The second container creates a container within the first + // container's sandbox. + map[string]string{ + specutils.ContainerdContainerTypeAnnotation: specutils.ContainerdContainerTypeContainer, + specutils.ContainerdSandboxIDAnnotation: containerIDs[0], + }, + } + + rootDir, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer os.RemoveAll(rootDir) + + // Setup the containers. + containers := make([]*Container, 0, len(containerIDs)) + for i, annotations := range containerAnnotations { + spec := testutil.NewSpecWithArgs("sleep", "100") + spec.Annotations = annotations + bundleDir, err := testutil.SetupContainerInRoot(rootDir, spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer os.RemoveAll(bundleDir) + cont, err := Create(containerIDs[i], spec, conf, bundleDir, "", "") + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } + containers = append(containers, cont) + } + + expectedPL := []*control.Process{ + { + UID: 0, + PID: 1, + PPID: 0, + C: 0, + Cmd: "sleep", + }, + { + UID: 0, + PID: 2, + PPID: 0, + C: 0, + Cmd: "sleep", + }, + } + + // Check via ps that multiple processes are running. + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + + // Kill process 2. + if err := containers[1].Signal(syscall.SIGKILL); err != nil { + t.Errorf("failed to kill process 2: %v", err) + } + + // Make sure process 1 is still running. + if err := waitForProcessList(containers[0], expectedPL[:1]); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + + // Now that process 2 is gone, ensure we get an error trying to + // signal it again. + if err := containers[1].Signal(syscall.SIGKILL); err == nil { + t.Errorf("container %q shouldn't exist, but we were able to signal it", containers[1].ID) + } + + // Kill process 1. + if err := containers[0].Signal(syscall.SIGKILL); err != nil { + t.Errorf("failed to kill process 1: %v", err) + } + + if err := waitForSandboxExit(containers[0]); err != nil { + t.Errorf("failed to exit sandbox: %v", err) + } + + // The sentry should be gone, so signaling should yield an + // error. + if err := containers[0].Signal(syscall.SIGKILL); err == nil { + t.Errorf("sandbox %q shouldn't exist, but we were able to signal it", containers[0].Sandbox.ID) + } + } +} + +// waitForSandboxExit waits until both the sandbox and gofer processes of the +// container have exited. +func waitForSandboxExit(container *Container) error { + goferProc, _ := os.FindProcess(container.GoferPid) + state, err := goferProc.Wait() + if err != nil { + return err + } + if !state.Exited() { + return fmt.Errorf("gofer with PID %d failed to exit", container.GoferPid) + } + sandboxProc, _ := os.FindProcess(container.Sandbox.Pid) + state, err = sandboxProc.Wait() + if err != nil { + return err + } + if !state.Exited() { + return fmt.Errorf("sandbox with PID %d failed to exit", container.Sandbox.Pid) + } + return nil +} + func TestMain(m *testing.M) { testutil.RunAsRoot(m) }