diff --git a/runsc/container/container.go b/runsc/container/container.go index 11c440f09..80a27df4a 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -476,9 +476,6 @@ func (c *Container) SandboxPid() int { // and wait returns immediately. func (c *Container) Wait() (syscall.WaitStatus, error) { log.Debugf("Wait on container %q", c.ID) - if !c.isSandboxRunning() { - return 0, fmt.Errorf("sandbox is not running") - } return c.Sandbox.Wait(c.ID) } diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 598b96a08..45a36e583 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -39,12 +39,8 @@ import ( "gvisor.googlesource.com/gvisor/runsc/test/testutil" ) -func init() { - log.SetLevel(log.Debug) - if err := testutil.ConfigureExePath(); err != nil { - panic(err.Error()) - } -} +// childReaper reaps child processes. +var childReaper *testutil.Reaper // waitForProcessList waits for the given process list to show up in the container. func waitForProcessList(cont *Container, want []*control.Process) error { @@ -1580,12 +1576,17 @@ func TestUserLog(t *testing.T) { } func TestWaitOnExitedSandbox(t *testing.T) { + // Disable the childReaper for this test. + childReaper.Stop() + defer childReaper.Start() + for _, conf := range configs(all...) { t.Logf("Running test with conf: %+v", conf) - // Run a shell that exits immediately with a non-zero code. + // Run a shell that sleeps for 1 second and then exits with a + // non-zero code. const wantExit = 17 - cmd := fmt.Sprintf("exit %d", wantExit) + cmd := fmt.Sprintf("sleep 1; exit %d", wantExit) spec := testutil.NewSpecWithArgs("/bin/sh", "-c", cmd) rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) if err != nil { @@ -1604,22 +1605,23 @@ func TestWaitOnExitedSandbox(t *testing.T) { t.Fatalf("error starting container: %v", err) } - // Wait for the sandbox to stop running. - if err := testutil.Poll(func() error { - if c.Sandbox.IsRunning() { - return nil - } - return fmt.Errorf("sandbox still running") - }, 10*time.Second); err != nil { - t.Fatalf("error waiting for sandbox to exit: %v", err) - } - - // Now call Wait. + // Wait on the sandbox. This will make an RPC to the sandbox + // and get the actual exit status of the application. ws, err := c.Wait() if err != nil { t.Fatalf("error waiting on container: %v", err) } + if got := ws.ExitStatus(); got != wantExit { + t.Errorf("got exit status %d, want %d", got, wantExit) + } + // Now the sandbox has exited, but the zombie sandbox process + // still exists. Calling Wait() now will return the sandbox + // exit status. + ws, err = c.Wait() + if err != nil { + t.Fatalf("error waiting on container: %v", err) + } if got := ws.ExitStatus(); got != wantExit { t.Errorf("got exit status %d, want %d", got, wantExit) } @@ -1704,8 +1706,16 @@ func (cont *Container) executeSync(args *control.ExecArgs) (syscall.WaitStatus, } func TestMain(m *testing.M) { + log.SetLevel(log.Debug) + if err := testutil.ConfigureExePath(); err != nil { + panic(err.Error()) + } testutil.RunAsRoot() - stop := testutil.StartReaper() - defer stop() + + // Start the child reaper. + childReaper = &testutil.Reaper{} + childReaper.Start() + defer childReaper.Stop() + os.Exit(m.Run()) } diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 084d79d06..3f00eba94 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -612,17 +612,23 @@ func (s *Sandbox) waitForCreated(timeout time.Duration) error { func (s *Sandbox) Wait(cid string) (syscall.WaitStatus, error) { log.Debugf("Waiting for container %q in sandbox %q", cid, s.ID) var ws syscall.WaitStatus - conn, err := s.sandboxConnect() - if err != nil { - return ws, err - } - defer conn.Close() - // First try the Wait RPC to the sandbox. - if err := conn.Call(boot.ContainerWait, &cid, &ws); err == nil { - return ws, nil + if conn, err := s.sandboxConnect(); err != nil { + // The sandbox may have exited while before we had a chance to + // wait on it. + log.Warningf("Wait on container %q failed: %v. Will try waiting on the sandbox process instead.", cid, err) + } else { + defer conn.Close() + // Try the Wait RPC to the sandbox. + err = conn.Call(boot.ContainerWait, &cid, &ws) + if err == nil { + // It worked! + return ws, nil + } + // The sandbox may have exited after we connected, but before + // or during the Wait RPC. + log.Warningf("Wait RPC to container %q failed: %v. Will try waiting on the sandbox process instead.", cid, err) } - log.Warningf("Wait RPC to container %q failed: %v. Will try waiting on the sandbox process instead.", cid, err) // The sandbox may have already exited, or exited while handling the // Wait RPC. The best we can do is ask Linux what the sandbox exit diff --git a/runsc/test/testutil/testutil.go b/runsc/test/testutil/testutil.go index c816de3f0..7a17d0552 100644 --- a/runsc/test/testutil/testutil.go +++ b/runsc/test/testutil/testutil.go @@ -29,6 +29,7 @@ import ( "path/filepath" "runtime" "strings" + "sync" "sync/atomic" "syscall" "time" @@ -296,18 +297,37 @@ func RunAsRoot() { os.Exit(0) } -// StartReaper starts a goroutine that will reap all children processes created -// by the tests. Caller must call the returned function to stop it. -func StartReaper() func() { - ch := make(chan os.Signal, 1) - signal.Notify(ch, syscall.SIGCHLD) - stop := make(chan struct{}) +// Reaper reaps child processes. +type Reaper struct { + // mu protects ch, which will be nil if the reaper is not running. + mu sync.Mutex + ch chan os.Signal +} + +// Start starts reaping child processes. +func (r *Reaper) Start() { + r.mu.Lock() + defer r.mu.Unlock() + + if r.ch != nil { + panic("reaper.Start called on a running reaper") + } + + r.ch = make(chan os.Signal, 1) + signal.Notify(r.ch, syscall.SIGCHLD) go func() { for { - select { - case <-ch: - case <-stop: + r.mu.Lock() + ch := r.ch + r.mu.Unlock() + if ch == nil { + return + } + + _, ok := <-ch + if !ok { + // Channel closed. return } for { @@ -318,7 +338,28 @@ func StartReaper() func() { } } }() - return func() { stop <- struct{}{} } +} + +// Stop stops reaping child processes. +func (r *Reaper) Stop() { + r.mu.Lock() + defer r.mu.Unlock() + + if r.ch == nil { + panic("reaper.Stop called on a stopped reaper") + } + + signal.Stop(r.ch) + close(r.ch) + r.ch = nil +} + +// StartReaper is a helper that starts a new Reaper and returns a function to +// stop it. +func StartReaper() func() { + r := &Reaper{} + r.Start() + return r.Stop } // RetryEintr retries the function until an error different than EINTR is