From 1bdec86baed82fe8d79100693949f1259636e9db Mon Sep 17 00:00:00 2001
From: Nicolas Lacasse <nlacasse@google.com>
Date: Wed, 9 May 2018 14:12:44 -0700
Subject: [PATCH] Return better errors from Docker when runsc fails to start.

Two changes in this CL:

First, make the "boot" process sleep when it encounters an error to give the
controller time to send the error back to the "start" process. Otherwise the
"boot" process exits immediately and the control connection errors with EOF.

Secondly, open the log file with O_APPEND, not O_TRUNC. Docker uses the same
log file for all runtime commands, and setting O_TRUNC causes them to get
destroyed. Furthermore, containerd parses these log files in the event of an
error, and it does not like the file being truncated out from underneath it.

Now, when trying to run a binary that does not exist in the image, the error
message is more reasonable:

$ docker run alpine /not/found
docker: Error response from daemon: OCI runtime start failed: /usr/local/google/docker/runtimes/runscd did not terminate sucessfully: error starting sandbox: error starting application [/not/found]: failed to create init process: no such file or directory

Fixes #32

PiperOrigin-RevId: 196027084
Change-Id: Iabc24c0bdd8fc327237acc051a1655515f445e68
---
 runsc/boot/controller.go  |  2 +-
 runsc/boot/loader.go      | 10 +++++++++-
 runsc/boot/loader_test.go | 19 ++++++++++++++++++-
 runsc/main.go             |  5 ++++-
 runsc/sandbox/sandbox.go  |  2 +-
 5 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 4d4ef7256..60c42fc19 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -68,7 +68,7 @@ func newController(fd int, k *kernel.Kernel) (*controller, error) {
 
 	app := &application{
 		startChan:       make(chan struct{}),
-		startResultChan: make(chan error, 1),
+		startResultChan: make(chan error),
 		k:               k,
 	}
 	srv.Register(app)
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index af577f571..34a25241f 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -287,7 +287,15 @@ func createPlatform(conf *Config) (platform.Platform, error) {
 func (l *Loader) Run() error {
 	err := l.run()
 	l.ctrl.app.startResultChan <- err
-	return err
+	if err != nil {
+		// Give the controller some time to send the error to the
+		// runtime. If we return too quickly here the process will exit
+		// and the control connection will be closed before the error
+		// is returned.
+		gtime.Sleep(2 * gtime.Second)
+		return err
+	}
+	return nil
 }
 
 func (l *Loader) run() error {
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index 2fc16b241..c3d9887fa 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -16,6 +16,7 @@ package boot
 
 import (
 	"os"
+	"sync"
 	"testing"
 	"time"
 
@@ -65,11 +66,27 @@ func TestRun(t *testing.T) {
 	}
 	defer s.Destroy()
 
+	// Start a goroutine to read the start chan result, otherwise Run will
+	// block forever.
+	var resultChanErr error
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		resultChanErr = <-s.ctrl.app.startResultChan
+		wg.Done()
+	}()
+
 	// Run the application.
 	if err := s.Run(); err != nil {
 		t.Errorf("error running application: %v", err)
 	}
 
+	// We should have not gotten an error on the startResultChan.
+	wg.Wait()
+	if resultChanErr != nil {
+		t.Errorf("error on startResultChan: %v", resultChanErr)
+	}
+
 	// Wait for the application to exit.  It should succeed.
 	if status := s.WaitExit(); status.Code != 0 || status.Signo != 0 {
 		t.Errorf("application exited with status %+v, want 0", status)
@@ -94,7 +111,7 @@ func TestStartSignal(t *testing.T) {
 	waitFinished := make(chan struct{})
 	go func() {
 		s.WaitForStartSignal()
-		// Pretent that Run() executed and returned no error.
+		// Pretend that Run() executed and returned no error.
 		s.ctrl.app.startResultChan <- nil
 		waitFinished <- struct{}{}
 	}()
diff --git a/runsc/main.go b/runsc/main.go
index cf4b99d3f..883b8b1f4 100644
--- a/runsc/main.go
+++ b/runsc/main.go
@@ -126,7 +126,10 @@ func main() {
 
 	var logFile io.Writer = os.Stderr
 	if *logFilename != "" {
-		f, err := os.OpenFile(*logFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+		// We must set O_APPEND and not O_TRUNC because Docker passes
+		// the same log file for all commands (and also parses these
+		// log files), so we can't destroy them on each command.
+		f, err := os.OpenFile(*logFilename, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644)
 		if err != nil {
 			cmd.Fatalf("error opening log file %q: %v", *logFilename, err)
 		}
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 2a5eda6ae..34bd6ea67 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -289,7 +289,7 @@ func (s *Sandbox) Start(conf *boot.Config) error {
 	// application.
 	if err := c.Call(boot.ApplicationStart, nil, nil); err != nil {
 		s.Destroy()
-		return fmt.Errorf("error starting sandbox: %v", err)
+		return fmt.Errorf("error starting application %v: %v", s.Spec.Process.Args, err)
 	}
 
 	// "If any poststart hook fails, the runtime MUST log a warning, but