From 2eff1fdd061be9cfabc36532dda8cbefeb02e534 Mon Sep 17 00:00:00 2001
From: Kevin Krakauer <krakauer@google.com>
Date: Wed, 12 Sep 2018 15:22:24 -0700
Subject: [PATCH] runsc: Add exec flag that specifies where to save the
 sandbox-internal pid.

This is different from the existing -pid-file flag, which saves a host pid.

PiperOrigin-RevId: 212713968
Change-Id: I2c486de8dd5cfd9b923fb0970165ef7c5fc597f0
---
 pkg/sentry/control/proc.go        | 35 ++++++++-----
 runsc/boot/controller.go          | 33 +++++++-----
 runsc/cmd/exec.go                 | 31 +++++++++---
 runsc/container/container.go      |  9 ++--
 runsc/container/container_test.go | 84 +++++++++++++++++++------------
 runsc/sandbox/sandbox.go          | 19 +++----
 6 files changed, 129 insertions(+), 82 deletions(-)

diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go
index 289b8ba0e..1623ed19a 100644
--- a/pkg/sentry/control/proc.go
+++ b/pkg/sentry/control/proc.go
@@ -87,6 +87,24 @@ type ExecArgs struct {
 
 // Exec runs a new task.
 func (proc *Proc) Exec(args *ExecArgs, waitStatus *uint32) error {
+	newTG, err := proc.execAsync(args)
+	if err != nil {
+		return err
+	}
+
+	// Wait for completion.
+	newTG.WaitExited()
+	*waitStatus = newTG.ExitStatus().Status()
+	return nil
+}
+
+// ExecAsync runs a new task, but doesn't wait for it to finish. It is defined
+// as a function rather than a method to avoid exposing execAsync as an RPC.
+func ExecAsync(proc *Proc, args *ExecArgs) (*kernel.ThreadGroup, error) {
+	return proc.execAsync(args)
+}
+
+func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, error) {
 	// Import file descriptors.
 	l := limits.NewLimitSet()
 	fdm := proc.Kernel.NewFDMap()
@@ -121,7 +139,7 @@ func (proc *Proc) Exec(args *ExecArgs, waitStatus *uint32) error {
 		paths := fs.GetPath(initArgs.Envv)
 		f, err := proc.Kernel.RootMountNamespace().ResolveExecutablePath(ctx, initArgs.WorkingDirectory, initArgs.Argv[0], paths)
 		if err != nil {
-			return fmt.Errorf("error finding executable %q in PATH %v: %v", initArgs.Argv[0], paths, err)
+			return nil, fmt.Errorf("error finding executable %q in PATH %v: %v", initArgs.Argv[0], paths, err)
 		}
 		initArgs.Filename = f
 	}
@@ -133,7 +151,7 @@ func (proc *Proc) Exec(args *ExecArgs, waitStatus *uint32) error {
 		// Import the given file FD. This dups the FD as well.
 		file, err := host.ImportFile(ctx, int(f.Fd()), mounter, enableIoctl)
 		if err != nil {
-			return err
+			return nil, err
 		}
 		defer file.DecRef()
 
@@ -141,20 +159,11 @@ func (proc *Proc) Exec(args *ExecArgs, waitStatus *uint32) error {
 		f.Close()
 
 		if err := fdm.NewFDAt(kdefs.FD(appFD), file, kernel.FDFlags{}, l); err != nil {
-			return err
+			return nil, err
 		}
 	}
 
-	// Start the new task.
-	newTG, err := proc.Kernel.CreateProcess(initArgs)
-	if err != nil {
-		return err
-	}
-
-	// Wait for completion.
-	newTG.WaitExited()
-	*waitStatus = newTG.ExitStatus().Status()
-	return nil
+	return proc.Kernel.CreateProcess(initArgs)
 }
 
 // PsArgs is the set of arguments to ps.
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 257f275f9..aaac852e0 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -41,9 +41,9 @@ const (
 	// container used by "runsc events".
 	ContainerEvent = "containerManager.Event"
 
-	// ContainerExecute is the URPC endpoint for executing a command in a
+	// ContainerExecuteAsync is the URPC endpoint for executing a command in a
 	// container..
-	ContainerExecute = "containerManager.Execute"
+	ContainerExecuteAsync = "containerManager.ExecuteAsync"
 
 	// ContainerPause pauses the container.
 	ContainerPause = "containerManager.Pause"
@@ -233,33 +233,40 @@ type ExecArgs struct {
 	CID string
 }
 
-// Execute runs a command on a created or running sandbox.
-func (cm *containerManager) Execute(e *ExecArgs, waitStatus *uint32) error {
-	log.Debugf("containerManager.Execute: %+v", *e)
+// ExecuteAsync starts running a command on a created or running sandbox. It
+// returns the pid of the new process.
+func (cm *containerManager) ExecuteAsync(args *ExecArgs, pid *int32) error {
+	log.Debugf("containerManager.ExecuteAsync: %+v", args)
 
 	// Get the container Root Dirent from the Task, since we must run this
 	// process with the same Root.
 	cm.l.mu.Lock()
-	tgid, ok := cm.l.containerRootTGIDs[e.CID]
+	tgid, ok := cm.l.containerRootTGIDs[args.CID]
 	cm.l.mu.Unlock()
 	if !ok {
-		return fmt.Errorf("cannot exec in container %q: no such container", e.CID)
+		return fmt.Errorf("cannot exec in container %q: no such container", args.CID)
 	}
 	t := cm.l.k.TaskSet().Root.TaskWithID(kernel.ThreadID(tgid))
 	if t == nil {
-		return fmt.Errorf("cannot exec in container %q: no thread group with ID %d", e.CID, tgid)
+		return fmt.Errorf("cannot exec in container %q: no thread group with ID %d", args.CID, tgid)
 	}
 	t.WithMuLocked(func(t *kernel.Task) {
-		e.Root = t.FSContext().RootDirectory()
+		args.Root = t.FSContext().RootDirectory()
 	})
-	if e.Root != nil {
-		defer e.Root.DecRef()
+	if args.Root != nil {
+		defer args.Root.DecRef()
 	}
 
+	// Start the process.
 	proc := control.Proc{Kernel: cm.l.k}
-	if err := proc.Exec(&e.ExecArgs, waitStatus); err != nil {
-		return fmt.Errorf("error executing: %+v: %v", e, err)
+	newTG, err := control.ExecAsync(&proc, &args.ExecArgs)
+	if err != nil {
+		return fmt.Errorf("error executing: %+v: %v", args, err)
 	}
+
+	// Return the pid of the newly-created process.
+	ts := cm.l.k.TaskSet()
+	*pid = int32(ts.Root.IDOfThreadGroup(newTG))
 	return nil
 }
 
diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go
index da1642c08..0d1fa6e20 100644
--- a/runsc/cmd/exec.go
+++ b/runsc/cmd/exec.go
@@ -45,12 +45,13 @@ type Exec struct {
 	cwd string
 	env stringSlice
 	// user contains the UID and GID with which to run the new process.
-	user        user
-	extraKGIDs  stringSlice
-	caps        stringSlice
-	detach      bool
-	processPath string
-	pidFile     string
+	user            user
+	extraKGIDs      stringSlice
+	caps            stringSlice
+	detach          bool
+	processPath     string
+	pidFile         string
+	internalPidFile string
 
 	// consoleSocket is the path to an AF_UNIX socket which will receive a
 	// file descriptor referencing the master end of the console's
@@ -97,6 +98,7 @@ func (ex *Exec) SetFlags(f *flag.FlagSet) {
 	f.BoolVar(&ex.detach, "detach", false, "detach from the container's process")
 	f.StringVar(&ex.processPath, "process", "", "path to the process.json")
 	f.StringVar(&ex.pidFile, "pid-file", "", "filename that the container pid will be written to")
+	f.StringVar(&ex.internalPidFile, "internal-pid-file", "", "filename that the container-internal pid will be written to")
 	f.StringVar(&ex.consoleSocket, "console-socket", "", "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal")
 }
 
@@ -146,10 +148,25 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 		}
 	}
 
-	ws, err := c.Execute(e)
+	// Start the new process and get it pid.
+	pid, err := c.Execute(e)
 	if err != nil {
 		Fatalf("error getting processes for container: %v", err)
 	}
+
+	// Write the sandbox-internal pid if required.
+	if ex.internalPidFile != "" {
+		pidStr := []byte(strconv.Itoa(int(pid)))
+		if err := ioutil.WriteFile(ex.internalPidFile, pidStr, 0644); err != nil {
+			Fatalf("error writing internal pid file %q: %v", ex.internalPidFile, err)
+		}
+	}
+
+	// Wait for the process to exit.
+	ws, err := c.WaitPID(pid)
+	if err != nil {
+		Fatalf("error waiting on pid %d: %v", pid, err)
+	}
 	*waitStatus = ws
 	return subcommands.ExitSuccess
 }
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 9a05a1dc5..38848d02f 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -353,13 +353,14 @@ func Run(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocke
 	return c.Wait()
 }
 
-// Execute runs the specified command in the container.
-func (c *Container) Execute(e *control.ExecArgs) (syscall.WaitStatus, error) {
-	log.Debugf("Execute in container %q, args: %+v", c.ID, e)
+// Execute runs the specified command in the container. It returns the pid of
+// the newly created process.
+func (c *Container) Execute(args *control.ExecArgs) (int32, error) {
+	log.Debugf("Execute in container %q, args: %+v", c.ID, args)
 	if c.Status != Created && c.Status != Running {
 		return 0, fmt.Errorf("cannot exec in container in state %s", c.Status)
 	}
-	return c.Sandbox.Execute(c.ID, e)
+	return c.Sandbox.Execute(c.ID, args)
 }
 
 // Event returns events for the container.
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index c45eb79a3..790334249 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -49,11 +49,11 @@ func init() {
 }
 
 // waitForProcessList waits for the given process list to show up in the container.
-func waitForProcessList(s *Container, expected []*control.Process) error {
+func waitForProcessList(cont *Container, expected []*control.Process) error {
 	var got []*control.Process
 	for start := time.Now(); time.Now().Sub(start) < 10*time.Second; {
 		var err error
-		got, err = s.Processes()
+		got, err = cont.Processes()
 		if err != nil {
 			return fmt.Errorf("error getting process data from container: %v", err)
 		}
@@ -485,12 +485,12 @@ func TestExec(t *testing.T) {
 		defer os.RemoveAll(bundleDir)
 
 		// Create and start the container.
-		s, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "")
+		cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "")
 		if err != nil {
 			t.Fatalf("error creating container: %v", err)
 		}
-		defer s.Destroy()
-		if err := s.Start(conf); err != nil {
+		defer cont.Destroy()
+		if err := cont.Start(conf); err != nil {
 			t.Fatalf("error starting container: %v", err)
 		}
 
@@ -513,11 +513,11 @@ func TestExec(t *testing.T) {
 		}
 
 		// Verify that "sleep 100" is running.
-		if err := waitForProcessList(s, expectedPL[:1]); err != nil {
+		if err := waitForProcessList(cont, expectedPL[:1]); err != nil {
 			t.Error(err)
 		}
 
-		execArgs := control.ExecArgs{
+		args := &control.ExecArgs{
 			Filename:         "/bin/sleep",
 			Argv:             []string{"sleep", "5"},
 			WorkingDirectory: "/",
@@ -528,17 +528,19 @@ func TestExec(t *testing.T) {
 		// First, start running exec (whick blocks).
 		status := make(chan error, 1)
 		go func() {
-			exitStatus, err := s.Execute(&execArgs)
+			exitStatus, err := cont.executeSync(args)
 			if err != nil {
+				log.Debugf("error executing: %v", err)
 				status <- err
 			} else if exitStatus != 0 {
+				log.Debugf("bad status: %d", exitStatus)
 				status <- fmt.Errorf("failed with exit status: %v", exitStatus)
 			} else {
 				status <- nil
 			}
 		}()
 
-		if err := waitForProcessList(s, expectedPL); err != nil {
+		if err := waitForProcessList(cont, expectedPL); err != nil {
 			t.Fatal(err)
 		}
 
@@ -548,7 +550,7 @@ func TestExec(t *testing.T) {
 			t.Fatalf("container timed out waiting for exec to finish.")
 		case st := <-status:
 			if st != nil {
-				t.Errorf("container failed to exec %v: %v", execArgs, err)
+				t.Errorf("container failed to exec %v: %v", args, err)
 			}
 		}
 	}
@@ -884,15 +886,18 @@ func TestPauseResume(t *testing.T) {
 		}
 
 		script := fmt.Sprintf("while [[ -f %q ]]; do sleep 0.1; done", lock.Name())
-		execArgs := control.ExecArgs{
+		args := &control.ExecArgs{
 			Filename:         "/bin/bash",
 			Argv:             []string{"bash", "-c", script},
 			WorkingDirectory: "/",
 			KUID:             uid,
 		}
 
-		// First, start running exec (which blocks).
-		go cont.Execute(&execArgs)
+		// First, start running exec.
+		_, err = cont.Execute(args)
+		if err != nil {
+			t.Fatalf("error executing: %v", err)
+		}
 
 		// Verify that "sleep 5" is running.
 		if err := waitForProcessList(cont, expectedPL); err != nil {
@@ -1022,12 +1027,12 @@ func TestCapabilities(t *testing.T) {
 		defer os.RemoveAll(bundleDir)
 
 		// Create and start the container.
-		s, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "")
+		cont, err := Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "")
 		if err != nil {
 			t.Fatalf("error creating container: %v", err)
 		}
-		defer s.Destroy()
-		if err := s.Start(conf); err != nil {
+		defer cont.Destroy()
+		if err := cont.Start(conf); err != nil {
 			t.Fatalf("error starting container: %v", err)
 		}
 
@@ -1048,7 +1053,7 @@ func TestCapabilities(t *testing.T) {
 				Cmd:  "exe",
 			},
 		}
-		if err := waitForProcessList(s, expectedPL[:1]); err != nil {
+		if err := waitForProcessList(cont, expectedPL[:1]); err != nil {
 			t.Fatalf("Failed to wait for sleep to start, err: %v", err)
 		}
 
@@ -1064,7 +1069,7 @@ func TestCapabilities(t *testing.T) {
 		// Need to traverse the intermediate directory.
 		os.Chmod(rootDir, 0755)
 
-		execArgs := control.ExecArgs{
+		args := &control.ExecArgs{
 			Filename:         exePath,
 			Argv:             []string{exePath},
 			WorkingDirectory: "/",
@@ -1074,17 +1079,17 @@ func TestCapabilities(t *testing.T) {
 		}
 
 		// "exe" should fail because we don't have the necessary permissions.
-		if _, err := s.Execute(&execArgs); err == nil {
+		if _, err := cont.executeSync(args); err == nil {
 			t.Fatalf("container executed without error, but an error was expected")
 		}
 
 		// Now we run with the capability enabled and should succeed.
-		execArgs.Capabilities = &auth.TaskCapabilities{
+		args.Capabilities = &auth.TaskCapabilities{
 			EffectiveCaps: auth.CapabilitySetOf(linux.CAP_DAC_OVERRIDE),
 		}
 		// "exe" should not fail this time.
-		if _, err := s.Execute(&execArgs); err != nil {
-			t.Fatalf("container failed to exec %v: %v", execArgs, err)
+		if _, err := cont.executeSync(args); err != nil {
+			t.Fatalf("container failed to exec %v: %v", args, err)
 		}
 	}
 }
@@ -1404,11 +1409,11 @@ func TestContainerVolumeContentsShared(t *testing.T) {
 	filename := filepath.Join(dir, "file")
 
 	// File does not exist yet. Reading from the sandbox should fail.
-	execArgsTestFile := control.ExecArgs{
+	argsTestFile := &control.ExecArgs{
 		Filename: "/usr/bin/test",
 		Argv:     []string{"test", "-f", filename},
 	}
-	if ws, err := c.Execute(&execArgsTestFile); err != nil {
+	if ws, err := c.executeSync(argsTestFile); err != nil {
 		t.Fatalf("unexpected error testing file %q: %v", filename, err)
 	} else if ws.ExitStatus() == 0 {
 		t.Errorf("test %q exited with code %v, wanted not zero", ws.ExitStatus(), err)
@@ -1420,7 +1425,7 @@ func TestContainerVolumeContentsShared(t *testing.T) {
 	}
 
 	// Now we should be able to test the file from within the sandbox.
-	if ws, err := c.Execute(&execArgsTestFile); err != nil {
+	if ws, err := c.executeSync(argsTestFile); err != nil {
 		t.Fatalf("unexpected error testing file %q: %v", filename, err)
 	} else if ws.ExitStatus() != 0 {
 		t.Errorf("test %q exited with code %v, wanted zero", filename, ws.ExitStatus())
@@ -1433,18 +1438,18 @@ func TestContainerVolumeContentsShared(t *testing.T) {
 	}
 
 	// File should no longer exist at the old path within the sandbox.
-	if ws, err := c.Execute(&execArgsTestFile); err != nil {
+	if ws, err := c.executeSync(argsTestFile); err != nil {
 		t.Fatalf("unexpected error testing file %q: %v", filename, err)
 	} else if ws.ExitStatus() == 0 {
 		t.Errorf("test %q exited with code %v, wanted not zero", filename, ws.ExitStatus())
 	}
 
 	// We should be able to test the new filename from within the sandbox.
-	execArgsTestNewFile := control.ExecArgs{
+	argsTestNewFile := &control.ExecArgs{
 		Filename: "/usr/bin/test",
 		Argv:     []string{"test", "-f", newFilename},
 	}
-	if ws, err := c.Execute(&execArgsTestNewFile); err != nil {
+	if ws, err := c.executeSync(argsTestNewFile); err != nil {
 		t.Fatalf("unexpected error testing file %q: %v", newFilename, err)
 	} else if ws.ExitStatus() != 0 {
 		t.Errorf("test %q exited with code %v, wanted zero", newFilename, ws.ExitStatus())
@@ -1456,20 +1461,20 @@ func TestContainerVolumeContentsShared(t *testing.T) {
 	}
 
 	// Renamed file should no longer exist at the old path within the sandbox.
-	if ws, err := c.Execute(&execArgsTestNewFile); err != nil {
+	if ws, err := c.executeSync(argsTestNewFile); err != nil {
 		t.Fatalf("unexpected error testing file %q: %v", newFilename, err)
 	} else if ws.ExitStatus() == 0 {
 		t.Errorf("test %q exited with code %v, wanted not zero", newFilename, ws.ExitStatus())
 	}
 
 	// Now create the file from WITHIN the sandbox.
-	execArgsTouch := control.ExecArgs{
+	argsTouch := &control.ExecArgs{
 		Filename: "/usr/bin/touch",
 		Argv:     []string{"touch", filename},
 		KUID:     auth.KUID(os.Getuid()),
 		KGID:     auth.KGID(os.Getgid()),
 	}
-	if ws, err := c.Execute(&execArgsTouch); err != nil {
+	if ws, err := c.executeSync(argsTouch); err != nil {
 		t.Fatalf("unexpected error touching file %q: %v", filename, err)
 	} else if ws.ExitStatus() != 0 {
 		t.Errorf("touch %q exited with code %v, wanted zero", filename, ws.ExitStatus())
@@ -1486,11 +1491,11 @@ func TestContainerVolumeContentsShared(t *testing.T) {
 	}
 
 	// Delete the file from within the sandbox.
-	execArgsRemove := control.ExecArgs{
+	argsRemove := &control.ExecArgs{
 		Filename: "/bin/rm",
 		Argv:     []string{"rm", filename},
 	}
-	if ws, err := c.Execute(&execArgsRemove); err != nil {
+	if ws, err := c.executeSync(argsRemove); err != nil {
 		t.Fatalf("unexpected error removing file %q: %v", filename, err)
 	} else if ws.ExitStatus() != 0 {
 		t.Errorf("remove %q exited with code %v, wanted zero", filename, ws.ExitStatus())
@@ -1547,6 +1552,19 @@ func TestGoferExits(t *testing.T) {
 	}
 }
 
+// executeSync synchronously executes a new process.
+func (cont *Container) executeSync(args *control.ExecArgs) (syscall.WaitStatus, error) {
+	pid, err := cont.Execute(args)
+	if err != nil {
+		return 0, fmt.Errorf("error executing: %v", err)
+	}
+	ws, err := cont.WaitPID(pid)
+	if err != nil {
+		return 0, fmt.Errorf("error waiting: %v", err)
+	}
+	return ws, nil
+}
+
 func TestMain(m *testing.M) {
 	testutil.RunAsRoot(m)
 }
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 195deda1e..8e90dcc70 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -187,8 +187,9 @@ func (s *Sandbox) Processes(cid string) ([]*control.Process, error) {
 	return pl, nil
 }
 
-// Execute runs the specified command in the container.
-func (s *Sandbox) Execute(cid string, e *control.ExecArgs) (syscall.WaitStatus, error) {
+// Execute runs the specified command in the container. It returns the pid of
+// the newly created process.
+func (s *Sandbox) Execute(cid string, args *control.ExecArgs) (int32, error) {
 	log.Debugf("Executing new process in container %q in sandbox %q", cid, s.ID)
 	conn, err := s.sandboxConnect()
 	if err != nil {
@@ -196,20 +197,14 @@ func (s *Sandbox) Execute(cid string, e *control.ExecArgs) (syscall.WaitStatus,
 	}
 	defer conn.Close()
 
-	ea := &boot.ExecArgs{
-		ExecArgs: *e,
-		CID:      cid,
-	}
+	rpcArgs := &boot.ExecArgs{ExecArgs: *args, CID: cid}
 
 	// Send a message to the sandbox control server to start the container.
-	var waitStatus uint32
-	// TODO: Pass in the container id (cid) here. The sandbox
-	// should execute in the context of that container.
-	if err := conn.Call(boot.ContainerExecute, ea, &waitStatus); err != nil {
+	var pid int32
+	if err := conn.Call(boot.ContainerExecuteAsync, rpcArgs, &pid); err != nil {
 		return 0, fmt.Errorf("error executing in sandbox: %v", err)
 	}
-
-	return syscall.WaitStatus(waitStatus), nil
+	return pid, nil
 }
 
 // Event retrieves stats about the sandbox such as memory and CPU utilization.