From 1c5b6d9bd26ba090610d05366df90d4fee91c677 Mon Sep 17 00:00:00 2001 From: "chris.zn" Date: Tue, 30 Apr 2019 23:35:36 +0800 Subject: [PATCH] Use different pidns among different containers The different containers in a sandbox used only one pid namespace before. This results in that a container can see the processes in another container in the same sandbox. This patch use different pid namespace for different containers. Signed-off-by: chris.zn --- pkg/sentry/control/proc.go | 8 +- pkg/sentry/kernel/kernel.go | 14 +++- runsc/boot/fs.go | 1 + runsc/boot/loader.go | 32 +++++++- runsc/container/container_test.go | 10 +++ runsc/container/multi_container_test.go | 98 +++++++++++++++++++++++++ 6 files changed, 154 insertions(+), 9 deletions(-) diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go index 60e6c9285..3f9772b87 100644 --- a/pkg/sentry/control/proc.go +++ b/pkg/sentry/control/proc.go @@ -92,6 +92,9 @@ type ExecArgs struct { // ContainerID is the container for the process being executed. ContainerID string + + // PIDNamespace is the pid namespace for the process being executed. + PIDNamespace *kernel.PIDNamespace } // String prints the arguments as a string. @@ -162,6 +165,7 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI IPCNamespace: proc.Kernel.RootIPCNamespace(), AbstractSocketNamespace: proc.Kernel.RootAbstractSocketNamespace(), ContainerID: args.ContainerID, + PIDNamespace: args.PIDNamespace, } if initArgs.Root != nil { // initArgs must hold a reference on Root, which will be @@ -341,7 +345,7 @@ func Processes(k *kernel.Kernel, containerID string, out *[]*Process) error { ts := k.TaskSet() now := k.RealtimeClock().Now() for _, tg := range ts.Root.ThreadGroups() { - pid := ts.Root.IDOfThreadGroup(tg) + pid := tg.PIDNamespace().IDOfThreadGroup(tg) // If tg has already been reaped ignore it. if pid == 0 { continue @@ -352,7 +356,7 @@ func Processes(k *kernel.Kernel, containerID string, out *[]*Process) error { ppid := kernel.ThreadID(0) if p := tg.Leader().Parent(); p != nil { - ppid = ts.Root.IDOfThreadGroup(p.ThreadGroup()) + ppid = p.PIDNamespace().IDOfThreadGroup(p.ThreadGroup()) } *out = append(*out, &Process{ UID: tg.Leader().Credentials().EffectiveKUID, diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 38b49cba2..70f5a3f0b 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -622,6 +622,9 @@ type CreateProcessArgs struct { // IPCNamespace is the initial IPC namespace. IPCNamespace *IPCNamespace + // PIDNamespace is the initial PID Namespace. + PIDNamespace *PIDNamespace + // AbstractSocketNamespace is the initial Abstract Socket namespace. AbstractSocketNamespace *AbstractSocketNamespace @@ -668,9 +671,7 @@ func (ctx *createProcessContext) Value(key interface{}) interface{} { case CtxKernel: return ctx.k case CtxPIDNamespace: - // "The new task ... is in the root PID namespace." - - // Kernel.CreateProcess - return ctx.k.tasks.Root + return ctx.args.PIDNamespace case CtxUTSNamespace: return ctx.args.UTSNamespace case CtxIPCNamespace: @@ -745,7 +746,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, mounts.IncRef() } - tg := k.newThreadGroup(mounts, k.tasks.Root, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock) + tg := k.newThreadGroup(mounts, args.PIDNamespace, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock) ctx := args.NewContext(k) // Grab the root directory. @@ -1018,6 +1019,11 @@ func (k *Kernel) RootIPCNamespace() *IPCNamespace { return k.rootIPCNamespace } +// RootPIDNamespace returns the root PIDNamespace. +func (k *Kernel) RootPIDNamespace() *PIDNamespace { + return k.tasks.Root +} + // RootAbstractSocketNamespace returns the root AbstractSocketNamespace. func (k *Kernel) RootAbstractSocketNamespace() *AbstractSocketNamespace { return k.rootAbstractSocketNamespace diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 55bfc27ff..4bff0d034 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -505,6 +505,7 @@ func (c *containerMounter) setupFS(ctx context.Context, conf *Config, procArgs * Credentials: auth.NewRootCredentials(creds.UserNamespace), Umask: 0022, MaxSymlinkTraversals: linux.MaxSymlinkTraversals, + PIDNamespace: procArgs.PIDNamespace, } rootCtx := rootProcArgs.NewContext(c.k) diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index a8adaf292..b91553c4c 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -138,6 +138,9 @@ type execProcess struct { // tty will be nil if the process is not attached to a terminal. tty *host.TTYFileOperations + + // pidnsPath is the pid namespace path in spec + pidnsPath string } func init() { @@ -298,7 +301,7 @@ func New(args Args) (*Loader, error) { // Create a watchdog. dog := watchdog.New(k, watchdog.DefaultTimeout, args.Conf.WatchdogAction) - procArgs, err := newProcess(args.ID, args.Spec, creds, k) + procArgs, err := newProcess(args.ID, args.Spec, creds, k, k.RootPIDNamespace()) if err != nil { return nil, fmt.Errorf("creating init process for root container: %v", err) } @@ -376,7 +379,7 @@ func New(args Args) (*Loader, error) { } // newProcess creates a process that can be run with kernel.CreateProcess. -func newProcess(id string, spec *specs.Spec, creds *auth.Credentials, k *kernel.Kernel) (kernel.CreateProcessArgs, error) { +func newProcess(id string, spec *specs.Spec, creds *auth.Credentials, k *kernel.Kernel, pidns *kernel.PIDNamespace) (kernel.CreateProcessArgs, error) { // Create initial limits. ls, err := createLimitSet(spec) if err != nil { @@ -396,7 +399,9 @@ func newProcess(id string, spec *specs.Spec, creds *auth.Credentials, k *kernel. IPCNamespace: k.RootIPCNamespace(), AbstractSocketNamespace: k.RootAbstractSocketNamespace(), ContainerID: id, + PIDNamespace: pidns, } + return procArgs, nil } @@ -559,6 +564,9 @@ func (l *Loader) run() error { } ep.tg = l.k.GlobalInit() + if ns, ok := specutils.GetNS(specs.PIDNamespace, l.spec); ok { + ep.pidnsPath = ns.Path + } if l.console { ttyFile, _ := l.rootProcArgs.FDTable.Get(0) defer ttyFile.DecRef() @@ -627,7 +635,24 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file caps, l.k.RootUserNamespace()) - procArgs, err := newProcess(cid, spec, creds, l.k) + var pidns *kernel.PIDNamespace + if ns, ok := specutils.GetNS(specs.PIDNamespace, spec); ok { + if ns.Path != "" { + for _, p := range l.processes { + if ns.Path == p.pidnsPath { + pidns = p.tg.PIDNamespace() + break + } + } + } + if pidns == nil { + pidns = l.k.RootPIDNamespace().NewChild(l.k.RootUserNamespace()) + } + l.processes[eid].pidnsPath = ns.Path + } else { + pidns = l.k.RootPIDNamespace() + } + procArgs, err := newProcess(cid, spec, creds, l.k, pidns) if err != nil { return fmt.Errorf("creating new process: %v", err) } @@ -749,6 +774,7 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { // Start the process. proc := control.Proc{Kernel: l.k} + args.PIDNamespace = tg.PIDNamespace() newTG, tgid, ttyFile, err := control.ExecAsync(&proc, args) if err != nil { return 0, err diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index c1d6ca7b8..ff68c586e 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -714,6 +714,16 @@ func TestKillPid(t *testing.T) { if err := waitForProcessCount(cont, nProcs-1); err != nil { t.Fatal(err) } + + procs, err = cont.Processes() + if err != nil { + t.Fatalf("failed to get process list: %v", err) + } + for _, p := range procs { + if pid == int32(p.PID) { + t.Fatalf("pid %d is still alive, which should be killed", pid) + } + } } } diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index e299a0e88..978a422f5 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -165,6 +165,104 @@ func TestMultiContainerSanity(t *testing.T) { } } +// TestMultiPIDNS checks that it is possible to run 2 dead-simple +// containers in the same sandbox with different pidns. +func TestMultiPIDNS(t *testing.T) { + for _, conf := range configs(all...) { + t.Logf("Running test with conf: %+v", conf) + + // Setup the containers. + sleep := []string{"sleep", "100"} + testSpecs, ids := createSpecs(sleep, sleep) + testSpecs[1].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + }, + }, + } + + containers, cleanup, err := startContainers(conf, testSpecs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() + + // Check via ps that multiple processes are running. + expectedPL := []*control.Process{ + {PID: 1, Cmd: "sleep"}, + } + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + expectedPL = []*control.Process{ + {PID: 1, Cmd: "sleep"}, + } + if err := waitForProcessList(containers[1], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + } +} + +// TestMultiPIDNSPath checks the pidns path. +func TestMultiPIDNSPath(t *testing.T) { + for _, conf := range configs(all...) { + t.Logf("Running test with conf: %+v", conf) + + // Setup the containers. + sleep := []string{"sleep", "100"} + testSpecs, ids := createSpecs(sleep, sleep, sleep) + testSpecs[0].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + Path: "/proc/1/ns/pid", + }, + }, + } + testSpecs[1].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + Path: "/proc/1/ns/pid", + }, + }, + } + testSpecs[2].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + Path: "/proc/2/ns/pid", + }, + }, + } + + containers, cleanup, err := startContainers(conf, testSpecs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() + + // Check via ps that multiple processes are running. + expectedPL := []*control.Process{ + {PID: 1, Cmd: "sleep"}, + } + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + if err := waitForProcessList(containers[2], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + + expectedPL = []*control.Process{ + {PID: 2, Cmd: "sleep"}, + } + if err := waitForProcessList(containers[1], expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + } +} + func TestMultiContainerWait(t *testing.T) { // The first container should run the entire duration of the test. cmd1 := []string{"sleep", "100"}