Use different pidns among different containers

The different containers in a sandbox used only one pid
namespace before. This results in that a container can see
the processes in another container in the same sandbox.

This patch use different pid namespace for different containers.

Signed-off-by: chris.zn <chris.zn@antfin.com>
This commit is contained in:
chris.zn 2019-04-30 23:35:36 +08:00
parent 7e38d64333
commit 1c5b6d9bd2
6 changed files with 154 additions and 9 deletions

View File

@ -92,6 +92,9 @@ type ExecArgs struct {
// ContainerID is the container for the process being executed.
ContainerID string
// PIDNamespace is the pid namespace for the process being executed.
PIDNamespace *kernel.PIDNamespace
}
// String prints the arguments as a string.
@ -162,6 +165,7 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI
IPCNamespace: proc.Kernel.RootIPCNamespace(),
AbstractSocketNamespace: proc.Kernel.RootAbstractSocketNamespace(),
ContainerID: args.ContainerID,
PIDNamespace: args.PIDNamespace,
}
if initArgs.Root != nil {
// initArgs must hold a reference on Root, which will be
@ -341,7 +345,7 @@ func Processes(k *kernel.Kernel, containerID string, out *[]*Process) error {
ts := k.TaskSet()
now := k.RealtimeClock().Now()
for _, tg := range ts.Root.ThreadGroups() {
pid := ts.Root.IDOfThreadGroup(tg)
pid := tg.PIDNamespace().IDOfThreadGroup(tg)
// If tg has already been reaped ignore it.
if pid == 0 {
continue
@ -352,7 +356,7 @@ func Processes(k *kernel.Kernel, containerID string, out *[]*Process) error {
ppid := kernel.ThreadID(0)
if p := tg.Leader().Parent(); p != nil {
ppid = ts.Root.IDOfThreadGroup(p.ThreadGroup())
ppid = p.PIDNamespace().IDOfThreadGroup(p.ThreadGroup())
}
*out = append(*out, &Process{
UID: tg.Leader().Credentials().EffectiveKUID,

View File

@ -622,6 +622,9 @@ type CreateProcessArgs struct {
// IPCNamespace is the initial IPC namespace.
IPCNamespace *IPCNamespace
// PIDNamespace is the initial PID Namespace.
PIDNamespace *PIDNamespace
// AbstractSocketNamespace is the initial Abstract Socket namespace.
AbstractSocketNamespace *AbstractSocketNamespace
@ -668,9 +671,7 @@ func (ctx *createProcessContext) Value(key interface{}) interface{} {
case CtxKernel:
return ctx.k
case CtxPIDNamespace:
// "The new task ... is in the root PID namespace." -
// Kernel.CreateProcess
return ctx.k.tasks.Root
return ctx.args.PIDNamespace
case CtxUTSNamespace:
return ctx.args.UTSNamespace
case CtxIPCNamespace:
@ -745,7 +746,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
mounts.IncRef()
}
tg := k.newThreadGroup(mounts, k.tasks.Root, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock)
tg := k.newThreadGroup(mounts, args.PIDNamespace, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock)
ctx := args.NewContext(k)
// Grab the root directory.
@ -1018,6 +1019,11 @@ func (k *Kernel) RootIPCNamespace() *IPCNamespace {
return k.rootIPCNamespace
}
// RootPIDNamespace returns the root PIDNamespace.
func (k *Kernel) RootPIDNamespace() *PIDNamespace {
return k.tasks.Root
}
// RootAbstractSocketNamespace returns the root AbstractSocketNamespace.
func (k *Kernel) RootAbstractSocketNamespace() *AbstractSocketNamespace {
return k.rootAbstractSocketNamespace

View File

@ -505,6 +505,7 @@ func (c *containerMounter) setupFS(ctx context.Context, conf *Config, procArgs *
Credentials: auth.NewRootCredentials(creds.UserNamespace),
Umask: 0022,
MaxSymlinkTraversals: linux.MaxSymlinkTraversals,
PIDNamespace: procArgs.PIDNamespace,
}
rootCtx := rootProcArgs.NewContext(c.k)

View File

@ -138,6 +138,9 @@ type execProcess struct {
// tty will be nil if the process is not attached to a terminal.
tty *host.TTYFileOperations
// pidnsPath is the pid namespace path in spec
pidnsPath string
}
func init() {
@ -298,7 +301,7 @@ func New(args Args) (*Loader, error) {
// Create a watchdog.
dog := watchdog.New(k, watchdog.DefaultTimeout, args.Conf.WatchdogAction)
procArgs, err := newProcess(args.ID, args.Spec, creds, k)
procArgs, err := newProcess(args.ID, args.Spec, creds, k, k.RootPIDNamespace())
if err != nil {
return nil, fmt.Errorf("creating init process for root container: %v", err)
}
@ -376,7 +379,7 @@ func New(args Args) (*Loader, error) {
}
// newProcess creates a process that can be run with kernel.CreateProcess.
func newProcess(id string, spec *specs.Spec, creds *auth.Credentials, k *kernel.Kernel) (kernel.CreateProcessArgs, error) {
func newProcess(id string, spec *specs.Spec, creds *auth.Credentials, k *kernel.Kernel, pidns *kernel.PIDNamespace) (kernel.CreateProcessArgs, error) {
// Create initial limits.
ls, err := createLimitSet(spec)
if err != nil {
@ -396,7 +399,9 @@ func newProcess(id string, spec *specs.Spec, creds *auth.Credentials, k *kernel.
IPCNamespace: k.RootIPCNamespace(),
AbstractSocketNamespace: k.RootAbstractSocketNamespace(),
ContainerID: id,
PIDNamespace: pidns,
}
return procArgs, nil
}
@ -559,6 +564,9 @@ func (l *Loader) run() error {
}
ep.tg = l.k.GlobalInit()
if ns, ok := specutils.GetNS(specs.PIDNamespace, l.spec); ok {
ep.pidnsPath = ns.Path
}
if l.console {
ttyFile, _ := l.rootProcArgs.FDTable.Get(0)
defer ttyFile.DecRef()
@ -627,7 +635,24 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
caps,
l.k.RootUserNamespace())
procArgs, err := newProcess(cid, spec, creds, l.k)
var pidns *kernel.PIDNamespace
if ns, ok := specutils.GetNS(specs.PIDNamespace, spec); ok {
if ns.Path != "" {
for _, p := range l.processes {
if ns.Path == p.pidnsPath {
pidns = p.tg.PIDNamespace()
break
}
}
}
if pidns == nil {
pidns = l.k.RootPIDNamespace().NewChild(l.k.RootUserNamespace())
}
l.processes[eid].pidnsPath = ns.Path
} else {
pidns = l.k.RootPIDNamespace()
}
procArgs, err := newProcess(cid, spec, creds, l.k, pidns)
if err != nil {
return fmt.Errorf("creating new process: %v", err)
}
@ -749,6 +774,7 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
// Start the process.
proc := control.Proc{Kernel: l.k}
args.PIDNamespace = tg.PIDNamespace()
newTG, tgid, ttyFile, err := control.ExecAsync(&proc, args)
if err != nil {
return 0, err

View File

@ -714,6 +714,16 @@ func TestKillPid(t *testing.T) {
if err := waitForProcessCount(cont, nProcs-1); err != nil {
t.Fatal(err)
}
procs, err = cont.Processes()
if err != nil {
t.Fatalf("failed to get process list: %v", err)
}
for _, p := range procs {
if pid == int32(p.PID) {
t.Fatalf("pid %d is still alive, which should be killed", pid)
}
}
}
}

View File

@ -165,6 +165,104 @@ func TestMultiContainerSanity(t *testing.T) {
}
}
// TestMultiPIDNS checks that it is possible to run 2 dead-simple
// containers in the same sandbox with different pidns.
func TestMultiPIDNS(t *testing.T) {
for _, conf := range configs(all...) {
t.Logf("Running test with conf: %+v", conf)
// Setup the containers.
sleep := []string{"sleep", "100"}
testSpecs, ids := createSpecs(sleep, sleep)
testSpecs[1].Linux = &specs.Linux{
Namespaces: []specs.LinuxNamespace{
{
Type: "pid",
},
},
}
containers, cleanup, err := startContainers(conf, testSpecs, ids)
if err != nil {
t.Fatalf("error starting containers: %v", err)
}
defer cleanup()
// Check via ps that multiple processes are running.
expectedPL := []*control.Process{
{PID: 1, Cmd: "sleep"},
}
if err := waitForProcessList(containers[0], expectedPL); err != nil {
t.Errorf("failed to wait for sleep to start: %v", err)
}
expectedPL = []*control.Process{
{PID: 1, Cmd: "sleep"},
}
if err := waitForProcessList(containers[1], expectedPL); err != nil {
t.Errorf("failed to wait for sleep to start: %v", err)
}
}
}
// TestMultiPIDNSPath checks the pidns path.
func TestMultiPIDNSPath(t *testing.T) {
for _, conf := range configs(all...) {
t.Logf("Running test with conf: %+v", conf)
// Setup the containers.
sleep := []string{"sleep", "100"}
testSpecs, ids := createSpecs(sleep, sleep, sleep)
testSpecs[0].Linux = &specs.Linux{
Namespaces: []specs.LinuxNamespace{
{
Type: "pid",
Path: "/proc/1/ns/pid",
},
},
}
testSpecs[1].Linux = &specs.Linux{
Namespaces: []specs.LinuxNamespace{
{
Type: "pid",
Path: "/proc/1/ns/pid",
},
},
}
testSpecs[2].Linux = &specs.Linux{
Namespaces: []specs.LinuxNamespace{
{
Type: "pid",
Path: "/proc/2/ns/pid",
},
},
}
containers, cleanup, err := startContainers(conf, testSpecs, ids)
if err != nil {
t.Fatalf("error starting containers: %v", err)
}
defer cleanup()
// Check via ps that multiple processes are running.
expectedPL := []*control.Process{
{PID: 1, Cmd: "sleep"},
}
if err := waitForProcessList(containers[0], expectedPL); err != nil {
t.Errorf("failed to wait for sleep to start: %v", err)
}
if err := waitForProcessList(containers[2], expectedPL); err != nil {
t.Errorf("failed to wait for sleep to start: %v", err)
}
expectedPL = []*control.Process{
{PID: 2, Cmd: "sleep"},
}
if err := waitForProcessList(containers[1], expectedPL); err != nil {
t.Errorf("failed to wait for sleep to start: %v", err)
}
}
}
func TestMultiContainerWait(t *testing.T) {
// The first container should run the entire duration of the test.
cmd1 := []string{"sleep", "100"}