From 0d7023d581612e1670ef36490a827e46968d6d08 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Wed, 9 Jan 2019 09:17:04 -0800 Subject: [PATCH] Restore to original cgroup after sandbox and gofer processes are created The original code assumed that it was safe to join and not restore cgroup, but Container.Run will not exit after calling start, making cgroup cleanup fail because there were still processes inside the cgroup. PiperOrigin-RevId: 228529199 Change-Id: I12a48d9adab4bbb02f20d71ec99598c336cbfe51 --- runsc/cgroup/cgroup.go | 67 ++++++++++++++++++++++++++++-------- runsc/container/BUILD | 1 + runsc/container/container.go | 62 +++++++++++++++++++++++---------- runsc/sandbox/sandbox.go | 37 ++++---------------- runsc/specutils/specutils.go | 8 ++--- 5 files changed, 107 insertions(+), 68 deletions(-) diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go index 2887f3d7f..65a0b6d7a 100644 --- a/runsc/cgroup/cgroup.go +++ b/runsc/cgroup/cgroup.go @@ -17,6 +17,7 @@ package cgroup import ( + "bufio" "context" "fmt" "io/ioutil" @@ -168,12 +169,12 @@ type Cgroup struct { } // New creates a new Cgroup instance if the spec includes a cgroup path. -// Otherwise it returns nil and false. -func New(spec *specs.Spec) (*Cgroup, bool) { +// Returns nil otherwise. +func New(spec *specs.Spec) *Cgroup { if spec.Linux == nil || spec.Linux.CgroupsPath == "" { - return nil, false + return nil } - return &Cgroup{Name: spec.Linux.CgroupsPath}, true + return &Cgroup{Name: spec.Linux.CgroupsPath} } // Install creates and configures cgroups according to 'res'. If cgroup path @@ -241,19 +242,57 @@ func (c *Cgroup) Uninstall() error { return nil } -// Join adds the current process to the all controllers. -func (c *Cgroup) Join() error { - return c.Add(0) -} +// Join adds the current process to the all controllers. Returns function that +// restores cgroup to the original state. +func (c *Cgroup) Join() (func(), error) { + // First save the current state so it can be restored. + undo := func() {} + f, err := os.Open("/proc/self/cgroup") + if err != nil { + return undo, err + } + defer f.Close() -// Add adds given process to all controllers. -func (c *Cgroup) Add(pid int) error { - for key := range controllers { - if err := setValue(c.makePath(key), "cgroup.procs", strconv.Itoa(pid)); err != nil { - return err + var undoPaths []string + scanner := bufio.NewScanner(f) + for scanner.Scan() { + // Format: ID:controller1,controller2:path + // Example: 2:cpu,cpuacct:/user.slice + tokens := strings.Split(scanner.Text(), ":") + if len(tokens) != 3 { + return undo, fmt.Errorf("formatting cgroups file, line: %q", scanner.Text()) + } + for _, ctrlr := range strings.Split(tokens[1], ",") { + // Skip controllers we don't handle. + if _, ok := controllers[ctrlr]; ok { + undoPaths = append(undoPaths, filepath.Join(cgroupRoot, ctrlr, tokens[2])) + break + } } } - return nil + if err := scanner.Err(); err != nil { + return undo, err + } + + // Replace empty undo with the real thing before changes are made to cgroups. + undo = func() { + for _, path := range undoPaths { + log.Debugf("Restoring cgroup %q", path) + if err := setValue(path, "cgroup.procs", "0"); err != nil { + log.Warningf("Error restoring cgroup %q: %v", path, err) + } + } + } + + // Now join the cgroups. + for key := range controllers { + path := c.makePath(key) + log.Debugf("Joining cgroup %q", path) + if err := setValue(path, "cgroup.procs", "0"); err != nil { + return undo, err + } + } + return undo, nil } // NumCPU returns the number of CPUs configured in 'cpuset/cpuset.cpus'. diff --git a/runsc/container/BUILD b/runsc/container/BUILD index 28ec81d3f..d9534cbcc 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -19,6 +19,7 @@ go_library( "//pkg/log", "//pkg/sentry/control", "//runsc/boot", + "//runsc/cgroup", "//runsc/sandbox", "//runsc/specutils", "@com_github_cenkalti_backoff//:go_default_library", diff --git a/runsc/container/container.go b/runsc/container/container.go index 07924d23a..dc9ef86fa 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -36,6 +36,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/log" "gvisor.googlesource.com/gvisor/pkg/sentry/control" "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/cgroup" "gvisor.googlesource.com/gvisor/runsc/sandbox" "gvisor.googlesource.com/gvisor/runsc/specutils" ) @@ -286,18 +287,26 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo return nil, fmt.Errorf("writing clean spec: %v", err) } - ioFiles, err := c.createGoferProcess(spec, conf, bundleDir) - if err != nil { - return nil, err + // Create and join cgroup before processes are created to ensure they are + // part of the cgroup from the start (and all tneir children processes). + cg := cgroup.New(spec) + if cg != nil { + // If there is cgroup config, install it before creating sandbox process. + if err := cg.Install(spec.Linux.Resources); err != nil { + return nil, fmt.Errorf("configuring cgroup: %v", err) + } } + if err := runInCgroup(cg, func() error { + ioFiles, err := c.createGoferProcess(spec, conf, bundleDir) + if err != nil { + return err + } - // Start a new sandbox for this container. Any errors after this point - // must destroy the container. - c.Sandbox, err = sandbox.Create(id, spec, conf, bundleDir, consoleSocket, userLog, ioFiles) - if err != nil { - return nil, err - } - if err := c.Sandbox.AddGoferToCgroup(c.GoferPid); err != nil { + // Start a new sandbox for this container. Any errors after this point + // must destroy the container. + c.Sandbox, err = sandbox.New(id, spec, conf, bundleDir, consoleSocket, userLog, ioFiles, cg) + return err + }); err != nil { return nil, err } } else { @@ -381,15 +390,16 @@ func (c *Container) Start(conf *boot.Config) error { return fmt.Errorf("writing clean spec: %v", err) } - // Create the gofer process. - ioFiles, err := c.createGoferProcess(c.Spec, conf, c.BundleDir) - if err != nil { - return err - } - if err := c.Sandbox.StartContainer(c.Spec, conf, c.ID, ioFiles); err != nil { - return err - } - if err := c.Sandbox.AddGoferToCgroup(c.GoferPid); err != nil { + // Join cgroup to strt gofer process to ensure it's part of the cgroup from + // the start (and all tneir children processes). + if err := runInCgroup(c.Sandbox.Cgroup, func() error { + // Create the gofer process. + ioFiles, err := c.createGoferProcess(c.Spec, conf, c.BundleDir) + if err != nil { + return err + } + return c.Sandbox.StartContainer(c.Spec, conf, c.ID, ioFiles) + }); err != nil { return err } } @@ -883,3 +893,17 @@ func lockContainerMetadata(containerRootDir string) (func() error, error) { } return l.Unlock, nil } + +// runInCgroup executes fn inside the specified cgroup. If cg is nil, execute +// it in the current context. +func runInCgroup(cg *cgroup.Cgroup, fn func() error) error { + if cg == nil { + return fn() + } + restore, err := cg.Join() + defer restore() + if err != nil { + return err + } + return fn() +} diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index d84995d04..9e95a11b4 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -64,32 +64,15 @@ type Sandbox struct { Cgroup *cgroup.Cgroup `json:"cgroup"` } -// Create creates the sandbox process. The caller must call Destroy() on the -// sandbox. If spec specified a cgroup, the current process will have joined -// the cgroup upon return. -func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File) (*Sandbox, error) { - s := &Sandbox{ID: id} - // The Cleanup object cleans up partially created sandboxes when an error occurs. - // Any errors occurring during cleanup itself are ignored. +// New creates the sandbox process. The caller must call Destroy() on the +// sandbox. +func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, cg *cgroup.Cgroup) (*Sandbox, error) { + s := &Sandbox{ID: id, Cgroup: cg} + // The Cleanup object cleans up partially created sandboxes when an error + // occurs. Any errors occurring during cleanup itself are ignored. c := specutils.MakeCleanup(func() { _ = s.destroy() }) defer c.Clean() - if cg, ok := cgroup.New(spec); ok { - s.Cgroup = cg - - // If there is cgroup config, install it before creating sandbox process. - if err := s.Cgroup.Install(spec.Linux.Resources); err != nil { - return nil, fmt.Errorf("configuring cgroup: %v", err) - } - - // Make this process join the cgroup to ensure the sandbox (and all its - // children processes) are part of the cgroup from the start. Don't bother - // moving out because the caller is about to exit anyways. - if err := cg.Join(); err != nil { - return nil, fmt.Errorf("joining cgroup: %v", err) - } - } - // Create pipe to synchronize when sandbox process has been booted. fds := make([]int, 2) if err := syscall.Pipe(fds); err != nil { @@ -871,14 +854,6 @@ func (s *Sandbox) waitForStopped() error { return backoff.Retry(op, b) } -// AddGoferToCgroup adds the gofer process to the sandbox's cgroup. -func (s *Sandbox) AddGoferToCgroup(pid int) error { - if s.Cgroup != nil { - return s.Cgroup.Add(pid) - } - return nil -} - // deviceFileForPlatform opens the device file for the given platform. If the // platform does not need a device file, then nil is returned. func deviceFileForPlatform(p boot.PlatformType) (*os.File, error) { diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 055076475..7b0dcf231 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -470,8 +470,7 @@ func ContainsStr(strs []string, str string) bool { // c.Release() // on success, aborts closing the file and return it. // return f type Cleanup struct { - clean func() - released bool + clean func() } // MakeCleanup creates a new Cleanup object. @@ -481,13 +480,14 @@ func MakeCleanup(f func()) Cleanup { // Clean calls the cleanup function. func (c *Cleanup) Clean() { - if !c.released { + if c.clean != nil { c.clean() + c.clean = nil } } // Release releases the cleanup from its duties, i.e. cleanup function is not // called after this point. func (c *Cleanup) Release() { - c.released = true + c.clean = nil }