diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go index 2887f3d7f..65a0b6d7a 100644 --- a/runsc/cgroup/cgroup.go +++ b/runsc/cgroup/cgroup.go @@ -17,6 +17,7 @@ package cgroup import ( + "bufio" "context" "fmt" "io/ioutil" @@ -168,12 +169,12 @@ type Cgroup struct { } // New creates a new Cgroup instance if the spec includes a cgroup path. -// Otherwise it returns nil and false. -func New(spec *specs.Spec) (*Cgroup, bool) { +// Returns nil otherwise. +func New(spec *specs.Spec) *Cgroup { if spec.Linux == nil || spec.Linux.CgroupsPath == "" { - return nil, false + return nil } - return &Cgroup{Name: spec.Linux.CgroupsPath}, true + return &Cgroup{Name: spec.Linux.CgroupsPath} } // Install creates and configures cgroups according to 'res'. If cgroup path @@ -241,19 +242,57 @@ func (c *Cgroup) Uninstall() error { return nil } -// Join adds the current process to the all controllers. -func (c *Cgroup) Join() error { - return c.Add(0) -} +// Join adds the current process to the all controllers. Returns function that +// restores cgroup to the original state. +func (c *Cgroup) Join() (func(), error) { + // First save the current state so it can be restored. + undo := func() {} + f, err := os.Open("/proc/self/cgroup") + if err != nil { + return undo, err + } + defer f.Close() -// Add adds given process to all controllers. -func (c *Cgroup) Add(pid int) error { - for key := range controllers { - if err := setValue(c.makePath(key), "cgroup.procs", strconv.Itoa(pid)); err != nil { - return err + var undoPaths []string + scanner := bufio.NewScanner(f) + for scanner.Scan() { + // Format: ID:controller1,controller2:path + // Example: 2:cpu,cpuacct:/user.slice + tokens := strings.Split(scanner.Text(), ":") + if len(tokens) != 3 { + return undo, fmt.Errorf("formatting cgroups file, line: %q", scanner.Text()) + } + for _, ctrlr := range strings.Split(tokens[1], ",") { + // Skip controllers we don't handle. + if _, ok := controllers[ctrlr]; ok { + undoPaths = append(undoPaths, filepath.Join(cgroupRoot, ctrlr, tokens[2])) + break + } } } - return nil + if err := scanner.Err(); err != nil { + return undo, err + } + + // Replace empty undo with the real thing before changes are made to cgroups. + undo = func() { + for _, path := range undoPaths { + log.Debugf("Restoring cgroup %q", path) + if err := setValue(path, "cgroup.procs", "0"); err != nil { + log.Warningf("Error restoring cgroup %q: %v", path, err) + } + } + } + + // Now join the cgroups. + for key := range controllers { + path := c.makePath(key) + log.Debugf("Joining cgroup %q", path) + if err := setValue(path, "cgroup.procs", "0"); err != nil { + return undo, err + } + } + return undo, nil } // NumCPU returns the number of CPUs configured in 'cpuset/cpuset.cpus'. diff --git a/runsc/container/BUILD b/runsc/container/BUILD index 28ec81d3f..d9534cbcc 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -19,6 +19,7 @@ go_library( "//pkg/log", "//pkg/sentry/control", "//runsc/boot", + "//runsc/cgroup", "//runsc/sandbox", "//runsc/specutils", "@com_github_cenkalti_backoff//:go_default_library", diff --git a/runsc/container/container.go b/runsc/container/container.go index 07924d23a..dc9ef86fa 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -36,6 +36,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/log" "gvisor.googlesource.com/gvisor/pkg/sentry/control" "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/cgroup" "gvisor.googlesource.com/gvisor/runsc/sandbox" "gvisor.googlesource.com/gvisor/runsc/specutils" ) @@ -286,18 +287,26 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo return nil, fmt.Errorf("writing clean spec: %v", err) } - ioFiles, err := c.createGoferProcess(spec, conf, bundleDir) - if err != nil { - return nil, err + // Create and join cgroup before processes are created to ensure they are + // part of the cgroup from the start (and all tneir children processes). + cg := cgroup.New(spec) + if cg != nil { + // If there is cgroup config, install it before creating sandbox process. + if err := cg.Install(spec.Linux.Resources); err != nil { + return nil, fmt.Errorf("configuring cgroup: %v", err) + } } + if err := runInCgroup(cg, func() error { + ioFiles, err := c.createGoferProcess(spec, conf, bundleDir) + if err != nil { + return err + } - // Start a new sandbox for this container. Any errors after this point - // must destroy the container. - c.Sandbox, err = sandbox.Create(id, spec, conf, bundleDir, consoleSocket, userLog, ioFiles) - if err != nil { - return nil, err - } - if err := c.Sandbox.AddGoferToCgroup(c.GoferPid); err != nil { + // Start a new sandbox for this container. Any errors after this point + // must destroy the container. + c.Sandbox, err = sandbox.New(id, spec, conf, bundleDir, consoleSocket, userLog, ioFiles, cg) + return err + }); err != nil { return nil, err } } else { @@ -381,15 +390,16 @@ func (c *Container) Start(conf *boot.Config) error { return fmt.Errorf("writing clean spec: %v", err) } - // Create the gofer process. - ioFiles, err := c.createGoferProcess(c.Spec, conf, c.BundleDir) - if err != nil { - return err - } - if err := c.Sandbox.StartContainer(c.Spec, conf, c.ID, ioFiles); err != nil { - return err - } - if err := c.Sandbox.AddGoferToCgroup(c.GoferPid); err != nil { + // Join cgroup to strt gofer process to ensure it's part of the cgroup from + // the start (and all tneir children processes). + if err := runInCgroup(c.Sandbox.Cgroup, func() error { + // Create the gofer process. + ioFiles, err := c.createGoferProcess(c.Spec, conf, c.BundleDir) + if err != nil { + return err + } + return c.Sandbox.StartContainer(c.Spec, conf, c.ID, ioFiles) + }); err != nil { return err } } @@ -883,3 +893,17 @@ func lockContainerMetadata(containerRootDir string) (func() error, error) { } return l.Unlock, nil } + +// runInCgroup executes fn inside the specified cgroup. If cg is nil, execute +// it in the current context. +func runInCgroup(cg *cgroup.Cgroup, fn func() error) error { + if cg == nil { + return fn() + } + restore, err := cg.Join() + defer restore() + if err != nil { + return err + } + return fn() +} diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index d84995d04..9e95a11b4 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -64,32 +64,15 @@ type Sandbox struct { Cgroup *cgroup.Cgroup `json:"cgroup"` } -// Create creates the sandbox process. The caller must call Destroy() on the -// sandbox. If spec specified a cgroup, the current process will have joined -// the cgroup upon return. -func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File) (*Sandbox, error) { - s := &Sandbox{ID: id} - // The Cleanup object cleans up partially created sandboxes when an error occurs. - // Any errors occurring during cleanup itself are ignored. +// New creates the sandbox process. The caller must call Destroy() on the +// sandbox. +func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, cg *cgroup.Cgroup) (*Sandbox, error) { + s := &Sandbox{ID: id, Cgroup: cg} + // The Cleanup object cleans up partially created sandboxes when an error + // occurs. Any errors occurring during cleanup itself are ignored. c := specutils.MakeCleanup(func() { _ = s.destroy() }) defer c.Clean() - if cg, ok := cgroup.New(spec); ok { - s.Cgroup = cg - - // If there is cgroup config, install it before creating sandbox process. - if err := s.Cgroup.Install(spec.Linux.Resources); err != nil { - return nil, fmt.Errorf("configuring cgroup: %v", err) - } - - // Make this process join the cgroup to ensure the sandbox (and all its - // children processes) are part of the cgroup from the start. Don't bother - // moving out because the caller is about to exit anyways. - if err := cg.Join(); err != nil { - return nil, fmt.Errorf("joining cgroup: %v", err) - } - } - // Create pipe to synchronize when sandbox process has been booted. fds := make([]int, 2) if err := syscall.Pipe(fds); err != nil { @@ -871,14 +854,6 @@ func (s *Sandbox) waitForStopped() error { return backoff.Retry(op, b) } -// AddGoferToCgroup adds the gofer process to the sandbox's cgroup. -func (s *Sandbox) AddGoferToCgroup(pid int) error { - if s.Cgroup != nil { - return s.Cgroup.Add(pid) - } - return nil -} - // deviceFileForPlatform opens the device file for the given platform. If the // platform does not need a device file, then nil is returned. func deviceFileForPlatform(p boot.PlatformType) (*os.File, error) { diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 055076475..7b0dcf231 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -470,8 +470,7 @@ func ContainsStr(strs []string, str string) bool { // c.Release() // on success, aborts closing the file and return it. // return f type Cleanup struct { - clean func() - released bool + clean func() } // MakeCleanup creates a new Cleanup object. @@ -481,13 +480,14 @@ func MakeCleanup(f func()) Cleanup { // Clean calls the cleanup function. func (c *Cleanup) Clean() { - if !c.released { + if c.clean != nil { c.clean() + c.clean = nil } } // Release releases the cleanup from its duties, i.e. cleanup function is not // called after this point. func (c *Cleanup) Release() { - c.released = true + c.clean = nil }