From a891afad6d7e3b09bafdccb4cc4b9fc4e577620e Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Fri, 28 Dec 2018 13:47:19 -0800 Subject: [PATCH] Simplify synchronization between runsc and sandbox process Make 'runsc create' join cgroup before creating sandbox process. This removes the need to synchronize platform creation and ensure that sandbox process is charged to the right cgroup from the start. PiperOrigin-RevId: 227166451 Change-Id: Ieb4b18e6ca0daf7b331dc897699ca419bc5ee3a2 --- runsc/boot/controller.go | 32 ++++----------- runsc/boot/loader.go | 38 ++++++++--------- runsc/cgroup/cgroup.go | 5 +++ runsc/cmd/boot.go | 23 ++++------- runsc/sandbox/sandbox.go | 74 ++++++++++------------------------ runsc/test/root/cgroup_test.go | 32 +++++++++++++++ 6 files changed, 89 insertions(+), 115 deletions(-) diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 05d4f3a5b..36e9d2c6b 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -81,9 +81,6 @@ const ( // and return its ExitStatus. ContainerWait = "containerManager.Wait" - // ContainerWaitForLoader blocks until the container's loader has been created. - ContainerWaitForLoader = "containerManager.WaitForLoader" - // ContainerWaitPID is used to wait on a process with a certain PID in // the sandbox and return its ExitStatus. ContainerWaitPID = "containerManager.WaitPID" @@ -115,21 +112,22 @@ type controller struct { manager *containerManager } -// newController creates a new controller and starts it listening. -func newController(fd int, k *kernel.Kernel, w *watchdog.Watchdog) (*controller, error) { +// newController creates a new controller. The caller must call +// controller.srv.StartServing() to start the controller. +func newController(fd int, l *Loader) (*controller, error) { srv, err := server.CreateFromFD(fd) if err != nil { return nil, err } manager := &containerManager{ - startChan: make(chan struct{}), - startResultChan: make(chan error), - loaderCreatedChan: make(chan struct{}), + startChan: make(chan struct{}), + startResultChan: make(chan error), + l: l, } srv.Register(manager) - if eps, ok := k.NetworkStack().(*epsocket.Stack); ok { + if eps, ok := l.k.NetworkStack().(*epsocket.Stack); ok { net := &Network{ Stack: eps.Stack, } @@ -138,10 +136,6 @@ func newController(fd int, k *kernel.Kernel, w *watchdog.Watchdog) (*controller, srv.Register(&debug{}) - if err := srv.StartServing(); err != nil { - return nil, err - } - return &controller{ srv: srv, manager: manager, @@ -161,11 +155,6 @@ type containerManager struct { // l is the loader that creates containers and sandboxes. l *Loader - - // loaderCreatedChan is used to signal when the loader has been created. - // After a loader is created, a notify method is called that writes to - // this channel. - loaderCreatedChan chan struct{} } // StartRoot will start the root container process. @@ -291,13 +280,6 @@ func (cm *containerManager) Pause(_, _ *struct{}) error { return nil } -// WaitForLoader blocks until the container's loader has been created. -func (cm *containerManager) WaitForLoader(_, _ *struct{}) error { - log.Debugf("containerManager.WaitForLoader") - <-cm.loaderCreatedChan - return nil -} - // RestoreOpts contains options related to restoring a container's file system. type RestoreOpts struct { // FilePayload contains the state file to be restored, followed by the diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index a9c549790..3c6892446 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -277,20 +277,6 @@ func New(args Args) (*Loader, error) { // Create a watchdog. watchdog := watchdog.New(k, watchdog.DefaultTimeout, args.Conf.WatchdogAction) - // Create the control server using the provided FD. - // - // This must be done *after* we have initialized the kernel since the - // controller is used to configure the kernel's network stack. - // - // This should also be *before* we create the process, since a - // misconfigured process will cause an error, and we want the control - // server up before that so that we don't time out trying to connect to - // it. - ctrl, err := newController(args.ControllerFD, k, watchdog) - if err != nil { - return nil, fmt.Errorf("error creating control server: %v", err) - } - procArgs, err := newProcess(args.ID, args.Spec, creds, k) if err != nil { return nil, fmt.Errorf("failed to create init process for root container: %v", err) @@ -303,7 +289,6 @@ func New(args Args) (*Loader, error) { eid := execID{cid: args.ID} l := &Loader{ k: k, - ctrl: ctrl, conf: args.Conf, console: args.Console, watchdog: watchdog, @@ -348,7 +333,22 @@ func New(args Args) (*Loader, error) { } }) - ctrl.manager.l = l + // Create the control server using the provided FD. + // + // This must be done *after* we have initialized the kernel since the + // controller is used to configure the kernel's network stack. + ctrl, err := newController(args.ControllerFD, l) + if err != nil { + return nil, fmt.Errorf("creating control server: %v", err) + } + l.ctrl = ctrl + + // Only start serving after Loader is set to controller and controller is set + // to Loader, because they are both used in the urpc methods. + if err := ctrl.srv.StartServing(); err != nil { + return nil, fmt.Errorf("starting control server: %v", err) + } + return l, nil } @@ -745,12 +745,6 @@ func (l *Loader) WaitForStartSignal() { <-l.ctrl.manager.startChan } -// NotifyLoaderCreated sends a signal to the container manager that this -// loader has been created. -func (l *Loader) NotifyLoaderCreated() { - l.ctrl.manager.loaderCreatedChan <- struct{}{} -} - // WaitExit waits for the root container to exit, and returns its exit status. func (l *Loader) WaitExit() kernel.ExitStatus { // Wait for container. diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go index 15071387b..2887f3d7f 100644 --- a/runsc/cgroup/cgroup.go +++ b/runsc/cgroup/cgroup.go @@ -241,6 +241,11 @@ func (c *Cgroup) Uninstall() error { return nil } +// Join adds the current process to the all controllers. +func (c *Cgroup) Join() error { + return c.Add(0) +} + // Add adds given process to all controllers. func (c *Cgroup) Add(pid int) error { for key := range controllers { diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go index 192df7d3c..bb3435284 100644 --- a/runsc/cmd/boot.go +++ b/runsc/cmd/boot.go @@ -159,14 +159,6 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) panic("setCapsAndCallSelf must never return success") } - // Wait until this process has been moved into cgroups. - startSyncFile := os.NewFile(uintptr(b.startSyncFD), "start-sync file") - defer startSyncFile.Close() - buf := make([]byte, 1) - if r, err := startSyncFile.Read(buf); err != nil || r != 1 { - Fatalf("Unable to read from the start-sync descriptor: %v", err) - } - // Create the loader. bootArgs := boot.Args{ ID: f.Arg(0), @@ -186,21 +178,20 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) Fatalf("error creating loader: %v", err) } - // Fatalf exits the process and doesn't run defers. 'l' must be destroyed - // explicitly! + // Fatalf exits the process and doesn't run defers. + // 'l' must be destroyed explicitly after this point! - // Notify the parent process the controller has been created. + // Notify the parent process the sandbox has booted (and that the controller + // is up). + startSyncFile := os.NewFile(uintptr(b.startSyncFD), "start-sync file") + buf := make([]byte, 1) if w, err := startSyncFile.Write(buf); err != nil || w != 1 { l.Destroy() Fatalf("Unable to write into the start-sync descriptor: %v", err) } - // startSyncFile is closed here to be sure that starting with this point - // the runsc process will not write anything into it. + // Closes startSyncFile because 'l.Run()' only returns when the sandbox exits. startSyncFile.Close() - // Notify other processes the loader has been created. - l.NotifyLoaderCreated() - // Wait for the start signal from runsc. l.WaitForStartSignal() diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 0798aef9b..195cd4d6f 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -65,7 +65,8 @@ type Sandbox struct { } // Create creates the sandbox process. The caller must call Destroy() on the -// sandbox. +// sandbox. If spec specified a cgroup, the current process will have joined +// the cgroup upon return. func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File) (*Sandbox, error) { s := &Sandbox{ID: id} // The Cleanup object cleans up partially created sandboxes when an error occurs. @@ -78,55 +79,41 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo // If there is cgroup config, install it before creating sandbox process. if err := s.Cgroup.Install(spec.Linux.Resources); err != nil { - return nil, fmt.Errorf("error configuring cgroup: %v", err) + return nil, fmt.Errorf("configuring cgroup: %v", err) + } + + // Make this process join the cgroup to ensure the sandbox (and all its + // children processes) are part of the cgroup from the start. Don't bother + // moving out because the caller is about to exit anyways. + if err := cg.Join(); err != nil { + return nil, fmt.Errorf("joining cgroup: %v", err) } } - // Create a socket pair to synchronize runsc and sandbox processes. - // It is used for the following: - // * to notify the sandbox process when it has been moved into cgroups. - // * to wait for the controller socket. - fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_SEQPACKET, 0) - if err != nil { - return nil, fmt.Errorf("error creating a start-sync socket pair %q: %v", s.ID, err) + // Create pipe to synchronize when sandbox process has been booted. + fds := make([]int, 2) + if err := syscall.Pipe(fds); err != nil { + return nil, fmt.Errorf("creating pipe for sandbox %q: %v", s.ID, err) } - startSyncFile := os.NewFile(uintptr(fds[0]), "start-sync socket") - defer startSyncFile.Close() + clientSyncFile := os.NewFile(uintptr(fds[0]), "client sandbox sync") + defer clientSyncFile.Close() - sandboxSyncFile := os.NewFile(uintptr(fds[1]), "sandbox start-sync socket") + sandboxSyncFile := os.NewFile(uintptr(fds[1]), "sandbox sync") // Create the sandbox process. - err = s.createSandboxProcess(spec, conf, bundleDir, consoleSocket, userLog, ioFiles, sandboxSyncFile) - // sandboxSyncFile has to be closed to be able to detect - // when the sandbox process exits unexpectedly. + err := s.createSandboxProcess(spec, conf, bundleDir, consoleSocket, userLog, ioFiles, sandboxSyncFile) + // sandboxSyncFile has to be closed to be able to detect when the sandbox + // process exits unexpectedly. sandboxSyncFile.Close() if err != nil { return nil, err } - if s.Cgroup != nil { - if err := s.Cgroup.Add(s.Pid); err != nil { - return nil, fmt.Errorf("error adding sandbox to cgroup: %v", err) - } - } - + // Wait until the sandbox has booted. b := make([]byte, 1) - // Notify the sandbox process it has been moved into cgroups. - if l, err := startSyncFile.Write(b); err != nil || l != 1 { - return nil, fmt.Errorf("error writing into the start-sync descriptor: %v", err) - } - // Wait until the sandbox process has initialized the controller socket. - if l, err := startSyncFile.Read(b); err != nil || l != 1 { + if l, err := clientSyncFile.Read(b); err != nil || l != 1 { return nil, fmt.Errorf("error reading from the start-sync descriptor: %v", err) } - // startSyncFile is closed here to be sure that starting with this point - // the sandbox process will not write anything into it. - startSyncFile.Close() - - // Wait for the control server to come up. - if err := s.waitForCreated(); err != nil { - return nil, err - } c.Release() return s, nil @@ -612,23 +599,6 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund return nil } -// waitForCreated waits for the sandbox subprocess control server to be -// running and for the loader to have been created, at which point the sandbox -// is in Created state. -func (s *Sandbox) waitForCreated() error { - log.Debugf("Waiting for sandbox %q creation", s.ID) - conn, err := s.sandboxConnect() - if err != nil { - return err - } - defer conn.Close() - - if err := conn.Call(boot.ContainerWaitForLoader, nil, nil); err != nil { - return fmt.Errorf("err waiting on loader on sandbox %q, err: %v", s.ID, err) - } - return nil -} - // Wait waits for the containerized process to exit, and returns its WaitStatus. func (s *Sandbox) Wait(cid string) (syscall.WaitStatus, error) { log.Debugf("Waiting for container %q in sandbox %q", cid, s.ID) diff --git a/runsc/test/root/cgroup_test.go b/runsc/test/root/cgroup_test.go index fdb94ff64..0eabf9561 100644 --- a/runsc/test/root/cgroup_test.go +++ b/runsc/test/root/cgroup_test.go @@ -18,6 +18,7 @@ import ( "io/ioutil" "os" "path/filepath" + "strconv" "strings" "testing" @@ -123,6 +124,8 @@ func TestCgroup(t *testing.T) { t.Fatalf("Docker.ID() failed: %v", err) } t.Logf("cgroup ID: %s", gid) + + // Check list of attributes defined above. for _, attr := range attrs { path := filepath.Join("/sys/fs/cgroup", attr.ctrl, "docker", gid, attr.file) out, err := ioutil.ReadFile(path) @@ -137,4 +140,33 @@ func TestCgroup(t *testing.T) { t.Errorf("arg: %q, cgroup attribute %s/%s, got: %q, want: %q", attr.arg, attr.ctrl, attr.file, got, attr.want) } } + + // Check that sandbox is inside cgroup. + controllers := []string{ + "blkio", + "cpu", + "cpuset", + "memory", + "net_cls", + "net_prio", + "devices", + "freezer", + "perf_event", + "pids", + "systemd", + } + pid, err := d.SandboxPid() + if err != nil { + t.Fatalf("SandboxPid: %v", err) + } + for _, ctrl := range controllers { + path := filepath.Join("/sys/fs/cgroup", ctrl, "docker", gid, "cgroup.procs") + out, err := ioutil.ReadFile(path) + if err != nil { + t.Fatalf("failed to read %q: %v", path, err) + } + if got := string(out); !strings.Contains(got, strconv.Itoa(pid)) { + t.Errorf("cgroup control %s processes, got: %q, want: %q", ctrl, got, pid) + } + } }