diff --git a/pkg/seccomp/seccomp.go b/pkg/seccomp/seccomp.go index ba2955752..e113f3574 100644 --- a/pkg/seccomp/seccomp.go +++ b/pkg/seccomp/seccomp.go @@ -55,7 +55,7 @@ func Install(rules SyscallRules) error { } // Uncomment to get stack trace when there is a violation. - // defaultAction = uint32(linux.SECCOMP_RET_TRAP) + // defaultAction = linux.BPFAction(linux.SECCOMP_RET_TRAP) log.Infof("Installing seccomp filters for %d syscalls (action=%v)", len(rules), defaultAction) diff --git a/pkg/sentry/control/BUILD b/pkg/sentry/control/BUILD index f54e01ee8..5052bcc0d 100644 --- a/pkg/sentry/control/BUILD +++ b/pkg/sentry/control/BUILD @@ -6,6 +6,7 @@ go_library( name = "control", srcs = [ "control.go", + "pprof.go", "proc.go", "state.go", ], @@ -15,6 +16,7 @@ go_library( ], deps = [ "//pkg/abi/linux", + "//pkg/fd", "//pkg/log", "//pkg/sentry/fs", "//pkg/sentry/fs/host", diff --git a/pkg/sentry/control/pprof.go b/pkg/sentry/control/pprof.go new file mode 100644 index 000000000..1af092af3 --- /dev/null +++ b/pkg/sentry/control/pprof.go @@ -0,0 +1,124 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package control + +import ( + "errors" + "runtime" + "runtime/pprof" + "sync" + + "gvisor.googlesource.com/gvisor/pkg/fd" + "gvisor.googlesource.com/gvisor/pkg/urpc" +) + +var errNoOutput = errors.New("no output writer provided") + +// ProfileOpts contains options for the StartCPUProfile/Goroutine RPC call. +type ProfileOpts struct { + // File is the filesystem path for the profile. + File string `json:"path"` + + // FilePayload is the destination for the profiling output. + urpc.FilePayload +} + +// Profile includes profile-related RPC stubs. It provides a way to +// control the built-in pprof facility in sentry via sentryctl. +// +// The following options to sentryctl are added: +// +// - collect CPU profile on-demand. +// sentryctl -pid pprof-cpu-start +// sentryctl -pid pprof-cpu-stop +// +// - dump out the stack trace of current go routines. +// sentryctl -pid pprof-goroutine +type Profile struct { + // mu protects the fields below. + mu sync.Mutex + + // cpuFile is the current CPU profile output file. + cpuFile *fd.FD +} + +// StartCPUProfile is an RPC stub which starts recording the CPU profile in a +// file. +func (p *Profile) StartCPUProfile(o *ProfileOpts, _ *struct{}) error { + if len(o.FilePayload.Files) < 1 { + return errNoOutput + } + + output, err := fd.NewFromFile(o.FilePayload.Files[0]) + if err != nil { + return err + } + + p.mu.Lock() + defer p.mu.Unlock() + + // Returns an error if profiling is already started. + if err := pprof.StartCPUProfile(output); err != nil { + output.Close() + return err + } + + p.cpuFile = output + return nil +} + +// StopCPUProfile is an RPC stub which stops the CPU profiling and flush out the +// profile data. It takes no argument. +func (p *Profile) StopCPUProfile(_, _ *struct{}) error { + p.mu.Lock() + defer p.mu.Unlock() + + if p.cpuFile == nil { + return errors.New("CPU profiling not started") + } + + pprof.StopCPUProfile() + p.cpuFile.Close() + p.cpuFile = nil + return nil +} + +// HeapProfile generates a heap profile for the sentry. +func (p *Profile) HeapProfile(o *ProfileOpts, _ *struct{}) error { + if len(o.FilePayload.Files) < 1 { + return errNoOutput + } + output := o.FilePayload.Files[0] + defer output.Close() + runtime.GC() // Get up-to-date statistics. + if err := pprof.WriteHeapProfile(output); err != nil { + return err + } + return nil +} + +// Goroutine is an RPC stub which dumps out the stack trace for all running +// goroutines. +func (p *Profile) Goroutine(o *ProfileOpts, _ *struct{}) error { + if len(o.FilePayload.Files) < 1 { + return errNoOutput + } + output := o.FilePayload.Files[0] + defer output.Close() + if err := pprof.Lookup("goroutine").WriteTo(output, 2); err != nil { + return err + } + return nil +} diff --git a/runsc/boot/config.go b/runsc/boot/config.go index 400203c99..626fcabdd 100644 --- a/runsc/boot/config.go +++ b/runsc/boot/config.go @@ -202,6 +202,9 @@ type Config struct { // SIGUSR2(12) to troubleshoot hangs. -1 disables it. PanicSignal int + // ProfileEnable is set to prepare the sandbox to be profiled. + ProfileEnable bool + // TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in // tests. It allows runsc to start the sandbox process as the current // user, and without chrooting the sandbox process. This can be @@ -228,6 +231,7 @@ func (c *Config) ToFlags() []string { "--strace-log-size=" + strconv.Itoa(int(c.StraceLogSize)), "--watchdog-action=" + c.WatchdogAction.String(), "--panic-signal=" + strconv.Itoa(c.PanicSignal), + "--profile=" + strconv.FormatBool(c.ProfileEnable), } if c.TestOnlyAllowRunAsCurrentUserWithoutChroot { // Only include if set since it is never to be used by users. diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 23d476f7f..a864be720 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -95,6 +95,11 @@ const ( // SandboxStacks collects sandbox stacks for debugging. SandboxStacks = "debug.Stacks" + + // Profiling related commands (see pprof.go for more details). + StartCPUProfile = "Profile.StartCPUProfile" + StopCPUProfile = "Profile.StopCPUProfile" + HeapProfile = "Profile.HeapProfile" ) // ControlSocketAddr generates an abstract unix socket name for the given ID. @@ -135,6 +140,9 @@ func newController(fd int, l *Loader) (*controller, error) { } srv.Register(&debug{}) + if l.conf.ProfileEnable { + srv.Register(&control.Profile{}) + } return &controller{ srv: srv, diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index bde749861..1ba5b7257 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -470,3 +470,16 @@ func controlServerFilters(fd int) seccomp.SyscallRules { }, } } + +// profileFilters returns extra syscalls made by runtime/pprof package. +func profileFilters() seccomp.SyscallRules { + return seccomp.SyscallRules{ + syscall.SYS_OPENAT: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.O_RDONLY | syscall.O_LARGEFILE | syscall.O_CLOEXEC), + }, + }, + } +} diff --git a/runsc/boot/filter/filter.go b/runsc/boot/filter/filter.go index d69a6a2cc..fb197f9b1 100644 --- a/runsc/boot/filter/filter.go +++ b/runsc/boot/filter/filter.go @@ -29,9 +29,10 @@ import ( // Options are seccomp filter related options. type Options struct { - Platform platform.Platform - HostNetwork bool - ControllerFD int + Platform platform.Platform + HostNetwork bool + ProfileEnable bool + ControllerFD int } // Install installs seccomp filters for based on the given platform. @@ -47,6 +48,10 @@ func Install(opt Options) error { Report("host networking enabled: syscall filters less restrictive!") s.Merge(hostInetFilters()) } + if opt.ProfileEnable { + Report("profile enabled: syscall filters less restrictive!") + s.Merge(profileFilters()) + } switch p := opt.Platform.(type) { case *ptrace.PTrace: diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 42fe6f312..4c7e6abfc 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -445,9 +445,10 @@ func (l *Loader) run() error { filter.Report("syscall filter is DISABLED. Running in less secure mode.") } else { opts := filter.Options{ - Platform: l.k.Platform, - HostNetwork: l.conf.Network == NetworkHost, - ControllerFD: l.ctrl.srv.FD(), + Platform: l.k.Platform, + HostNetwork: l.conf.Network == NetworkHost, + ProfileEnable: l.conf.ProfileEnable, + ControllerFD: l.ctrl.srv.FD(), } if err := filter.Install(opts); err != nil { return fmt.Errorf("installing seccomp filters: %v", err) diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go index e10326754..3ee9a9b49 100644 --- a/runsc/cmd/debug.go +++ b/runsc/cmd/debug.go @@ -16,7 +16,9 @@ package cmd import ( "context" + "os" "syscall" + "time" "flag" "github.com/google/subcommands" @@ -27,9 +29,12 @@ import ( // Debug implements subcommands.Command for the "debug" command. type Debug struct { - pid int - stacks bool - signal int + pid int + stacks bool + signal int + profileHeap string + profileCPU string + profileDelay int } // Name implements subcommands.Command. @@ -51,6 +56,9 @@ func (*Debug) Usage() string { func (d *Debug) SetFlags(f *flag.FlagSet) { f.IntVar(&d.pid, "pid", 0, "sandbox process ID. Container ID is not necessary if this is set") f.BoolVar(&d.stacks, "stacks", false, "if true, dumps all sandbox stacks to the log") + f.StringVar(&d.profileHeap, "profile-heap", "", "writes heap profile to the given file.") + f.StringVar(&d.profileCPU, "profile-cpu", "", "writes CPU profile to the given file.") + f.IntVar(&d.profileDelay, "profile-delay", 5, "amount of time to wait before stoping CPU profile") f.IntVar(&d.signal, "signal", -1, "sends signal to the sandbox") } @@ -114,5 +122,35 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) } log.Infof(" *** Stack dump ***\n%s", stacks) } + if d.profileCPU != "" { + f, err := os.Create(d.profileCPU) + if err != nil { + Fatalf(err.Error()) + } + defer f.Close() + + if err := c.Sandbox.StartCPUProfile(f); err != nil { + Fatalf(err.Error()) + } + log.Infof("CPU profile started for %d sec, writing to %q", d.profileDelay, d.profileCPU) + time.Sleep(time.Duration(d.profileDelay) * time.Second) + + if err := c.Sandbox.StopCPUProfile(); err != nil { + Fatalf(err.Error()) + } + log.Infof("CPU profile written to %q", d.profileCPU) + } + if d.profileHeap != "" { + f, err := os.Create(d.profileHeap) + if err != nil { + Fatalf(err.Error()) + } + defer f.Close() + + if err := c.Sandbox.HeapProfile(f); err != nil { + Fatalf(err.Error()) + } + log.Infof("Heap profile written to %q", d.profileHeap) + } return subcommands.ExitSuccess } diff --git a/runsc/main.go b/runsc/main.go index 4f89312b3..82c37ec11 100644 --- a/runsc/main.go +++ b/runsc/main.go @@ -63,6 +63,7 @@ var ( overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.") watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.") panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.") + profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).") testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.") ) @@ -146,6 +147,7 @@ func main() { StraceLogSize: *straceLogSize, WatchdogAction: wa, PanicSignal: *panicSignal, + ProfileEnable: *profile, TestOnlyAllowRunAsCurrentUserWithoutChroot: *testOnlyAllowRunAsCurrentUserWithoutChroot, } if len(*straceSyscalls) != 0 { diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index ce8c21681..2698e3f86 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -825,6 +825,61 @@ func (s *Sandbox) Stacks() (string, error) { return stacks, nil } +// HeapProfile writes a heap profile to the given file. +func (s *Sandbox) HeapProfile(f *os.File) error { + log.Debugf("Heap profile %q", s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return err + } + defer conn.Close() + + opts := control.ProfileOpts{ + FilePayload: urpc.FilePayload{ + Files: []*os.File{f}, + }, + } + if err := conn.Call(boot.HeapProfile, &opts, nil); err != nil { + return fmt.Errorf("getting sandbox %q heap profile: %v", s.ID, err) + } + return nil +} + +// StartCPUProfile start CPU profile writing to the given file. +func (s *Sandbox) StartCPUProfile(f *os.File) error { + log.Debugf("CPU profile start %q", s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return err + } + defer conn.Close() + + opts := control.ProfileOpts{ + FilePayload: urpc.FilePayload{ + Files: []*os.File{f}, + }, + } + if err := conn.Call(boot.StartCPUProfile, &opts, nil); err != nil { + return fmt.Errorf("starting sandbox %q CPU profile: %v", s.ID, err) + } + return nil +} + +// StopCPUProfile stops a previously started CPU profile. +func (s *Sandbox) StopCPUProfile() error { + log.Debugf("CPU profile stop %q", s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return err + } + defer conn.Close() + + if err := conn.Call(boot.StopCPUProfile, nil, nil); err != nil { + return fmt.Errorf("stopping sandbox %q CPU profile: %v", s.ID, err) + } + return nil +} + // DestroyContainer destroys the given container. If it is the root container, // then the entire sandbox is destroyed. func (s *Sandbox) DestroyContainer(cid string) error {