runsc: add global profile collection flags

Add global flags -profile-{block,cpu,heap,mutex} and -trace which
enable collection of the specified profile for the entire duration of a
container execution. This provides a way to definitively start profiling
before that application starts, rather than attempting to race with an
out-of-band `runsc debug`.

Note that only the main boot process is profiled.

This exposed a bug in Task.traceExecEvent: a crash when tracing and
-race are enabled. traceExecEvent is called off of the task goroutine,
but uses the Task as a context, which is a violation of the Task
contract. Switching to the AsyncContext fixes the issue.

Fixes #220
This commit is contained in:
Michael Pratt 2021-09-13 18:07:29 -04:00
parent 9bdeddd882
commit bd296e799b
11 changed files with 337 additions and 26 deletions

View File

@ -26,6 +26,23 @@ import (
"gvisor.dev/gvisor/pkg/urpc"
)
const (
// DefaultBlockProfileRate is the default profiling rate for block
// profiles.
//
// The default here is 10%, which will record a stacktrace 10% of the
// time when blocking occurs. Since these events should not be super
// frequent, we expect this to achieve a reasonable balance between
// collecting the data we need and imposing a high performance cost
// (e.g. skewing even the CPU profile).
DefaultBlockProfileRate = 10
// DefaultMutexProfileRate is the default profiling rate for mutex
// profiles. Like the block rate above, we use a default rate of 10%
// for the same reasons.
DefaultMutexProfileRate = 10
)
// Profile includes profile-related RPC stubs. It provides a way to
// control the built-in runtime profiling facilities.
//
@ -175,12 +192,8 @@ func (p *Profile) Block(o *BlockProfileOpts, _ *struct{}) error {
defer p.blockMu.Unlock()
// Always set the rate. We then wait to collect a profile at this rate,
// and disable when we're done. Note that the default here is 10%, which
// will record a stacktrace 10% of the time when blocking occurs. Since
// these events should not be super frequent, we expect this to achieve
// a reasonable balance between collecting the data we need and imposing
// a high performance cost (e.g. skewing even the CPU profile).
rate := 10
// and disable when we're done.
rate := DefaultBlockProfileRate
if o.Rate != 0 {
rate = o.Rate
}
@ -220,9 +233,8 @@ func (p *Profile) Mutex(o *MutexProfileOpts, _ *struct{}) error {
p.mutexMu.Lock()
defer p.mutexMu.Unlock()
// Always set the fraction. Like the block rate above, we use
// a default rate of 10% for the same reasons.
fraction := 10
// Always set the fraction.
fraction := DefaultMutexProfileRate
if o.Fraction != 0 {
fraction = o.Fraction
}

View File

@ -249,5 +249,9 @@ func (t *Task) traceExecEvent(image *TaskImage) {
return
}
defer file.DecRef(t)
trace.Logf(t.traceContext, traceCategory, "exec: %s", file.PathnameWithDeleted(t))
// traceExecEvent function may be called before the task goroutine
// starts, so we must use the async context.
name := file.PathnameWithDeleted(t.AsyncContext())
trace.Logf(t.traceContext, traceCategory, "exec: %s", name)
}

View File

@ -15,6 +15,7 @@ go_library(
"limits.go",
"loader.go",
"network.go",
"profile.go",
"strace.go",
"vfs.go",
],

View File

@ -120,6 +120,10 @@ type Loader struct {
// container. It should be called when a sandbox is destroyed.
stopSignalForwarding func()
// stopProfiling stops profiling started at container creation. It
// should be called when a sandbox is destroyed.
stopProfiling func()
// restore is set to true if we are restoring a container.
restore bool
@ -199,6 +203,21 @@ type Args struct {
TotalMem uint64
// UserLogFD is the file descriptor to write user logs to.
UserLogFD int
// ProfileBlockFD is the file descriptor to write a block profile to.
// Valid if >=0.
ProfileBlockFD int
// ProfileCPUFD is the file descriptor to write a CPU profile to.
// Valid if >=0.
ProfileCPUFD int
// ProfileHeapFD is the file descriptor to write a heap profile to.
// Valid if >=0.
ProfileHeapFD int
// ProfileMutexFD is the file descriptor to write a mutex profile to.
// Valid if >=0.
ProfileMutexFD int
// TraceFD is the file descriptor to write a Go execution trace to.
// Valid if >=0.
TraceFD int
}
// make sure stdioFDs are always the same on initial start and on restore
@ -207,6 +226,8 @@ const startingStdioFD = 256
// New initializes a new kernel loader configured by spec.
// New also handles setting up a kernel for restoring a container.
func New(args Args) (*Loader, error) {
stopProfiling := startProfiling(args)
// We initialize the rand package now to make sure /dev/urandom is pre-opened
// on kernels that do not support getrandom(2).
if err := rand.Init(); err != nil {
@ -400,12 +421,13 @@ func New(args Args) (*Loader, error) {
eid := execID{cid: args.ID}
l := &Loader{
k: k,
watchdog: dog,
sandboxID: args.ID,
processes: map[execID]*execProcess{eid: {}},
mountHints: mountHints,
root: info,
k: k,
watchdog: dog,
sandboxID: args.ID,
processes: map[execID]*execProcess{eid: {}},
mountHints: mountHints,
root: info,
stopProfiling: stopProfiling,
}
// We don't care about child signals; some platforms can generate a
@ -498,6 +520,8 @@ func (l *Loader) Destroy() {
for _, f := range l.root.goferFDs {
_ = f.Close()
}
l.stopProfiling()
}
func createPlatform(conf *config.Config, deviceFile *os.File) (platform.Platform, error) {

95
runsc/boot/profile.go Normal file
View File

@ -0,0 +1,95 @@
// Copyright 2021 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package boot
import (
"os"
"runtime"
"runtime/pprof"
"runtime/trace"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/control"
)
// startProfiling initiates profiling as defined by the ProfileConfig, and
// returns a function that should be called to stop profiling.
func startProfiling(args Args) func() {
var onStopProfiling []func()
stopProfiling := func() {
for _, f := range onStopProfiling {
f()
}
}
if args.ProfileBlockFD >= 0 {
file := os.NewFile(uintptr(args.ProfileBlockFD), "profile-block")
runtime.SetBlockProfileRate(control.DefaultBlockProfileRate)
onStopProfiling = append(onStopProfiling, func() {
if err := pprof.Lookup("block").WriteTo(file, 0); err != nil {
log.Warningf("Error writing block profile: %v", err)
}
file.Close()
runtime.SetBlockProfileRate(0)
})
}
if args.ProfileCPUFD >= 0 {
file := os.NewFile(uintptr(args.ProfileCPUFD), "profile-cpu")
pprof.StartCPUProfile(file)
onStopProfiling = append(onStopProfiling, func() {
pprof.StopCPUProfile()
file.Close()
})
}
if args.ProfileHeapFD >= 0 {
file := os.NewFile(uintptr(args.ProfileHeapFD), "profile-heap")
onStopProfiling = append(onStopProfiling, func() {
if err := pprof.Lookup("heap").WriteTo(file, 0); err != nil {
log.Warningf("Error writing heap profile: %v", err)
}
file.Close()
})
}
if args.ProfileMutexFD >= 0 {
file := os.NewFile(uintptr(args.ProfileMutexFD), "profile-mutex")
prev := runtime.SetMutexProfileFraction(control.DefaultMutexProfileRate)
onStopProfiling = append(onStopProfiling, func() {
if err := pprof.Lookup("mutex").WriteTo(file, 0); err != nil {
log.Warningf("Error writing mutex profile: %v", err)
}
file.Close()
runtime.SetMutexProfileFraction(prev)
})
}
if args.TraceFD >= 0 {
file := os.NewFile(uintptr(args.TraceFD), "trace")
trace.Start(file)
onStopProfiling = append(onStopProfiling, func() {
trace.Stop()
file.Close()
})
}
return stopProfiling
}

View File

@ -79,6 +79,26 @@ type Boot struct {
// sandbox (e.g. gofer) and sent through this FD.
mountsFD int
// profileBlockFD is the file descriptor to write a block profile to.
// Valid if >= 0.
profileBlockFD int
// profileCPUFD is the file descriptor to write a CPU profile to.
// Valid if >= 0.
profileCPUFD int
// profileHeapFD is the file descriptor to write a heap profile to.
// Valid if >= 0.
profileHeapFD int
// profileMutexFD is the file descriptor to write a mutex profile to.
// Valid if >= 0.
profileMutexFD int
// traceFD is the file descriptor to write a Go execution trace to.
// Valid if >= 0.
traceFD int
// pidns is set if the sandbox is in its own pid namespace.
pidns bool
@ -119,6 +139,11 @@ func (b *Boot) SetFlags(f *flag.FlagSet) {
f.IntVar(&b.userLogFD, "user-log-fd", 0, "file descriptor to write user logs to. 0 means no logging.")
f.IntVar(&b.startSyncFD, "start-sync-fd", -1, "required FD to used to synchronize sandbox startup")
f.IntVar(&b.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to read list of mounts after they have been resolved (direct paths, no symlinks).")
f.IntVar(&b.profileBlockFD, "profile-block-fd", -1, "file descriptor to write block profile to. -1 disables profiling.")
f.IntVar(&b.profileCPUFD, "profile-cpu-fd", -1, "file descriptor to write CPU profile to. -1 disables profiling.")
f.IntVar(&b.profileHeapFD, "profile-heap-fd", -1, "file descriptor to write heap profile to. -1 disables profiling.")
f.IntVar(&b.profileMutexFD, "profile-mutex-fd", -1, "file descriptor to write mutex profile to. -1 disables profiling.")
f.IntVar(&b.traceFD, "trace-fd", -1, "file descriptor to write Go execution trace to. -1 disables tracing.")
f.BoolVar(&b.attached, "attached", false, "if attached is true, kills the sandbox process when the parent process terminates")
}
@ -213,16 +238,21 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// Create the loader.
bootArgs := boot.Args{
ID: f.Arg(0),
Spec: spec,
Conf: conf,
ControllerFD: b.controllerFD,
Device: os.NewFile(uintptr(b.deviceFD), "platform device"),
GoferFDs: b.ioFDs.GetArray(),
StdioFDs: b.stdioFDs.GetArray(),
NumCPU: b.cpuNum,
TotalMem: b.totalMem,
UserLogFD: b.userLogFD,
ID: f.Arg(0),
Spec: spec,
Conf: conf,
ControllerFD: b.controllerFD,
Device: os.NewFile(uintptr(b.deviceFD), "platform device"),
GoferFDs: b.ioFDs.GetArray(),
StdioFDs: b.stdioFDs.GetArray(),
NumCPU: b.cpuNum,
TotalMem: b.totalMem,
UserLogFD: b.userLogFD,
ProfileBlockFD: b.profileBlockFD,
ProfileCPUFD: b.profileCPUFD,
ProfileHeapFD: b.profileHeapFD,
ProfileMutexFD: b.profileMutexFD,
TraceFD: b.traceFD,
}
l, err := boot.New(bootArgs)
if err != nil {

View File

@ -90,6 +90,13 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
var c *container.Container
conf := args[0].(*config.Config)
if conf.ProfileBlock != "" || conf.ProfileCPU != "" || conf.ProfileHeap != "" || conf.ProfileMutex != "" {
return Errorf("global -profile-{block,cpu,heap,mutex} flags have no effect on runsc debug. Pass runsc debug -profile-{block,cpu,heap,mutex} instead")
}
if conf.TraceFile != "" {
return Errorf("global -trace flag has no effect on runsc debug. Pass runsc debug -trace instead")
}
if d.pid == 0 {
// No pid, container ID must have been provided.
if f.NArg() != 1 {

View File

@ -140,6 +140,26 @@ type Config struct {
// ProfileEnable is set to prepare the sandbox to be profiled.
ProfileEnable bool `flag:"profile"`
// ProfileBlock collects a block profile to the passed file for the
// duration of the container execution. Requires ProfileEnabled.
ProfileBlock string `flag:"profile-block"`
// ProfileCPU collects a CPU profile to the passed file for the
// duration of the container execution. Requires ProfileEnabled.
ProfileCPU string `flag:"profile-cpu"`
// ProfileHeap collects a heap profile to the passed file for the
// duration of the container execution. Requires ProfileEnabled.
ProfileHeap string `flag:"profile-heap"`
// ProfileMutex collects a mutex profile to the passed file for the
// duration of the container execution. Requires ProfileEnabled.
ProfileMutex string `flag:"profile-mutex"`
// TraceFile collects a Go runtime execution trace to the passed file
// for the duration of the container execution.
TraceFile string `flag:"trace"`
// Controls defines the controls that may be enabled.
Controls controlConfig `flag:"controls"`
@ -207,6 +227,21 @@ func (c *Config) validate() error {
if c.NumNetworkChannels <= 0 {
return fmt.Errorf("num_network_channels must be > 0, got: %d", c.NumNetworkChannels)
}
// Require profile flags to explicitly opt-in to profiling with
// -profile rather than implying it since these options have security
// implications.
if c.ProfileBlock != "" && !c.ProfileEnable {
return fmt.Errorf("profile-block flag requires enabling profiling with profile flag")
}
if c.ProfileCPU != "" && !c.ProfileEnable {
return fmt.Errorf("profile-cpu flag requires enabling profiling with profile flag")
}
if c.ProfileHeap != "" && !c.ProfileEnable {
return fmt.Errorf("profile-heap flag requires enabling profiling with profile flag")
}
if c.ProfileMutex != "" && !c.ProfileEnable {
return fmt.Errorf("profile-mutex flag requires enabling profiling with profile flag")
}
return nil
}

View File

@ -63,6 +63,11 @@ func RegisterFlags() {
flag.Var(watchdogActionPtr(watchdog.LogWarning), "watchdog-action", "sets what action the watchdog takes when triggered: log (default), panic.")
flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
flag.String("profile-block", "", "collects a block profile to this file path for the duration of the container execution. Requires -profile=true.")
flag.String("profile-cpu", "", "collects a CPU profile to this file path for the duration of the container execution. Requires -profile=true.")
flag.String("profile-heap", "", "collects a heap profile to this file path for the duration of the container execution. Requires -profile=true.")
flag.String("profile-mutex", "", "collects a mutex profile to this file path for the duration of the container execution. Requires -profile=true.")
flag.String("trace", "", "collects a Go runtime execution trace to this file path for the duration of the container execution.")
flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.")
flag.Var(leakModePtr(refs.NoLeakChecking), "ref-leak-mode", "sets reference leak check mode: disabled (default), log-names, log-traces.")
flag.Bool("cpu-num-from-quota", false, "set cpu number to cpu quota (least integer greater or equal to quota value, but not less than 2)")

View File

@ -2829,3 +2829,46 @@ func TestStream(t *testing.T) {
t.Errorf("out got %s, want include %s", buf, want)
}
}
// TestProfile checks that profiling options generate profiles.
func TestProfile(t *testing.T) {
// Perform a non-trivial amount of work so we actually capture
// something in the profiles.
spec := testutil.NewSpecWithArgs("/bin/bash", "-c", "true")
conf := testutil.TestConfig(t)
conf.ProfileEnable = true
conf.ProfileBlock = filepath.Join(t.TempDir(), "block.pprof")
conf.ProfileCPU = filepath.Join(t.TempDir(), "cpu.pprof")
conf.ProfileHeap = filepath.Join(t.TempDir(), "heap.pprof")
conf.ProfileMutex = filepath.Join(t.TempDir(), "mutex.pprof")
conf.TraceFile = filepath.Join(t.TempDir(), "trace.out")
_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
if err != nil {
t.Fatalf("error setting up container: %v", err)
}
defer cleanup()
args := Args{
ID: testutil.RandomContainerID(),
Spec: spec,
BundleDir: bundleDir,
Attached: true,
}
_, err = Run(conf, args)
if err != nil {
t.Fatalf("Creating container: %v", err)
}
// Basic test; simply assert that the profiles are not empty.
for _, name := range []string{conf.ProfileBlock, conf.ProfileCPU, conf.ProfileHeap, conf.ProfileMutex, conf.TraceFile} {
fi, err := os.Stat(name)
if err != nil {
t.Fatalf("Unable to stat profile file %s: %v", name, err)
}
if fi.Size() == 0 {
t.Errorf("Profile file %s is empty: %+v", name, fi)
}
}
}

View File

@ -490,6 +490,61 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
cmd.Args = append(cmd.Args, "--start-sync-fd="+strconv.Itoa(nextFD))
nextFD++
if conf.ProfileBlock != "" {
blockFile, err := os.OpenFile(conf.ProfileBlock, os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return fmt.Errorf("opening block profiling file %q: %v", conf.ProfileBlock, err)
}
defer blockFile.Close()
cmd.ExtraFiles = append(cmd.ExtraFiles, blockFile)
cmd.Args = append(cmd.Args, "--profile-block-fd="+strconv.Itoa(nextFD))
nextFD++
}
if conf.ProfileCPU != "" {
cpuFile, err := os.OpenFile(conf.ProfileCPU, os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return fmt.Errorf("opening cpu profiling file %q: %v", conf.ProfileCPU, err)
}
defer cpuFile.Close()
cmd.ExtraFiles = append(cmd.ExtraFiles, cpuFile)
cmd.Args = append(cmd.Args, "--profile-cpu-fd="+strconv.Itoa(nextFD))
nextFD++
}
if conf.ProfileHeap != "" {
heapFile, err := os.OpenFile(conf.ProfileHeap, os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return fmt.Errorf("opening heap profiling file %q: %v", conf.ProfileHeap, err)
}
defer heapFile.Close()
cmd.ExtraFiles = append(cmd.ExtraFiles, heapFile)
cmd.Args = append(cmd.Args, "--profile-heap-fd="+strconv.Itoa(nextFD))
nextFD++
}
if conf.ProfileMutex != "" {
mutexFile, err := os.OpenFile(conf.ProfileMutex, os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return fmt.Errorf("opening mutex profiling file %q: %v", conf.ProfileMutex, err)
}
defer mutexFile.Close()
cmd.ExtraFiles = append(cmd.ExtraFiles, mutexFile)
cmd.Args = append(cmd.Args, "--profile-mutex-fd="+strconv.Itoa(nextFD))
nextFD++
}
if conf.TraceFile != "" {
traceFile, err := os.OpenFile(conf.TraceFile, os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return fmt.Errorf("opening trace file %q: %v", conf.TraceFile, err)
}
defer traceFile.Close()
cmd.ExtraFiles = append(cmd.ExtraFiles, traceFile)
cmd.Args = append(cmd.Args, "--trace-fd="+strconv.Itoa(nextFD))
nextFD++
}
// If there is a gofer, sends all socket ends to the sandbox.
for _, f := range args.IOFiles {
defer f.Close()