2019-04-29 21:25:05 +00:00
// Copyright 2018 The gVisor Authors.
2018-04-27 17:37:02 +00:00
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Binary runsc is an implementation of the Open Container Initiative Runtime
// that runs applications inside a sandbox.
package main
import (
2018-11-28 22:00:54 +00:00
"context"
2019-04-01 23:17:40 +00:00
"fmt"
2018-04-27 17:37:02 +00:00
"io"
2019-04-12 00:53:24 +00:00
"io/ioutil"
2018-04-27 17:37:02 +00:00
"os"
2019-08-13 18:54:59 +00:00
"os/signal"
2018-04-27 17:37:02 +00:00
"path/filepath"
"strings"
"syscall"
2019-11-22 23:21:31 +00:00
"time"
2018-04-27 17:37:02 +00:00
"flag"
"github.com/google/subcommands"
2019-06-13 23:49:09 +00:00
"gvisor.dev/gvisor/pkg/log"
2019-08-29 21:17:32 +00:00
"gvisor.dev/gvisor/pkg/refs"
2019-07-04 05:50:26 +00:00
"gvisor.dev/gvisor/pkg/sentry/platform"
2019-06-13 23:49:09 +00:00
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/cmd"
"gvisor.dev/gvisor/runsc/specutils"
2018-04-27 17:37:02 +00:00
)
var (
// Although these flags are not part of the OCI spec, they are used by
// Docker, and thus should not be changed.
2019-10-16 21:33:23 +00:00
rootDir = flag . String ( "root" , "" , "root directory for storage of container state." )
logFilename = flag . String ( "log" , "" , "file path where internal debug information is written, default is stdout." )
logFormat = flag . String ( "log-format" , "text" , "log format: text (default), json, or json-k8s." )
debug = flag . Bool ( "debug" , false , "enable debug logging." )
showVersion = flag . Bool ( "version" , false , "show version and exit." )
2019-11-01 00:37:54 +00:00
// TODO(gvisor.dev/issue/193): support systemd cgroups
systemdCgroup = flag . Bool ( "systemd-cgroup" , false , "Use systemd for cgroups. NOT SUPPORTED." )
2018-04-27 17:37:02 +00:00
// These flags are unique to runsc, and are used to configure parts of the
// system that are not covered by the runtime spec.
// Debugging flags.
2019-06-07 00:48:53 +00:00
debugLog = flag . String ( "debug-log" , "" , "additional location for logs. If it ends with '/', log files are created inside the directory with default names. The following variables are available: %TIMESTAMP%, %COMMAND%." )
2019-10-16 21:33:23 +00:00
logPackets = flag . Bool ( "log-packets" , false , "enable network packet logging." )
2019-06-07 00:48:53 +00:00
logFD = flag . Int ( "log-fd" , - 1 , "file descriptor to log to. If set, the 'log' flag is ignored." )
debugLogFD = flag . Int ( "debug-log-fd" , - 1 , "file descriptor to write debug logs to. If set, the 'debug-log-dir' flag is ignored." )
2019-10-16 21:33:23 +00:00
debugLogFormat = flag . String ( "debug-log-format" , "text" , "log format: text (default), json, or json-k8s." )
alsoLogToStderr = flag . Bool ( "alsologtostderr" , false , "send log messages to stderr." )
2018-04-27 17:37:02 +00:00
// Debugging flags: strace related
2019-10-16 21:33:23 +00:00
strace = flag . Bool ( "strace" , false , "enable strace." )
2018-04-27 17:37:02 +00:00
straceSyscalls = flag . String ( "strace-syscalls" , "" , "comma-separated list of syscalls to trace. If --strace is true and this list is empty, then all syscalls will be traced." )
2019-10-16 21:33:23 +00:00
straceLogSize = flag . Uint ( "strace-log-size" , 1024 , "default size (in bytes) to log data argument blobs." )
2018-04-27 17:37:02 +00:00
// Flags that control sandbox runtime behavior.
2019-10-16 21:33:23 +00:00
platformName = flag . String ( "platform" , "ptrace" , "specifies which platform to use: ptrace (default), kvm." )
2019-06-12 16:40:50 +00:00
network = flag . String ( "network" , "sandbox" , "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network." )
2019-10-22 18:54:14 +00:00
hardwareGSO = flag . Bool ( "gso" , true , "enable hardware segmentation offload if it is supported by a network device." )
softwareGSO = flag . Bool ( "software-gso" , true , "enable software segmentation offload when hardware ofload can't be enabled." )
2019-06-12 16:40:50 +00:00
fileAccess = flag . String ( "file-access" , "exclusive" , "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared." )
2019-10-16 21:33:23 +00:00
fsGoferHostUDS = flag . Bool ( "fsgofer-host-uds" , false , "allow the gofer to mount Unix Domain Sockets." )
2019-06-12 16:40:50 +00:00
overlay = flag . Bool ( "overlay" , false , "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox." )
2019-10-16 21:33:23 +00:00
overlayfsStaleRead = flag . Bool ( "overlayfs-stale-read" , false , "reopen cached FDs after a file is opened for write to workaround overlayfs limitation on kernels before 4.19." )
2019-06-12 16:40:50 +00:00
watchdogAction = flag . String ( "watchdog-action" , "log" , "sets what action the watchdog takes when triggered: log (default), panic." )
panicSignal = flag . Int ( "panic-signal" , - 1 , "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it." )
profile = flag . Bool ( "profile" , false , "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION)." )
netRaw = flag . Bool ( "net-raw" , false , "enable raw sockets. When false, raw sockets are disabled by removing CAP_NET_RAW from containers (`runsc exec` will still be able to utilize raw sockets). Raw sockets allow malicious containers to craft packets and potentially attack the network." )
numNetworkChannels = flag . Int ( "num-network-channels" , 1 , "number of underlying channels(FDs) to use for network link endpoints." )
rootless = flag . Bool ( "rootless" , false , "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user." )
2019-08-22 12:52:43 +00:00
referenceLeakMode = flag . String ( "ref-leak-mode" , "disabled" , "sets reference leak check mode: disabled (default), log-names, log-traces." )
2019-12-17 17:41:02 +00:00
cpuNumFromQuota = flag . Bool ( "cpu-num-from-quota" , false , "set cpu number to cpu quota (least integer greater or equal to quota value, but not less than 2)" )
2019-06-12 16:40:50 +00:00
// Test flags, not to be used outside tests, ever.
2018-12-18 01:34:09 +00:00
testOnlyAllowRunAsCurrentUserWithoutChroot = flag . Bool ( "TESTONLY-unsafe-nonroot" , false , "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox." )
2019-09-16 15:15:40 +00:00
testOnlyTestNameEnv = flag . String ( "TESTONLY-test-name-env" , "" , "TEST ONLY; do not ever use! Used for automated tests to improve logging." )
2018-04-27 17:37:02 +00:00
)
func main ( ) {
// Help and flags commands are generated automatically.
2019-06-11 06:37:32 +00:00
help := cmd . NewHelp ( subcommands . DefaultCommander )
help . Register ( new ( cmd . Syscalls ) )
subcommands . Register ( help , "" )
2018-04-27 17:37:02 +00:00
subcommands . Register ( subcommands . FlagsCommand ( ) , "" )
2019-09-04 05:01:34 +00:00
// Installation helpers.
const helperGroup = "helpers"
subcommands . Register ( new ( cmd . Install ) , helperGroup )
subcommands . Register ( new ( cmd . Uninstall ) , helperGroup )
2018-04-27 17:37:02 +00:00
// Register user-facing runsc commands.
2018-06-29 21:46:45 +00:00
subcommands . Register ( new ( cmd . Checkpoint ) , "" )
2018-04-27 17:37:02 +00:00
subcommands . Register ( new ( cmd . Create ) , "" )
subcommands . Register ( new ( cmd . Delete ) , "" )
2019-04-12 00:53:24 +00:00
subcommands . Register ( new ( cmd . Do ) , "" )
2018-04-27 17:37:02 +00:00
subcommands . Register ( new ( cmd . Events ) , "" )
subcommands . Register ( new ( cmd . Exec ) , "" )
subcommands . Register ( new ( cmd . Gofer ) , "" )
subcommands . Register ( new ( cmd . Kill ) , "" )
subcommands . Register ( new ( cmd . List ) , "" )
2018-06-19 22:22:23 +00:00
subcommands . Register ( new ( cmd . Pause ) , "" )
2018-04-27 17:37:02 +00:00
subcommands . Register ( new ( cmd . PS ) , "" )
2018-06-29 21:46:45 +00:00
subcommands . Register ( new ( cmd . Restore ) , "" )
2018-06-19 22:22:23 +00:00
subcommands . Register ( new ( cmd . Resume ) , "" )
2018-04-27 17:37:02 +00:00
subcommands . Register ( new ( cmd . Run ) , "" )
2018-10-12 19:58:42 +00:00
subcommands . Register ( new ( cmd . Spec ) , "" )
2018-04-27 17:37:02 +00:00
subcommands . Register ( new ( cmd . Start ) , "" )
subcommands . Register ( new ( cmd . State ) , "" )
2018-06-28 21:55:46 +00:00
subcommands . Register ( new ( cmd . Wait ) , "" )
2018-04-27 17:37:02 +00:00
// Register internal commands with the internal group name. This causes
// them to be sorted below the user-facing commands with empty group.
// The string below will be printed above the commands.
const internalGroup = "internal use only"
subcommands . Register ( new ( cmd . Boot ) , internalGroup )
2018-06-20 20:30:39 +00:00
subcommands . Register ( new ( cmd . Debug ) , internalGroup )
2018-04-27 17:37:02 +00:00
subcommands . Register ( new ( cmd . Gofer ) , internalGroup )
// All subcommands must be registered before flag parsing.
flag . Parse ( )
2019-04-01 23:17:40 +00:00
// Are we showing the version?
if * showVersion {
// The format here is the same as runc.
fmt . Fprintf ( os . Stdout , "runsc version %s\n" , version )
fmt . Fprintf ( os . Stdout , "spec: %s\n" , specutils . Version )
os . Exit ( 0 )
}
2019-11-01 00:37:54 +00:00
// TODO(gvisor.dev/issue/193): support systemd cgroups
if * systemdCgroup {
fmt . Fprintln ( os . Stderr , "systemd cgroup flag passed, but systemd cgroups not supported. See gvisor.dev/issue/193" )
os . Exit ( 1 )
}
2019-06-06 17:48:19 +00:00
var errorLogger io . Writer
if * logFD > - 1 {
errorLogger = os . NewFile ( uintptr ( * logFD ) , "error log file" )
} else if * logFilename != "" {
// We must set O_APPEND and not O_TRUNC because Docker passes
// the same log file for all commands (and also parses these
// log files), so we can't destroy them on each command.
var err error
errorLogger , err = os . OpenFile ( * logFilename , os . O_WRONLY | os . O_CREATE | os . O_APPEND , 0644 )
if err != nil {
cmd . Fatalf ( "error opening log file %q: %v" , * logFilename , err )
}
}
cmd . ErrorLogger = errorLogger
2019-07-04 05:50:26 +00:00
platformType := * platformName
if _ , err := platform . Lookup ( platformType ) ; err != nil {
2018-04-27 17:37:02 +00:00
cmd . Fatalf ( "%v" , err )
}
fsAccess , err := boot . MakeFileAccessType ( * fileAccess )
if err != nil {
cmd . Fatalf ( "%v" , err )
}
2018-09-07 19:27:44 +00:00
if fsAccess == boot . FileAccessShared && * overlay {
cmd . Fatalf ( "overlay flag is incompatible with shared file access" )
2018-08-15 16:33:19 +00:00
}
2018-04-27 17:37:02 +00:00
netType , err := boot . MakeNetworkType ( * network )
if err != nil {
cmd . Fatalf ( "%v" , err )
}
2018-06-28 16:45:52 +00:00
wa , err := boot . MakeWatchdogAction ( * watchdogAction )
if err != nil {
cmd . Fatalf ( "%v" , err )
}
2019-06-06 15:05:46 +00:00
if * numNetworkChannels <= 0 {
cmd . Fatalf ( "num_network_channels must be > 0, got: %d" , * numNetworkChannels )
}
2019-08-05 15:15:48 +00:00
refsLeakMode , err := boot . MakeRefsLeakMode ( * referenceLeakMode )
if err != nil {
cmd . Fatalf ( "%v" , err )
}
2019-08-29 21:17:32 +00:00
// Sets the reference leak check mode. Also set it in config below to
// propagate it to child processes.
refs . SetLeakMode ( refsLeakMode )
2018-04-27 17:37:02 +00:00
// Create a new Config from the flags.
conf := & boot . Config {
2019-06-12 16:40:50 +00:00
RootDir : * rootDir ,
Debug : * debug ,
LogFilename : * logFilename ,
LogFormat : * logFormat ,
DebugLog : * debugLog ,
DebugLogFormat : * debugLogFormat ,
FileAccess : fsAccess ,
2019-09-24 22:24:10 +00:00
FSGoferHostUDS : * fsGoferHostUDS ,
2019-06-12 16:40:50 +00:00
Overlay : * overlay ,
Network : netType ,
2019-10-22 18:54:14 +00:00
HardwareGSO : * hardwareGSO ,
SoftwareGSO : * softwareGSO ,
2019-06-12 16:40:50 +00:00
LogPackets : * logPackets ,
Platform : platformType ,
Strace : * strace ,
StraceLogSize : * straceLogSize ,
WatchdogAction : wa ,
PanicSignal : * panicSignal ,
ProfileEnable : * profile ,
EnableRaw : * netRaw ,
NumNetworkChannels : * numNetworkChannels ,
Rootless : * rootless ,
2019-07-26 23:52:28 +00:00
AlsoLogToStderr : * alsoLogToStderr ,
2019-08-05 15:15:48 +00:00
ReferenceLeakMode : refsLeakMode ,
2019-10-16 21:33:23 +00:00
OverlayfsStaleRead : * overlayfsStaleRead ,
2019-12-15 17:57:23 +00:00
CPUNumFromQuota : * cpuNumFromQuota ,
2019-06-12 16:40:50 +00:00
2018-12-18 01:34:09 +00:00
TestOnlyAllowRunAsCurrentUserWithoutChroot : * testOnlyAllowRunAsCurrentUserWithoutChroot ,
2019-09-16 15:15:40 +00:00
TestOnlyTestNameEnv : * testOnlyTestNameEnv ,
2018-04-27 17:37:02 +00:00
}
if len ( * straceSyscalls ) != 0 {
conf . StraceSyscalls = strings . Split ( * straceSyscalls , "," )
}
// Set up logging.
if * debug {
log . SetLevel ( log . Debug )
}
2019-11-22 23:21:31 +00:00
// Logging will include the local date and time via the time package.
//
// On first use, time.Local initializes the local time zone, which
// involves opening tzdata files on the host. Since this requires
// opening host files, it must be done before syscall filter
// installation.
//
// Generally there will be a log message before filter installation
// that will force initialization, but force initialization here in
// case that does not occur.
_ = time . Local . String ( )
2019-04-12 00:53:24 +00:00
subcommand := flag . CommandLine . Arg ( 0 )
2019-06-06 17:48:19 +00:00
var e log . Emitter
2018-09-05 03:08:41 +00:00
if * debugLogFD > - 1 {
f := os . NewFile ( uintptr ( * debugLogFD ) , "debug log file" )
2018-10-04 18:00:40 +00:00
// Quick sanity check to make sure no other commands get passed
// a log fd (they should use log dir instead).
2019-01-31 23:17:50 +00:00
if subcommand != "boot" && subcommand != "gofer" {
cmd . Fatalf ( "flag --debug-log-fd should only be passed to 'boot' and 'gofer' command, but was passed to %q" , subcommand )
2018-10-04 18:00:40 +00:00
}
2019-06-06 17:48:19 +00:00
// If we are the boot process, then we own our stdio FDs and can do what we
// want with them. Since Docker and Containerd both eat boot's stderr, we
// dup our stderr to the provided log FD so that panics will appear in the
// logs, rather than just disappear.
2019-09-05 07:50:44 +00:00
if err := syscall . Dup3 ( int ( f . Fd ( ) ) , int ( os . Stderr . Fd ( ) ) , 0 ) ; err != nil {
2018-10-04 18:00:40 +00:00
cmd . Fatalf ( "error dup'ing fd %d to stderr: %v" , f . Fd ( ) , err )
}
2019-06-06 17:48:19 +00:00
e = newEmitter ( * debugLogFormat , f )
2018-10-11 21:28:15 +00:00
} else if * debugLog != "" {
2019-09-16 15:15:40 +00:00
f , err := specutils . DebugLogFile ( * debugLog , subcommand , "" /* name */ )
2018-04-27 17:37:02 +00:00
if err != nil {
2018-10-11 21:28:15 +00:00
cmd . Fatalf ( "error opening debug log file in %q: %v" , * debugLog , err )
2018-04-27 17:37:02 +00:00
}
2019-06-06 17:48:19 +00:00
e = newEmitter ( * debugLogFormat , f )
} else {
// Stderr is reserved for the application, just discard the logs if no debug
// log is specified.
e = newEmitter ( "text" , ioutil . Discard )
2018-04-27 17:37:02 +00:00
}
2019-06-07 00:48:53 +00:00
if * alsoLogToStderr {
2020-01-27 23:37:28 +00:00
e = & log . MultiEmitter { e , newEmitter ( * debugLogFormat , os . Stderr ) }
2019-06-07 00:48:53 +00:00
}
2018-04-27 17:37:02 +00:00
log . SetTarget ( e )
log . Infof ( "***************************" )
log . Infof ( "Args: %s" , os . Args )
2019-04-01 23:17:40 +00:00
log . Infof ( "Version %s" , version )
2018-04-27 17:37:02 +00:00
log . Infof ( "PID: %d" , os . Getpid ( ) )
log . Infof ( "UID: %d, GID: %d" , os . Getuid ( ) , os . Getgid ( ) )
log . Infof ( "Configuration:" )
log . Infof ( "\t\tRootDir: %s" , conf . RootDir )
log . Infof ( "\t\tPlatform: %v" , conf . Platform )
log . Infof ( "\t\tFileAccess: %v, overlay: %t" , conf . FileAccess , conf . Overlay )
log . Infof ( "\t\tNetwork: %v, logging: %t" , conf . Network , conf . LogPackets )
log . Infof ( "\t\tStrace: %t, max size: %d, syscalls: %s" , conf . Strace , conf . StraceLogSize , conf . StraceSyscalls )
log . Infof ( "***************************" )
2019-08-23 01:25:57 +00:00
if * testOnlyAllowRunAsCurrentUserWithoutChroot {
// SIGTERM is sent to all processes if a test exceeds its
// timeout and this case is handled by syscall_test_runner.
log . Warningf ( "Block the TERM signal. This is only safe in tests!" )
signal . Ignore ( syscall . SIGTERM )
}
2018-04-27 17:37:02 +00:00
// Call the subcommand and pass in the configuration.
var ws syscall . WaitStatus
subcmdCode := subcommands . Execute ( context . Background ( ) , conf , & ws )
if subcmdCode == subcommands . ExitSuccess {
log . Infof ( "Exiting with status: %v" , ws )
if ws . Signaled ( ) {
// No good way to return it, emulate what the shell does. Maybe raise
// signall to self?
os . Exit ( 128 + int ( ws . Signal ( ) ) )
}
os . Exit ( ws . ExitStatus ( ) )
}
// Return an error that is unlikely to be used by the application.
log . Warningf ( "Failure to execute command, err: %v" , subcmdCode )
os . Exit ( 128 )
}
2018-11-02 00:43:50 +00:00
func newEmitter ( format string , logFile io . Writer ) log . Emitter {
switch format {
case "text" :
2020-01-27 23:37:28 +00:00
return & log . GoogleEmitter { log . Writer { Next : logFile } }
2018-11-02 00:43:50 +00:00
case "json" :
return & log . JSONEmitter { log . Writer { Next : logFile } }
case "json-k8s" :
return & log . K8sJSONEmitter { log . Writer { Next : logFile } }
}
cmd . Fatalf ( "invalid log format %q, must be 'text', 'json', or 'json-k8s'" , format )
panic ( "unreachable" )
}
2018-04-27 17:37:02 +00:00
func init ( ) {
// Set default root dir to something (hopefully) user-writeable.
* rootDir = "/var/run/runsc"
if runtimeDir := os . Getenv ( "XDG_RUNTIME_DIR" ) ; runtimeDir != "" {
* rootDir = filepath . Join ( runtimeDir , "runsc" )
}
}