2019-04-29 21:25:05 +00:00
// Copyright 2018 The gVisor Authors.
2018-04-27 17:37:02 +00:00
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
2018-11-28 22:00:54 +00:00
"context"
2018-04-27 17:37:02 +00:00
"os"
"runtime/debug"
"strings"
"syscall"
"flag"
"github.com/google/subcommands"
2018-06-28 16:56:23 +00:00
specs "github.com/opencontainers/runtime-spec/specs-go"
2019-06-13 23:49:09 +00:00
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/runsc/boot"
2019-07-04 05:50:26 +00:00
"gvisor.dev/gvisor/runsc/boot/platforms"
2019-06-13 23:49:09 +00:00
"gvisor.dev/gvisor/runsc/specutils"
2018-04-27 17:37:02 +00:00
)
// Boot implements subcommands.Command for the "boot" command which starts a
// new sandbox. It should not be called directly.
type Boot struct {
2018-09-05 03:08:41 +00:00
// bundleDir is the directory containing the OCI spec.
2018-04-27 17:37:02 +00:00
bundleDir string
2018-09-05 03:08:41 +00:00
// specFD is the file descriptor that the spec will be read from.
specFD int
2018-04-27 17:37:02 +00:00
// controllerFD is the file descriptor of a stream socket for the
// control server that is donated to this process.
controllerFD int
2018-09-11 20:08:36 +00:00
// deviceFD is the file descriptor for the platform device file.
deviceFD int
2018-04-27 17:37:02 +00:00
// ioFDs is the list of FDs used to connect to FS gofers.
ioFDs intFlags
2018-10-03 17:31:01 +00:00
// stdioFDs are the fds for stdin, stdout, and stderr. They must be
// provided in that order.
stdioFDs intFlags
2018-04-27 17:37:02 +00:00
// console is set to true if the sandbox should allow terminal ioctl(2)
// syscalls.
console bool
// applyCaps determines if capabilities defined in the spec should be applied
// to the process.
applyCaps bool
2018-10-10 15:59:25 +00:00
2019-01-14 22:07:05 +00:00
// setUpChroot is set to true if the sandbox is started in an empty root.
setUpRoot bool
2018-10-10 15:59:25 +00:00
// cpuNum number of CPUs to create inside the sandbox.
cpuNum int
// totalMem sets the initial amount of total memory to report back to the
// container.
totalMem uint64
2018-10-11 18:55:45 +00:00
// userLogFD is the file descriptor to write user logs to.
userLogFD int
2018-12-06 23:26:58 +00:00
// startSyncFD is the file descriptor to synchronize runsc and sandbox.
startSyncFD int
2019-01-18 20:16:24 +00:00
2019-03-18 19:29:43 +00:00
// mountsFD is the file descriptor to read list of mounts after they have
// been resolved (direct paths, no symlinks). They are resolved outside the
// sandbox (e.g. gofer) and sent through this FD.
mountsFD int
2019-01-18 20:16:24 +00:00
// pidns is set if the sanadbox is in its own pid namespace.
pidns bool
2018-04-27 17:37:02 +00:00
}
// Name implements subcommands.Command.Name.
func ( * Boot ) Name ( ) string {
return "boot"
}
// Synopsis implements subcommands.Command.Synopsis.
func ( * Boot ) Synopsis ( ) string {
return "launch a sandbox process (internal use only)"
}
// Usage implements subcommands.Command.Usage.
func ( * Boot ) Usage ( ) string {
2018-09-27 17:25:19 +00:00
return ` boot [flags] <container id> `
2018-04-27 17:37:02 +00:00
}
// SetFlags implements subcommands.Command.SetFlags.
func ( b * Boot ) SetFlags ( f * flag . FlagSet ) {
2018-08-31 18:29:36 +00:00
f . StringVar ( & b . bundleDir , "bundle" , "" , "required path to the root of the bundle directory" )
2018-09-05 03:08:41 +00:00
f . IntVar ( & b . specFD , "spec-fd" , - 1 , "required fd with the container spec" )
2018-04-27 17:37:02 +00:00
f . IntVar ( & b . controllerFD , "controller-fd" , - 1 , "required FD of a stream socket for the control server that must be donated to this process" )
2018-09-11 20:08:36 +00:00
f . IntVar ( & b . deviceFD , "device-fd" , - 1 , "FD for the platform device file" )
2018-04-27 17:37:02 +00:00
f . Var ( & b . ioFDs , "io-fds" , "list of FDs to connect 9P clients. They must follow this order: root first, then mounts as defined in the spec" )
2018-10-03 17:31:01 +00:00
f . Var ( & b . stdioFDs , "stdio-fds" , "list of FDs containing sandbox stdin, stdout, and stderr in that order" )
2018-04-27 17:37:02 +00:00
f . BoolVar ( & b . console , "console" , false , "set to true if the sandbox should allow terminal ioctl(2) syscalls" )
f . BoolVar ( & b . applyCaps , "apply-caps" , false , "if true, apply capabilities defined in the spec to the process" )
2019-01-14 22:07:05 +00:00
f . BoolVar ( & b . setUpRoot , "setup-root" , false , "if true, set up an empty root for the process" )
2019-01-18 20:16:24 +00:00
f . BoolVar ( & b . pidns , "pidns" , false , "if true, the sandbox is in its own PID namespace" )
2018-10-10 15:59:25 +00:00
f . IntVar ( & b . cpuNum , "cpu-num" , 0 , "number of CPUs to create inside the sandbox" )
f . Uint64Var ( & b . totalMem , "total-memory" , 0 , "sets the initial amount of total memory to report back to the container" )
2018-10-11 18:55:45 +00:00
f . IntVar ( & b . userLogFD , "user-log-fd" , 0 , "file descriptor to write user logs to. 0 means no logging." )
2018-12-06 23:26:58 +00:00
f . IntVar ( & b . startSyncFD , "start-sync-fd" , - 1 , "required FD to used to synchronize sandbox startup" )
2019-03-18 19:29:43 +00:00
f . IntVar ( & b . mountsFD , "mounts-fd" , - 1 , "mountsFD is the file descriptor to read list of mounts after they have been resolved (direct paths, no symlinks)." )
2018-04-27 17:37:02 +00:00
}
// Execute implements subcommands.Command.Execute. It starts a sandbox in a
// waiting state.
func ( b * Boot ) Execute ( _ context . Context , f * flag . FlagSet , args ... interface { } ) subcommands . ExitStatus {
2018-12-06 23:26:58 +00:00
if b . specFD == - 1 || b . controllerFD == - 1 || b . startSyncFD == - 1 || f . NArg ( ) != 1 {
2018-04-27 17:37:02 +00:00
f . Usage ( )
return subcommands . ExitUsageError
}
// Ensure that if there is a panic, all goroutine stacks are printed.
debug . SetTraceback ( "all" )
2019-06-12 16:40:50 +00:00
conf := args [ 0 ] . ( * boot . Config )
2019-01-14 22:07:05 +00:00
if b . setUpRoot {
2019-01-18 20:16:24 +00:00
if err := setUpChroot ( b . pidns ) ; err != nil {
2019-01-14 22:07:05 +00:00
Fatalf ( "error setting up chroot: %v" , err )
}
if ! b . applyCaps {
// Remove --setup-root arg to call myself.
var args [ ] string
for _ , arg := range os . Args {
if ! strings . Contains ( arg , "setup-root" ) {
args = append ( args , arg )
}
}
2019-06-12 16:40:50 +00:00
if ! conf . Rootless {
// Note that we've already read the spec from the spec FD, and
// we will read it again after the exec call. This works
// because the ReadSpecFromFile function seeks to the beginning
// of the file before reading.
if err := callSelfAsNobody ( args ) ; err != nil {
Fatalf ( "%v" , err )
}
panic ( "callSelfAsNobody must never return success" )
2019-01-14 22:07:05 +00:00
}
}
}
2018-09-05 03:08:41 +00:00
// Get the spec from the specFD.
specFile := os . NewFile ( uintptr ( b . specFD ) , "spec file" )
defer specFile . Close ( )
spec , err := specutils . ReadSpecFromFile ( b . bundleDir , specFile )
2018-04-27 17:37:02 +00:00
if err != nil {
2019-01-19 01:35:09 +00:00
Fatalf ( "reading spec: %v" , err )
2018-04-27 17:37:02 +00:00
}
specutils . LogSpec ( spec )
if b . applyCaps {
2018-06-08 16:58:29 +00:00
caps := spec . Process . Capabilities
2018-06-28 16:56:23 +00:00
if caps == nil {
caps = & specs . LinuxCapabilities { }
}
2019-07-04 05:50:26 +00:00
if conf . Platform == platforms . Ptrace {
2018-06-08 16:58:29 +00:00
// Ptrace platform requires extra capabilities.
const c = "CAP_SYS_PTRACE"
caps . Bounding = append ( caps . Bounding , c )
caps . Effective = append ( caps . Effective , c )
caps . Permitted = append ( caps . Permitted , c )
}
// Remove --apply-caps arg to call myself.
var args [ ] string
for _ , arg := range os . Args {
2019-01-14 22:07:05 +00:00
if ! strings . Contains ( arg , "setup-root" ) && ! strings . Contains ( arg , "apply-caps" ) {
2018-06-08 16:58:29 +00:00
args = append ( args , arg )
}
}
2018-09-05 03:08:41 +00:00
// Note that we've already read the spec from the spec FD, and
// we will read it again after the exec call. This works
// because the ReadSpecFromFile function seeks to the beginning
// of the file before reading.
2018-06-28 16:56:23 +00:00
if err := setCapsAndCallSelf ( args , caps ) ; err != nil {
2018-06-08 16:58:29 +00:00
Fatalf ( "%v" , err )
}
panic ( "setCapsAndCallSelf must never return success" )
2018-04-27 17:37:02 +00:00
}
2019-03-18 19:29:43 +00:00
// Read resolved mount list and replace the original one from the spec.
mountsFile := os . NewFile ( uintptr ( b . mountsFD ) , "mounts file" )
cleanMounts , err := specutils . ReadMounts ( mountsFile )
if err != nil {
mountsFile . Close ( )
Fatalf ( "Error reading mounts file: %v" , err )
}
mountsFile . Close ( )
spec . Mounts = cleanMounts
2018-04-27 17:37:02 +00:00
// Create the loader.
2018-10-10 15:59:25 +00:00
bootArgs := boot . Args {
ID : f . Arg ( 0 ) ,
Spec : spec ,
Conf : conf ,
ControllerFD : b . controllerFD ,
2019-05-15 21:35:30 +00:00
Device : os . NewFile ( uintptr ( b . deviceFD ) , "platform device" ) ,
2018-10-10 15:59:25 +00:00
GoferFDs : b . ioFDs . GetArray ( ) ,
StdioFDs : b . stdioFDs . GetArray ( ) ,
Console : b . console ,
NumCPU : b . cpuNum ,
TotalMem : b . totalMem ,
2018-10-11 18:55:45 +00:00
UserLogFD : b . userLogFD ,
2018-10-10 15:59:25 +00:00
}
l , err := boot . New ( bootArgs )
2018-04-27 17:37:02 +00:00
if err != nil {
2019-01-19 01:35:09 +00:00
Fatalf ( "creating loader: %v" , err )
2018-04-27 17:37:02 +00:00
}
2018-12-06 23:26:58 +00:00
2018-12-28 21:47:19 +00:00
// Fatalf exits the process and doesn't run defers.
// 'l' must be destroyed explicitly after this point!
2018-04-27 17:37:02 +00:00
2018-12-28 21:47:19 +00:00
// Notify the parent process the sandbox has booted (and that the controller
// is up).
startSyncFile := os . NewFile ( uintptr ( b . startSyncFD ) , "start-sync file" )
buf := make ( [ ] byte , 1 )
2018-12-06 23:26:58 +00:00
if w , err := startSyncFile . Write ( buf ) ; err != nil || w != 1 {
l . Destroy ( )
2019-01-19 01:35:09 +00:00
Fatalf ( "unable to write into the start-sync descriptor: %v" , err )
2018-12-06 23:26:58 +00:00
}
2018-12-28 21:47:19 +00:00
// Closes startSyncFile because 'l.Run()' only returns when the sandbox exits.
2018-12-06 23:26:58 +00:00
startSyncFile . Close ( )
2018-04-27 17:37:02 +00:00
// Wait for the start signal from runsc.
2018-05-17 18:54:36 +00:00
l . WaitForStartSignal ( )
2018-04-27 17:37:02 +00:00
// Run the application and wait for it to finish.
2018-05-17 18:54:36 +00:00
if err := l . Run ( ) ; err != nil {
2018-07-12 20:36:01 +00:00
l . Destroy ( )
2019-01-19 01:35:09 +00:00
Fatalf ( "running sandbox: %v" , err )
2018-04-27 17:37:02 +00:00
}
2018-05-17 18:54:36 +00:00
ws := l . WaitExit ( )
2018-04-27 17:37:02 +00:00
log . Infof ( "application exiting with %+v" , ws )
2019-06-12 16:40:50 +00:00
waitStatus := args [ 1 ] . ( * syscall . WaitStatus )
2018-04-27 17:37:02 +00:00
* waitStatus = syscall . WaitStatus ( ws . Status ( ) )
2018-07-12 20:36:01 +00:00
l . Destroy ( )
2018-04-27 17:37:02 +00:00
return subcommands . ExitSuccess
}