Merge gvisor-containerd-shim

This commit is contained in:
Adin Scannell 2020-07-09 16:59:42 -07:00
commit 5471dbe2f3
45 changed files with 5983 additions and 0 deletions

511
pkg/shim/runsc/runsc.go Normal file
View File

@ -0,0 +1,511 @@
// Copyright 2018 The containerd Authors.
// Copyright 2018 The gVisor authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package runsc
import (
"context"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strconv"
"syscall"
"time"
runc "github.com/containerd/go-runc"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
var Monitor runc.ProcessMonitor = runc.Monitor
// DefaultCommand is the default command for Runsc
const DefaultCommand = "runsc"
// Runsc is the client to the runsc cli
type Runsc struct {
Command string
PdeathSignal syscall.Signal
Setpgid bool
Root string
Log string
LogFormat runc.Format
Config map[string]string
}
// List returns all containers created inside the provided runsc root directory
func (r *Runsc) List(context context.Context) ([]*runc.Container, error) {
data, err := cmdOutput(r.command(context, "list", "--format=json"), false)
if err != nil {
return nil, err
}
var out []*runc.Container
if err := json.Unmarshal(data, &out); err != nil {
return nil, err
}
return out, nil
}
// State returns the state for the container provided by id
func (r *Runsc) State(context context.Context, id string) (*runc.Container, error) {
data, err := cmdOutput(r.command(context, "state", id), true)
if err != nil {
return nil, fmt.Errorf("%s: %s", err, data)
}
var c runc.Container
if err := json.Unmarshal(data, &c); err != nil {
return nil, err
}
return &c, nil
}
type CreateOpts struct {
runc.IO
// PidFile is a path to where a pid file should be created
PidFile string
ConsoleSocket runc.ConsoleSocket
// UserLog is a path to where runsc user log should be generated.
UserLog string
}
func (o *CreateOpts) args() (out []string, err error) {
if o.PidFile != "" {
abs, err := filepath.Abs(o.PidFile)
if err != nil {
return nil, err
}
out = append(out, "--pid-file", abs)
}
if o.ConsoleSocket != nil {
out = append(out, "--console-socket", o.ConsoleSocket.Path())
}
if o.UserLog != "" {
out = append(out, "--user-log", o.UserLog)
}
return out, nil
}
// Create creates a new container and returns its pid if it was created successfully
func (r *Runsc) Create(context context.Context, id, bundle string, opts *CreateOpts) error {
args := []string{"create", "--bundle", bundle}
if opts != nil {
oargs, err := opts.args()
if err != nil {
return err
}
args = append(args, oargs...)
}
cmd := r.command(context, append(args, id)...)
if opts != nil && opts.IO != nil {
opts.Set(cmd)
}
if cmd.Stdout == nil && cmd.Stderr == nil {
data, err := cmdOutput(cmd, true)
if err != nil {
return fmt.Errorf("%s: %s", err, data)
}
return nil
}
ec, err := Monitor.Start(cmd)
if err != nil {
return err
}
if opts != nil && opts.IO != nil {
if c, ok := opts.IO.(runc.StartCloser); ok {
if err := c.CloseAfterStart(); err != nil {
return err
}
}
}
status, err := Monitor.Wait(cmd, ec)
if err == nil && status != 0 {
err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0])
}
return err
}
// Start will start an already created container
func (r *Runsc) Start(context context.Context, id string, cio runc.IO) error {
cmd := r.command(context, "start", id)
if cio != nil {
cio.Set(cmd)
}
if cmd.Stdout == nil && cmd.Stderr == nil {
data, err := cmdOutput(cmd, true)
if err != nil {
return fmt.Errorf("%s: %s", err, data)
}
return nil
}
ec, err := Monitor.Start(cmd)
if err != nil {
return err
}
if cio != nil {
if c, ok := cio.(runc.StartCloser); ok {
if err := c.CloseAfterStart(); err != nil {
return err
}
}
}
status, err := Monitor.Wait(cmd, ec)
if err == nil && status != 0 {
err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0])
}
return err
}
type waitResult struct {
ID string `json:"id"`
ExitStatus int `json:"exitStatus"`
}
// Wait will wait for a running container, and return its exit status.
// TODO(random-liu): Add exec process support.
func (r *Runsc) Wait(context context.Context, id string) (int, error) {
data, err := cmdOutput(r.command(context, "wait", id), true)
if err != nil {
return 0, fmt.Errorf("%s: %s", err, data)
}
var res waitResult
if err := json.Unmarshal(data, &res); err != nil {
return 0, err
}
return res.ExitStatus, nil
}
type ExecOpts struct {
runc.IO
PidFile string
InternalPidFile string
ConsoleSocket runc.ConsoleSocket
Detach bool
}
func (o *ExecOpts) args() (out []string, err error) {
if o.ConsoleSocket != nil {
out = append(out, "--console-socket", o.ConsoleSocket.Path())
}
if o.Detach {
out = append(out, "--detach")
}
if o.PidFile != "" {
abs, err := filepath.Abs(o.PidFile)
if err != nil {
return nil, err
}
out = append(out, "--pid-file", abs)
}
if o.InternalPidFile != "" {
abs, err := filepath.Abs(o.InternalPidFile)
if err != nil {
return nil, err
}
out = append(out, "--internal-pid-file", abs)
}
return out, nil
}
// Exec executres and additional process inside the container based on a full
// OCI Process specification
func (r *Runsc) Exec(context context.Context, id string, spec specs.Process, opts *ExecOpts) error {
f, err := ioutil.TempFile(os.Getenv("XDG_RUNTIME_DIR"), "runsc-process")
if err != nil {
return err
}
defer os.Remove(f.Name())
err = json.NewEncoder(f).Encode(spec)
f.Close()
if err != nil {
return err
}
args := []string{"exec", "--process", f.Name()}
if opts != nil {
oargs, err := opts.args()
if err != nil {
return err
}
args = append(args, oargs...)
}
cmd := r.command(context, append(args, id)...)
if opts != nil && opts.IO != nil {
opts.Set(cmd)
}
if cmd.Stdout == nil && cmd.Stderr == nil {
data, err := cmdOutput(cmd, true)
if err != nil {
return fmt.Errorf("%s: %s", err, data)
}
return nil
}
ec, err := Monitor.Start(cmd)
if err != nil {
return err
}
if opts != nil && opts.IO != nil {
if c, ok := opts.IO.(runc.StartCloser); ok {
if err := c.CloseAfterStart(); err != nil {
return err
}
}
}
status, err := Monitor.Wait(cmd, ec)
if err == nil && status != 0 {
err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0])
}
return err
}
// Run runs the create, start, delete lifecycle of the container
// and returns its exit status after it has exited
func (r *Runsc) Run(context context.Context, id, bundle string, opts *CreateOpts) (int, error) {
args := []string{"run", "--bundle", bundle}
if opts != nil {
oargs, err := opts.args()
if err != nil {
return -1, err
}
args = append(args, oargs...)
}
cmd := r.command(context, append(args, id)...)
if opts != nil && opts.IO != nil {
opts.Set(cmd)
}
ec, err := Monitor.Start(cmd)
if err != nil {
return -1, err
}
return Monitor.Wait(cmd, ec)
}
type DeleteOpts struct {
Force bool
}
func (o *DeleteOpts) args() (out []string) {
if o.Force {
out = append(out, "--force")
}
return out
}
// Delete deletes the container
func (r *Runsc) Delete(context context.Context, id string, opts *DeleteOpts) error {
args := []string{"delete"}
if opts != nil {
args = append(args, opts.args()...)
}
return r.runOrError(r.command(context, append(args, id)...))
}
// KillOpts specifies options for killing a container and its processes
type KillOpts struct {
All bool
Pid int
}
func (o *KillOpts) args() (out []string) {
if o.All {
out = append(out, "--all")
}
if o.Pid != 0 {
out = append(out, "--pid", strconv.Itoa(o.Pid))
}
return out
}
// Kill sends the specified signal to the container
func (r *Runsc) Kill(context context.Context, id string, sig int, opts *KillOpts) error {
args := []string{
"kill",
}
if opts != nil {
args = append(args, opts.args()...)
}
return r.runOrError(r.command(context, append(args, id, strconv.Itoa(sig))...))
}
// Stats return the stats for a container like cpu, memory, and io
func (r *Runsc) Stats(context context.Context, id string) (*runc.Stats, error) {
cmd := r.command(context, "events", "--stats", id)
rd, err := cmd.StdoutPipe()
if err != nil {
return nil, err
}
ec, err := Monitor.Start(cmd)
if err != nil {
return nil, err
}
defer func() {
rd.Close()
Monitor.Wait(cmd, ec)
}()
var e runc.Event
if err := json.NewDecoder(rd).Decode(&e); err != nil {
return nil, err
}
return e.Stats, nil
}
// Events returns an event stream from runsc for a container with stats and OOM notifications
func (r *Runsc) Events(context context.Context, id string, interval time.Duration) (chan *runc.Event, error) {
cmd := r.command(context, "events", fmt.Sprintf("--interval=%ds", int(interval.Seconds())), id)
rd, err := cmd.StdoutPipe()
if err != nil {
return nil, err
}
ec, err := Monitor.Start(cmd)
if err != nil {
rd.Close()
return nil, err
}
var (
dec = json.NewDecoder(rd)
c = make(chan *runc.Event, 128)
)
go func() {
defer func() {
close(c)
rd.Close()
Monitor.Wait(cmd, ec)
}()
for {
var e runc.Event
if err := dec.Decode(&e); err != nil {
if err == io.EOF {
return
}
e = runc.Event{
Type: "error",
Err: err,
}
}
c <- &e
}
}()
return c, nil
}
// Ps lists all the processes inside the container returning their pids
func (r *Runsc) Ps(context context.Context, id string) ([]int, error) {
data, err := cmdOutput(r.command(context, "ps", "--format", "json", id), true)
if err != nil {
return nil, fmt.Errorf("%s: %s", err, data)
}
var pids []int
if err := json.Unmarshal(data, &pids); err != nil {
return nil, err
}
return pids, nil
}
// Top lists all the processes inside the container returning the full ps data
func (r *Runsc) Top(context context.Context, id string) (*runc.TopResults, error) {
data, err := cmdOutput(r.command(context, "ps", "--format", "table", id), true)
if err != nil {
return nil, fmt.Errorf("%s: %s", err, data)
}
topResults, err := runc.ParsePSOutput(data)
if err != nil {
return nil, fmt.Errorf("%s: ", err)
}
return topResults, nil
}
func (r *Runsc) args() []string {
var args []string
if r.Root != "" {
args = append(args, fmt.Sprintf("--root=%s", r.Root))
}
if r.Log != "" {
args = append(args, fmt.Sprintf("--log=%s", r.Log))
}
if r.LogFormat != "" {
args = append(args, fmt.Sprintf("--log-format=%s", r.LogFormat))
}
for k, v := range r.Config {
args = append(args, fmt.Sprintf("--%s=%s", k, v))
}
return args
}
// runOrError will run the provided command. If an error is
// encountered and neither Stdout or Stderr was set the error and the
// stderr of the command will be returned in the format of <error>:
// <stderr>
func (r *Runsc) runOrError(cmd *exec.Cmd) error {
if cmd.Stdout != nil || cmd.Stderr != nil {
ec, err := Monitor.Start(cmd)
if err != nil {
return err
}
status, err := Monitor.Wait(cmd, ec)
if err == nil && status != 0 {
err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0])
}
return err
}
data, err := cmdOutput(cmd, true)
if err != nil {
return fmt.Errorf("%s: %s", err, data)
}
return nil
}
func (r *Runsc) command(context context.Context, args ...string) *exec.Cmd {
command := r.Command
if command == "" {
command = DefaultCommand
}
cmd := exec.CommandContext(context, command, append(r.args(), args...)...)
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: r.Setpgid,
}
if r.PdeathSignal != 0 {
cmd.SysProcAttr.Pdeathsig = r.PdeathSignal
}
return cmd
}
func cmdOutput(cmd *exec.Cmd, combined bool) ([]byte, error) {
b := getBuf()
defer putBuf(b)
cmd.Stdout = b
if combined {
cmd.Stderr = b
}
ec, err := Monitor.Start(cmd)
if err != nil {
return nil, err
}
status, err := Monitor.Wait(cmd, ec)
if err == nil && status != 0 {
err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0])
}
return b.Bytes(), err
}

44
pkg/shim/runsc/utils.go Normal file
View File

@ -0,0 +1,44 @@
// Copyright 2018 The containerd Authors.
// Copyright 2018 The gVisor authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package runsc
import (
"bytes"
"strings"
"sync"
)
var bytesBufferPool = sync.Pool{
New: func() interface{} {
return bytes.NewBuffer(nil)
},
}
func getBuf() *bytes.Buffer {
return bytesBufferPool.Get().(*bytes.Buffer)
}
func putBuf(b *bytes.Buffer) {
b.Reset()
bytesBufferPool.Put(b)
}
// FormatLogPath parses runsc config, and fill in %ID% in the log path.
func FormatLogPath(id string, config map[string]string) {
if path, ok := config["debug-log"]; ok {
config["debug-log"] = strings.Replace(path, "%ID%", id, -1)
}
}

View File

@ -0,0 +1,49 @@
// Copyright 2018 The containerd Authors.
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package proc
import (
"context"
"github.com/containerd/console"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/runtime/proc"
"github.com/pkg/errors"
)
type deletedState struct{}
func (*deletedState) Resize(ws console.WinSize) error {
return errors.Errorf("cannot resize a deleted process")
}
func (*deletedState) Start(ctx context.Context) error {
return errors.Errorf("cannot start a deleted process")
}
func (*deletedState) Delete(ctx context.Context) error {
return errors.Wrap(errdefs.ErrNotFound, "cannot delete a deleted process")
}
func (*deletedState) Kill(ctx context.Context, sig uint32, all bool) error {
return errors.Wrap(errdefs.ErrNotFound, "cannot kill a deleted process")
}
func (*deletedState) SetExited(status int) {}
func (*deletedState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) {
return nil, errors.Errorf("cannot exec in a deleted state")
}

282
pkg/shim/v1/proc/exec.go Normal file
View File

@ -0,0 +1,282 @@
// Copyright 2018 The containerd Authors.
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package proc
import (
"context"
"fmt"
"io"
"os"
"path/filepath"
"sync"
"syscall"
"time"
"github.com/containerd/console"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/runtime/proc"
"github.com/containerd/fifo"
runc "github.com/containerd/go-runc"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc"
)
type execProcess struct {
wg sync.WaitGroup
execState execState
mu sync.Mutex
id string
console console.Console
io runc.IO
status int
exited time.Time
pid int
internalPid int
closers []io.Closer
stdin io.Closer
stdio proc.Stdio
path string
spec specs.Process
parent *Init
waitBlock chan struct{}
}
func (e *execProcess) Wait() {
<-e.waitBlock
}
func (e *execProcess) ID() string {
return e.id
}
func (e *execProcess) Pid() int {
e.mu.Lock()
defer e.mu.Unlock()
return e.pid
}
func (e *execProcess) ExitStatus() int {
e.mu.Lock()
defer e.mu.Unlock()
return e.status
}
func (e *execProcess) ExitedAt() time.Time {
e.mu.Lock()
defer e.mu.Unlock()
return e.exited
}
func (e *execProcess) SetExited(status int) {
e.mu.Lock()
defer e.mu.Unlock()
e.execState.SetExited(status)
}
func (e *execProcess) setExited(status int) {
e.status = status
e.exited = time.Now()
e.parent.Platform.ShutdownConsole(context.Background(), e.console)
close(e.waitBlock)
}
func (e *execProcess) Delete(ctx context.Context) error {
e.mu.Lock()
defer e.mu.Unlock()
return e.execState.Delete(ctx)
}
func (e *execProcess) delete(ctx context.Context) error {
e.wg.Wait()
if e.io != nil {
for _, c := range e.closers {
c.Close()
}
e.io.Close()
}
pidfile := filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id))
// silently ignore error
os.Remove(pidfile)
internalPidfile := filepath.Join(e.path, fmt.Sprintf("%s-internal.pid", e.id))
// silently ignore error
os.Remove(internalPidfile)
return nil
}
func (e *execProcess) Resize(ws console.WinSize) error {
e.mu.Lock()
defer e.mu.Unlock()
return e.execState.Resize(ws)
}
func (e *execProcess) resize(ws console.WinSize) error {
if e.console == nil {
return nil
}
return e.console.Resize(ws)
}
func (e *execProcess) Kill(ctx context.Context, sig uint32, _ bool) error {
e.mu.Lock()
defer e.mu.Unlock()
return e.execState.Kill(ctx, sig, false)
}
func (e *execProcess) kill(ctx context.Context, sig uint32, _ bool) error {
internalPid := e.internalPid
if internalPid != 0 {
if err := e.parent.runtime.Kill(ctx, e.parent.id, int(sig), &runsc.KillOpts{
Pid: internalPid,
}); err != nil {
// If this returns error, consider the process has already stopped.
// TODO: Fix after signal handling is fixed.
return errors.Wrapf(errdefs.ErrNotFound, err.Error())
}
}
return nil
}
func (e *execProcess) Stdin() io.Closer {
return e.stdin
}
func (e *execProcess) Stdio() proc.Stdio {
return e.stdio
}
func (e *execProcess) Start(ctx context.Context) error {
e.mu.Lock()
defer e.mu.Unlock()
return e.execState.Start(ctx)
}
func (e *execProcess) start(ctx context.Context) (err error) {
var (
socket *runc.Socket
pidfile = filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id))
internalPidfile = filepath.Join(e.path, fmt.Sprintf("%s-internal.pid", e.id))
)
if e.stdio.Terminal {
if socket, err = runc.NewTempConsoleSocket(); err != nil {
return errors.Wrap(err, "failed to create runc console socket")
}
defer socket.Close()
} else if e.stdio.IsNull() {
if e.io, err = runc.NewNullIO(); err != nil {
return errors.Wrap(err, "creating new NULL IO")
}
} else {
if e.io, err = runc.NewPipeIO(e.parent.IoUID, e.parent.IoGID, withConditionalIO(e.stdio)); err != nil {
return errors.Wrap(err, "failed to create runc io pipes")
}
}
opts := &runsc.ExecOpts{
PidFile: pidfile,
InternalPidFile: internalPidfile,
IO: e.io,
Detach: true,
}
if socket != nil {
opts.ConsoleSocket = socket
}
eventCh := e.parent.Monitor.Subscribe()
defer func() {
// Unsubscribe if an error is returned.
if err != nil {
e.parent.Monitor.Unsubscribe(eventCh)
}
}()
if err := e.parent.runtime.Exec(ctx, e.parent.id, e.spec, opts); err != nil {
close(e.waitBlock)
return e.parent.runtimeError(err, "OCI runtime exec failed")
}
if e.stdio.Stdin != "" {
sc, err := fifo.OpenFifo(context.Background(), e.stdio.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0)
if err != nil {
return errors.Wrapf(err, "failed to open stdin fifo %s", e.stdio.Stdin)
}
e.closers = append(e.closers, sc)
e.stdin = sc
}
var copyWaitGroup sync.WaitGroup
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
if socket != nil {
console, err := socket.ReceiveMaster()
if err != nil {
return errors.Wrap(err, "failed to retrieve console master")
}
if e.console, err = e.parent.Platform.CopyConsole(ctx, console, e.stdio.Stdin, e.stdio.Stdout, e.stdio.Stderr, &e.wg, &copyWaitGroup); err != nil {
return errors.Wrap(err, "failed to start console copy")
}
} else if !e.stdio.IsNull() {
if err := copyPipes(ctx, e.io, e.stdio.Stdin, e.stdio.Stdout, e.stdio.Stderr, &e.wg, &copyWaitGroup); err != nil {
return errors.Wrap(err, "failed to start io pipe copy")
}
}
copyWaitGroup.Wait()
pid, err := runc.ReadPidFile(opts.PidFile)
if err != nil {
return errors.Wrap(err, "failed to retrieve OCI runtime exec pid")
}
e.pid = pid
internalPid, err := runc.ReadPidFile(opts.InternalPidFile)
if err != nil {
return errors.Wrap(err, "failed to retrieve OCI runtime exec internal pid")
}
e.internalPid = internalPid
go func() {
defer e.parent.Monitor.Unsubscribe(eventCh)
for event := range eventCh {
if event.Pid == e.pid {
ExitCh <- Exit{
Timestamp: event.Timestamp,
ID: e.id,
Status: event.Status,
}
break
}
}
}()
return nil
}
func (e *execProcess) Status(ctx context.Context) (string, error) {
e.mu.Lock()
defer e.mu.Unlock()
// if we don't have a pid then the exec process has just been created
if e.pid == 0 {
return "created", nil
}
// if we have a pid and it can be signaled, the process is running
// TODO(random-liu): Use `runsc kill --pid`.
if err := unix.Kill(e.pid, 0); err == nil {
return "running", nil
}
// else if we have a pid but it can nolonger be signaled, it has stopped
return "stopped", nil
}

View File

@ -0,0 +1,154 @@
// Copyright 2018 The containerd Authors.
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package proc
import (
"context"
"github.com/containerd/console"
"github.com/pkg/errors"
)
type execState interface {
Resize(console.WinSize) error
Start(context.Context) error
Delete(context.Context) error
Kill(context.Context, uint32, bool) error
SetExited(int)
}
type execCreatedState struct {
p *execProcess
}
func (s *execCreatedState) transition(name string) error {
switch name {
case "running":
s.p.execState = &execRunningState{p: s.p}
case "stopped":
s.p.execState = &execStoppedState{p: s.p}
case "deleted":
s.p.execState = &deletedState{}
default:
return errors.Errorf("invalid state transition %q to %q", stateName(s), name)
}
return nil
}
func (s *execCreatedState) Resize(ws console.WinSize) error {
return s.p.resize(ws)
}
func (s *execCreatedState) Start(ctx context.Context) error {
if err := s.p.start(ctx); err != nil {
return err
}
return s.transition("running")
}
func (s *execCreatedState) Delete(ctx context.Context) error {
if err := s.p.delete(ctx); err != nil {
return err
}
return s.transition("deleted")
}
func (s *execCreatedState) Kill(ctx context.Context, sig uint32, all bool) error {
return s.p.kill(ctx, sig, all)
}
func (s *execCreatedState) SetExited(status int) {
s.p.setExited(status)
if err := s.transition("stopped"); err != nil {
panic(err)
}
}
type execRunningState struct {
p *execProcess
}
func (s *execRunningState) transition(name string) error {
switch name {
case "stopped":
s.p.execState = &execStoppedState{p: s.p}
default:
return errors.Errorf("invalid state transition %q to %q", stateName(s), name)
}
return nil
}
func (s *execRunningState) Resize(ws console.WinSize) error {
return s.p.resize(ws)
}
func (s *execRunningState) Start(ctx context.Context) error {
return errors.Errorf("cannot start a running process")
}
func (s *execRunningState) Delete(ctx context.Context) error {
return errors.Errorf("cannot delete a running process")
}
func (s *execRunningState) Kill(ctx context.Context, sig uint32, all bool) error {
return s.p.kill(ctx, sig, all)
}
func (s *execRunningState) SetExited(status int) {
s.p.setExited(status)
if err := s.transition("stopped"); err != nil {
panic(err)
}
}
type execStoppedState struct {
p *execProcess
}
func (s *execStoppedState) transition(name string) error {
switch name {
case "deleted":
s.p.execState = &deletedState{}
default:
return errors.Errorf("invalid state transition %q to %q", stateName(s), name)
}
return nil
}
func (s *execStoppedState) Resize(ws console.WinSize) error {
return errors.Errorf("cannot resize a stopped container")
}
func (s *execStoppedState) Start(ctx context.Context) error {
return errors.Errorf("cannot start a stopped process")
}
func (s *execStoppedState) Delete(ctx context.Context) error {
if err := s.p.delete(ctx); err != nil {
return err
}
return s.transition("deleted")
}
func (s *execStoppedState) Kill(ctx context.Context, sig uint32, all bool) error {
return s.p.kill(ctx, sig, all)
}
func (s *execStoppedState) SetExited(status int) {
// no op
}

462
pkg/shim/v1/proc/init.go Normal file
View File

@ -0,0 +1,462 @@
// Copyright 2018 The containerd Authors.
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package proc
import (
"context"
"encoding/json"
"io"
"path/filepath"
"strings"
"sync"
"syscall"
"time"
"github.com/containerd/console"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/runtime/proc"
"github.com/containerd/fifo"
runc "github.com/containerd/go-runc"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc"
)
// InitPidFile name of the file that contains the init pid
const InitPidFile = "init.pid"
// Init represents an initial process for a container
type Init struct {
wg sync.WaitGroup
initState initState
// mu is used to ensure that `Start()` and `Exited()` calls return in
// the right order when invoked in separate go routines.
// This is the case within the shim implementation as it makes use of
// the reaper interface.
mu sync.Mutex
waitBlock chan struct{}
WorkDir string
id string
Bundle string
console console.Console
Platform proc.Platform
io runc.IO
runtime *runsc.Runsc
status int
exited time.Time
pid int
closers []io.Closer
stdin io.Closer
stdio proc.Stdio
Rootfs string
IoUID int
IoGID int
Sandbox bool
UserLog string
Monitor ProcessMonitor
}
// NewRunsc returns a new runsc instance for a process
func NewRunsc(root, path, namespace, runtime string, config map[string]string) *runsc.Runsc {
if root == "" {
root = RunscRoot
}
return &runsc.Runsc{
Command: runtime,
PdeathSignal: syscall.SIGKILL,
Log: filepath.Join(path, "log.json"),
LogFormat: runc.JSON,
Root: filepath.Join(root, namespace),
Config: config,
}
}
// New returns a new init process
func New(id string, runtime *runsc.Runsc, stdio proc.Stdio) *Init {
p := &Init{
id: id,
runtime: runtime,
stdio: stdio,
status: 0,
waitBlock: make(chan struct{}),
}
p.initState = &createdState{p: p}
return p
}
// Create the process with the provided config
func (p *Init) Create(ctx context.Context, r *CreateConfig) (err error) {
var socket *runc.Socket
if r.Terminal {
if socket, err = runc.NewTempConsoleSocket(); err != nil {
return errors.Wrap(err, "failed to create OCI runtime console socket")
}
defer socket.Close()
} else if hasNoIO(r) {
if p.io, err = runc.NewNullIO(); err != nil {
return errors.Wrap(err, "creating new NULL IO")
}
} else {
if p.io, err = runc.NewPipeIO(p.IoUID, p.IoGID, withConditionalIO(p.stdio)); err != nil {
return errors.Wrap(err, "failed to create OCI runtime io pipes")
}
}
pidFile := filepath.Join(p.Bundle, InitPidFile)
opts := &runsc.CreateOpts{
PidFile: pidFile,
}
if socket != nil {
opts.ConsoleSocket = socket
}
if p.Sandbox {
opts.IO = p.io
// UserLog is only useful for sandbox.
opts.UserLog = p.UserLog
}
if err := p.runtime.Create(ctx, r.ID, r.Bundle, opts); err != nil {
return p.runtimeError(err, "OCI runtime create failed")
}
if r.Stdin != "" {
sc, err := fifo.OpenFifo(context.Background(), r.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0)
if err != nil {
return errors.Wrapf(err, "failed to open stdin fifo %s", r.Stdin)
}
p.stdin = sc
p.closers = append(p.closers, sc)
}
var copyWaitGroup sync.WaitGroup
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
if socket != nil {
console, err := socket.ReceiveMaster()
if err != nil {
return errors.Wrap(err, "failed to retrieve console master")
}
console, err = p.Platform.CopyConsole(ctx, console, r.Stdin, r.Stdout, r.Stderr, &p.wg, &copyWaitGroup)
if err != nil {
return errors.Wrap(err, "failed to start console copy")
}
p.console = console
} else if !hasNoIO(r) {
if err := copyPipes(ctx, p.io, r.Stdin, r.Stdout, r.Stderr, &p.wg, &copyWaitGroup); err != nil {
return errors.Wrap(err, "failed to start io pipe copy")
}
}
copyWaitGroup.Wait()
pid, err := runc.ReadPidFile(pidFile)
if err != nil {
return errors.Wrap(err, "failed to retrieve OCI runtime container pid")
}
p.pid = pid
return nil
}
// Wait for the process to exit
func (p *Init) Wait() {
<-p.waitBlock
}
// ID of the process
func (p *Init) ID() string {
return p.id
}
// Pid of the process
func (p *Init) Pid() int {
return p.pid
}
// ExitStatus of the process
func (p *Init) ExitStatus() int {
p.mu.Lock()
defer p.mu.Unlock()
return p.status
}
// ExitedAt at time when the process exited
func (p *Init) ExitedAt() time.Time {
p.mu.Lock()
defer p.mu.Unlock()
return p.exited
}
// Status of the process
func (p *Init) Status(ctx context.Context) (string, error) {
p.mu.Lock()
defer p.mu.Unlock()
c, err := p.runtime.State(ctx, p.id)
if err != nil {
if strings.Contains(err.Error(), "does not exist") {
return "stopped", nil
}
return "", p.runtimeError(err, "OCI runtime state failed")
}
return p.convertStatus(c.Status), nil
}
// Start the init process
func (p *Init) Start(ctx context.Context) error {
p.mu.Lock()
defer p.mu.Unlock()
return p.initState.Start(ctx)
}
func (p *Init) start(ctx context.Context) error {
var cio runc.IO
if !p.Sandbox {
cio = p.io
}
if err := p.runtime.Start(ctx, p.id, cio); err != nil {
return p.runtimeError(err, "OCI runtime start failed")
}
go func() {
status, err := p.runtime.Wait(context.Background(), p.id)
if err != nil {
log.G(ctx).WithError(err).Errorf("Failed to wait for container %q", p.id)
// TODO(random-liu): Handle runsc kill error.
if err := p.killAll(ctx); err != nil {
log.G(ctx).WithError(err).Errorf("Failed to kill container %q", p.id)
}
status = internalErrorCode
}
ExitCh <- Exit{
Timestamp: time.Now(),
ID: p.id,
Status: status,
}
}()
return nil
}
// SetExited of the init process with the next status
func (p *Init) SetExited(status int) {
p.mu.Lock()
defer p.mu.Unlock()
p.initState.SetExited(status)
}
func (p *Init) setExited(status int) {
p.exited = time.Now()
p.status = status
p.Platform.ShutdownConsole(context.Background(), p.console)
close(p.waitBlock)
}
// Delete the init process
func (p *Init) Delete(ctx context.Context) error {
p.mu.Lock()
defer p.mu.Unlock()
return p.initState.Delete(ctx)
}
func (p *Init) delete(ctx context.Context) error {
p.killAll(ctx)
p.wg.Wait()
err := p.runtime.Delete(ctx, p.id, nil)
// ignore errors if a runtime has already deleted the process
// but we still hold metadata and pipes
//
// this is common during a checkpoint, runc will delete the container state
// after a checkpoint and the container will no longer exist within runc
if err != nil {
if strings.Contains(err.Error(), "does not exist") {
err = nil
} else {
err = p.runtimeError(err, "failed to delete task")
}
}
if p.io != nil {
for _, c := range p.closers {
c.Close()
}
p.io.Close()
}
if err2 := mount.UnmountAll(p.Rootfs, 0); err2 != nil {
log.G(ctx).WithError(err2).Warn("failed to cleanup rootfs mount")
if err == nil {
err = errors.Wrap(err2, "failed rootfs umount")
}
}
return err
}
// Resize the init processes console
func (p *Init) Resize(ws console.WinSize) error {
p.mu.Lock()
defer p.mu.Unlock()
if p.console == nil {
return nil
}
return p.console.Resize(ws)
}
func (p *Init) resize(ws console.WinSize) error {
if p.console == nil {
return nil
}
return p.console.Resize(ws)
}
// Kill the init process
func (p *Init) Kill(ctx context.Context, signal uint32, all bool) error {
p.mu.Lock()
defer p.mu.Unlock()
return p.initState.Kill(ctx, signal, all)
}
func (p *Init) kill(context context.Context, signal uint32, all bool) error {
var (
killErr error
backoff = 100 * time.Millisecond
)
timeout := 1 * time.Second
for start := time.Now(); time.Now().Sub(start) < timeout; {
c, err := p.runtime.State(context, p.id)
if err != nil {
if strings.Contains(err.Error(), "does not exist") {
return errors.Wrapf(errdefs.ErrNotFound, "no such process")
}
return p.runtimeError(err, "OCI runtime state failed")
}
// For runsc, signal only works when container is running state.
// If the container is not in running state, directly return
// "no such process"
if p.convertStatus(c.Status) == "stopped" {
return errors.Wrapf(errdefs.ErrNotFound, "no such process")
}
killErr = p.runtime.Kill(context, p.id, int(signal), &runsc.KillOpts{
All: all,
})
if killErr == nil {
return nil
}
time.Sleep(backoff)
backoff *= 2
}
return p.runtimeError(killErr, "kill timeout")
}
// KillAll processes belonging to the init process
func (p *Init) KillAll(context context.Context) error {
p.mu.Lock()
defer p.mu.Unlock()
return p.killAll(context)
}
func (p *Init) killAll(context context.Context) error {
p.runtime.Kill(context, p.id, int(syscall.SIGKILL), &runsc.KillOpts{
All: true,
})
// Ignore error handling for `runsc kill --all` for now.
// * If it doesn't return error, it is good;
// * If it returns error, consider the container has already stopped.
// TODO: Fix `runsc kill --all` error handling.
return nil
}
// Stdin of the process
func (p *Init) Stdin() io.Closer {
return p.stdin
}
// Runtime returns the OCI runtime configured for the init process
func (p *Init) Runtime() *runsc.Runsc {
return p.runtime
}
// Exec returns a new child process
func (p *Init) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) {
p.mu.Lock()
defer p.mu.Unlock()
return p.initState.Exec(ctx, path, r)
}
// exec returns a new exec'd process
func (p *Init) exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) {
// process exec request
var spec specs.Process
if err := json.Unmarshal(r.Spec.Value, &spec); err != nil {
return nil, err
}
spec.Terminal = r.Terminal
e := &execProcess{
id: r.ID,
path: path,
parent: p,
spec: spec,
stdio: proc.Stdio{
Stdin: r.Stdin,
Stdout: r.Stdout,
Stderr: r.Stderr,
Terminal: r.Terminal,
},
waitBlock: make(chan struct{}),
}
e.execState = &execCreatedState{p: e}
return e, nil
}
// Stdio of the process
func (p *Init) Stdio() proc.Stdio {
return p.stdio
}
func (p *Init) runtimeError(rErr error, msg string) error {
if rErr == nil {
return nil
}
rMsg, err := getLastRuntimeError(p.runtime)
switch {
case err != nil:
return errors.Wrapf(rErr, "%s: %s (%s)", msg, "unable to retrieve OCI runtime error", err.Error())
case rMsg == "":
return errors.Wrap(rErr, msg)
default:
return errors.Errorf("%s: %s", msg, rMsg)
}
}
func (p *Init) convertStatus(status string) string {
if status == "created" && !p.Sandbox && p.status == internalErrorCode {
// Treat start failure state for non-root container as stopped.
return "stopped"
}
return status
}
func withConditionalIO(c proc.Stdio) runc.IOOpt {
return func(o *runc.IOOption) {
o.OpenStdin = c.Stdin != ""
o.OpenStdout = c.Stdout != ""
o.OpenStderr = c.Stderr != ""
}
}

View File

@ -0,0 +1,182 @@
// Copyright 2018 The containerd Authors.
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package proc
import (
"context"
"github.com/containerd/console"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/runtime/proc"
"github.com/pkg/errors"
)
type initState interface {
Resize(console.WinSize) error
Start(context.Context) error
Delete(context.Context) error
Exec(context.Context, string, *ExecConfig) (proc.Process, error)
Kill(context.Context, uint32, bool) error
SetExited(int)
}
type createdState struct {
p *Init
}
func (s *createdState) transition(name string) error {
switch name {
case "running":
s.p.initState = &runningState{p: s.p}
case "stopped":
s.p.initState = &stoppedState{p: s.p}
case "deleted":
s.p.initState = &deletedState{}
default:
return errors.Errorf("invalid state transition %q to %q", stateName(s), name)
}
return nil
}
func (s *createdState) Resize(ws console.WinSize) error {
return s.p.resize(ws)
}
func (s *createdState) Start(ctx context.Context) error {
if err := s.p.start(ctx); err != nil {
// Containerd doesn't allow deleting container in created state.
// However, for gvisor, a non-root container in created state can
// only go to running state. If the container can't be started,
// it can only stay in created state, and never be deleted.
// To work around that, we treat non-root container in start failure
// state as stopped.
if !s.p.Sandbox {
s.p.io.Close()
s.p.setExited(internalErrorCode)
if err := s.transition("stopped"); err != nil {
panic(err)
}
}
return err
}
return s.transition("running")
}
func (s *createdState) Delete(ctx context.Context) error {
if err := s.p.delete(ctx); err != nil {
return err
}
return s.transition("deleted")
}
func (s *createdState) Kill(ctx context.Context, sig uint32, all bool) error {
return s.p.kill(ctx, sig, all)
}
func (s *createdState) SetExited(status int) {
s.p.setExited(status)
if err := s.transition("stopped"); err != nil {
panic(err)
}
}
func (s *createdState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) {
return s.p.exec(ctx, path, r)
}
type runningState struct {
p *Init
}
func (s *runningState) transition(name string) error {
switch name {
case "stopped":
s.p.initState = &stoppedState{p: s.p}
default:
return errors.Errorf("invalid state transition %q to %q", stateName(s), name)
}
return nil
}
func (s *runningState) Resize(ws console.WinSize) error {
return s.p.resize(ws)
}
func (s *runningState) Start(ctx context.Context) error {
return errors.Errorf("cannot start a running process")
}
func (s *runningState) Delete(ctx context.Context) error {
return errors.Errorf("cannot delete a running process")
}
func (s *runningState) Kill(ctx context.Context, sig uint32, all bool) error {
return s.p.kill(ctx, sig, all)
}
func (s *runningState) SetExited(status int) {
s.p.setExited(status)
if err := s.transition("stopped"); err != nil {
panic(err)
}
}
func (s *runningState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) {
return s.p.exec(ctx, path, r)
}
type stoppedState struct {
p *Init
}
func (s *stoppedState) transition(name string) error {
switch name {
case "deleted":
s.p.initState = &deletedState{}
default:
return errors.Errorf("invalid state transition %q to %q", stateName(s), name)
}
return nil
}
func (s *stoppedState) Resize(ws console.WinSize) error {
return errors.Errorf("cannot resize a stopped container")
}
func (s *stoppedState) Start(ctx context.Context) error {
return errors.Errorf("cannot start a stopped process")
}
func (s *stoppedState) Delete(ctx context.Context) error {
if err := s.p.delete(ctx); err != nil {
return err
}
return s.transition("deleted")
}
func (s *stoppedState) Kill(ctx context.Context, sig uint32, all bool) error {
return errdefs.ToGRPCf(errdefs.ErrNotFound, "process %s not found", s.p.id)
}
func (s *stoppedState) SetExited(status int) {
// no op
}
func (s *stoppedState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) {
return nil, errors.Errorf("cannot exec in a stopped state")
}

167
pkg/shim/v1/proc/io.go Normal file
View File

@ -0,0 +1,167 @@
// Copyright 2018 The containerd Authors.
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package proc
import (
"context"
"fmt"
"io"
"os"
"sync"
"sync/atomic"
"syscall"
"github.com/containerd/containerd/log"
"github.com/containerd/fifo"
runc "github.com/containerd/go-runc"
)
// TODO(random-liu): This file can be a util.
var bufPool = sync.Pool{
New: func() interface{} {
buffer := make([]byte, 32<<10)
return &buffer
},
}
func copyPipes(ctx context.Context, rio runc.IO, stdin, stdout, stderr string, wg, cwg *sync.WaitGroup) error {
var sameFile *countingWriteCloser
for _, i := range []struct {
name string
dest func(wc io.WriteCloser, rc io.Closer)
}{
{
name: stdout,
dest: func(wc io.WriteCloser, rc io.Closer) {
wg.Add(1)
cwg.Add(1)
go func() {
cwg.Done()
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
if _, err := io.CopyBuffer(wc, rio.Stdout(), *p); err != nil {
log.G(ctx).Warn("error copying stdout")
}
wg.Done()
wc.Close()
if rc != nil {
rc.Close()
}
}()
},
}, {
name: stderr,
dest: func(wc io.WriteCloser, rc io.Closer) {
wg.Add(1)
cwg.Add(1)
go func() {
cwg.Done()
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
if _, err := io.CopyBuffer(wc, rio.Stderr(), *p); err != nil {
log.G(ctx).Warn("error copying stderr")
}
wg.Done()
wc.Close()
if rc != nil {
rc.Close()
}
}()
},
},
} {
ok, err := isFifo(i.name)
if err != nil {
return err
}
var (
fw io.WriteCloser
fr io.Closer
)
if ok {
if fw, err = fifo.OpenFifo(ctx, i.name, syscall.O_WRONLY, 0); err != nil {
return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", i.name, err)
}
if fr, err = fifo.OpenFifo(ctx, i.name, syscall.O_RDONLY, 0); err != nil {
return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", i.name, err)
}
} else {
if sameFile != nil {
sameFile.count++
i.dest(sameFile, nil)
continue
}
if fw, err = os.OpenFile(i.name, syscall.O_WRONLY|syscall.O_APPEND, 0); err != nil {
return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", i.name, err)
}
if stdout == stderr {
sameFile = &countingWriteCloser{
WriteCloser: fw,
count: 1,
}
}
}
i.dest(fw, fr)
}
if stdin == "" {
return nil
}
f, err := fifo.OpenFifo(context.Background(), stdin, syscall.O_RDONLY|syscall.O_NONBLOCK, 0)
if err != nil {
return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", stdin, err)
}
cwg.Add(1)
go func() {
cwg.Done()
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
io.CopyBuffer(rio.Stdin(), f, *p)
rio.Stdin().Close()
f.Close()
}()
return nil
}
// countingWriteCloser masks io.Closer() until close has been invoked a certain number of times.
type countingWriteCloser struct {
io.WriteCloser
count int64
}
func (c *countingWriteCloser) Close() error {
if atomic.AddInt64(&c.count, -1) > 0 {
return nil
}
return c.WriteCloser.Close()
}
// isFifo checks if a file is a fifo
// if the file does not exist then it returns false
func isFifo(path string) (bool, error) {
stat, err := os.Stat(path)
if err != nil {
if os.IsNotExist(err) {
return false, nil
}
return false, err
}
if stat.Mode()&os.ModeNamedPipe == os.ModeNamedPipe {
return true, nil
}
return false, nil
}

View File

@ -0,0 +1,37 @@
// Copyright 2018 The containerd Authors.
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package proc
import (
"github.com/pkg/errors"
)
// RunscRoot is the path to the root runsc state directory
const RunscRoot = "/run/containerd/runsc"
func stateName(v interface{}) string {
switch v.(type) {
case *runningState, *execRunningState:
return "running"
case *createdState, *execCreatedState:
return "created"
case *deletedState:
return "deleted"
case *stoppedState:
return "stopped"
}
panic(errors.Errorf("invalid state %v", v))
}

70
pkg/shim/v1/proc/types.go Normal file
View File

@ -0,0 +1,70 @@
// Copyright 2018 The containerd Authors.
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package proc
import (
"time"
google_protobuf "github.com/gogo/protobuf/types"
runc "github.com/containerd/go-runc"
)
// Mount holds filesystem mount configuration
type Mount struct {
Type string
Source string
Target string
Options []string
}
// CreateConfig hold task creation configuration
type CreateConfig struct {
ID string
Bundle string
Runtime string
Rootfs []Mount
Terminal bool
Stdin string
Stdout string
Stderr string
Options *google_protobuf.Any
}
// ExecConfig holds exec creation configuration
type ExecConfig struct {
ID string
Terminal bool
Stdin string
Stdout string
Stderr string
Spec *google_protobuf.Any
}
// Exit is the type of exit events
type Exit struct {
Timestamp time.Time
ID string
Status int
}
// ProcessMonitor monitors process exit changes
type ProcessMonitor interface {
// Subscribe to process exit changes
Subscribe() chan runc.Exit
// Unsubscribe to process exit changes
Unsubscribe(c chan runc.Exit)
}

92
pkg/shim/v1/proc/utils.go Normal file
View File

@ -0,0 +1,92 @@
// Copyright 2018 The containerd Authors.
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package proc
import (
"encoding/json"
"io"
"os"
"strings"
"time"
runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc"
)
const (
internalErrorCode = 128
bufferSize = 32
)
// ExitCh is the exit events channel for containers and exec processes
// inside the sandbox.
var ExitCh = make(chan Exit, bufferSize)
// TODO(random-liu): This can be a utility.
// TODO(mlaventure): move to runc package?
func getLastRuntimeError(r *runsc.Runsc) (string, error) {
if r.Log == "" {
return "", nil
}
f, err := os.OpenFile(r.Log, os.O_RDONLY, 0400)
if err != nil {
return "", err
}
var (
errMsg string
log struct {
Level string
Msg string
Time time.Time
}
)
dec := json.NewDecoder(f)
for err = nil; err == nil; {
if err = dec.Decode(&log); err != nil && err != io.EOF {
return "", err
}
if log.Level == "error" {
errMsg = strings.TrimSpace(log.Msg)
}
}
return errMsg, nil
}
func copyFile(to, from string) error {
ff, err := os.Open(from)
if err != nil {
return err
}
defer ff.Close()
tt, err := os.Create(to)
if err != nil {
return err
}
defer tt.Close()
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
_, err = io.CopyBuffer(tt, ff, *p)
return err
}
func hasNoIO(r *CreateConfig) bool {
return r.Stdin == "" && r.Stdout == "" && r.Stderr == ""
}

View File

@ -0,0 +1,110 @@
// Copyright 2018 The containerd Authors.
// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package shim
import (
"context"
"io"
"sync"
"syscall"
"github.com/containerd/console"
"github.com/containerd/fifo"
"github.com/pkg/errors"
)
type linuxPlatform struct {
epoller *console.Epoller
}
func (p *linuxPlatform) CopyConsole(ctx context.Context, console console.Console, stdin, stdout, stderr string, wg, cwg *sync.WaitGroup) (console.Console, error) {
if p.epoller == nil {
return nil, errors.New("uninitialized epoller")
}
epollConsole, err := p.epoller.Add(console)
if err != nil {
return nil, err
}
if stdin != "" {
in, err := fifo.OpenFifo(ctx, stdin, syscall.O_RDONLY, 0)
if err != nil {
return nil, err
}
cwg.Add(1)
go func() {
cwg.Done()
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
io.CopyBuffer(epollConsole, in, *p)
}()
}
outw, err := fifo.OpenFifo(ctx, stdout, syscall.O_WRONLY, 0)
if err != nil {
return nil, err
}
outr, err := fifo.OpenFifo(ctx, stdout, syscall.O_RDONLY, 0)
if err != nil {
return nil, err
}
wg.Add(1)
cwg.Add(1)
go func() {
cwg.Done()
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
io.CopyBuffer(outw, epollConsole, *p)
epollConsole.Close()
outr.Close()
outw.Close()
wg.Done()
}()
return epollConsole, nil
}
func (p *linuxPlatform) ShutdownConsole(ctx context.Context, cons console.Console) error {
if p.epoller == nil {
return errors.New("uninitialized epoller")
}
epollConsole, ok := cons.(*console.EpollConsole)
if !ok {
return errors.Errorf("expected EpollConsole, got %#v", cons)
}
return epollConsole.Shutdown(p.epoller.CloseConsole)
}
func (p *linuxPlatform) Close() error {
return p.epoller.Close()
}
// initialize a single epoll fd to manage our consoles. `initPlatform` should
// only be called once.
func (s *Service) initPlatform() error {
if s.platform != nil {
return nil
}
epoller, err := console.NewEpoller()
if err != nil {
return errors.Wrap(err, "failed to initialize epoller")
}
s.platform = &linuxPlatform{
epoller: epoller,
}
go epoller.Wait()
return nil
}

579
pkg/shim/v1/shim/service.go Normal file
View File

@ -0,0 +1,579 @@
// Copyright 2018 The containerd Authors.
// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package shim
import (
"context"
"fmt"
"os"
"path/filepath"
"sync"
"github.com/containerd/console"
eventstypes "github.com/containerd/containerd/api/events"
"github.com/containerd/containerd/api/types/task"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/events"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/namespaces"
"github.com/containerd/containerd/runtime"
"github.com/containerd/containerd/runtime/linux/runctypes"
rproc "github.com/containerd/containerd/runtime/proc"
"github.com/containerd/containerd/runtime/v1/shim"
shimapi "github.com/containerd/containerd/runtime/v1/shim/v1"
"github.com/containerd/typeurl"
ptypes "github.com/gogo/protobuf/types"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc"
"github.com/google/gvisor-containerd-shim/pkg/v1/proc"
"github.com/google/gvisor-containerd-shim/pkg/v1/utils"
)
var (
empty = &ptypes.Empty{}
bufPool = sync.Pool{
New: func() interface{} {
buffer := make([]byte, 32<<10)
return &buffer
},
}
)
// Config contains shim specific configuration
type Config struct {
Path string
Namespace string
WorkDir string
RuntimeRoot string
RunscConfig map[string]string
}
// NewService returns a new shim service that can be used via GRPC
func NewService(config Config, publisher events.Publisher) (*Service, error) {
if config.Namespace == "" {
return nil, fmt.Errorf("shim namespace cannot be empty")
}
ctx := namespaces.WithNamespace(context.Background(), config.Namespace)
ctx = log.WithLogger(ctx, logrus.WithFields(logrus.Fields{
"namespace": config.Namespace,
"path": config.Path,
"pid": os.Getpid(),
}))
s := &Service{
config: config,
context: ctx,
processes: make(map[string]rproc.Process),
events: make(chan interface{}, 128),
ec: proc.ExitCh,
}
go s.processExits()
if err := s.initPlatform(); err != nil {
return nil, errors.Wrap(err, "failed to initialized platform behavior")
}
go s.forward(publisher)
return s, nil
}
// Service is the shim implementation of a remote shim over GRPC
type Service struct {
mu sync.Mutex
config Config
context context.Context
processes map[string]rproc.Process
events chan interface{}
platform rproc.Platform
ec chan proc.Exit
// Filled by Create()
id string
bundle string
}
// Create a new initial process and container with the underlying OCI runtime
func (s *Service) Create(ctx context.Context, r *shimapi.CreateTaskRequest) (_ *shimapi.CreateTaskResponse, err error) {
s.mu.Lock()
defer s.mu.Unlock()
var mounts []proc.Mount
for _, m := range r.Rootfs {
mounts = append(mounts, proc.Mount{
Type: m.Type,
Source: m.Source,
Target: m.Target,
Options: m.Options,
})
}
rootfs := filepath.Join(r.Bundle, "rootfs")
if err := os.Mkdir(rootfs, 0711); err != nil && !os.IsExist(err) {
return nil, err
}
config := &proc.CreateConfig{
ID: r.ID,
Bundle: r.Bundle,
Runtime: r.Runtime,
Rootfs: mounts,
Terminal: r.Terminal,
Stdin: r.Stdin,
Stdout: r.Stdout,
Stderr: r.Stderr,
Options: r.Options,
}
defer func() {
if err != nil {
if err2 := mount.UnmountAll(rootfs, 0); err2 != nil {
log.G(ctx).WithError(err2).Warn("Failed to cleanup rootfs mount")
}
}
}()
for _, rm := range mounts {
m := &mount.Mount{
Type: rm.Type,
Source: rm.Source,
Options: rm.Options,
}
if err := m.Mount(rootfs); err != nil {
return nil, errors.Wrapf(err, "failed to mount rootfs component %v", m)
}
}
process, err := newInit(
ctx,
s.config.Path,
s.config.WorkDir,
s.config.RuntimeRoot,
s.config.Namespace,
s.config.RunscConfig,
s.platform,
config,
)
if err := process.Create(ctx, config); err != nil {
return nil, errdefs.ToGRPC(err)
}
// save the main task id and bundle to the shim for additional requests
s.id = r.ID
s.bundle = r.Bundle
pid := process.Pid()
s.processes[r.ID] = process
return &shimapi.CreateTaskResponse{
Pid: uint32(pid),
}, nil
}
// Start a process
func (s *Service) Start(ctx context.Context, r *shimapi.StartRequest) (*shimapi.StartResponse, error) {
p, err := s.getExecProcess(r.ID)
if err != nil {
return nil, err
}
if err := p.Start(ctx); err != nil {
return nil, err
}
return &shimapi.StartResponse{
ID: p.ID(),
Pid: uint32(p.Pid()),
}, nil
}
// Delete the initial process and container
func (s *Service) Delete(ctx context.Context, r *ptypes.Empty) (*shimapi.DeleteResponse, error) {
p, err := s.getInitProcess()
if err != nil {
return nil, err
}
if err := p.Delete(ctx); err != nil {
return nil, err
}
s.mu.Lock()
delete(s.processes, s.id)
s.mu.Unlock()
s.platform.Close()
return &shimapi.DeleteResponse{
ExitStatus: uint32(p.ExitStatus()),
ExitedAt: p.ExitedAt(),
Pid: uint32(p.Pid()),
}, nil
}
// DeleteProcess deletes an exec'd process
func (s *Service) DeleteProcess(ctx context.Context, r *shimapi.DeleteProcessRequest) (*shimapi.DeleteResponse, error) {
if r.ID == s.id {
return nil, status.Errorf(codes.InvalidArgument, "cannot delete init process with DeleteProcess")
}
p, err := s.getExecProcess(r.ID)
if err != nil {
return nil, err
}
if err := p.Delete(ctx); err != nil {
return nil, err
}
s.mu.Lock()
delete(s.processes, r.ID)
s.mu.Unlock()
return &shimapi.DeleteResponse{
ExitStatus: uint32(p.ExitStatus()),
ExitedAt: p.ExitedAt(),
Pid: uint32(p.Pid()),
}, nil
}
// Exec an additional process inside the container
func (s *Service) Exec(ctx context.Context, r *shimapi.ExecProcessRequest) (*ptypes.Empty, error) {
s.mu.Lock()
if p := s.processes[r.ID]; p != nil {
s.mu.Unlock()
return nil, errdefs.ToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ID)
}
p := s.processes[s.id]
s.mu.Unlock()
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
}
process, err := p.(*proc.Init).Exec(ctx, s.config.Path, &proc.ExecConfig{
ID: r.ID,
Terminal: r.Terminal,
Stdin: r.Stdin,
Stdout: r.Stdout,
Stderr: r.Stderr,
Spec: r.Spec,
})
if err != nil {
return nil, errdefs.ToGRPC(err)
}
s.mu.Lock()
s.processes[r.ID] = process
s.mu.Unlock()
return empty, nil
}
// ResizePty of a process
func (s *Service) ResizePty(ctx context.Context, r *shimapi.ResizePtyRequest) (*ptypes.Empty, error) {
if r.ID == "" {
return nil, errdefs.ToGRPCf(errdefs.ErrInvalidArgument, "id not provided")
}
ws := console.WinSize{
Width: uint16(r.Width),
Height: uint16(r.Height),
}
p, err := s.getExecProcess(r.ID)
if err != nil {
return nil, err
}
if err := p.Resize(ws); err != nil {
return nil, errdefs.ToGRPC(err)
}
return empty, nil
}
// State returns runtime state information for a process
func (s *Service) State(ctx context.Context, r *shimapi.StateRequest) (*shimapi.StateResponse, error) {
p, err := s.getExecProcess(r.ID)
if err != nil {
return nil, err
}
st, err := p.Status(ctx)
if err != nil {
return nil, err
}
status := task.StatusUnknown
switch st {
case "created":
status = task.StatusCreated
case "running":
status = task.StatusRunning
case "stopped":
status = task.StatusStopped
}
sio := p.Stdio()
return &shimapi.StateResponse{
ID: p.ID(),
Bundle: s.bundle,
Pid: uint32(p.Pid()),
Status: status,
Stdin: sio.Stdin,
Stdout: sio.Stdout,
Stderr: sio.Stderr,
Terminal: sio.Terminal,
ExitStatus: uint32(p.ExitStatus()),
ExitedAt: p.ExitedAt(),
}, nil
}
// Pause the container
func (s *Service) Pause(ctx context.Context, r *ptypes.Empty) (*ptypes.Empty, error) {
return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented)
}
// Resume the container
func (s *Service) Resume(ctx context.Context, r *ptypes.Empty) (*ptypes.Empty, error) {
return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented)
}
// Kill a process with the provided signal
func (s *Service) Kill(ctx context.Context, r *shimapi.KillRequest) (*ptypes.Empty, error) {
if r.ID == "" {
p, err := s.getInitProcess()
if err != nil {
return nil, err
}
if err := p.Kill(ctx, r.Signal, r.All); err != nil {
return nil, errdefs.ToGRPC(err)
}
return empty, nil
}
p, err := s.getExecProcess(r.ID)
if err != nil {
return nil, err
}
if err := p.Kill(ctx, r.Signal, r.All); err != nil {
return nil, errdefs.ToGRPC(err)
}
return empty, nil
}
// ListPids returns all pids inside the container
func (s *Service) ListPids(ctx context.Context, r *shimapi.ListPidsRequest) (*shimapi.ListPidsResponse, error) {
pids, err := s.getContainerPids(ctx, r.ID)
if err != nil {
return nil, errdefs.ToGRPC(err)
}
var processes []*task.ProcessInfo
for _, pid := range pids {
pInfo := task.ProcessInfo{
Pid: pid,
}
for _, p := range s.processes {
if p.Pid() == int(pid) {
d := &runctypes.ProcessDetails{
ExecID: p.ID(),
}
a, err := typeurl.MarshalAny(d)
if err != nil {
return nil, errors.Wrapf(err, "failed to marshal process %d info", pid)
}
pInfo.Info = a
break
}
}
processes = append(processes, &pInfo)
}
return &shimapi.ListPidsResponse{
Processes: processes,
}, nil
}
// CloseIO of a process
func (s *Service) CloseIO(ctx context.Context, r *shimapi.CloseIORequest) (*ptypes.Empty, error) {
p, err := s.getExecProcess(r.ID)
if err != nil {
return nil, err
}
if stdin := p.Stdin(); stdin != nil {
if err := stdin.Close(); err != nil {
return nil, errors.Wrap(err, "close stdin")
}
}
return empty, nil
}
// Checkpoint the container
func (s *Service) Checkpoint(ctx context.Context, r *shimapi.CheckpointTaskRequest) (*ptypes.Empty, error) {
return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented)
}
// ShimInfo returns shim information such as the shim's pid
func (s *Service) ShimInfo(ctx context.Context, r *ptypes.Empty) (*shimapi.ShimInfoResponse, error) {
return &shimapi.ShimInfoResponse{
ShimPid: uint32(os.Getpid()),
}, nil
}
// Update a running container
func (s *Service) Update(ctx context.Context, r *shimapi.UpdateTaskRequest) (*ptypes.Empty, error) {
return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented)
}
// Wait for a process to exit
func (s *Service) Wait(ctx context.Context, r *shimapi.WaitRequest) (*shimapi.WaitResponse, error) {
p, err := s.getExecProcess(r.ID)
if err != nil {
return nil, err
}
p.Wait()
return &shimapi.WaitResponse{
ExitStatus: uint32(p.ExitStatus()),
ExitedAt: p.ExitedAt(),
}, nil
}
func (s *Service) processExits() {
for e := range s.ec {
s.checkProcesses(e)
}
}
func (s *Service) allProcesses() []rproc.Process {
s.mu.Lock()
defer s.mu.Unlock()
res := make([]rproc.Process, 0, len(s.processes))
for _, p := range s.processes {
res = append(res, p)
}
return res
}
func (s *Service) checkProcesses(e proc.Exit) {
for _, p := range s.allProcesses() {
if p.ID() == e.ID {
if ip, ok := p.(*proc.Init); ok {
// Ensure all children are killed
if err := ip.KillAll(s.context); err != nil {
log.G(s.context).WithError(err).WithField("id", ip.ID()).
Error("failed to kill init's children")
}
}
p.SetExited(e.Status)
s.events <- &eventstypes.TaskExit{
ContainerID: s.id,
ID: p.ID(),
Pid: uint32(p.Pid()),
ExitStatus: uint32(e.Status),
ExitedAt: p.ExitedAt(),
}
return
}
}
}
func (s *Service) getContainerPids(ctx context.Context, id string) ([]uint32, error) {
p, err := s.getInitProcess()
if err != nil {
return nil, err
}
ps, err := p.(*proc.Init).Runtime().Ps(ctx, id)
if err != nil {
return nil, err
}
pids := make([]uint32, 0, len(ps))
for _, pid := range ps {
pids = append(pids, uint32(pid))
}
return pids, nil
}
func (s *Service) forward(publisher events.Publisher) {
for e := range s.events {
if err := publisher.Publish(s.context, getTopic(s.context, e), e); err != nil {
log.G(s.context).WithError(err).Error("post event")
}
}
}
// getInitProcess returns initial process
func (s *Service) getInitProcess() (rproc.Process, error) {
s.mu.Lock()
defer s.mu.Unlock()
p := s.processes[s.id]
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
}
return p, nil
}
// getExecProcess returns exec process
func (s *Service) getExecProcess(id string) (rproc.Process, error) {
s.mu.Lock()
defer s.mu.Unlock()
p := s.processes[id]
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "process %s does not exist", id)
}
return p, nil
}
func getTopic(ctx context.Context, e interface{}) string {
switch e.(type) {
case *eventstypes.TaskCreate:
return runtime.TaskCreateEventTopic
case *eventstypes.TaskStart:
return runtime.TaskStartEventTopic
case *eventstypes.TaskOOM:
return runtime.TaskOOMEventTopic
case *eventstypes.TaskExit:
return runtime.TaskExitEventTopic
case *eventstypes.TaskDelete:
return runtime.TaskDeleteEventTopic
case *eventstypes.TaskExecAdded:
return runtime.TaskExecAddedEventTopic
case *eventstypes.TaskExecStarted:
return runtime.TaskExecStartedEventTopic
default:
logrus.Warnf("no topic for type %#v", e)
}
return runtime.TaskUnknownTopic
}
func newInit(ctx context.Context, path, workDir, runtimeRoot, namespace string, config map[string]string, platform rproc.Platform, r *proc.CreateConfig) (*proc.Init, error) {
var options runctypes.CreateOptions
if r.Options != nil {
v, err := typeurl.UnmarshalAny(r.Options)
if err != nil {
return nil, err
}
options = *v.(*runctypes.CreateOptions)
}
spec, err := utils.ReadSpec(r.Bundle)
if err != nil {
return nil, errors.Wrap(err, "read oci spec")
}
if err := utils.UpdateVolumeAnnotations(r.Bundle, spec); err != nil {
return nil, errors.Wrap(err, "update volume annotations")
}
runsc.FormatLogPath(r.ID, config)
rootfs := filepath.Join(path, "rootfs")
runtime := proc.NewRunsc(runtimeRoot, path, namespace, r.Runtime, config)
p := proc.New(r.ID, runtime, rproc.Stdio{
Stdin: r.Stdin,
Stdout: r.Stdout,
Stderr: r.Stderr,
Terminal: r.Terminal,
})
p.Bundle = r.Bundle
p.Platform = platform
p.Rootfs = rootfs
p.WorkDir = workDir
p.IoUID = int(options.IoUid)
p.IoGID = int(options.IoGid)
p.Sandbox = utils.IsSandbox(spec)
p.UserLog = utils.UserLogPath(spec)
p.Monitor = shim.Default
return p, nil
}

View File

@ -0,0 +1,57 @@
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package utils
import (
"encoding/json"
"io/ioutil"
"os"
"path/filepath"
"github.com/containerd/cri/pkg/annotations"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
// ReadSpec reads OCI spec from the bundle directory.
func ReadSpec(bundle string) (*specs.Spec, error) {
f, err := os.Open(filepath.Join(bundle, "config.json"))
if err != nil {
return nil, err
}
b, err := ioutil.ReadAll(f)
if err != nil {
return nil, err
}
var spec specs.Spec
if err := json.Unmarshal(b, &spec); err != nil {
return nil, err
}
return &spec, nil
}
// IsSandbox checks whether a container is a sandbox container.
func IsSandbox(spec *specs.Spec) bool {
t, ok := spec.Annotations[annotations.ContainerType]
return !ok || t == annotations.ContainerTypeSandbox
}
// UserLogPath gets user log path from OCI annotation.
func UserLogPath(spec *specs.Spec) string {
sandboxLogDir := spec.Annotations[annotations.SandboxLogDir]
if sandboxLogDir == "" {
return ""
}
return filepath.Join(sandboxLogDir, "gvisor.log")
}

View File

@ -0,0 +1,156 @@
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package utils
import (
"encoding/json"
"fmt"
"io/ioutil"
"path/filepath"
"strings"
"github.com/containerd/cri/pkg/annotations"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
const volumeKeyPrefix = "dev.gvisor.spec.mount."
var kubeletPodsDir = "/var/lib/kubelet/pods"
// volumeName gets volume name from volume annotation key, example:
// dev.gvisor.spec.mount.NAME.share
func volumeName(k string) string {
return strings.SplitN(strings.TrimPrefix(k, volumeKeyPrefix), ".", 2)[0]
}
// volumeFieldName gets volume field name from volume annotation key, example:
// `type` is the field of dev.gvisor.spec.mount.NAME.type
func volumeFieldName(k string) string {
parts := strings.Split(strings.TrimPrefix(k, volumeKeyPrefix), ".")
return parts[len(parts)-1]
}
// podUID gets pod UID from the pod log path.
func podUID(s *specs.Spec) (string, error) {
sandboxLogDir := s.Annotations[annotations.SandboxLogDir]
if sandboxLogDir == "" {
return "", errors.New("no sandbox log path annotation")
}
fields := strings.Split(filepath.Base(sandboxLogDir), "_")
switch len(fields) {
case 1: // This is the old CRI logging path
return fields[0], nil
case 3: // This is the new CRI logging path
return fields[2], nil
}
return "", errors.Errorf("unexpected sandbox log path %q", sandboxLogDir)
}
// isVolumeKey checks whether an annotation key is for volume.
func isVolumeKey(k string) bool {
return strings.HasPrefix(k, volumeKeyPrefix)
}
// volumeSourceKey constructs the annotation key for volume source.
func volumeSourceKey(volume string) string {
return volumeKeyPrefix + volume + ".source"
}
// volumePath searches the volume path in the kubelet pod directory.
func volumePath(volume, uid string) (string, error) {
// TODO: Support subpath when gvisor supports pod volume bind mount.
volumeSearchPath := fmt.Sprintf("%s/%s/volumes/*/%s", kubeletPodsDir, uid, volume)
dirs, err := filepath.Glob(volumeSearchPath)
if err != nil {
return "", err
}
if len(dirs) != 1 {
return "", errors.Errorf("unexpected matched volume list %v", dirs)
}
return dirs[0], nil
}
// isVolumePath checks whether a string is the volume path.
func isVolumePath(volume, path string) (bool, error) {
// TODO: Support subpath when gvisor supports pod volume bind mount.
volumeSearchPath := fmt.Sprintf("%s/*/volumes/*/%s", kubeletPodsDir, volume)
return filepath.Match(volumeSearchPath, path)
}
// UpdateVolumeAnnotations add necessary OCI annotations for gvisor
// volume optimization.
func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error {
var (
uid string
err error
)
if IsSandbox(s) {
uid, err = podUID(s)
if err != nil {
// Skip if we can't get pod UID, because this doesn't work
// for containerd 1.1.
logrus.WithError(err).Error("Can't get pod uid")
return nil
}
}
var updated bool
for k, v := range s.Annotations {
if !isVolumeKey(k) {
continue
}
if volumeFieldName(k) != "type" {
continue
}
volume := volumeName(k)
if uid != "" {
// This is a sandbox
path, err := volumePath(volume, uid)
if err != nil {
return errors.Wrapf(err, "get volume path for %q", volume)
}
s.Annotations[volumeSourceKey(volume)] = path
updated = true
} else {
// This is a container
for i := range s.Mounts {
// An error is returned for sandbox if source annotation
// is not successfully applied, so it is guaranteed that
// the source annotation for sandbox has already been
// successfully applied at this point.
// The volume name is unique inside a pod, so matching without
// podUID is fine here.
// TODO: Pass podUID down to shim for containers to do
// more accurate matching.
if yes, _ := isVolumePath(volume, s.Mounts[i].Source); yes {
// gVisor requires the container mount type to match
// sandbox mount type.
s.Mounts[i].Type = v
updated = true
}
}
}
}
if !updated {
return nil
}
// Update bundle
b, err := json.Marshal(s)
if err != nil {
return err
}
return ioutil.WriteFile(filepath.Join(bundle, "config.json"), b, 0666)
}

View File

@ -0,0 +1,309 @@
// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package utils
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"reflect"
"testing"
"github.com/containerd/cri/pkg/annotations"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
func TestUpdateVolumeAnnotations(t *testing.T) {
dir, err := ioutil.TempDir("", "test-update-volume-annotations")
if err != nil {
t.Fatalf("create tempdir: %v", err)
}
defer os.RemoveAll(dir)
kubeletPodsDir = dir
const (
testPodUID = "testuid"
testVolumeName = "testvolume"
testLogDirPath = "/var/log/pods/testns_testname_" + testPodUID
testLegacyLogDirPath = "/var/log/pods/" + testPodUID
)
testVolumePath := fmt.Sprintf("%s/%s/volumes/kubernetes.io~empty-dir/%s", dir, testPodUID, testVolumeName)
if err := os.MkdirAll(testVolumePath, 0755); err != nil {
t.Fatalf("Create test volume: %v", err)
}
for _, test := range []struct {
desc string
spec *specs.Spec
expected *specs.Spec
expectErr bool
expectUpdate bool
}{
{
desc: "volume annotations for sandbox",
spec: &specs.Spec{
Annotations: map[string]string{
annotations.SandboxLogDir: testLogDirPath,
annotations.ContainerType: annotations.ContainerTypeSandbox,
"dev.gvisor.spec.mount." + testVolumeName + ".share": "pod",
"dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs",
"dev.gvisor.spec.mount." + testVolumeName + ".options": "ro",
},
},
expected: &specs.Spec{
Annotations: map[string]string{
annotations.SandboxLogDir: testLogDirPath,
annotations.ContainerType: annotations.ContainerTypeSandbox,
"dev.gvisor.spec.mount." + testVolumeName + ".share": "pod",
"dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs",
"dev.gvisor.spec.mount." + testVolumeName + ".options": "ro",
"dev.gvisor.spec.mount." + testVolumeName + ".source": testVolumePath,
},
},
expectUpdate: true,
},
{
desc: "volume annotations for sandbox with legacy log path",
spec: &specs.Spec{
Annotations: map[string]string{
annotations.SandboxLogDir: testLegacyLogDirPath,
annotations.ContainerType: annotations.ContainerTypeSandbox,
"dev.gvisor.spec.mount." + testVolumeName + ".share": "pod",
"dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs",
"dev.gvisor.spec.mount." + testVolumeName + ".options": "ro",
},
},
expected: &specs.Spec{
Annotations: map[string]string{
annotations.SandboxLogDir: testLegacyLogDirPath,
annotations.ContainerType: annotations.ContainerTypeSandbox,
"dev.gvisor.spec.mount." + testVolumeName + ".share": "pod",
"dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs",
"dev.gvisor.spec.mount." + testVolumeName + ".options": "ro",
"dev.gvisor.spec.mount." + testVolumeName + ".source": testVolumePath,
},
},
expectUpdate: true,
},
{
desc: "tmpfs: volume annotations for container",
spec: &specs.Spec{
Mounts: []specs.Mount{
{
Destination: "/test",
Type: "bind",
Source: testVolumePath,
Options: []string{"ro"},
},
{
Destination: "/random",
Type: "bind",
Source: "/random",
Options: []string{"ro"},
},
},
Annotations: map[string]string{
annotations.ContainerType: annotations.ContainerTypeContainer,
"dev.gvisor.spec.mount." + testVolumeName + ".share": "pod",
"dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs",
"dev.gvisor.spec.mount." + testVolumeName + ".options": "ro",
},
},
expected: &specs.Spec{
Mounts: []specs.Mount{
{
Destination: "/test",
Type: "tmpfs",
Source: testVolumePath,
Options: []string{"ro"},
},
{
Destination: "/random",
Type: "bind",
Source: "/random",
Options: []string{"ro"},
},
},
Annotations: map[string]string{
annotations.ContainerType: annotations.ContainerTypeContainer,
"dev.gvisor.spec.mount." + testVolumeName + ".share": "pod",
"dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs",
"dev.gvisor.spec.mount." + testVolumeName + ".options": "ro",
},
},
expectUpdate: true,
},
{
desc: "bind: volume annotations for container",
spec: &specs.Spec{
Mounts: []specs.Mount{
{
Destination: "/test",
Type: "bind",
Source: testVolumePath,
Options: []string{"ro"},
},
},
Annotations: map[string]string{
annotations.ContainerType: annotations.ContainerTypeContainer,
"dev.gvisor.spec.mount." + testVolumeName + ".share": "container",
"dev.gvisor.spec.mount." + testVolumeName + ".type": "bind",
"dev.gvisor.spec.mount." + testVolumeName + ".options": "ro",
},
},
expected: &specs.Spec{
Mounts: []specs.Mount{
{
Destination: "/test",
Type: "bind",
Source: testVolumePath,
Options: []string{"ro"},
},
},
Annotations: map[string]string{
annotations.ContainerType: annotations.ContainerTypeContainer,
"dev.gvisor.spec.mount." + testVolumeName + ".share": "container",
"dev.gvisor.spec.mount." + testVolumeName + ".type": "bind",
"dev.gvisor.spec.mount." + testVolumeName + ".options": "ro",
},
},
expectUpdate: true,
},
{
desc: "should not return error without pod log directory",
spec: &specs.Spec{
Annotations: map[string]string{
annotations.ContainerType: annotations.ContainerTypeSandbox,
"dev.gvisor.spec.mount." + testVolumeName + ".share": "pod",
"dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs",
"dev.gvisor.spec.mount." + testVolumeName + ".options": "ro",
},
},
expected: &specs.Spec{
Annotations: map[string]string{
annotations.ContainerType: annotations.ContainerTypeSandbox,
"dev.gvisor.spec.mount." + testVolumeName + ".share": "pod",
"dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs",
"dev.gvisor.spec.mount." + testVolumeName + ".options": "ro",
},
},
},
{
desc: "should return error if volume path does not exist",
spec: &specs.Spec{
Annotations: map[string]string{
annotations.SandboxLogDir: testLogDirPath,
annotations.ContainerType: annotations.ContainerTypeSandbox,
"dev.gvisor.spec.mount.notexist.share": "pod",
"dev.gvisor.spec.mount.notexist.type": "tmpfs",
"dev.gvisor.spec.mount.notexist.options": "ro",
},
},
expectErr: true,
},
{
desc: "no volume annotations for sandbox",
spec: &specs.Spec{
Annotations: map[string]string{
annotations.SandboxLogDir: testLogDirPath,
annotations.ContainerType: annotations.ContainerTypeSandbox,
},
},
expected: &specs.Spec{
Annotations: map[string]string{
annotations.SandboxLogDir: testLogDirPath,
annotations.ContainerType: annotations.ContainerTypeSandbox,
},
},
},
{
desc: "no volume annotations for container",
spec: &specs.Spec{
Mounts: []specs.Mount{
{
Destination: "/test",
Type: "bind",
Source: "/test",
Options: []string{"ro"},
},
{
Destination: "/random",
Type: "bind",
Source: "/random",
Options: []string{"ro"},
},
},
Annotations: map[string]string{
annotations.ContainerType: annotations.ContainerTypeContainer,
},
},
expected: &specs.Spec{
Mounts: []specs.Mount{
{
Destination: "/test",
Type: "bind",
Source: "/test",
Options: []string{"ro"},
},
{
Destination: "/random",
Type: "bind",
Source: "/random",
Options: []string{"ro"},
},
},
Annotations: map[string]string{
annotations.ContainerType: annotations.ContainerTypeContainer,
},
},
},
} {
t.Run(test.desc, func(t *testing.T) {
bundle, err := ioutil.TempDir(dir, "test-bundle")
if err != nil {
t.Fatalf("Create test bundle: %v", err)
}
err = UpdateVolumeAnnotations(bundle, test.spec)
if test.expectErr {
if err == nil {
t.Fatal("Expected error, but got nil")
}
return
}
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expected, test.spec) {
t.Fatalf("Expected %+v, got %+v", test.expected, test.spec)
}
if test.expectUpdate {
b, err := ioutil.ReadFile(filepath.Join(bundle, "config.json"))
if err != nil {
t.Fatalf("Read spec from bundle: %v", err)
}
var spec specs.Spec
if err := json.Unmarshal(b, &spec); err != nil {
t.Fatalf("Unmarshal spec: %v", err)
}
if !reflect.DeepEqual(test.expected, &spec) {
t.Fatalf("Expected %+v, got %+v", test.expected, &spec)
}
}
})
}
}

128
pkg/shim/v2/epoll.go Normal file
View File

@ -0,0 +1,128 @@
// Copyright 2018 The containerd Authors.
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// +build linux
package v2
import (
"context"
"sync"
"github.com/containerd/cgroups"
eventstypes "github.com/containerd/containerd/api/events"
"github.com/containerd/containerd/events"
"github.com/containerd/containerd/runtime"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
func newOOMEpoller(publisher events.Publisher) (*epoller, error) {
fd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC)
if err != nil {
return nil, err
}
return &epoller{
fd: fd,
publisher: publisher,
set: make(map[uintptr]*item),
}, nil
}
type epoller struct {
mu sync.Mutex
fd int
publisher events.Publisher
set map[uintptr]*item
}
type item struct {
id string
cg cgroups.Cgroup
}
func (e *epoller) Close() error {
return unix.Close(e.fd)
}
func (e *epoller) run(ctx context.Context) {
var events [128]unix.EpollEvent
for {
select {
case <-ctx.Done():
e.Close()
return
default:
n, err := unix.EpollWait(e.fd, events[:], -1)
if err != nil {
if err == unix.EINTR {
continue
}
logrus.WithError(err).Error("cgroups: epoll wait")
}
for i := 0; i < n; i++ {
e.process(ctx, uintptr(events[i].Fd))
}
}
}
}
func (e *epoller) add(id string, cg cgroups.Cgroup) error {
e.mu.Lock()
defer e.mu.Unlock()
fd, err := cg.OOMEventFD()
if err != nil {
return err
}
e.set[fd] = &item{
id: id,
cg: cg,
}
event := unix.EpollEvent{
Fd: int32(fd),
Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR,
}
return unix.EpollCtl(e.fd, unix.EPOLL_CTL_ADD, int(fd), &event)
}
func (e *epoller) process(ctx context.Context, fd uintptr) {
flush(fd)
e.mu.Lock()
i, ok := e.set[fd]
if !ok {
e.mu.Unlock()
return
}
e.mu.Unlock()
if i.cg.State() == cgroups.Deleted {
e.mu.Lock()
delete(e.set, fd)
e.mu.Unlock()
unix.Close(int(fd))
return
}
if err := e.publisher.Publish(ctx, runtime.TaskOOMEventTopic, &eventstypes.TaskOOM{
ContainerID: i.id,
}); err != nil {
logrus.WithError(err).Error("publish OOM event")
}
}
func flush(fd uintptr) error {
var buf [8]byte
_, err := unix.Read(int(fd), buf[:])
return err
}

View File

@ -0,0 +1,33 @@
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package options
const OptionType = "io.containerd.runsc.v1.options"
// Options is runtime options for io.containerd.runsc.v1.
type Options struct {
// ShimCgroup is the cgroup the shim should be in.
ShimCgroup string `toml:"shim_cgroup"`
// IoUid is the I/O's pipes uid.
IoUid uint32 `toml:"io_uid"`
// IoUid is the I/O's pipes gid.
IoGid uint32 `toml:"io_gid"`
// BinaryName is the binary name of the runsc binary.
BinaryName string `toml:"binary_name"`
// Root is the runsc root directory.
Root string `toml:"root"`
// RunscConfig is a key/value map of all runsc flags.
RunscConfig map[string]string `toml:"runsc_config"`
}

821
pkg/shim/v2/service.go Normal file
View File

@ -0,0 +1,821 @@
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v2
import (
"context"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"sync"
"syscall"
"time"
"github.com/BurntSushi/toml"
"github.com/containerd/cgroups"
"github.com/containerd/console"
eventstypes "github.com/containerd/containerd/api/events"
"github.com/containerd/containerd/api/types/task"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/events"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/namespaces"
"github.com/containerd/containerd/runtime"
"github.com/containerd/containerd/runtime/linux/runctypes"
rproc "github.com/containerd/containerd/runtime/proc"
"github.com/containerd/containerd/runtime/v2/shim"
taskAPI "github.com/containerd/containerd/runtime/v2/task"
runtimeoptions "github.com/containerd/cri/pkg/api/runtimeoptions/v1"
"github.com/containerd/typeurl"
ptypes "github.com/gogo/protobuf/types"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc"
"github.com/google/gvisor-containerd-shim/pkg/v1/proc"
"github.com/google/gvisor-containerd-shim/pkg/v1/utils"
"github.com/google/gvisor-containerd-shim/pkg/v2/options"
)
var (
empty = &ptypes.Empty{}
bufPool = sync.Pool{
New: func() interface{} {
buffer := make([]byte, 32<<10)
return &buffer
},
}
)
var _ = (taskAPI.TaskService)(&service{})
// configFile is the default config file name. For containerd 1.2,
// we assume that a config.toml should exist in the runtime root.
const configFile = "config.toml"
// New returns a new shim service that can be used via GRPC
func New(ctx context.Context, id string, publisher events.Publisher) (shim.Shim, error) {
ep, err := newOOMEpoller(publisher)
if err != nil {
return nil, err
}
ctx, cancel := context.WithCancel(ctx)
go ep.run(ctx)
s := &service{
id: id,
context: ctx,
processes: make(map[string]rproc.Process),
events: make(chan interface{}, 128),
ec: proc.ExitCh,
oomPoller: ep,
cancel: cancel,
}
go s.processExits()
runsc.Monitor = shim.Default
if err := s.initPlatform(); err != nil {
cancel()
return nil, errors.Wrap(err, "failed to initialized platform behavior")
}
go s.forward(publisher)
return s, nil
}
// service is the shim implementation of a remote shim over GRPC
type service struct {
mu sync.Mutex
context context.Context
task rproc.Process
processes map[string]rproc.Process
events chan interface{}
platform rproc.Platform
opts options.Options
ec chan proc.Exit
oomPoller *epoller
id string
bundle string
cancel func()
}
func newCommand(ctx context.Context, containerdBinary, containerdAddress string) (*exec.Cmd, error) {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return nil, err
}
self, err := os.Executable()
if err != nil {
return nil, err
}
cwd, err := os.Getwd()
if err != nil {
return nil, err
}
args := []string{
"-namespace", ns,
"-address", containerdAddress,
"-publish-binary", containerdBinary,
}
cmd := exec.Command(self, args...)
cmd.Dir = cwd
cmd.Env = append(os.Environ(), "GOMAXPROCS=2")
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}
return cmd, nil
}
func (s *service) StartShim(ctx context.Context, id, containerdBinary, containerdAddress string) (string, error) {
cmd, err := newCommand(ctx, containerdBinary, containerdAddress)
if err != nil {
return "", err
}
address, err := shim.SocketAddress(ctx, id)
if err != nil {
return "", err
}
socket, err := shim.NewSocket(address)
if err != nil {
return "", err
}
defer socket.Close()
f, err := socket.File()
if err != nil {
return "", err
}
defer f.Close()
cmd.ExtraFiles = append(cmd.ExtraFiles, f)
if err := cmd.Start(); err != nil {
return "", err
}
defer func() {
if err != nil {
cmd.Process.Kill()
}
}()
// make sure to wait after start
go cmd.Wait()
if err := shim.WritePidFile("shim.pid", cmd.Process.Pid); err != nil {
return "", err
}
if err := shim.WriteAddress("address", address); err != nil {
return "", err
}
if err := shim.SetScore(cmd.Process.Pid); err != nil {
return "", errors.Wrap(err, "failed to set OOM Score on shim")
}
return address, nil
}
func (s *service) Cleanup(ctx context.Context) (*taskAPI.DeleteResponse, error) {
path, err := os.Getwd()
if err != nil {
return nil, err
}
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return nil, err
}
runtime, err := s.readRuntime(path)
if err != nil {
return nil, err
}
r := proc.NewRunsc(s.opts.Root, path, ns, runtime, nil)
if err := r.Delete(ctx, s.id, &runsc.DeleteOpts{
Force: true,
}); err != nil {
logrus.WithError(err).Warn("failed to remove runc container")
}
if err := mount.UnmountAll(filepath.Join(path, "rootfs"), 0); err != nil {
logrus.WithError(err).Warn("failed to cleanup rootfs mount")
}
return &taskAPI.DeleteResponse{
ExitedAt: time.Now(),
ExitStatus: 128 + uint32(unix.SIGKILL),
}, nil
}
func (s *service) readRuntime(path string) (string, error) {
data, err := ioutil.ReadFile(filepath.Join(path, "runtime"))
if err != nil {
return "", err
}
return string(data), nil
}
func (s *service) writeRuntime(path, runtime string) error {
return ioutil.WriteFile(filepath.Join(path, "runtime"), []byte(runtime), 0600)
}
// Create a new initial process and container with the underlying OCI runtime
func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ *taskAPI.CreateTaskResponse, err error) {
s.mu.Lock()
defer s.mu.Unlock()
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return nil, errors.Wrap(err, "create namespace")
}
// Read from root for now.
var opts options.Options
if r.Options != nil {
v, err := typeurl.UnmarshalAny(r.Options)
if err != nil {
return nil, err
}
var path string
switch o := v.(type) {
case *runctypes.CreateOptions: // containerd 1.2.x
opts.IoUid = o.IoUid
opts.IoGid = o.IoGid
opts.ShimCgroup = o.ShimCgroup
case *runctypes.RuncOptions: // containerd 1.2.x
root := proc.RunscRoot
if o.RuntimeRoot != "" {
root = o.RuntimeRoot
}
opts.BinaryName = o.Runtime
path = filepath.Join(root, configFile)
if _, err := os.Stat(path); err != nil {
if !os.IsNotExist(err) {
return nil, errors.Wrapf(err, "stat config file %q", path)
}
// A config file in runtime root is not required.
path = ""
}
case *runtimeoptions.Options: // containerd 1.3.x+
if o.ConfigPath == "" {
break
}
if o.TypeUrl != options.OptionType {
return nil, errors.Errorf("unsupported runtimeoptions %q", o.TypeUrl)
}
path = o.ConfigPath
default:
return nil, errors.Errorf("unsupported option type %q", r.Options.TypeUrl)
}
if path != "" {
if _, err = toml.DecodeFile(path, &opts); err != nil {
return nil, errors.Wrapf(err, "decode config file %q", path)
}
}
}
var mounts []proc.Mount
for _, m := range r.Rootfs {
mounts = append(mounts, proc.Mount{
Type: m.Type,
Source: m.Source,
Target: m.Target,
Options: m.Options,
})
}
rootfs := filepath.Join(r.Bundle, "rootfs")
if err := os.Mkdir(rootfs, 0711); err != nil && !os.IsExist(err) {
return nil, err
}
config := &proc.CreateConfig{
ID: r.ID,
Bundle: r.Bundle,
Runtime: opts.BinaryName,
Rootfs: mounts,
Terminal: r.Terminal,
Stdin: r.Stdin,
Stdout: r.Stdout,
Stderr: r.Stderr,
Options: r.Options,
}
if err := s.writeRuntime(r.Bundle, opts.BinaryName); err != nil {
return nil, err
}
defer func() {
if err != nil {
if err2 := mount.UnmountAll(rootfs, 0); err2 != nil {
logrus.WithError(err2).Warn("failed to cleanup rootfs mount")
}
}
}()
for _, rm := range mounts {
m := &mount.Mount{
Type: rm.Type,
Source: rm.Source,
Options: rm.Options,
}
if err := m.Mount(rootfs); err != nil {
return nil, errors.Wrapf(err, "failed to mount rootfs component %v", m)
}
}
process, err := newInit(
ctx,
r.Bundle,
filepath.Join(r.Bundle, "work"),
ns,
s.platform,
config,
&opts,
rootfs,
)
if err != nil {
return nil, errdefs.ToGRPC(err)
}
if err := process.Create(ctx, config); err != nil {
return nil, errdefs.ToGRPC(err)
}
// save the main task id and bundle to the shim for additional requests
s.id = r.ID
s.bundle = r.Bundle
// Set up OOM notification on the sandbox's cgroup. This is done on sandbox
// create since the sandbox process will be created here.
pid := process.Pid()
if pid > 0 {
cg, err := cgroups.Load(cgroups.V1, cgroups.PidPath(pid))
if err != nil {
return nil, errors.Wrapf(err, "loading cgroup for %d", pid)
}
if err := s.oomPoller.add(s.id, cg); err != nil {
return nil, errors.Wrapf(err, "add cg to OOM monitor")
}
}
s.task = process
s.opts = opts
return &taskAPI.CreateTaskResponse{
Pid: uint32(process.Pid()),
}, nil
}
// Start a process
func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI.StartResponse, error) {
p, err := s.getProcess(r.ExecID)
if err != nil {
return nil, err
}
if err := p.Start(ctx); err != nil {
return nil, err
}
// TODO: Set the cgroup and oom notifications on restore.
// https://github.com/google/gvisor-containerd-shim/issues/58
return &taskAPI.StartResponse{
Pid: uint32(p.Pid()),
}, nil
}
// Delete the initial process and container
func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (*taskAPI.DeleteResponse, error) {
p, err := s.getProcess(r.ExecID)
if err != nil {
return nil, err
}
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
}
if err := p.Delete(ctx); err != nil {
return nil, err
}
isTask := r.ExecID == ""
if !isTask {
s.mu.Lock()
delete(s.processes, r.ExecID)
s.mu.Unlock()
}
if isTask && s.platform != nil {
s.platform.Close()
}
return &taskAPI.DeleteResponse{
ExitStatus: uint32(p.ExitStatus()),
ExitedAt: p.ExitedAt(),
Pid: uint32(p.Pid()),
}, nil
}
// Exec an additional process inside the container
func (s *service) Exec(ctx context.Context, r *taskAPI.ExecProcessRequest) (*ptypes.Empty, error) {
s.mu.Lock()
p := s.processes[r.ExecID]
s.mu.Unlock()
if p != nil {
return nil, errdefs.ToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ExecID)
}
p = s.task
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
}
process, err := p.(*proc.Init).Exec(ctx, s.bundle, &proc.ExecConfig{
ID: r.ExecID,
Terminal: r.Terminal,
Stdin: r.Stdin,
Stdout: r.Stdout,
Stderr: r.Stderr,
Spec: r.Spec,
})
if err != nil {
return nil, errdefs.ToGRPC(err)
}
s.mu.Lock()
s.processes[r.ExecID] = process
s.mu.Unlock()
return empty, nil
}
// ResizePty of a process
func (s *service) ResizePty(ctx context.Context, r *taskAPI.ResizePtyRequest) (*ptypes.Empty, error) {
p, err := s.getProcess(r.ExecID)
if err != nil {
return nil, err
}
ws := console.WinSize{
Width: uint16(r.Width),
Height: uint16(r.Height),
}
if err := p.Resize(ws); err != nil {
return nil, errdefs.ToGRPC(err)
}
return empty, nil
}
// State returns runtime state information for a process
func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (*taskAPI.StateResponse, error) {
p, err := s.getProcess(r.ExecID)
if err != nil {
return nil, err
}
st, err := p.Status(ctx)
if err != nil {
return nil, err
}
status := task.StatusUnknown
switch st {
case "created":
status = task.StatusCreated
case "running":
status = task.StatusRunning
case "stopped":
status = task.StatusStopped
}
sio := p.Stdio()
return &taskAPI.StateResponse{
ID: p.ID(),
Bundle: s.bundle,
Pid: uint32(p.Pid()),
Status: status,
Stdin: sio.Stdin,
Stdout: sio.Stdout,
Stderr: sio.Stderr,
Terminal: sio.Terminal,
ExitStatus: uint32(p.ExitStatus()),
ExitedAt: p.ExitedAt(),
}, nil
}
// Pause the container
func (s *service) Pause(ctx context.Context, r *taskAPI.PauseRequest) (*ptypes.Empty, error) {
return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented)
}
// Resume the container
func (s *service) Resume(ctx context.Context, r *taskAPI.ResumeRequest) (*ptypes.Empty, error) {
return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented)
}
// Kill a process with the provided signal
func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (*ptypes.Empty, error) {
p, err := s.getProcess(r.ExecID)
if err != nil {
return nil, err
}
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
}
if err := p.Kill(ctx, r.Signal, r.All); err != nil {
return nil, errdefs.ToGRPC(err)
}
return empty, nil
}
// Pids returns all pids inside the container
func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (*taskAPI.PidsResponse, error) {
pids, err := s.getContainerPids(ctx, r.ID)
if err != nil {
return nil, errdefs.ToGRPC(err)
}
var processes []*task.ProcessInfo
for _, pid := range pids {
pInfo := task.ProcessInfo{
Pid: pid,
}
for _, p := range s.processes {
if p.Pid() == int(pid) {
d := &runctypes.ProcessDetails{
ExecID: p.ID(),
}
a, err := typeurl.MarshalAny(d)
if err != nil {
return nil, errors.Wrapf(err, "failed to marshal process %d info", pid)
}
pInfo.Info = a
break
}
}
processes = append(processes, &pInfo)
}
return &taskAPI.PidsResponse{
Processes: processes,
}, nil
}
// CloseIO of a process
func (s *service) CloseIO(ctx context.Context, r *taskAPI.CloseIORequest) (*ptypes.Empty, error) {
p, err := s.getProcess(r.ExecID)
if err != nil {
return nil, err
}
if stdin := p.Stdin(); stdin != nil {
if err := stdin.Close(); err != nil {
return nil, errors.Wrap(err, "close stdin")
}
}
return empty, nil
}
// Checkpoint the container
func (s *service) Checkpoint(ctx context.Context, r *taskAPI.CheckpointTaskRequest) (*ptypes.Empty, error) {
return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented)
}
// Connect returns shim information such as the shim's pid
func (s *service) Connect(ctx context.Context, r *taskAPI.ConnectRequest) (*taskAPI.ConnectResponse, error) {
var pid int
if s.task != nil {
pid = s.task.Pid()
}
return &taskAPI.ConnectResponse{
ShimPid: uint32(os.Getpid()),
TaskPid: uint32(pid),
}, nil
}
func (s *service) Shutdown(ctx context.Context, r *taskAPI.ShutdownRequest) (*ptypes.Empty, error) {
s.cancel()
os.Exit(0)
return empty, nil
}
func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*taskAPI.StatsResponse, error) {
path, err := os.Getwd()
if err != nil {
return nil, err
}
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return nil, err
}
runtime, err := s.readRuntime(path)
if err != nil {
return nil, err
}
rs := proc.NewRunsc(s.opts.Root, path, ns, runtime, nil)
stats, err := rs.Stats(ctx, s.id)
if err != nil {
return nil, err
}
// gvisor currently (as of 2020-03-03) only returns the total memory
// usage and current PID value[0]. However, we copy the common fields here
// so that future updates will propagate correct information. We're
// using the cgroups.Metrics structure so we're returning the same type
// as runc.
//
// [0]: https://github.com/google/gvisor/blob/277a0d5a1fbe8272d4729c01ee4c6e374d047ebc/runsc/boot/events.go#L61-L81
data, err := typeurl.MarshalAny(&cgroups.Metrics{
CPU: &cgroups.CPUStat{
Usage: &cgroups.CPUUsage{
Total: stats.Cpu.Usage.Total,
Kernel: stats.Cpu.Usage.Kernel,
User: stats.Cpu.Usage.User,
PerCPU: stats.Cpu.Usage.Percpu,
},
Throttling: &cgroups.Throttle{
Periods: stats.Cpu.Throttling.Periods,
ThrottledPeriods: stats.Cpu.Throttling.ThrottledPeriods,
ThrottledTime: stats.Cpu.Throttling.ThrottledTime,
},
},
Memory: &cgroups.MemoryStat{
Cache: stats.Memory.Cache,
Usage: &cgroups.MemoryEntry{
Limit: stats.Memory.Usage.Limit,
Usage: stats.Memory.Usage.Usage,
Max: stats.Memory.Usage.Max,
Failcnt: stats.Memory.Usage.Failcnt,
},
Swap: &cgroups.MemoryEntry{
Limit: stats.Memory.Swap.Limit,
Usage: stats.Memory.Swap.Usage,
Max: stats.Memory.Swap.Max,
Failcnt: stats.Memory.Swap.Failcnt,
},
Kernel: &cgroups.MemoryEntry{
Limit: stats.Memory.Kernel.Limit,
Usage: stats.Memory.Kernel.Usage,
Max: stats.Memory.Kernel.Max,
Failcnt: stats.Memory.Kernel.Failcnt,
},
KernelTCP: &cgroups.MemoryEntry{
Limit: stats.Memory.KernelTCP.Limit,
Usage: stats.Memory.KernelTCP.Usage,
Max: stats.Memory.KernelTCP.Max,
Failcnt: stats.Memory.KernelTCP.Failcnt,
},
},
Pids: &cgroups.PidsStat{
Current: stats.Pids.Current,
Limit: stats.Pids.Limit,
},
})
if err != nil {
return nil, err
}
return &taskAPI.StatsResponse{
Stats: data,
}, nil
}
// Update a running container
func (s *service) Update(ctx context.Context, r *taskAPI.UpdateTaskRequest) (*ptypes.Empty, error) {
return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented)
}
// Wait for a process to exit
func (s *service) Wait(ctx context.Context, r *taskAPI.WaitRequest) (*taskAPI.WaitResponse, error) {
p, err := s.getProcess(r.ExecID)
if err != nil {
return nil, err
}
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
}
p.Wait()
return &taskAPI.WaitResponse{
ExitStatus: uint32(p.ExitStatus()),
ExitedAt: p.ExitedAt(),
}, nil
}
func (s *service) processExits() {
for e := range s.ec {
s.checkProcesses(e)
}
}
func (s *service) checkProcesses(e proc.Exit) {
// TODO(random-liu): Add `shouldKillAll` logic if container pid
// namespace is supported.
for _, p := range s.allProcesses() {
if p.ID() == e.ID {
if ip, ok := p.(*proc.Init); ok {
// Ensure all children are killed
if err := ip.KillAll(s.context); err != nil {
log.G(s.context).WithError(err).WithField("id", ip.ID()).
Error("failed to kill init's children")
}
}
p.SetExited(e.Status)
s.events <- &eventstypes.TaskExit{
ContainerID: s.id,
ID: p.ID(),
Pid: uint32(p.Pid()),
ExitStatus: uint32(e.Status),
ExitedAt: p.ExitedAt(),
}
return
}
}
}
func (s *service) allProcesses() (o []rproc.Process) {
s.mu.Lock()
defer s.mu.Unlock()
for _, p := range s.processes {
o = append(o, p)
}
if s.task != nil {
o = append(o, s.task)
}
return o
}
func (s *service) getContainerPids(ctx context.Context, id string) ([]uint32, error) {
s.mu.Lock()
p := s.task
s.mu.Unlock()
if p == nil {
return nil, errors.Wrapf(errdefs.ErrFailedPrecondition, "container must be created")
}
ps, err := p.(*proc.Init).Runtime().Ps(ctx, id)
if err != nil {
return nil, err
}
pids := make([]uint32, 0, len(ps))
for _, pid := range ps {
pids = append(pids, uint32(pid))
}
return pids, nil
}
func (s *service) forward(publisher events.Publisher) {
for e := range s.events {
ctx, cancel := context.WithTimeout(s.context, 5*time.Second)
err := publisher.Publish(ctx, getTopic(e), e)
cancel()
if err != nil {
logrus.WithError(err).Error("post event")
}
}
}
func (s *service) getProcess(execID string) (rproc.Process, error) {
s.mu.Lock()
defer s.mu.Unlock()
if execID == "" {
return s.task, nil
}
p := s.processes[execID]
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "process does not exist %s", execID)
}
return p, nil
}
func getTopic(e interface{}) string {
switch e.(type) {
case *eventstypes.TaskCreate:
return runtime.TaskCreateEventTopic
case *eventstypes.TaskStart:
return runtime.TaskStartEventTopic
case *eventstypes.TaskOOM:
return runtime.TaskOOMEventTopic
case *eventstypes.TaskExit:
return runtime.TaskExitEventTopic
case *eventstypes.TaskDelete:
return runtime.TaskDeleteEventTopic
case *eventstypes.TaskExecAdded:
return runtime.TaskExecAddedEventTopic
case *eventstypes.TaskExecStarted:
return runtime.TaskExecStartedEventTopic
default:
logrus.Warnf("no topic for type %#v", e)
}
return runtime.TaskUnknownTopic
}
func newInit(ctx context.Context, path, workDir, namespace string, platform rproc.Platform, r *proc.CreateConfig, options *options.Options, rootfs string) (*proc.Init, error) {
spec, err := utils.ReadSpec(r.Bundle)
if err != nil {
return nil, errors.Wrap(err, "read oci spec")
}
if err := utils.UpdateVolumeAnnotations(r.Bundle, spec); err != nil {
return nil, errors.Wrap(err, "update volume annotations")
}
runsc.FormatLogPath(r.ID, options.RunscConfig)
runtime := proc.NewRunsc(options.Root, path, namespace, options.BinaryName, options.RunscConfig)
p := proc.New(r.ID, runtime, rproc.Stdio{
Stdin: r.Stdin,
Stdout: r.Stdout,
Stderr: r.Stderr,
Terminal: r.Terminal,
})
p.Bundle = r.Bundle
p.Platform = platform
p.Rootfs = rootfs
p.WorkDir = workDir
p.IoUID = int(options.IoUid)
p.IoGID = int(options.IoGid)
p.Sandbox = utils.IsSandbox(spec)
p.UserLog = utils.UserLogPath(spec)
p.Monitor = shim.Default
return p, nil
}

View File

@ -0,0 +1,112 @@
// Copyright 2018 The containerd Authors.
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// +build linux
package v2
import (
"context"
"io"
"sync"
"syscall"
"github.com/containerd/console"
"github.com/containerd/fifo"
"github.com/pkg/errors"
)
type linuxPlatform struct {
epoller *console.Epoller
}
func (p *linuxPlatform) CopyConsole(ctx context.Context, console console.Console, stdin, stdout, stderr string, wg, cwg *sync.WaitGroup) (console.Console, error) {
if p.epoller == nil {
return nil, errors.New("uninitialized epoller")
}
epollConsole, err := p.epoller.Add(console)
if err != nil {
return nil, err
}
if stdin != "" {
in, err := fifo.OpenFifo(context.Background(), stdin, syscall.O_RDONLY|syscall.O_NONBLOCK, 0)
if err != nil {
return nil, err
}
cwg.Add(1)
go func() {
cwg.Done()
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
io.CopyBuffer(epollConsole, in, *p)
}()
}
outw, err := fifo.OpenFifo(ctx, stdout, syscall.O_WRONLY, 0)
if err != nil {
return nil, err
}
outr, err := fifo.OpenFifo(ctx, stdout, syscall.O_RDONLY, 0)
if err != nil {
return nil, err
}
wg.Add(1)
cwg.Add(1)
go func() {
cwg.Done()
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
io.CopyBuffer(outw, epollConsole, *p)
epollConsole.Close()
outr.Close()
outw.Close()
wg.Done()
}()
return epollConsole, nil
}
func (p *linuxPlatform) ShutdownConsole(ctx context.Context, cons console.Console) error {
if p.epoller == nil {
return errors.New("uninitialized epoller")
}
epollConsole, ok := cons.(*console.EpollConsole)
if !ok {
return errors.Errorf("expected EpollConsole, got %#v", cons)
}
return epollConsole.Shutdown(p.epoller.CloseConsole)
}
func (p *linuxPlatform) Close() error {
return p.epoller.Close()
}
// initialize a single epoll fd to manage our consoles. `initPlatform` should
// only be called once.
func (s *service) initPlatform() error {
if s.platform != nil {
return nil
}
epoller, err := console.NewEpoller()
if err != nil {
return errors.Wrap(err, "failed to initialize epoller")
}
s.platform = &linuxPlatform{
epoller: epoller,
}
go epoller.Wait()
return nil
}

16
shim/README.md Normal file
View File

@ -0,0 +1,16 @@
# gvisor-containerd-shim
gvisor-containerd-shim is a containerd shim. It implements the containerd v1
shim API. It can be used as a drop-in replacement for
[containerd-shim][containerd-shim]
(though containerd-shim must still be installed). It allows the use of both
gVisor (runsc) and normal containers in the same containerd installation by
deferring to the runc shim if the desired runtime engine is not runsc.
- [Untrusted Workload Quick Start (containerd >=1.1)](docs/untrusted-workload-quickstart.md)
- [Runtime Handler/RuntimeClass Quick Start (containerd >=1.2)](docs/runtime-handler-quickstart.md)
- [Runtime Handler/RuntimeClass Quick Start (shim v2) (containerd >=1.2)](docs/runtime-handler-shim-v2-quickstart.md)
- [Configure containerd-shim-runsc-v1 (shim v2) (containerd >= 1.3)](docs/configure-containerd-shim-runsc-v1.md)
- [Configure gvisor-containerd-shim (shim v1) (containerd &lt;= 1.2)](docs/configure-gvisor-containerd-shim.md)
[containerd-shim]: https://github.com/containerd/containerd/tree/master/cmd/containerd-shim

View File

@ -0,0 +1,72 @@
# Configure containerd-shim-runsc-v1 (Shim V2)
This document describes how to configure runtime options for
`containerd-shim-runsc-v1`. This is follows on to the instructions of
[Runtime Handler Quick Start (shim v2) (containerd >=1.2)](runtime-handler-shim-v2-quickstart.md)
and requires containerd 1.3 or later.
### Update `/etc/containerd/config.toml` to point to a configuration file for `containerd-shim-runsc-v1`.
`containerd-shim-runsc-v1` supports a few different configuration options based
on the version of containerd that is used. For versions >= 1.3, it supports a
configurable config path in the containerd runtime configuration.
```shell
{ # Step 1: Update runtime options for runsc in containerd config.toml
cat <<EOF | sudo tee /etc/containerd/config.toml
disabled_plugins = ["restart"]
[plugins.linux]
shim_debug = true
[plugins.cri.containerd.runtimes.runsc]
runtime_type = "io.containerd.runsc.v1"
[plugins.cri.containerd.runtimes.runsc.options]
TypeUrl = "io.containerd.runsc.v1.options"
ConfigPath = "/etc/containerd/runsc.toml"
EOF
}
```
### Configure `/etc/containerd/runsc.toml`
The set of options that can be configured can be found in
[options.go](../pkg/v2/options/options.go).
#### Example: Enable the KVM platform
gVisor enables the use of a number of platforms. This example shows how to
configure `containerd-shim-runsc-v1` to use gvisor with the KVM platform.
Find out more about platform in the
(gVisor documentation)[https://gvisor.dev/docs/user_guide/platforms/].
```shell
cat <<EOF | sudo tee /etc/containerd/runsc.toml
[runsc_config]
platform = "kvm"
EOF
```
### Example: Enable gVisor debug logging
gVisor debug logging can be enabled by setting the `debug` and `debug-log`
flag. The shim will replace "%ID%" with the container ID in the path of the
`debug-log` flag.
Find out more about debugging in the
(gVisor documentation)[https://gvisor.dev/docs/user_guide/debugging/].
```shell
cat <<EOF | sudo tee /etc/containerd/runsc.toml
[runsc_config]
debug=true
debug-log=/var/log/%ID%/gvisor.log
EOF
```
## Restart `containerd`
When you are done restart containerd to pick up the new configuration files.
```shell
sudo systemctl restart containerd
```

View File

@ -0,0 +1,42 @@
# Configure gvisor-containerd-shim (Shim V1)
This document describes how to configure runtime options for `gvisor-containerd-shim`.
The shim configuration is stored in `/etc/containerd/gvisor-containerd-shim.toml`. The configuration file supports two values.
`runc_shim`: The path to the runc shim. This is used by the gvisor-containerd-shim to run normal containers.
`runsc_config`: This is a set of key/value pairs that are converted into `runsc` command line flags. You can learn more about which flags are available by running `runsc flags`.
## Example: Enable the KVM platform
gVisor enables the use of a number of platforms. This configuration enables the
KVM platform.
Find out more about platform in the
(gVisor documentation)[https://gvisor.dev/docs/user_guide/platforms/].
```shell
cat <<EOF | sudo tee /etc/containerd/gvisor-containerd-shim.toml
[runsc_config]
platform = "kvm"
EOF
```
## Example: Enable gVisor debug logging
gVisor debug logging can be enabled by setting the `debug` and `debug-log`
flag. The shim will replace "%ID%" with the container ID in the path of the
`debug-log` flag.
Find out more about debugging in the
(gVisor documentation)[https://gvisor.dev/docs/user_guide/debugging/].
```shell
cat <<EOF | sudo tee /etc/containerd/gvisor-containerd-shim.toml
# This is the path to the default runc containerd-shim.
runc_shim = "/usr/local/bin/containerd-shim"
[runsc_config]
debug=true
debug-log=/var/log/%ID%/gvisor.log
EOF
```

View File

@ -0,0 +1,251 @@
# Runtime Handler Quickstart
This document describes how to install and run the `gvisor-containerd-shim`
using the containerd runtime handler support. This requires containerd 1.2 or
later.
## Requirements
- **runsc**: See the [gVisor documentation](https://github.com/google/gvisor) for information on how to install runsc.
- **containerd**: See the [containerd website](https://containerd.io/) for information on how to install containerd.
## Install
### Install gvisor-containerd-shim
1. Download the latest release of the `gvisor-containerd-shim`. See the
[releases page](https://github.com/google/gvisor-containerd-shim/releases)
[embedmd]:# (../test/e2e/shim-install.sh shell /{ # Step 1\(release\)/ /^}/)
```shell
{ # Step 1(release): Install gvisor-containerd-shim
LATEST_RELEASE=$(wget -qO - https://api.github.com/repos/google/gvisor-containerd-shim/releases | grep -oP '(?<="browser_download_url": ")https://[^"]*gvisor-containerd-shim.linux-amd64' | head -1)
wget -O gvisor-containerd-shim ${LATEST_RELEASE}
chmod +x gvisor-containerd-shim
sudo mv gvisor-containerd-shim /usr/local/bin/gvisor-containerd-shim
}
```
2. Create the configuration for the gvisor shim in
`/etc/containerd/gvisor-containerd-shim.toml`:
[embedmd]:# (../test/e2e/shim-install.sh shell /{ # Step 2/ /^}/)
```shell
{ # Step 2: Create the gvisor-containerd-shim.toml
cat <<EOF | sudo tee /etc/containerd/gvisor-containerd-shim.toml
# This is the path to the default runc containerd-shim.
runc_shim = "/usr/local/bin/containerd-shim"
EOF
}
```
### Configure containerd
1. Update `/etc/containerd/config.toml`. Be sure to update the path to
`gvisor-containerd-shim` and `runsc` if necessary:
[embedmd]:# (../test/e2e/runtime-handler/install.sh shell /{ # Step 1/ /^}/)
```shell
{ # Step 1: Create containerd config.toml
cat <<EOF | sudo tee /etc/containerd/config.toml
disabled_plugins = ["restart"]
[plugins.linux]
shim = "/usr/local/bin/gvisor-containerd-shim"
shim_debug = true
[plugins.cri.containerd.runtimes.runsc]
runtime_type = "io.containerd.runtime.v1.linux"
runtime_engine = "/usr/local/bin/runsc"
runtime_root = "/run/containerd/runsc"
EOF
}
```
2. Restart `containerd`
```shell
sudo systemctl restart containerd
```
## Usage
You can run containers in gVisor via containerd's CRI.
### Install crictl
1. Download and install the crictl binary:
[embedmd]:# (../test/e2e/crictl-install.sh shell /{ # Step 1/ /^}/)
```shell
{ # Step 1: Download crictl
wget https://github.com/kubernetes-sigs/cri-tools/releases/download/v1.13.0/crictl-v1.13.0-linux-amd64.tar.gz
tar xf crictl-v1.13.0-linux-amd64.tar.gz
sudo mv crictl /usr/local/bin
}
```
2. Write the crictl configuration file
[embedmd]:# (../test/e2e/crictl-install.sh shell /{ # Step 2/ /^}/)
```shell
{ # Step 2: Configure crictl
cat <<EOF | sudo tee /etc/crictl.yaml
runtime-endpoint: unix:///run/containerd/containerd.sock
EOF
}
```
### Create the nginx Sandbox in gVisor
1. Pull the nginx image
[embedmd]:# (../test/e2e/runtime-handler/usage.sh shell /{ # Step 1/ /^}/)
```shell
{ # Step 1: Pull the nginx image
sudo crictl pull nginx
}
```
2. Create the sandbox creation request
[embedmd]:# (../test/e2e/runtime-handler/usage.sh shell /{ # Step 2/ /^EOF\n}/)
```shell
{ # Step 2: Create sandbox.json
cat <<EOF | tee sandbox.json
{
"metadata": {
"name": "nginx-sandbox",
"namespace": "default",
"attempt": 1,
"uid": "hdishd83djaidwnduwk28bcsb"
},
"linux": {
},
"log_directory": "/tmp"
}
EOF
}
```
3. Create the pod in gVisor
[embedmd]:# (../test/e2e/runtime-handler/usage.sh shell /{ # Step 3/ /^}/)
```shell
{ # Step 3: Create the sandbox
SANDBOX_ID=$(sudo crictl runp --runtime runsc sandbox.json)
}
```
### Run the nginx Container in the Sandbox
1. Create the nginx container creation request
[embedmd]:# (../test/e2e/run-container.sh shell /{ # Step 1/ /^EOF\n}/)
```shell
{ # Step 1: Create nginx container config
cat <<EOF | tee container.json
{
"metadata": {
"name": "nginx"
},
"image":{
"image": "nginx"
},
"log_path":"nginx.0.log",
"linux": {
}
}
EOF
}
```
2. Create the nginx container
[embedmd]:# (../test/e2e/run-container.sh shell /{ # Step 2/ /^}/)
```shell
{ # Step 2: Create nginx container
CONTAINER_ID=$(sudo crictl create ${SANDBOX_ID} container.json sandbox.json)
}
```
3. Start the nginx container
[embedmd]:# (../test/e2e/run-container.sh shell /{ # Step 3/ /^}/)
```shell
{ # Step 3: Start nginx container
sudo crictl start ${CONTAINER_ID}
}
```
### Validate the container
1. Inspect the created pod
[embedmd]:# (../test/e2e/validate.sh shell /{ # Step 1/ /^}/)
```shell
{ # Step 1: Inspect the pod
sudo crictl inspectp ${SANDBOX_ID}
}
```
2. Inspect the nginx container
[embedmd]:# (../test/e2e/validate.sh shell /{ # Step 2/ /^}/)
```shell
{ # Step 2: Inspect the container
sudo crictl inspect ${CONTAINER_ID}
}
```
3. Verify that nginx is running in gVisor
[embedmd]:# (../test/e2e/validate.sh shell /{ # Step 3/ /^}/)
```shell
{ # Step 3: Check dmesg
sudo crictl exec ${CONTAINER_ID} dmesg | grep -i gvisor
}
```
### Set up the Kubernetes Runtime Class
1. Install the Runtime Class for gVisor
[embedmd]:# (../test/e2e/runtimeclass-install.sh shell /{ # Step 1/ /^}/)
```shell
{ # Step 1: Install a RuntimeClass
cat <<EOF | kubectl apply -f -
apiVersion: node.k8s.io/v1beta1
kind: RuntimeClass
metadata:
name: gvisor
handler: runsc
EOF
}
```
2. Create a Pod with the gVisor Runtime Class
[embedmd]:# (../test/e2e/runtimeclass-install.sh shell /{ # Step 2/ /^}/)
```shell
{ # Step 2: Create a pod
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: nginx-gvisor
spec:
runtimeClassName: gvisor
containers:
- name: nginx
image: nginx
EOF
}
```
3. Verify that the Pod is running
[embedmd]:# (../test/e2e/runtimeclass-install.sh shell /{ # Step 3/ /^}/)
```shell
{ # Step 3: Get the pod
kubectl get pod nginx-gvisor -o wide
}
```

View File

@ -0,0 +1,232 @@
# Runtime Handler Quickstart (Shim V2)
This document describes how to install and run `containerd-shim-runsc-v1` using
the containerd runtime handler support. This requires containerd 1.2 or later.
## Requirements
- **runsc**: See the [gVisor documentation](https://github.com/google/gvisor) for information on how to install runsc.
- **containerd**: See the [containerd website](https://containerd.io/) for information on how to install containerd.
## Install
### Install containerd-shim-runsc-v1
1. Build and install `containerd-shim-runsc-v1`.
<!-- TODO: Use a release once we have one available. -->
[embedmd]:# (../test/e2e/shim-install.sh shell /{ # Step 1\(dev\)/ /^}/)
```shell
{ # Step 1(dev): Build and install gvisor-containerd-shim and containerd-shim-runsc-v1
make
sudo make install
}
```
### Configure containerd
1. Update `/etc/containerd/config.toml`. Make sure `containerd-shim-runsc-v1` is
in `${PATH}`.
[embedmd]:# (../test/e2e/runtime-handler-shim-v2/install.sh shell /{ # Step 1/ /^}/)
```shell
{ # Step 1: Create containerd config.toml
cat <<EOF | sudo tee /etc/containerd/config.toml
disabled_plugins = ["restart"]
[plugins.linux]
shim_debug = true
[plugins.cri.containerd.runtimes.runsc]
runtime_type = "io.containerd.runsc.v1"
EOF
}
```
2. Restart `containerd`
```shell
sudo systemctl restart containerd
```
## Usage
You can run containers in gVisor via containerd's CRI.
### Install crictl
1. Download and install the crictl binary:
[embedmd]:# (../test/e2e/crictl-install.sh shell /{ # Step 1/ /^}/)
```shell
{ # Step 1: Download crictl
wget https://github.com/kubernetes-sigs/cri-tools/releases/download/v1.13.0/crictl-v1.13.0-linux-amd64.tar.gz
tar xf crictl-v1.13.0-linux-amd64.tar.gz
sudo mv crictl /usr/local/bin
}
```
2. Write the crictl configuration file
[embedmd]:# (../test/e2e/crictl-install.sh shell /{ # Step 2/ /^}/)
```shell
{ # Step 2: Configure crictl
cat <<EOF | sudo tee /etc/crictl.yaml
runtime-endpoint: unix:///run/containerd/containerd.sock
EOF
}
```
### Create the nginx Sandbox in gVisor
1. Pull the nginx image
[embedmd]:# (../test/e2e/runtime-handler/usage.sh shell /{ # Step 1/ /^}/)
```shell
{ # Step 1: Pull the nginx image
sudo crictl pull nginx
}
```
2. Create the sandbox creation request
[embedmd]:# (../test/e2e/runtime-handler/usage.sh shell /{ # Step 2/ /^EOF\n}/)
```shell
{ # Step 2: Create sandbox.json
cat <<EOF | tee sandbox.json
{
"metadata": {
"name": "nginx-sandbox",
"namespace": "default",
"attempt": 1,
"uid": "hdishd83djaidwnduwk28bcsb"
},
"linux": {
},
"log_directory": "/tmp"
}
EOF
}
```
3. Create the pod in gVisor
[embedmd]:# (../test/e2e/runtime-handler/usage.sh shell /{ # Step 3/ /^}/)
```shell
{ # Step 3: Create the sandbox
SANDBOX_ID=$(sudo crictl runp --runtime runsc sandbox.json)
}
```
### Run the nginx Container in the Sandbox
1. Create the nginx container creation request
[embedmd]:# (../test/e2e/run-container.sh shell /{ # Step 1/ /^EOF\n}/)
```shell
{ # Step 1: Create nginx container config
cat <<EOF | tee container.json
{
"metadata": {
"name": "nginx"
},
"image":{
"image": "nginx"
},
"log_path":"nginx.0.log",
"linux": {
}
}
EOF
}
```
2. Create the nginx container
[embedmd]:# (../test/e2e/run-container.sh shell /{ # Step 2/ /^}/)
```shell
{ # Step 2: Create nginx container
CONTAINER_ID=$(sudo crictl create ${SANDBOX_ID} container.json sandbox.json)
}
```
3. Start the nginx container
[embedmd]:# (../test/e2e/run-container.sh shell /{ # Step 3/ /^}/)
```shell
{ # Step 3: Start nginx container
sudo crictl start ${CONTAINER_ID}
}
```
### Validate the container
1. Inspect the created pod
[embedmd]:# (../test/e2e/validate.sh shell /{ # Step 1/ /^}/)
```shell
{ # Step 1: Inspect the pod
sudo crictl inspectp ${SANDBOX_ID}
}
```
2. Inspect the nginx container
[embedmd]:# (../test/e2e/validate.sh shell /{ # Step 2/ /^}/)
```shell
{ # Step 2: Inspect the container
sudo crictl inspect ${CONTAINER_ID}
}
```
3. Verify that nginx is running in gVisor
[embedmd]:# (../test/e2e/validate.sh shell /{ # Step 3/ /^}/)
```shell
{ # Step 3: Check dmesg
sudo crictl exec ${CONTAINER_ID} dmesg | grep -i gvisor
}
```
### Set up the Kubernetes Runtime Class
1. Install the Runtime Class for gVisor
[embedmd]:# (../test/e2e/runtimeclass-install.sh shell /{ # Step 1/ /^}/)
```shell
{ # Step 1: Install a RuntimeClass
cat <<EOF | kubectl apply -f -
apiVersion: node.k8s.io/v1beta1
kind: RuntimeClass
metadata:
name: gvisor
handler: runsc
EOF
}
```
2. Create a Pod with the gVisor Runtime Class
[embedmd]:# (../test/e2e/runtimeclass-install.sh shell /{ # Step 2/ /^}/)
```shell
{ # Step 2: Create a pod
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: nginx-gvisor
spec:
runtimeClassName: gvisor
containers:
- name: nginx
image: nginx
EOF
}
```
3. Verify that the Pod is running
[embedmd]:# (../test/e2e/runtimeclass-install.sh shell /{ # Step 3/ /^}/)
```shell
{ # Step 3: Get the pod
kubectl get pod nginx-gvisor -o wide
}
```

View File

@ -0,0 +1,212 @@
# Untrusted Workload Quickstart
This document describes how to install and run the `gvisor-containerd-shim`
using the untrusted workload CRI extension. This requires containerd 1.1 or
later.
*Note: The untrusted workload CRI extension is deprecated by containerd. If you
are using containerd 1.2, please consider using runtime handler.*
## Requirements
- **runsc**: See the [gVisor documentation](https://github.com/google/gvisor) for information on how to install runsc.
- **containerd**: See the [containerd website](https://containerd.io/) for information on how to install containerd.
## Install
### Install gvisor-containerd-shim
1. Download the latest release of the `gvisor-containerd-shim`. See the
[releases page](https://github.com/google/gvisor-containerd-shim/releases)
[embedmd]:# (../test/e2e/shim-install.sh shell /{ # Step 1/ /^}/)
```shell
{ # Step 1(release): Install gvisor-containerd-shim
LATEST_RELEASE=$(wget -qO - https://api.github.com/repos/google/gvisor-containerd-shim/releases | grep -oP '(?<="browser_download_url": ")https://[^"]*gvisor-containerd-shim.linux-amd64' | head -1)
wget -O gvisor-containerd-shim ${LATEST_RELEASE}
chmod +x gvisor-containerd-shim
sudo mv gvisor-containerd-shim /usr/local/bin/gvisor-containerd-shim
}
```
2. Create the configuration for the gvisor shim in
`/etc/containerd/gvisor-containerd-shim.toml`:
[embedmd]:# (../test/e2e/shim-install.sh shell /{ # Step 2/ /^}/)
```shell
{ # Step 2: Create the gvisor-containerd-shim.toml
cat <<EOF | sudo tee /etc/containerd/gvisor-containerd-shim.toml
# This is the path to the default runc containerd-shim.
runc_shim = "/usr/local/bin/containerd-shim"
EOF
}
```
### Configure containerd
1. Update `/etc/containerd/config.toml`. Be sure to update the path to
`gvisor-containerd-shim` and `runsc` if necessary:
[embedmd]:# (../test/e2e/untrusted-workload/install.sh shell /{ # Step 1/ /^}/)
```shell
{ # Step 1: Create containerd config.toml
cat <<EOF | sudo tee /etc/containerd/config.toml
disabled_plugins = ["restart"]
[plugins.linux]
shim = "/usr/local/bin/gvisor-containerd-shim"
shim_debug = true
[plugins.cri.containerd.untrusted_workload_runtime]
runtime_type = "io.containerd.runtime.v1.linux"
runtime_engine = "/usr/local/bin/runsc"
runtime_root = "/run/containerd/runsc"
EOF
}
```
2. Restart `containerd`
```shell
sudo systemctl restart containerd
```
## Usage
You can run containers in gVisor via containerd's CRI.
### Install crictl
1. Download and install the crictl binary:
[embedmd]:# (../test/e2e/crictl-install.sh shell /{ # Step 1/ /^}/)
```shell
{ # Step 1: Download crictl
wget https://github.com/kubernetes-sigs/cri-tools/releases/download/v1.13.0/crictl-v1.13.0-linux-amd64.tar.gz
tar xf crictl-v1.13.0-linux-amd64.tar.gz
sudo mv crictl /usr/local/bin
}
```
2. Write the crictl configuration file
[embedmd]:# (../test/e2e/crictl-install.sh shell /{ # Step 2/ /^}/)
```shell
{ # Step 2: Configure crictl
cat <<EOF | sudo tee /etc/crictl.yaml
runtime-endpoint: unix:///run/containerd/containerd.sock
EOF
}
```
### Create the nginx Sandbox in gVisor
1. Pull the nginx image
[embedmd]:# (../test/e2e/untrusted-workload/usage.sh shell /{ # Step 1/ /^}/)
```shell
{ # Step 1: Pull the nginx image
sudo crictl pull nginx
}
```
2. Create the sandbox creation request
[embedmd]:# (../test/e2e/untrusted-workload/usage.sh shell /{ # Step 2/ /^EOF\n}/)
```shell
{ # Step 2: Create sandbox.json
cat <<EOF | tee sandbox.json
{
"metadata": {
"name": "nginx-sandbox",
"namespace": "default",
"attempt": 1,
"uid": "hdishd83djaidwnduwk28bcsb"
},
"annotations": {
"io.kubernetes.cri.untrusted-workload": "true"
},
"linux": {
},
"log_directory": "/tmp"
}
EOF
}
```
3. Create the pod in gVisor
[embedmd]:# (../test/e2e/untrusted-workload/usage.sh shell /{ # Step 3/ /^}/)
```shell
{ # Step 3: Create the sandbox
SANDBOX_ID=$(sudo crictl runp sandbox.json)
}
```
### Run the nginx Container in the Sandbox
1. Create the nginx container creation request
[embedmd]:# (../test/e2e/run-container.sh shell /{ # Step 1/ /^EOF\n}/)
```shell
{ # Step 1: Create nginx container config
cat <<EOF | tee container.json
{
"metadata": {
"name": "nginx"
},
"image":{
"image": "nginx"
},
"log_path":"nginx.0.log",
"linux": {
}
}
EOF
}
```
2. Create the nginx container
[embedmd]:# (../test/e2e/run-container.sh shell /{ # Step 2/ /^}/)
```shell
{ # Step 2: Create nginx container
CONTAINER_ID=$(sudo crictl create ${SANDBOX_ID} container.json sandbox.json)
}
```
3. Start the nginx container
[embedmd]:# (../test/e2e/run-container.sh shell /{ # Step 3/ /^}/)
```shell
{ # Step 3: Start nginx container
sudo crictl start ${CONTAINER_ID}
}
```
### Validate the container
1. Inspect the created pod
[embedmd]:# (../test/e2e/validate.sh shell /{ # Step 1/ /^}/)
```shell
{ # Step 1: Inspect the pod
sudo crictl inspectp ${SANDBOX_ID}
}
```
2. Inspect the nginx container
[embedmd]:# (../test/e2e/validate.sh shell /{ # Step 2/ /^}/)
```shell
{ # Step 2: Inspect the container
sudo crictl inspect ${CONTAINER_ID}
}
```
3. Verify that nginx is running in gVisor
[embedmd]:# (../test/e2e/validate.sh shell /{ # Step 3/ /^}/)
```shell
{ # Step 3: Check dmesg
sudo crictl exec ${CONTAINER_ID} dmesg | grep -i gvisor
}
```

26
shim/v1/main.go Normal file
View File

@ -0,0 +1,26 @@
// Copyright 2018 The containerd Authors.
// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"github.com/containerd/containerd/runtime/v2/shim"
runsc "github.com/google/gvisor-containerd-shim/pkg/v2"
)
func main() {
shim.Run("io.containerd.runsc.v1", runsc.New)
}

40
shim/v2/config.go Normal file
View File

@ -0,0 +1,40 @@
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import "github.com/BurntSushi/toml"
// config is the configuration for gvisor containerd shim.
type config struct {
// RuncShim is the shim binary path for standard containerd-shim for runc.
// When the runtime is `runc`, gvisor containerd shim will exec current
// process to standard containerd-shim. This is a work around for containerd
// 1.1. In containerd 1.2, containerd will choose different containerd-shims
// based on runtime.
RuncShim string `toml:"runc_shim"`
// RunscConfig is configuration for runsc. The key value will be converted
// to runsc flags --key=value directly.
RunscConfig map[string]string `toml:"runsc_config"`
}
// loadConfig load gvisor containerd shim config from config file.
func loadConfig(path string) (*config, error) {
var c config
_, err := toml.DecodeFile(path, &c)
if err != nil {
return &c, err
}
return &c, nil
}

318
shim/v2/main.go Normal file
View File

@ -0,0 +1,318 @@
// Copyright 2018 The containerd Authors.
// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"bytes"
"context"
"flag"
"fmt"
"net"
"os"
"os/exec"
"os/signal"
"path/filepath"
"runtime"
"runtime/debug"
"strings"
"sync"
"syscall"
"time"
"github.com/containerd/containerd/events"
"github.com/containerd/containerd/namespaces"
"github.com/containerd/containerd/runtime/v1/linux/proc"
containerdshim "github.com/containerd/containerd/runtime/v1/shim"
shimapi "github.com/containerd/containerd/runtime/v1/shim/v1"
"github.com/containerd/ttrpc"
"github.com/containerd/typeurl"
ptypes "github.com/gogo/protobuf/types"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc"
"github.com/google/gvisor-containerd-shim/pkg/v1/shim"
)
var (
debugFlag bool
namespaceFlag string
socketFlag string
addressFlag string
workdirFlag string
runtimeRootFlag string
containerdBinaryFlag string
shimConfigFlag string
)
// ShimConfigPath is the default shim config file path.
const ShimConfigPath = "/etc/containerd/gvisor-containerd-shim.toml"
func init() {
flag.BoolVar(&debugFlag, "debug", false, "enable debug output in logs")
flag.StringVar(&namespaceFlag, "namespace", "", "namespace that owns the shim")
flag.StringVar(&socketFlag, "socket", "", "abstract socket path to serve")
flag.StringVar(&addressFlag, "address", "", "grpc address back to main containerd")
flag.StringVar(&workdirFlag, "workdir", "", "path used to storge large temporary data")
// Containerd default to runc, unless another runtime is explicitly specified.
// We keep the same default to make the default behavior consistent.
flag.StringVar(&runtimeRootFlag, "runtime-root", proc.RuncRoot, "root directory for the runtime")
// currently, the `containerd publish` utility is embedded in the daemon binary.
// The daemon invokes `containerd-shim -containerd-binary ...` with its own os.Executable() path.
flag.StringVar(&containerdBinaryFlag, "containerd-binary", "containerd", "path to containerd binary (used for `containerd publish`)")
flag.StringVar(&shimConfigFlag, "config", ShimConfigPath, "path to the shim configuration file")
flag.Parse()
}
func main() {
// This is a hack. Exec current process to run standard containerd-shim
// if runtime root is not `runsc`. We don't need this for shim v2 api.
if filepath.Base(runtimeRootFlag) != "runsc" {
if err := executeRuncShim(); err != nil {
fmt.Fprintf(os.Stderr, "gvisor-containerd-shim: %s\n", err)
os.Exit(1)
}
}
debug.SetGCPercent(40)
go func() {
for range time.Tick(30 * time.Second) {
debug.FreeOSMemory()
}
}()
if debugFlag {
logrus.SetLevel(logrus.DebugLevel)
}
if os.Getenv("GOMAXPROCS") == "" {
// If GOMAXPROCS hasn't been set, we default to a value of 2 to reduce
// the number of Go stacks present in the shim.
runtime.GOMAXPROCS(2)
}
// Run regular shim if needed.
if err := executeShim(); err != nil {
fmt.Fprintf(os.Stderr, "gvisor-containerd-shim: %s\n", err)
os.Exit(1)
}
}
// executeRuncShim execs current process to a containerd-shim process and
// retains all flags and envs.
func executeRuncShim() error {
c, err := loadConfig(shimConfigFlag)
if err != nil && !os.IsNotExist(err) {
return errors.Wrap(err, "failed to load shim config")
}
shimPath := c.RuncShim
if shimPath == "" {
shimPath, err = exec.LookPath("containerd-shim")
if err != nil {
return errors.Wrapf(err, "lookup containerd-shim")
}
}
args := append([]string{shimPath}, os.Args[1:]...)
if err := syscall.Exec(shimPath, args, os.Environ()); err != nil {
return errors.Wrapf(err, "exec containerd-shim %q", shimPath)
}
return nil
}
func executeShim() error {
// start handling signals as soon as possible so that things are properly reaped
// or if runtime exits before we hit the handler
signals, err := setupSignals()
if err != nil {
return err
}
dump := make(chan os.Signal, 32)
signal.Notify(dump, syscall.SIGUSR1)
path, err := os.Getwd()
if err != nil {
return err
}
server, err := ttrpc.NewServer(ttrpc.WithServerHandshaker(ttrpc.UnixSocketRequireSameUser()))
if err != nil {
return errors.Wrap(err, "failed creating server")
}
c, err := loadConfig(shimConfigFlag)
if err != nil && !os.IsNotExist(err) {
return errors.Wrap(err, "failed to load shim config")
}
sv, err := shim.NewService(
shim.Config{
Path: path,
Namespace: namespaceFlag,
WorkDir: workdirFlag,
RuntimeRoot: runtimeRootFlag,
RunscConfig: c.RunscConfig,
},
&remoteEventsPublisher{address: addressFlag},
)
if err != nil {
return err
}
logrus.Debug("registering ttrpc server")
shimapi.RegisterShimService(server, sv)
socket := socketFlag
if err := serve(server, socket); err != nil {
return err
}
logger := logrus.WithFields(logrus.Fields{
"pid": os.Getpid(),
"path": path,
"namespace": namespaceFlag,
})
go func() {
for range dump {
dumpStacks(logger)
}
}()
return handleSignals(logger, signals, server, sv)
}
// serve serves the ttrpc API over a unix socket at the provided path
// this function does not block
func serve(server *ttrpc.Server, path string) error {
var (
l net.Listener
err error
)
if path == "" {
l, err = net.FileListener(os.NewFile(3, "socket"))
path = "[inherited from parent]"
} else {
if len(path) > 106 {
return errors.Errorf("%q: unix socket path too long (> 106)", path)
}
l, err = net.Listen("unix", "\x00"+path)
}
if err != nil {
return err
}
logrus.WithField("socket", path).Debug("serving api on unix socket")
go func() {
defer l.Close()
if err := server.Serve(context.Background(), l); err != nil &&
!strings.Contains(err.Error(), "use of closed network connection") {
logrus.WithError(err).Fatal("gvisor-containerd-shim: ttrpc server failure")
}
}()
return nil
}
// setupSignals creates a new signal handler for all signals and sets the shim as a
// sub-reaper so that the container processes are reparented
func setupSignals() (chan os.Signal, error) {
signals := make(chan os.Signal, 32)
signal.Notify(signals, unix.SIGTERM, unix.SIGINT, unix.SIGCHLD, unix.SIGPIPE)
// make sure runc is setup to use the monitor
// for waiting on processes
// TODO(random-liu): Move shim/reaper.go to a separate package.
runsc.Monitor = containerdshim.Default
// set the shim as the subreaper for all orphaned processes created by the container
if err := system.SetSubreaper(1); err != nil {
return nil, err
}
return signals, nil
}
func handleSignals(logger *logrus.Entry, signals chan os.Signal, server *ttrpc.Server, sv *shim.Service) error {
var (
termOnce sync.Once
done = make(chan struct{})
)
for {
select {
case <-done:
return nil
case s := <-signals:
switch s {
case unix.SIGCHLD:
if err := containerdshim.Reap(); err != nil {
logger.WithError(err).Error("reap exit status")
}
case unix.SIGTERM, unix.SIGINT:
go termOnce.Do(func() {
ctx := context.TODO()
if err := server.Shutdown(ctx); err != nil {
logger.WithError(err).Error("failed to shutdown server")
}
// Ensure our child is dead if any
sv.Kill(ctx, &shimapi.KillRequest{
Signal: uint32(syscall.SIGKILL),
All: true,
})
sv.Delete(context.Background(), &ptypes.Empty{})
close(done)
})
case unix.SIGPIPE:
}
}
}
}
func dumpStacks(logger *logrus.Entry) {
var (
buf []byte
stackSize int
)
bufferLen := 16384
for stackSize == len(buf) {
buf = make([]byte, bufferLen)
stackSize = runtime.Stack(buf, true)
bufferLen *= 2
}
buf = buf[:stackSize]
logger.Infof("=== BEGIN goroutine stack dump ===\n%s\n=== END goroutine stack dump ===", buf)
}
type remoteEventsPublisher struct {
address string
}
func (l *remoteEventsPublisher) Publish(ctx context.Context, topic string, event events.Event) error {
ns, _ := namespaces.Namespace(ctx)
encoded, err := typeurl.MarshalAny(event)
if err != nil {
return err
}
data, err := encoded.Marshal()
if err != nil {
return err
}
cmd := exec.CommandContext(ctx, containerdBinaryFlag, "--address", l.address, "publish", "--topic", topic, "--namespace", ns)
cmd.Stdin = bytes.NewReader(data)
c, err := containerdshim.Default.Start(cmd)
if err != nil {
return err
}
status, err := containerdshim.Default.Wait(cmd, c)
if err != nil {
return err
}
if status != 0 {
return errors.New("failed to publish event")
}
return nil
}

44
test/shim/containerd-install.sh Executable file
View File

@ -0,0 +1,44 @@
#!/bin/bash
# A script to install containerd and CNI plugins for e2e testing
wget -q --https-only \
https://github.com/containerd/containerd/releases/download/v${CONTAINERD_VERSION}/containerd-${CONTAINERD_VERSION}.linux-amd64.tar.gz \
https://github.com/containernetworking/plugins/releases/download/v0.7.0/cni-plugins-amd64-v0.7.0.tgz
sudo mkdir -p /etc/containerd /etc/cni/net.d /opt/cni/bin
sudo tar -xvf cni-plugins-amd64-v0.7.0.tgz -C /opt/cni/bin/
sudo tar -xvf containerd-${CONTAINERD_VERSION}.linux-amd64.tar.gz -C /
cat <<EOF | sudo tee /etc/containerd/config.toml
disabled_plugins = ["restart"]
# Set to avoid port overlap on older versions of containerd where default is 10010.
[plugins.cri]
stream_server_port = "10011"
EOF
cat <<EOF | sudo tee /etc/cni/net.d/10-bridge.conf
{
"cniVersion": "0.3.1",
"name": "bridge",
"type": "bridge",
"bridge": "cnio0",
"isGateway": true,
"ipMasq": true,
"ipam": {
"type": "host-local",
"ranges": [
[{"subnet": "10.200.0.0/24"}]
],
"routes": [{"dst": "0.0.0.0/0"}]
}
}
EOF
cat <<EOF | sudo tee /etc/cni/net.d/99-loopback.conf
{
"cniVersion": "0.3.1",
"type": "loopback"
}
EOF
sudo PATH=$PATH containerd -log-level debug &>/tmp/containerd-cri.log &

17
test/shim/crictl-install.sh Executable file
View File

@ -0,0 +1,17 @@
#!/bin/bash
# A sample script for installing crictl.
set -ex
{ # Step 1: Download crictl
wget https://github.com/kubernetes-sigs/cri-tools/releases/download/v1.13.0/crictl-v1.13.0-linux-amd64.tar.gz
tar xf crictl-v1.13.0-linux-amd64.tar.gz
sudo mv crictl /usr/local/bin
}
{ # Step 2: Configure crictl
cat <<EOF | sudo tee /etc/crictl.yaml
runtime-endpoint: unix:///run/containerd/containerd.sock
EOF
}

30
test/shim/run-container.sh Executable file
View File

@ -0,0 +1,30 @@
#!/bin/bash
# A sample script to run a container in an existing pod
set -ex
{ # Step 1: Create nginx container config
cat <<EOF | tee container.json
{
"metadata": {
"name": "nginx"
},
"image":{
"image": "nginx"
},
"log_path":"nginx.0.log",
"linux": {
}
}
EOF
}
{ # Step 2: Create nginx container
CONTAINER_ID=$(sudo crictl create ${SANDBOX_ID} container.json sandbox.json)
}
{ # Step 3: Start nginx container
sudo crictl start ${CONTAINER_ID}
}

8
test/shim/runsc-install.sh Executable file
View File

@ -0,0 +1,8 @@
#!/bin/bash
# Sample script to install runsc
wget -q --https-only \
https://storage.googleapis.com/gvisor/releases/${RUNSC_VERSION}/runsc
chmod +x runsc
sudo mv runsc /usr/local/bin/

View File

@ -0,0 +1,21 @@
#!/bin/bash
# A sample script for installing and configuring the gvisor-containerd-shim to
# use the containerd runtime handler.
set -ex
{ # Step 1: Create containerd config.toml
cat <<EOF | sudo tee /etc/containerd/config.toml
disabled_plugins = ["restart"]
[plugins.linux]
shim_debug = true
[plugins.cri.containerd.runtimes.runsc]
runtime_type = "io.containerd.runsc.v1"
EOF
}
{ # Step 2: Restart containerd
sudo pkill containerd
sudo containerd -log-level debug &> /tmp/containerd-cri.log &
}

View File

@ -0,0 +1,34 @@
#!/bin/bash
# Runs end-to-end tests for gvisor-containerd-shim to test the use of runtime
# handler. This should work on containerd 1.2+
# This is meant to be run in a VM as it makes a fairly invasive install of
# containerd.
set -ex
# Install containerd
. ./test/e2e/containerd-install.sh
# Install gVisor
. ./test/e2e/runsc-install.sh
# Install gvisor-containerd-shim
. ./test/e2e/shim-install.sh
# Test installation/configuration
. ./test/e2e/runtime-handler-shim-v2/install.sh
# Install crictl
. ./test/e2e/crictl-install.sh
# Test usage (the same with runtime-handler)
. ./test/e2e/runtime-handler/usage.sh
# Run a container in the sandbox
. ./test/e2e/run-container.sh
# Validate the pod and container
. ./test/e2e/validate.sh
. ./test/e2e/runtime-handler-shim-v2/validate.sh

View File

@ -0,0 +1,7 @@
#!/bin/bash
# A sample script to validating the running containerd-shim-runsc-v1.
set -ex
ps aux | grep [c]ontainerd-shim-runsc-v1

View File

@ -0,0 +1,24 @@
#!/bin/bash
# A sample script for installing and configuring the gvisor-containerd-shim to
# use the containerd runtime handler.
set -ex
{ # Step 1: Create containerd config.toml
cat <<EOF | sudo tee /etc/containerd/config.toml
disabled_plugins = ["restart"]
[plugins.linux]
shim = "/usr/local/bin/gvisor-containerd-shim"
shim_debug = true
[plugins.cri.containerd.runtimes.runsc]
runtime_type = "io.containerd.runtime.v1.linux"
runtime_engine = "/usr/local/bin/runsc"
runtime_root = "/run/containerd/runsc"
EOF
}
{ # Step 2: Restart containerd
sudo pkill containerd
sudo containerd -log-level debug &> /tmp/containerd-cri.log &
}

View File

@ -0,0 +1,33 @@
#!/bin/bash
# Runs end-to-end tests for gvisor-containerd-shim to test the use of runtime
# handler. This should work on containerd 1.2+
# This is meant to be run in a VM as it makes a fairly invasive install of
# containerd.
set -ex
# Install containerd
. ./test/e2e/containerd-install.sh
# Install gVisor
. ./test/e2e/runsc-install.sh
# Install gvisor-containerd-shim
. ./test/e2e/shim-install.sh
# Test installation/configuration
. ./test/e2e/runtime-handler/install.sh
# Install crictl
. ./test/e2e/crictl-install.sh
# Test usage
. ./test/e2e/runtime-handler/usage.sh
# Run a container in the sandbox
. ./test/e2e/run-container.sh
# Validate the pod and container
. ./test/e2e/validate.sh

View File

@ -0,0 +1,30 @@
#!/bin/bash
# A sample script for testing the gvisor-containerd-shim
# using runtime handler.
set -ex
{ # Step 1: Pull the nginx image
sudo crictl pull nginx
}
{ # Step 2: Create sandbox.json
cat <<EOF | tee sandbox.json
{
"metadata": {
"name": "nginx-sandbox",
"namespace": "default",
"attempt": 1,
"uid": "hdishd83djaidwnduwk28bcsb"
},
"linux": {
},
"log_directory": "/tmp"
}
EOF
}
{ # Step 3: Create the sandbox
SANDBOX_ID=$(sudo crictl runp --runtime runsc sandbox.json)
}

View File

@ -0,0 +1,33 @@
#!/bin/bash
# A sample script to test installing a RuntimeClass
set -ex
{ # Step 1: Install a RuntimeClass
cat <<EOF | kubectl apply -f -
apiVersion: node.k8s.io/v1beta1
kind: RuntimeClass
metadata:
name: gvisor
handler: runsc
EOF
}
{ # Step 2: Create a pod
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: nginx-gvisor
spec:
runtimeClassName: gvisor
containers:
- name: nginx
image: nginx
EOF
}
{ # Step 3: Get the pod
kubectl get pod nginx-gvisor -o wide
}

28
test/shim/shim-install.sh Executable file
View File

@ -0,0 +1,28 @@
#!/bin/bash
# A sample script to install gvisor-containerd-shim
set -ex
# Build gvisor-containerd-shim
if [ "${INSTALL_LATEST}" == "1" ]; then
{ # Step 1(release): Install gvisor-containerd-shim
LATEST_RELEASE=$(wget -qO - https://api.github.com/repos/google/gvisor-containerd-shim/releases | grep -oP '(?<="browser_download_url": ")https://[^"]*gvisor-containerd-shim.linux-amd64' | head -1)
wget -O gvisor-containerd-shim ${LATEST_RELEASE}
chmod +x gvisor-containerd-shim
sudo mv gvisor-containerd-shim /usr/local/bin/gvisor-containerd-shim
}
else
{ # Step 1(dev): Build and install gvisor-containerd-shim and containerd-shim-runsc-v1
make
sudo make install
}
fi
{ # Step 2: Create the gvisor-containerd-shim.toml
cat <<EOF | sudo tee /etc/containerd/gvisor-containerd-shim.toml
# This is the path to the default runc containerd-shim.
runc_shim = "/usr/local/bin/containerd-shim"
EOF
}

View File

@ -0,0 +1,27 @@
#!/bin/bash
# A sample script for installing and configuring the gvisor-containerd-shim to
# use the untrusted workload extension.
set -ex
{ # Step 1: Create containerd config.toml
cat <<EOF | sudo tee /etc/containerd/config.toml
disabled_plugins = ["restart"]
[plugins.linux]
shim = "/usr/local/bin/gvisor-containerd-shim"
shim_debug = true
# Set to avoid port overlap on older versions of containerd where default is 10010.
[plugins.cri]
stream_server_port = "10011"
[plugins.cri.containerd.untrusted_workload_runtime]
runtime_type = "io.containerd.runtime.v1.linux"
runtime_engine = "/usr/local/bin/runsc"
runtime_root = "/run/containerd/runsc"
EOF
}
{ # Step 2: Restart containerd
sudo pkill containerd
sudo containerd -log-level debug &>/tmp/containerd-cri.log &
}

View File

@ -0,0 +1,33 @@
#!/bin/bash
# Runs end-to-end tests for gvisor-containerd-shim to test using the
# untrusted workload extension. This should work on containerd 1.1+
# This is meant to be run in a VM as it makes a fairly invasive install of
# containerd.
set -ex
# Install containerd
. ./test/e2e/containerd-install.sh
# Install gVisor
. ./test/e2e/runsc-install.sh
# Install gvisor-containerd-shim
. ./test/e2e/shim-install.sh
# Test installation/configuration
. ./test/e2e/untrusted-workload/install.sh
# Install crictl
. ./test/e2e/crictl-install.sh
# Test usage
. ./test/e2e/untrusted-workload/usage.sh
# Run a container in the sandbox
. ./test/e2e/run-container.sh
# Validate the pod and container
. ./test/e2e/validate.sh

View File

@ -0,0 +1,33 @@
#!/bin/bash
# A sample script for testing the gvisor-containerd-shim # using untrusted
# workload extension.
set -ex
{ # Step 1: Pull the nginx image
sudo crictl pull nginx
}
{ # Step 2: Create sandbox.json
cat <<EOF | tee sandbox.json
{
"metadata": {
"name": "nginx-sandbox",
"namespace": "default",
"attempt": 1,
"uid": "hdishd83djaidwnduwk28bcsb"
},
"annotations": {
"io.kubernetes.cri.untrusted-workload": "true"
},
"linux": {
},
"log_directory": "/tmp"
}
EOF
}
{ # Step 3: Create the sandbox
SANDBOX_ID=$(sudo crictl runp sandbox.json)
}

17
test/shim/validate.sh Executable file
View File

@ -0,0 +1,17 @@
#!/bin/bash
# A sample script to validate a running nginx container.
set -ex
{ # Step 1: Inspect the pod
sudo crictl inspectp ${SANDBOX_ID}
}
{ # Step 2: Inspect the container
sudo crictl inspect ${CONTAINER_ID}
}
{ # Step 3: Check dmesg
sudo crictl exec ${CONTAINER_ID} dmesg | grep -i gvisor
}