2018-04-27 17:37:02 +00:00
|
|
|
// Copyright 2018 Google Inc.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
// Package specutils contains utility functions for working with OCI runtime
|
|
|
|
// specs.
|
|
|
|
package specutils
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/json"
|
|
|
|
"fmt"
|
|
|
|
"io/ioutil"
|
|
|
|
"os"
|
|
|
|
"path/filepath"
|
|
|
|
"strings"
|
2018-05-04 04:08:38 +00:00
|
|
|
"syscall"
|
|
|
|
"time"
|
2018-04-27 17:37:02 +00:00
|
|
|
|
|
|
|
specs "github.com/opencontainers/runtime-spec/specs-go"
|
|
|
|
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
|
|
|
|
"gvisor.googlesource.com/gvisor/pkg/log"
|
|
|
|
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
|
|
|
|
)
|
|
|
|
|
2018-06-04 18:25:40 +00:00
|
|
|
// ExePath must point to runsc binary, which is normally the same binary. It's
|
|
|
|
// changed in tests that aren't linked in the same binary.
|
|
|
|
var ExePath = "/proc/self/exe"
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// LogSpec logs the spec in a human-friendly way.
|
|
|
|
func LogSpec(spec *specs.Spec) {
|
|
|
|
log.Debugf("Spec: %+v", spec)
|
|
|
|
log.Debugf("Spec.Hooks: %+v", spec.Hooks)
|
|
|
|
log.Debugf("Spec.Linux: %+v", spec.Linux)
|
|
|
|
log.Debugf("Spec.Process: %+v", spec.Process)
|
|
|
|
log.Debugf("Spec.Root: %+v", spec.Root)
|
|
|
|
}
|
|
|
|
|
2018-05-15 17:17:19 +00:00
|
|
|
// ValidateSpec validates that the spec is compatible with runsc.
|
|
|
|
func ValidateSpec(spec *specs.Spec) error {
|
2018-06-28 16:56:23 +00:00
|
|
|
// Mandatory fields.
|
2018-05-15 17:17:19 +00:00
|
|
|
if spec.Process == nil {
|
2018-06-28 16:56:23 +00:00
|
|
|
return fmt.Errorf("Spec.Process must be defined: %+v", spec)
|
2018-05-15 17:17:19 +00:00
|
|
|
}
|
2018-06-28 16:56:23 +00:00
|
|
|
if len(spec.Process.Args) == 0 {
|
|
|
|
return fmt.Errorf("Spec.Process.Arg must be defined: %+v", spec.Process)
|
|
|
|
}
|
|
|
|
if spec.Root == nil {
|
|
|
|
return fmt.Errorf("Spec.Root must be defined: %+v", spec)
|
|
|
|
}
|
|
|
|
if len(spec.Root.Path) == 0 {
|
|
|
|
return fmt.Errorf("Spec.Root.Path must be defined: %+v", spec.Root)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Unsupported fields.
|
|
|
|
if spec.Solaris != nil {
|
|
|
|
return fmt.Errorf("Spec.Solaris is not supported: %+v", spec)
|
|
|
|
}
|
|
|
|
if spec.Windows != nil {
|
|
|
|
return fmt.Errorf("Spec.Windows is not supported: %+v", spec)
|
|
|
|
}
|
|
|
|
if len(spec.Process.SelinuxLabel) != 0 {
|
2018-05-15 17:17:19 +00:00
|
|
|
return fmt.Errorf("SELinux is not supported: %s", spec.Process.SelinuxLabel)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Docker uses AppArmor by default, so just log that it's being ignored.
|
|
|
|
if spec.Process.ApparmorProfile != "" {
|
|
|
|
log.Warningf("AppArmor profile %q is being ignored", spec.Process.ApparmorProfile)
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: Apply seccomp to application inside sandbox.
|
|
|
|
if spec.Linux != nil && spec.Linux.Seccomp != nil {
|
|
|
|
log.Warningf("Seccomp spec is being ignored")
|
|
|
|
}
|
2018-06-20 04:42:21 +00:00
|
|
|
|
2018-06-28 16:56:23 +00:00
|
|
|
// Two annotations are use by containerd to support multi-container pods.
|
2018-06-20 04:42:21 +00:00
|
|
|
// "io.kubernetes.cri.container-type"
|
|
|
|
// "io.kubernetes.cri.sandbox-id"
|
|
|
|
containerType, hasContainerType := spec.Annotations[ContainerdContainerTypeAnnotation]
|
|
|
|
_, hasSandboxID := spec.Annotations[ContainerdSandboxIDAnnotation]
|
|
|
|
switch {
|
|
|
|
// Non-containerd use won't set a container type.
|
|
|
|
case !hasContainerType:
|
|
|
|
case containerType == ContainerdContainerTypeSandbox:
|
|
|
|
// When starting a container in an existing sandbox, the sandbox ID
|
|
|
|
// must be set.
|
|
|
|
case containerType == ContainerdContainerTypeContainer:
|
|
|
|
if !hasSandboxID {
|
|
|
|
return fmt.Errorf("spec has container-type of %s, but no sandbox ID set", containerType)
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
return fmt.Errorf("unknown container-type: %s", containerType)
|
|
|
|
}
|
|
|
|
|
2018-05-15 17:17:19 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// ReadSpec reads an OCI runtime spec from the given bundle directory.
|
|
|
|
func ReadSpec(bundleDir string) (*specs.Spec, error) {
|
|
|
|
// The spec file must be in "config.json" inside the bundle directory.
|
|
|
|
specFile := filepath.Join(bundleDir, "config.json")
|
|
|
|
specBytes, err := ioutil.ReadFile(specFile)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("error reading spec from file %q: %v", specFile, err)
|
|
|
|
}
|
|
|
|
var spec specs.Spec
|
|
|
|
if err := json.Unmarshal(specBytes, &spec); err != nil {
|
|
|
|
return nil, fmt.Errorf("error unmarshaling spec from file %q: %v\n %s", specFile, err, string(specBytes))
|
|
|
|
}
|
2018-06-28 16:56:23 +00:00
|
|
|
if err := ValidateSpec(&spec); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2018-04-27 17:37:02 +00:00
|
|
|
return &spec, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetExecutablePath returns the absolute path to the executable, relative to
|
2018-06-20 04:42:21 +00:00
|
|
|
// the root. It searches the environment PATH for the first file that exists
|
2018-04-27 17:37:02 +00:00
|
|
|
// with the given name.
|
|
|
|
func GetExecutablePath(exec, root string, env []string) (string, error) {
|
|
|
|
exec = filepath.Clean(exec)
|
|
|
|
|
|
|
|
// Don't search PATH if exec is a path to a file (absolute or relative).
|
|
|
|
if strings.IndexByte(exec, '/') >= 0 {
|
|
|
|
return exec, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get the PATH from the environment.
|
|
|
|
const prefix = "PATH="
|
|
|
|
var path []string
|
|
|
|
for _, e := range env {
|
|
|
|
if strings.HasPrefix(e, prefix) {
|
|
|
|
path = strings.Split(strings.TrimPrefix(e, prefix), ":")
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Search the PATH for a file whose name matches the one we are looking
|
|
|
|
// for.
|
|
|
|
for _, p := range path {
|
|
|
|
abs := filepath.Join(root, p, exec)
|
2018-06-18 20:36:55 +00:00
|
|
|
// Do not follow symlink link because the target is in the container
|
|
|
|
// root filesystem.
|
|
|
|
if _, err := os.Lstat(abs); err == nil {
|
2018-04-27 17:37:02 +00:00
|
|
|
// We found it! Return the path relative to the root.
|
|
|
|
return filepath.Join("/", p, exec), nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Could not find a suitable path, just return the original string.
|
|
|
|
log.Warningf("could not find executable %s in path %s", exec, path)
|
|
|
|
return exec, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Capabilities takes in spec and returns a TaskCapabilities corresponding to
|
|
|
|
// the spec.
|
|
|
|
func Capabilities(specCaps *specs.LinuxCapabilities) (*auth.TaskCapabilities, error) {
|
|
|
|
var caps auth.TaskCapabilities
|
|
|
|
if specCaps != nil {
|
|
|
|
var err error
|
|
|
|
if caps.BoundingCaps, err = capsFromNames(specCaps.Bounding); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if caps.EffectiveCaps, err = capsFromNames(specCaps.Effective); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if caps.InheritableCaps, err = capsFromNames(specCaps.Inheritable); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if caps.PermittedCaps, err = capsFromNames(specCaps.Permitted); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
// TODO: Support ambient capabilities.
|
|
|
|
}
|
|
|
|
return &caps, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
var capFromName = map[string]linux.Capability{
|
|
|
|
"CAP_CHOWN": linux.CAP_CHOWN,
|
|
|
|
"CAP_DAC_OVERRIDE": linux.CAP_DAC_OVERRIDE,
|
|
|
|
"CAP_DAC_READ_SEARCH": linux.CAP_DAC_READ_SEARCH,
|
|
|
|
"CAP_FOWNER": linux.CAP_FOWNER,
|
|
|
|
"CAP_FSETID": linux.CAP_FSETID,
|
|
|
|
"CAP_KILL": linux.CAP_KILL,
|
|
|
|
"CAP_SETGID": linux.CAP_SETGID,
|
|
|
|
"CAP_SETUID": linux.CAP_SETUID,
|
|
|
|
"CAP_SETPCAP": linux.CAP_SETPCAP,
|
|
|
|
"CAP_LINUX_IMMUTABLE": linux.CAP_LINUX_IMMUTABLE,
|
|
|
|
"CAP_NET_BIND_SERVICE": linux.CAP_NET_BIND_SERVICE,
|
2018-05-04 16:38:35 +00:00
|
|
|
"CAP_NET_BROADCAST": linux.CAP_NET_BROADCAST,
|
2018-04-27 17:37:02 +00:00
|
|
|
"CAP_NET_ADMIN": linux.CAP_NET_ADMIN,
|
|
|
|
"CAP_NET_RAW": linux.CAP_NET_RAW,
|
|
|
|
"CAP_IPC_LOCK": linux.CAP_IPC_LOCK,
|
|
|
|
"CAP_IPC_OWNER": linux.CAP_IPC_OWNER,
|
|
|
|
"CAP_SYS_MODULE": linux.CAP_SYS_MODULE,
|
|
|
|
"CAP_SYS_RAWIO": linux.CAP_SYS_RAWIO,
|
|
|
|
"CAP_SYS_CHROOT": linux.CAP_SYS_CHROOT,
|
|
|
|
"CAP_SYS_PTRACE": linux.CAP_SYS_PTRACE,
|
|
|
|
"CAP_SYS_PACCT": linux.CAP_SYS_PACCT,
|
|
|
|
"CAP_SYS_ADMIN": linux.CAP_SYS_ADMIN,
|
|
|
|
"CAP_SYS_BOOT": linux.CAP_SYS_BOOT,
|
|
|
|
"CAP_SYS_NICE": linux.CAP_SYS_NICE,
|
|
|
|
"CAP_SYS_RESOURCE": linux.CAP_SYS_RESOURCE,
|
|
|
|
"CAP_SYS_TIME": linux.CAP_SYS_TIME,
|
|
|
|
"CAP_SYS_TTY_CONFIG": linux.CAP_SYS_TTY_CONFIG,
|
|
|
|
"CAP_MKNOD": linux.CAP_MKNOD,
|
|
|
|
"CAP_LEASE": linux.CAP_LEASE,
|
|
|
|
"CAP_AUDIT_WRITE": linux.CAP_AUDIT_WRITE,
|
|
|
|
"CAP_AUDIT_CONTROL": linux.CAP_AUDIT_CONTROL,
|
|
|
|
"CAP_SETFCAP": linux.CAP_SETFCAP,
|
|
|
|
"CAP_MAC_OVERRIDE": linux.CAP_MAC_OVERRIDE,
|
|
|
|
"CAP_MAC_ADMIN": linux.CAP_MAC_ADMIN,
|
|
|
|
"CAP_SYSLOG": linux.CAP_SYSLOG,
|
|
|
|
"CAP_WAKE_ALARM": linux.CAP_WAKE_ALARM,
|
|
|
|
"CAP_BLOCK_SUSPEND": linux.CAP_BLOCK_SUSPEND,
|
2018-05-04 16:38:35 +00:00
|
|
|
"CAP_AUDIT_READ": linux.CAP_AUDIT_READ,
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func capsFromNames(names []string) (auth.CapabilitySet, error) {
|
|
|
|
var caps []linux.Capability
|
|
|
|
for _, n := range names {
|
|
|
|
c, ok := capFromName[n]
|
|
|
|
if !ok {
|
|
|
|
return 0, fmt.Errorf("unknown capability %q", n)
|
|
|
|
}
|
|
|
|
caps = append(caps, c)
|
|
|
|
}
|
|
|
|
return auth.CapabilitySetOfMany(caps), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Is9PMount returns true if the given mount can be mounted as an external gofer.
|
|
|
|
func Is9PMount(m specs.Mount) bool {
|
2018-06-15 20:57:29 +00:00
|
|
|
return m.Type == "bind" && m.Source != "" && IsSupportedDevMount(m)
|
|
|
|
}
|
|
|
|
|
|
|
|
// IsSupportedDevMount returns true if the mount is a supported /dev mount.
|
|
|
|
// Only mount that does not conflict with runsc default /dev mount is
|
|
|
|
// supported.
|
|
|
|
func IsSupportedDevMount(m specs.Mount) bool {
|
|
|
|
// These are devices exist inside sentry. See pkg/sentry/fs/dev/dev.go
|
|
|
|
var existingDevices = []string{
|
|
|
|
"/dev/fd", "/dev/stdin", "/dev/stdout", "/dev/stderr",
|
|
|
|
"/dev/null", "/dev/zero", "/dev/full", "/dev/random",
|
|
|
|
"/dev/urandom", "/dev/shm", "/dev/pts", "/dev/ptmx",
|
|
|
|
}
|
|
|
|
dst := filepath.Clean(m.Destination)
|
|
|
|
if dst == "/dev" {
|
|
|
|
// OCI spec uses many different mounts for the things inside of '/dev'. We
|
|
|
|
// have a single mount at '/dev' that is always mounted, regardless of
|
|
|
|
// whether it was asked for, as the spec says we SHOULD.
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
for _, dev := range existingDevices {
|
|
|
|
if dst == dev || strings.HasPrefix(dst, dev+"/") {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// BinPath returns the real path to self, resolving symbolink links. This is done
|
|
|
|
// to make the process name appears as 'runsc', instead of 'exe'.
|
|
|
|
func BinPath() (string, error) {
|
2018-06-04 18:25:40 +00:00
|
|
|
binPath, err := filepath.EvalSymlinks(ExePath)
|
2018-04-27 17:37:02 +00:00
|
|
|
if err != nil {
|
2018-06-04 18:25:40 +00:00
|
|
|
return "", fmt.Errorf(`error resolving %q symlink: %v`, ExePath, err)
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
return binPath, nil
|
|
|
|
}
|
2018-05-04 04:08:38 +00:00
|
|
|
|
2018-06-20 04:42:21 +00:00
|
|
|
const (
|
|
|
|
// ContainerdContainerTypeAnnotation is the OCI annotation set by
|
|
|
|
// containerd to indicate whether the container to create should have
|
|
|
|
// its own sandbox or a container within an existing sandbox.
|
|
|
|
ContainerdContainerTypeAnnotation = "io.kubernetes.cri.container-type"
|
|
|
|
// ContainerdContainerTypeContainer is the container type value
|
|
|
|
// indicating the container should be created in an existing sandbox.
|
|
|
|
ContainerdContainerTypeContainer = "container"
|
|
|
|
// ContainerdContainerTypeSandbox is the container type value
|
|
|
|
// indicating the container should be created in a new sandbox.
|
|
|
|
ContainerdContainerTypeSandbox = "sandbox"
|
|
|
|
|
|
|
|
// ContainerdSandboxIDAnnotation is the OCI annotation set to indicate
|
|
|
|
// which sandbox the container should be created in when the container
|
|
|
|
// is not the first container in the sandbox.
|
|
|
|
ContainerdSandboxIDAnnotation = "io.kubernetes.cri.sandbox-id"
|
|
|
|
)
|
|
|
|
|
|
|
|
// ShouldCreateSandbox returns true if the spec indicates that a new sandbox
|
|
|
|
// should be created for the container. If false, the container should be
|
|
|
|
// started in an existing sandbox.
|
|
|
|
func ShouldCreateSandbox(spec *specs.Spec) bool {
|
|
|
|
t, ok := spec.Annotations[ContainerdContainerTypeAnnotation]
|
|
|
|
return !ok || t == ContainerdContainerTypeSandbox
|
|
|
|
}
|
|
|
|
|
|
|
|
// SandboxID returns the ID of the sandbox to join and whether an ID was found
|
|
|
|
// in the spec.
|
|
|
|
func SandboxID(spec *specs.Spec) (string, bool) {
|
|
|
|
id, ok := spec.Annotations[ContainerdSandboxIDAnnotation]
|
|
|
|
return id, ok
|
|
|
|
}
|
|
|
|
|
2018-05-04 04:08:38 +00:00
|
|
|
// WaitForReady waits for a process to become ready. The process is ready when
|
|
|
|
// the 'ready' function returns true. It continues to wait if 'ready' returns
|
|
|
|
// false. It returns error on timeout, if the process stops or if 'ready' fails.
|
|
|
|
func WaitForReady(pid int, timeout time.Duration, ready func() (bool, error)) error {
|
|
|
|
backoff := 1 * time.Millisecond
|
|
|
|
for start := time.Now(); time.Now().Sub(start) < timeout; {
|
|
|
|
if ok, err := ready(); err != nil {
|
|
|
|
return err
|
|
|
|
} else if ok {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if the process is still running.
|
|
|
|
var ws syscall.WaitStatus
|
|
|
|
var ru syscall.Rusage
|
|
|
|
child, err := syscall.Wait4(pid, &ws, syscall.WNOHANG, &ru)
|
|
|
|
if err != nil || child == pid {
|
|
|
|
return fmt.Errorf("process (%d) is not running, err: %v", pid, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Process continues to run, backoff and retry.
|
|
|
|
time.Sleep(backoff)
|
|
|
|
backoff *= 2
|
|
|
|
if backoff > 1*time.Second {
|
|
|
|
backoff = 1 * time.Second
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return fmt.Errorf("timed out waiting for process (%d)", pid)
|
|
|
|
}
|