gvisor/pkg/sentry/socket/control/control.go

406 lines
12 KiB
Go
Raw Normal View History

// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package control provides internal representations of socket control
// messages.
package control
import (
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/binary"
"gvisor.googlesource.com/gvisor/pkg/sentry/context"
"gvisor.googlesource.com/gvisor/pkg/sentry/fs"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
"gvisor.googlesource.com/gvisor/pkg/syserror"
"gvisor.googlesource.com/gvisor/pkg/tcpip/transport/unix"
)
const maxInt = int(^uint(0) >> 1)
// SCMCredentials represents a SCM_CREDENTIALS socket control message.
type SCMCredentials interface {
unix.CredentialsControlMessage
// Credentials returns properly namespaced values for the sender's pid, uid
// and gid.
Credentials(t *kernel.Task) (kernel.ThreadID, auth.UID, auth.GID)
}
// SCMRights represents a SCM_RIGHTS socket control message.
type SCMRights interface {
unix.RightsControlMessage
// Files returns up to max RightsFiles.
Files(ctx context.Context, max int) RightsFiles
}
// RightsFiles represents a SCM_RIGHTS socket control message. A reference is
// maintained for each fs.File and is release either when an FD is created or
// when the Release method is called.
type RightsFiles []*fs.File
// NewSCMRights creates a new SCM_RIGHTS socket control message representation
// using local sentry FDs.
func NewSCMRights(t *kernel.Task, fds []int32) (SCMRights, error) {
files := make(RightsFiles, 0, len(fds))
for _, fd := range fds {
file, _ := t.FDMap().GetDescriptor(kdefs.FD(fd))
if file == nil {
files.Release()
return nil, syserror.EBADF
}
files = append(files, file)
}
return &files, nil
}
// Files implements SCMRights.Files.
func (fs *RightsFiles) Files(ctx context.Context, max int) RightsFiles {
n := max
if l := len(*fs); n > l {
n = l
}
rf := (*fs)[:n]
*fs = (*fs)[n:]
return rf
}
// Clone implements unix.RightsControlMessage.Clone.
func (fs *RightsFiles) Clone() unix.RightsControlMessage {
nfs := append(RightsFiles(nil), *fs...)
for _, nf := range nfs {
nf.IncRef()
}
return &nfs
}
// Release implements unix.RightsControlMessage.Release.
func (fs *RightsFiles) Release() {
for _, f := range *fs {
f.DecRef()
}
*fs = nil
}
// rightsFDs gets up to the specified maximum number of FDs.
func rightsFDs(t *kernel.Task, rights SCMRights, cloexec bool, max int) []int32 {
files := rights.Files(t, max)
fds := make([]int32, 0, len(files))
for i := 0; i < max && len(files) > 0; i++ {
fd, err := t.FDMap().NewFDFrom(0, files[0], kernel.FDFlags{cloexec}, t.ThreadGroup().Limits())
files[0].DecRef()
files = files[1:]
if err != nil {
t.Warningf("Error inserting FD: %v", err)
// This is what Linux does.
break
}
fds = append(fds, int32(fd))
}
return fds
}
// PackRights packs as many FDs as will fit into the unused capacity of buf.
func PackRights(t *kernel.Task, rights SCMRights, cloexec bool, buf []byte) []byte {
maxFDs := (cap(buf) - len(buf) - linux.SizeOfControlMessageHeader) / 4
// Linux does not return any FDs if none fit.
if maxFDs <= 0 {
return buf
}
fds := rightsFDs(t, rights, cloexec, maxFDs)
align := t.Arch().Width()
return putCmsg(buf, linux.SCM_RIGHTS, align, fds)
}
// scmCredentials represents an SCM_CREDENTIALS socket control message.
type scmCredentials struct {
t *kernel.Task
kuid auth.KUID
kgid auth.KGID
}
// NewSCMCredentials creates a new SCM_CREDENTIALS socket control message
// representation.
func NewSCMCredentials(t *kernel.Task, cred linux.ControlMessageCredentials) (SCMCredentials, error) {
tcred := t.Credentials()
kuid, err := tcred.UseUID(auth.UID(cred.UID))
if err != nil {
return nil, err
}
kgid, err := tcred.UseGID(auth.GID(cred.GID))
if err != nil {
return nil, err
}
if kernel.ThreadID(cred.PID) != t.ThreadGroup().ID() && !t.HasCapabilityIn(linux.CAP_SYS_ADMIN, t.PIDNamespace().UserNamespace()) {
return nil, syserror.EPERM
}
return &scmCredentials{t, kuid, kgid}, nil
}
// Equals implements unix.CredentialsControlMessage.Equals.
func (c *scmCredentials) Equals(oc unix.CredentialsControlMessage) bool {
if oc, _ := oc.(*scmCredentials); oc != nil && *c == *oc {
return true
}
return false
}
func putUint64(buf []byte, n uint64) []byte {
usermem.ByteOrder.PutUint64(buf[len(buf):len(buf)+8], n)
return buf[:len(buf)+8]
}
func putUint32(buf []byte, n uint32) []byte {
usermem.ByteOrder.PutUint32(buf[len(buf):len(buf)+4], n)
return buf[:len(buf)+4]
}
// putCmsg writes a control message header and as much data as will fit into
// the unused capacity of a buffer.
func putCmsg(buf []byte, msgType uint32, align uint, data []int32) []byte {
space := AlignDown(cap(buf)-len(buf), 4)
// We can't write to space that doesn't exist, so if we are going to align
// the available space, we must align down.
//
// align must be >= 4 and each data int32 is 4 bytes. The length of the
// header is already aligned, so if we align to the with of the data there
// are two cases:
// 1. The aligned length is less than the length of the header. The
// unaligned length was also less than the length of the header, so we
// can't write anything.
// 2. The aligned length is greater than or equal to the length of the
// header. We can write the header plus zero or more datas. We can't write
// a partial int32, so the length of the message will be
// min(aligned length, header + datas).
if space < linux.SizeOfControlMessageHeader {
return buf
}
length := 4*len(data) + linux.SizeOfControlMessageHeader
if length > space {
length = space
}
buf = putUint64(buf, uint64(length))
buf = putUint32(buf, linux.SOL_SOCKET)
buf = putUint32(buf, msgType)
for _, d := range data {
if len(buf)+4 > cap(buf) {
break
}
buf = putUint32(buf, uint32(d))
}
return alignSlice(buf, align)
}
func putCmsgStruct(buf []byte, msgType uint32, align uint, data interface{}) []byte {
if cap(buf)-len(buf) < linux.SizeOfControlMessageHeader {
return buf
}
ob := buf
buf = putUint64(buf, uint64(linux.SizeOfControlMessageHeader))
buf = putUint32(buf, linux.SOL_SOCKET)
buf = putUint32(buf, msgType)
hdrBuf := buf
buf = binary.Marshal(buf, usermem.ByteOrder, data)
// Check if we went over.
if cap(buf) != cap(ob) {
return hdrBuf
}
// Fix up length.
putUint64(ob, uint64(len(buf)-len(ob)))
return alignSlice(buf, align)
}
// Credentials implements SCMCredentials.Credentials.
func (c *scmCredentials) Credentials(t *kernel.Task) (kernel.ThreadID, auth.UID, auth.GID) {
// "When a process's user and group IDs are passed over a UNIX domain
// socket to a process in a different user namespace (see the description
// of SCM_CREDENTIALS in unix(7)), they are translated into the
// corresponding values as per the receiving process's user and group ID
// mappings." - user_namespaces(7)
pid := t.PIDNamespace().IDOfTask(c.t)
uid := c.kuid.In(t.UserNamespace()).OrOverflow()
gid := c.kgid.In(t.UserNamespace()).OrOverflow()
return pid, uid, gid
}
// PackCredentials packs the credentials in the control message (or default
// credentials if none) into a buffer.
func PackCredentials(t *kernel.Task, creds SCMCredentials, buf []byte) []byte {
align := t.Arch().Width()
// Default credentials if none are available.
pid := kernel.ThreadID(0)
uid := auth.UID(auth.NobodyKUID)
gid := auth.GID(auth.NobodyKGID)
if creds != nil {
pid, uid, gid = creds.Credentials(t)
}
c := []int32{int32(pid), int32(uid), int32(gid)}
return putCmsg(buf, linux.SCM_CREDENTIALS, align, c)
}
// AlignUp rounds a length up to an alignment. align must be a power of 2.
func AlignUp(length int, align uint) int {
return (length + int(align) - 1) & ^(int(align) - 1)
}
// AlignDown rounds a down to an alignment. align must be a power of 2.
func AlignDown(length int, align uint) int {
return length & ^(int(align) - 1)
}
// alignSlice extends a slice's length (up to the capacity) to align it.
func alignSlice(buf []byte, align uint) []byte {
aligned := AlignUp(len(buf), align)
if aligned > cap(buf) {
// Linux allows unaligned data if there isn't room for alignment.
// Since there isn't room for alignment, there isn't room for any
// additional messages either.
return buf
}
return buf[:aligned]
}
// PackTimestamp packs a SO_TIMESTAMP socket control message.
func PackTimestamp(t *kernel.Task, timestamp int64, buf []byte) []byte {
return putCmsgStruct(
buf,
linux.SO_TIMESTAMP,
t.Arch().Width(),
linux.NsecToTimeval(timestamp),
)
}
// Parse parses a raw socket control message into portable objects.
func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte) (unix.ControlMessages, error) {
var (
fds linux.ControlMessageRights
haveCreds bool
creds linux.ControlMessageCredentials
)
for i := 0; i < len(buf); {
if i+linux.SizeOfControlMessageHeader > len(buf) {
return unix.ControlMessages{}, syserror.EINVAL
}
var h linux.ControlMessageHeader
binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageHeader], usermem.ByteOrder, &h)
if h.Length < uint64(linux.SizeOfControlMessageHeader) {
return unix.ControlMessages{}, syserror.EINVAL
}
if h.Length > uint64(len(buf)-i) {
return unix.ControlMessages{}, syserror.EINVAL
}
if h.Level != linux.SOL_SOCKET {
return unix.ControlMessages{}, syserror.EINVAL
}
i += linux.SizeOfControlMessageHeader
length := int(h.Length) - linux.SizeOfControlMessageHeader
// The use of t.Arch().Width() is analogous to Linux's use of
// sizeof(long) in CMSG_ALIGN.
width := t.Arch().Width()
switch h.Type {
case linux.SCM_RIGHTS:
rightsSize := AlignDown(length, linux.SizeOfControlMessageRight)
numRights := rightsSize / linux.SizeOfControlMessageRight
if len(fds)+numRights > linux.SCM_MAX_FD {
return unix.ControlMessages{}, syserror.EINVAL
}
for j := i; j < i+rightsSize; j += linux.SizeOfControlMessageRight {
fds = append(fds, int32(usermem.ByteOrder.Uint32(buf[j:j+linux.SizeOfControlMessageRight])))
}
i += AlignUp(length, width)
case linux.SCM_CREDENTIALS:
if length < linux.SizeOfControlMessageCredentials {
return unix.ControlMessages{}, syserror.EINVAL
}
binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageCredentials], usermem.ByteOrder, &creds)
haveCreds = true
i += AlignUp(length, width)
default:
// Unknown message type.
return unix.ControlMessages{}, syserror.EINVAL
}
}
var credentials SCMCredentials
if haveCreds {
var err error
if credentials, err = NewSCMCredentials(t, creds); err != nil {
return unix.ControlMessages{}, err
}
} else {
credentials = makeCreds(t, socketOrEndpoint)
}
var rights SCMRights
if len(fds) > 0 {
var err error
if rights, err = NewSCMRights(t, fds); err != nil {
return unix.ControlMessages{}, err
}
}
if credentials == nil && rights == nil {
return unix.ControlMessages{}, nil
}
return unix.ControlMessages{Credentials: credentials, Rights: rights}, nil
}
func makeCreds(t *kernel.Task, socketOrEndpoint interface{}) SCMCredentials {
if t == nil || socketOrEndpoint == nil {
return nil
}
if cr, ok := socketOrEndpoint.(unix.Credentialer); ok && (cr.Passcred() || cr.ConnectedPasscred()) {
tcred := t.Credentials()
return &scmCredentials{t, tcred.EffectiveKUID, tcred.EffectiveKGID}
}
return nil
}
// New creates default control messages if needed.
func New(t *kernel.Task, socketOrEndpoint interface{}, rights SCMRights) unix.ControlMessages {
return unix.ControlMessages{
Credentials: makeCreds(t, socketOrEndpoint),
Rights: rights,
}
}