gvisor/pkg/sentry/syscalls/linux/sys_socket.go

1139 lines
30 KiB
Go

// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package linux
import (
"time"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/socket"
"gvisor.dev/gvisor/pkg/sentry/socket/control"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
// LINT.IfChange
// minListenBacklog is the minimum reasonable backlog for listening sockets.
const minListenBacklog = 8
// maxListenBacklog is the maximum allowed backlog for listening sockets.
const maxListenBacklog = 1024
// maxAddrLen is the maximum socket address length we're willing to accept.
const maxAddrLen = 200
// maxOptLen is the maximum sockopt parameter length we're willing to accept.
const maxOptLen = 1024 * 8
// maxControlLen is the maximum length of the msghdr.msg_control buffer we're
// willing to accept. Note that this limit is smaller than Linux, which allows
// buffers upto INT_MAX.
const maxControlLen = 10 * 1024 * 1024
// nameLenOffset is the offset from the start of the MessageHeader64 struct to
// the NameLen field.
const nameLenOffset = 8
// controlLenOffset is the offset form the start of the MessageHeader64 struct
// to the ControlLen field.
const controlLenOffset = 40
// flagsOffset is the offset form the start of the MessageHeader64 struct
// to the Flags field.
const flagsOffset = 48
const sizeOfInt32 = 4
// messageHeader64Len is the length of a MessageHeader64 struct.
var messageHeader64Len = uint64(binary.Size(MessageHeader64{}))
// multipleMessageHeader64Len is the length of a multipeMessageHeader64 struct.
var multipleMessageHeader64Len = uint64(binary.Size(multipleMessageHeader64{}))
// baseRecvFlags are the flags that are accepted across recvmsg(2),
// recvmmsg(2), and recvfrom(2).
const baseRecvFlags = linux.MSG_OOB | linux.MSG_DONTROUTE | linux.MSG_DONTWAIT | linux.MSG_NOSIGNAL | linux.MSG_WAITALL | linux.MSG_TRUNC | linux.MSG_CTRUNC
// MessageHeader64 is the 64-bit representation of the msghdr struct used in
// the recvmsg and sendmsg syscalls.
type MessageHeader64 struct {
// Name is the optional pointer to a network address buffer.
Name uint64
// NameLen is the length of the buffer pointed to by Name.
NameLen uint32
_ uint32
// Iov is a pointer to an array of io vectors that describe the memory
// locations involved in the io operation.
Iov uint64
// IovLen is the length of the array pointed to by Iov.
IovLen uint64
// Control is the optional pointer to ancillary control data.
Control uint64
// ControlLen is the length of the data pointed to by Control.
ControlLen uint64
// Flags on the sent/received message.
Flags int32
_ int32
}
// multipleMessageHeader64 is the 64-bit representation of the mmsghdr struct used in
// the recvmmsg and sendmmsg syscalls.
type multipleMessageHeader64 struct {
msgHdr MessageHeader64
msgLen uint32
_ int32
}
// CopyInMessageHeader64 copies a message header from user to kernel memory.
func CopyInMessageHeader64(t *kernel.Task, addr usermem.Addr, msg *MessageHeader64) error {
b := t.CopyScratchBuffer(52)
if _, err := t.CopyInBytes(addr, b); err != nil {
return err
}
msg.Name = usermem.ByteOrder.Uint64(b[0:])
msg.NameLen = usermem.ByteOrder.Uint32(b[8:])
msg.Iov = usermem.ByteOrder.Uint64(b[16:])
msg.IovLen = usermem.ByteOrder.Uint64(b[24:])
msg.Control = usermem.ByteOrder.Uint64(b[32:])
msg.ControlLen = usermem.ByteOrder.Uint64(b[40:])
msg.Flags = int32(usermem.ByteOrder.Uint32(b[48:]))
return nil
}
// CaptureAddress allocates memory for and copies a socket address structure
// from the untrusted address space range.
func CaptureAddress(t *kernel.Task, addr usermem.Addr, addrlen uint32) ([]byte, error) {
if addrlen > maxAddrLen {
return nil, syserror.EINVAL
}
addrBuf := make([]byte, addrlen)
if _, err := t.CopyInBytes(addr, addrBuf); err != nil {
return nil, err
}
return addrBuf, nil
}
// writeAddress writes a sockaddr structure and its length to an output buffer
// in the unstrusted address space range. If the address is bigger than the
// buffer, it is truncated.
func writeAddress(t *kernel.Task, addr interface{}, addrLen uint32, addrPtr usermem.Addr, addrLenPtr usermem.Addr) error {
// Get the buffer length.
var bufLen uint32
if _, err := t.CopyIn(addrLenPtr, &bufLen); err != nil {
return err
}
if int32(bufLen) < 0 {
return syserror.EINVAL
}
// Write the length unconditionally.
if _, err := t.CopyOut(addrLenPtr, addrLen); err != nil {
return err
}
if addr == nil {
return nil
}
if bufLen > addrLen {
bufLen = addrLen
}
// Copy as much of the address as will fit in the buffer.
encodedAddr := binary.Marshal(nil, usermem.ByteOrder, addr)
if bufLen > uint32(len(encodedAddr)) {
bufLen = uint32(len(encodedAddr))
}
_, err := t.CopyOutBytes(addrPtr, encodedAddr[:int(bufLen)])
return err
}
// Socket implements the linux syscall socket(2).
func Socket(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
domain := int(args[0].Int())
stype := args[1].Int()
protocol := int(args[2].Int())
// Check and initialize the flags.
if stype & ^(0xf|linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 {
return 0, nil, syserror.EINVAL
}
// Create the new socket.
s, e := socket.New(t, domain, linux.SockType(stype&0xf), protocol)
if e != nil {
return 0, nil, e.ToError()
}
s.SetFlags(fs.SettableFileFlags{
NonBlocking: stype&linux.SOCK_NONBLOCK != 0,
})
defer s.DecRef()
fd, err := t.NewFDFrom(0, s, kernel.FDFlags{
CloseOnExec: stype&linux.SOCK_CLOEXEC != 0,
})
if err != nil {
return 0, nil, err
}
return uintptr(fd), nil, nil
}
// SocketPair implements the linux syscall socketpair(2).
func SocketPair(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
domain := int(args[0].Int())
stype := args[1].Int()
protocol := int(args[2].Int())
socks := args[3].Pointer()
// Check and initialize the flags.
if stype & ^(0xf|linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 {
return 0, nil, syserror.EINVAL
}
fileFlags := fs.SettableFileFlags{
NonBlocking: stype&linux.SOCK_NONBLOCK != 0,
}
// Create the socket pair.
s1, s2, e := socket.Pair(t, domain, linux.SockType(stype&0xf), protocol)
if e != nil {
return 0, nil, e.ToError()
}
s1.SetFlags(fileFlags)
s2.SetFlags(fileFlags)
defer s1.DecRef()
defer s2.DecRef()
// Create the FDs for the sockets.
fds, err := t.NewFDs(0, []*fs.File{s1, s2}, kernel.FDFlags{
CloseOnExec: stype&linux.SOCK_CLOEXEC != 0,
})
if err != nil {
return 0, nil, err
}
// Copy the file descriptors out.
if _, err := t.CopyOut(socks, fds); err != nil {
for _, fd := range fds {
if file, _ := t.FDTable().Remove(fd); file != nil {
file.DecRef()
}
}
return 0, nil, err
}
return 0, nil, nil
}
// Connect implements the linux syscall connect(2).
func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
fd := args[0].Int()
addr := args[1].Pointer()
addrlen := args[2].Uint()
// Get socket from the file descriptor.
file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
return 0, nil, syserror.ENOTSOCK
}
// Capture address and call syscall implementation.
a, err := CaptureAddress(t, addr, addrlen)
if err != nil {
return 0, nil, err
}
blocking := !file.Flags().NonBlocking
return 0, nil, syserror.ConvertIntr(s.Connect(t, a, blocking).ToError(), kernel.ERESTARTSYS)
}
// accept is the implementation of the accept syscall. It is called by accept
// and accept4 syscall handlers.
func accept(t *kernel.Task, fd int32, addr usermem.Addr, addrLen usermem.Addr, flags int) (uintptr, error) {
// Check that no unsupported flags are passed in.
if flags & ^(linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 {
return 0, syserror.EINVAL
}
// Get socket from the file descriptor.
file := t.GetFile(fd)
if file == nil {
return 0, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
return 0, syserror.ENOTSOCK
}
// Call the syscall implementation for this socket, then copy the
// output address if one is specified.
blocking := !file.Flags().NonBlocking
peerRequested := addrLen != 0
nfd, peer, peerLen, e := s.Accept(t, peerRequested, flags, blocking)
if e != nil {
return 0, syserror.ConvertIntr(e.ToError(), kernel.ERESTARTSYS)
}
if peerRequested {
// NOTE(magi): Linux does not give you an error if it can't
// write the data back out so neither do we.
if err := writeAddress(t, peer, peerLen, addr, addrLen); err == syserror.EINVAL {
return 0, err
}
}
return uintptr(nfd), nil
}
// Accept4 implements the linux syscall accept4(2).
func Accept4(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
fd := args[0].Int()
addr := args[1].Pointer()
addrlen := args[2].Pointer()
flags := int(args[3].Int())
n, err := accept(t, fd, addr, addrlen, flags)
return n, nil, err
}
// Accept implements the linux syscall accept(2).
func Accept(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
fd := args[0].Int()
addr := args[1].Pointer()
addrlen := args[2].Pointer()
n, err := accept(t, fd, addr, addrlen, 0)
return n, nil, err
}
// Bind implements the linux syscall bind(2).
func Bind(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
fd := args[0].Int()
addr := args[1].Pointer()
addrlen := args[2].Uint()
// Get socket from the file descriptor.
file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
return 0, nil, syserror.ENOTSOCK
}
// Capture address and call syscall implementation.
a, err := CaptureAddress(t, addr, addrlen)
if err != nil {
return 0, nil, err
}
return 0, nil, s.Bind(t, a).ToError()
}
// Listen implements the linux syscall listen(2).
func Listen(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
fd := args[0].Int()
backlog := args[1].Int()
// Get socket from the file descriptor.
file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
return 0, nil, syserror.ENOTSOCK
}
// Per Linux, the backlog is silently capped to reasonable values.
if backlog <= 0 {
backlog = minListenBacklog
}
if backlog > maxListenBacklog {
backlog = maxListenBacklog
}
return 0, nil, s.Listen(t, int(backlog)).ToError()
}
// Shutdown implements the linux syscall shutdown(2).
func Shutdown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
fd := args[0].Int()
how := args[1].Int()
// Get socket from the file descriptor.
file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
return 0, nil, syserror.ENOTSOCK
}
// Validate how, then call syscall implementation.
switch how {
case linux.SHUT_RD, linux.SHUT_WR, linux.SHUT_RDWR:
default:
return 0, nil, syserror.EINVAL
}
return 0, nil, s.Shutdown(t, int(how)).ToError()
}
// GetSockOpt implements the linux syscall getsockopt(2).
func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
fd := args[0].Int()
level := args[1].Int()
name := args[2].Int()
optValAddr := args[3].Pointer()
optLenAddr := args[4].Pointer()
// Get socket from the file descriptor.
file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
return 0, nil, syserror.ENOTSOCK
}
// Read the length. Reject negative values.
optLen := int32(0)
if _, err := t.CopyIn(optLenAddr, &optLen); err != nil {
return 0, nil, err
}
if optLen < 0 {
return 0, nil, syserror.EINVAL
}
// Call syscall implementation then copy both value and value len out.
v, e := getSockOpt(t, s, int(level), int(name), optValAddr, int(optLen))
if e != nil {
return 0, nil, e.ToError()
}
vLen := int32(binary.Size(v))
if _, err := t.CopyOut(optLenAddr, vLen); err != nil {
return 0, nil, err
}
if v != nil {
if _, err := t.CopyOut(optValAddr, v); err != nil {
return 0, nil, err
}
}
return 0, nil, nil
}
// getSockOpt tries to handle common socket options, or dispatches to a specific
// socket implementation.
func getSockOpt(t *kernel.Task, s socket.Socket, level, name int, optValAddr usermem.Addr, len int) (interface{}, *syserr.Error) {
if level == linux.SOL_SOCKET {
switch name {
case linux.SO_TYPE, linux.SO_DOMAIN, linux.SO_PROTOCOL:
if len < sizeOfInt32 {
return nil, syserr.ErrInvalidArgument
}
}
switch name {
case linux.SO_TYPE:
_, skType, _ := s.Type()
return int32(skType), nil
case linux.SO_DOMAIN:
family, _, _ := s.Type()
return int32(family), nil
case linux.SO_PROTOCOL:
_, _, protocol := s.Type()
return int32(protocol), nil
}
}
return s.GetSockOpt(t, level, name, optValAddr, len)
}
// SetSockOpt implements the linux syscall setsockopt(2).
//
// Note that unlike Linux, enabling SO_PASSCRED does not autobind the socket.
func SetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
fd := args[0].Int()
level := args[1].Int()
name := args[2].Int()
optValAddr := args[3].Pointer()
optLen := args[4].Int()
// Get socket from the file descriptor.
file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
return 0, nil, syserror.ENOTSOCK
}
if optLen < 0 {
return 0, nil, syserror.EINVAL
}
if optLen > maxOptLen {
return 0, nil, syserror.EINVAL
}
buf := t.CopyScratchBuffer(int(optLen))
if _, err := t.CopyIn(optValAddr, &buf); err != nil {
return 0, nil, err
}
// Call syscall implementation.
if err := s.SetSockOpt(t, int(level), int(name), buf); err != nil {
return 0, nil, err.ToError()
}
return 0, nil, nil
}
// GetSockName implements the linux syscall getsockname(2).
func GetSockName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
fd := args[0].Int()
addr := args[1].Pointer()
addrlen := args[2].Pointer()
// Get socket from the file descriptor.
file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
return 0, nil, syserror.ENOTSOCK
}
// Get the socket name and copy it to the caller.
v, vl, err := s.GetSockName(t)
if err != nil {
return 0, nil, err.ToError()
}
return 0, nil, writeAddress(t, v, vl, addr, addrlen)
}
// GetPeerName implements the linux syscall getpeername(2).
func GetPeerName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
fd := args[0].Int()
addr := args[1].Pointer()
addrlen := args[2].Pointer()
// Get socket from the file descriptor.
file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
return 0, nil, syserror.ENOTSOCK
}
// Get the socket peer name and copy it to the caller.
v, vl, err := s.GetPeerName(t)
if err != nil {
return 0, nil, err.ToError()
}
return 0, nil, writeAddress(t, v, vl, addr, addrlen)
}
// RecvMsg implements the linux syscall recvmsg(2).
func RecvMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
fd := args[0].Int()
msgPtr := args[1].Pointer()
flags := args[2].Int()
if t.Arch().Width() != 8 {
// We only handle 64-bit for now.
return 0, nil, syserror.EINVAL
}
// Get socket from the file descriptor.
file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
return 0, nil, syserror.ENOTSOCK
}
// Reject flags that we don't handle yet.
if flags & ^(baseRecvFlags|linux.MSG_PEEK|linux.MSG_CMSG_CLOEXEC|linux.MSG_ERRQUEUE) != 0 {
return 0, nil, syserror.EINVAL
}
if file.Flags().NonBlocking {
flags |= linux.MSG_DONTWAIT
}
var haveDeadline bool
var deadline ktime.Time
if dl := s.RecvTimeout(); dl > 0 {
deadline = t.Kernel().MonotonicClock().Now().Add(time.Duration(dl) * time.Nanosecond)
haveDeadline = true
} else if dl < 0 {
flags |= linux.MSG_DONTWAIT
}
n, err := recvSingleMsg(t, s, msgPtr, flags, haveDeadline, deadline)
return n, nil, err
}
// RecvMMsg implements the linux syscall recvmmsg(2).
func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
fd := args[0].Int()
msgPtr := args[1].Pointer()
vlen := args[2].Uint()
flags := args[3].Int()
toPtr := args[4].Pointer()
if t.Arch().Width() != 8 {
// We only handle 64-bit for now.
return 0, nil, syserror.EINVAL
}
// Reject flags that we don't handle yet.
if flags & ^(baseRecvFlags|linux.MSG_CMSG_CLOEXEC|linux.MSG_ERRQUEUE) != 0 {
return 0, nil, syserror.EINVAL
}
// Get socket from the file descriptor.
file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
return 0, nil, syserror.ENOTSOCK
}
if file.Flags().NonBlocking {
flags |= linux.MSG_DONTWAIT
}
var haveDeadline bool
var deadline ktime.Time
if toPtr != 0 {
ts, err := copyTimespecIn(t, toPtr)
if err != nil {
return 0, nil, err
}
if !ts.Valid() {
return 0, nil, syserror.EINVAL
}
deadline = t.Kernel().MonotonicClock().Now().Add(ts.ToDuration())
haveDeadline = true
}
if !haveDeadline {
if dl := s.RecvTimeout(); dl > 0 {
deadline = t.Kernel().MonotonicClock().Now().Add(time.Duration(dl) * time.Nanosecond)
haveDeadline = true
} else if dl < 0 {
flags |= linux.MSG_DONTWAIT
}
}
var count uint32
var err error
for i := uint64(0); i < uint64(vlen); i++ {
mp, ok := msgPtr.AddLength(i * multipleMessageHeader64Len)
if !ok {
return 0, nil, syserror.EFAULT
}
var n uintptr
if n, err = recvSingleMsg(t, s, mp, flags, haveDeadline, deadline); err != nil {
break
}
// Copy the received length to the caller.
lp, ok := mp.AddLength(messageHeader64Len)
if !ok {
return 0, nil, syserror.EFAULT
}
if _, err = t.CopyOut(lp, uint32(n)); err != nil {
break
}
count++
}
if count == 0 {
return 0, nil, err
}
return uintptr(count), nil, nil
}
func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags int32, haveDeadline bool, deadline ktime.Time) (uintptr, error) {
// Capture the message header and io vectors.
var msg MessageHeader64
if err := CopyInMessageHeader64(t, msgPtr, &msg); err != nil {
return 0, err
}
if msg.IovLen > linux.UIO_MAXIOV {
return 0, syserror.EMSGSIZE
}
dst, err := t.IovecsIOSequence(usermem.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{
AddressSpaceActive: true,
})
if err != nil {
return 0, err
}
// FIXME(b/63594852): Pretend we have an empty error queue.
if flags&linux.MSG_ERRQUEUE != 0 {
return 0, syserror.EAGAIN
}
// Fast path when no control message nor name buffers are provided.
if msg.ControlLen == 0 && msg.NameLen == 0 {
n, mflags, _, _, cms, err := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, false, 0)
if err != nil {
return 0, syserror.ConvertIntr(err.ToError(), kernel.ERESTARTSYS)
}
if !cms.Unix.Empty() {
mflags |= linux.MSG_CTRUNC
cms.Release()
}
if int(msg.Flags) != mflags {
// Copy out the flags to the caller.
if _, err := t.CopyOut(msgPtr+flagsOffset, int32(mflags)); err != nil {
return 0, err
}
}
return uintptr(n), nil
}
if msg.ControlLen > maxControlLen {
return 0, syserror.ENOBUFS
}
n, mflags, sender, senderLen, cms, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, msg.NameLen != 0, msg.ControlLen)
if e != nil {
return 0, syserror.ConvertIntr(e.ToError(), kernel.ERESTARTSYS)
}
defer cms.Release()
controlData := make([]byte, 0, msg.ControlLen)
controlData = control.PackControlMessages(t, cms, controlData)
if cr, ok := s.(transport.Credentialer); ok && cr.Passcred() {
creds, _ := cms.Unix.Credentials.(control.SCMCredentials)
controlData, mflags = control.PackCredentials(t, creds, controlData, mflags)
}
if cms.Unix.Rights != nil {
controlData, mflags = control.PackRights(t, cms.Unix.Rights.(control.SCMRights), flags&linux.MSG_CMSG_CLOEXEC != 0, controlData, mflags)
}
// Copy the address to the caller.
if msg.NameLen != 0 {
if err := writeAddress(t, sender, senderLen, usermem.Addr(msg.Name), usermem.Addr(msgPtr+nameLenOffset)); err != nil {
return 0, err
}
}
// Copy the control data to the caller.
if _, err := t.CopyOut(msgPtr+controlLenOffset, uint64(len(controlData))); err != nil {
return 0, err
}
if len(controlData) > 0 {
if _, err := t.CopyOut(usermem.Addr(msg.Control), controlData); err != nil {
return 0, err
}
}
// Copy out the flags to the caller.
if _, err := t.CopyOut(msgPtr+flagsOffset, int32(mflags)); err != nil {
return 0, err
}
return uintptr(n), nil
}
// recvFrom is the implementation of the recvfrom syscall. It is called by
// recvfrom and recv syscall handlers.
func recvFrom(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLenPtr usermem.Addr) (uintptr, error) {
if int(bufLen) < 0 {
return 0, syserror.EINVAL
}
// Reject flags that we don't handle yet.
if flags & ^(baseRecvFlags|linux.MSG_PEEK|linux.MSG_CONFIRM) != 0 {
return 0, syserror.EINVAL
}
// Get socket from the file descriptor.
file := t.GetFile(fd)
if file == nil {
return 0, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
return 0, syserror.ENOTSOCK
}
if file.Flags().NonBlocking {
flags |= linux.MSG_DONTWAIT
}
dst, err := t.SingleIOSequence(bufPtr, int(bufLen), usermem.IOOpts{
AddressSpaceActive: true,
})
if err != nil {
return 0, err
}
var haveDeadline bool
var deadline ktime.Time
if dl := s.RecvTimeout(); dl > 0 {
deadline = t.Kernel().MonotonicClock().Now().Add(time.Duration(dl) * time.Nanosecond)
haveDeadline = true
} else if dl < 0 {
flags |= linux.MSG_DONTWAIT
}
n, _, sender, senderLen, cm, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, nameLenPtr != 0, 0)
cm.Release()
if e != nil {
return 0, syserror.ConvertIntr(e.ToError(), kernel.ERESTARTSYS)
}
// Copy the address to the caller.
if nameLenPtr != 0 {
if err := writeAddress(t, sender, senderLen, namePtr, nameLenPtr); err != nil {
return 0, err
}
}
return uintptr(n), nil
}
// RecvFrom implements the linux syscall recvfrom(2).
func RecvFrom(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
fd := args[0].Int()
bufPtr := args[1].Pointer()
bufLen := args[2].Uint64()
flags := args[3].Int()
namePtr := args[4].Pointer()
nameLenPtr := args[5].Pointer()
n, err := recvFrom(t, fd, bufPtr, bufLen, flags, namePtr, nameLenPtr)
return n, nil, err
}
// SendMsg implements the linux syscall sendmsg(2).
func SendMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
fd := args[0].Int()
msgPtr := args[1].Pointer()
flags := args[2].Int()
if t.Arch().Width() != 8 {
// We only handle 64-bit for now.
return 0, nil, syserror.EINVAL
}
// Get socket from the file descriptor.
file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
return 0, nil, syserror.ENOTSOCK
}
// Reject flags that we don't handle yet.
if flags & ^(linux.MSG_DONTWAIT|linux.MSG_EOR|linux.MSG_MORE|linux.MSG_NOSIGNAL) != 0 {
return 0, nil, syserror.EINVAL
}
if file.Flags().NonBlocking {
flags |= linux.MSG_DONTWAIT
}
n, err := sendSingleMsg(t, s, file, msgPtr, flags)
return n, nil, err
}
// SendMMsg implements the linux syscall sendmmsg(2).
func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
fd := args[0].Int()
msgPtr := args[1].Pointer()
vlen := args[2].Uint()
flags := args[3].Int()
if t.Arch().Width() != 8 {
// We only handle 64-bit for now.
return 0, nil, syserror.EINVAL
}
// Get socket from the file descriptor.
file := t.GetFile(fd)
if file == nil {
return 0, nil, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
return 0, nil, syserror.ENOTSOCK
}
// Reject flags that we don't handle yet.
if flags & ^(linux.MSG_DONTWAIT|linux.MSG_EOR|linux.MSG_MORE|linux.MSG_NOSIGNAL) != 0 {
return 0, nil, syserror.EINVAL
}
if file.Flags().NonBlocking {
flags |= linux.MSG_DONTWAIT
}
var count uint32
var err error
for i := uint64(0); i < uint64(vlen); i++ {
mp, ok := msgPtr.AddLength(i * multipleMessageHeader64Len)
if !ok {
return 0, nil, syserror.EFAULT
}
var n uintptr
if n, err = sendSingleMsg(t, s, file, mp, flags); err != nil {
break
}
// Copy the received length to the caller.
lp, ok := mp.AddLength(messageHeader64Len)
if !ok {
return 0, nil, syserror.EFAULT
}
if _, err = t.CopyOut(lp, uint32(n)); err != nil {
break
}
count++
}
if count == 0 {
return 0, nil, err
}
return uintptr(count), nil, nil
}
func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr usermem.Addr, flags int32) (uintptr, error) {
// Capture the message header.
var msg MessageHeader64
if err := CopyInMessageHeader64(t, msgPtr, &msg); err != nil {
return 0, err
}
var controlData []byte
if msg.ControlLen > 0 {
// Put an upper bound to prevent large allocations.
if msg.ControlLen > maxControlLen {
return 0, syserror.ENOBUFS
}
controlData = make([]byte, msg.ControlLen)
if _, err := t.CopyIn(usermem.Addr(msg.Control), &controlData); err != nil {
return 0, err
}
}
// Read the destination address if one is specified.
var to []byte
if msg.NameLen != 0 {
var err error
to, err = CaptureAddress(t, usermem.Addr(msg.Name), msg.NameLen)
if err != nil {
return 0, err
}
}
// Read data then call the sendmsg implementation.
if msg.IovLen > linux.UIO_MAXIOV {
return 0, syserror.EMSGSIZE
}
src, err := t.IovecsIOSequence(usermem.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{
AddressSpaceActive: true,
})
if err != nil {
return 0, err
}
controlMessages, err := control.Parse(t, s, controlData)
if err != nil {
return 0, err
}
var haveDeadline bool
var deadline ktime.Time
if dl := s.SendTimeout(); dl > 0 {
deadline = t.Kernel().MonotonicClock().Now().Add(time.Duration(dl) * time.Nanosecond)
haveDeadline = true
} else if dl < 0 {
flags |= linux.MSG_DONTWAIT
}
// Call the syscall implementation.
n, e := s.SendMsg(t, src, to, int(flags), haveDeadline, deadline, controlMessages)
err = handleIOError(t, n != 0, e.ToError(), kernel.ERESTARTSYS, "sendmsg", file)
if err != nil {
controlMessages.Release()
}
return uintptr(n), err
}
// sendTo is the implementation of the sendto syscall. It is called by sendto
// and send syscall handlers.
func sendTo(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLen uint32) (uintptr, error) {
bl := int(bufLen)
if bl < 0 {
return 0, syserror.EINVAL
}
// Get socket from the file descriptor.
file := t.GetFile(fd)
if file == nil {
return 0, syserror.EBADF
}
defer file.DecRef()
// Extract the socket.
s, ok := file.FileOperations.(socket.Socket)
if !ok {
return 0, syserror.ENOTSOCK
}
if file.Flags().NonBlocking {
flags |= linux.MSG_DONTWAIT
}
// Read the destination address if one is specified.
var to []byte
var err error
if namePtr != 0 {
to, err = CaptureAddress(t, namePtr, nameLen)
if err != nil {
return 0, err
}
}
src, err := t.SingleIOSequence(bufPtr, bl, usermem.IOOpts{
AddressSpaceActive: true,
})
if err != nil {
return 0, err
}
var haveDeadline bool
var deadline ktime.Time
if dl := s.SendTimeout(); dl > 0 {
deadline = t.Kernel().MonotonicClock().Now().Add(time.Duration(dl) * time.Nanosecond)
haveDeadline = true
} else if dl < 0 {
flags |= linux.MSG_DONTWAIT
}
// Call the syscall implementation.
n, e := s.SendMsg(t, src, to, int(flags), haveDeadline, deadline, socket.ControlMessages{Unix: control.New(t, s, nil)})
return uintptr(n), handleIOError(t, n != 0, e.ToError(), kernel.ERESTARTSYS, "sendto", file)
}
// SendTo implements the linux syscall sendto(2).
func SendTo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
fd := args[0].Int()
bufPtr := args[1].Pointer()
bufLen := args[2].Uint64()
flags := args[3].Int()
namePtr := args[4].Pointer()
nameLen := args[5].Uint()
n, err := sendTo(t, fd, bufPtr, bufLen, flags, namePtr, nameLen)
return n, nil, err
}
// LINT.ThenChange(./vfs2/socket.go)