2019-04-29 21:25:05 +00:00
|
|
|
// Copyright 2018 The gVisor Authors.
|
2018-04-27 17:37:02 +00:00
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
// Package socket provides the interfaces that need to be provided by socket
|
|
|
|
// implementations and providers, as well as per family demultiplexing of socket
|
|
|
|
// creation.
|
|
|
|
package socket
|
|
|
|
|
|
|
|
import (
|
2020-12-02 19:31:38 +00:00
|
|
|
"bytes"
|
2018-04-27 17:37:02 +00:00
|
|
|
"fmt"
|
|
|
|
"sync/atomic"
|
|
|
|
|
2021-03-03 18:23:55 +00:00
|
|
|
"golang.org/x/sys/unix"
|
2019-06-13 23:49:09 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/abi/linux"
|
2020-01-27 23:17:58 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/context"
|
2021-03-29 20:28:32 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/hostarch"
|
2020-09-12 00:40:57 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/marshal"
|
2019-06-13 23:49:09 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sentry/device"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/fs"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/kernel"
|
|
|
|
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
|
2020-04-01 14:56:05 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sentry/vfs"
|
2019-06-13 23:49:09 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/syserr"
|
|
|
|
"gvisor.dev/gvisor/pkg/tcpip"
|
2020-12-02 19:31:38 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/tcpip/header"
|
2020-01-27 23:17:58 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/usermem"
|
2018-04-27 17:37:02 +00:00
|
|
|
)
|
|
|
|
|
2018-05-02 05:11:07 +00:00
|
|
|
// ControlMessages represents the union of unix control messages and tcpip
|
|
|
|
// control messages.
|
|
|
|
type ControlMessages struct {
|
2018-10-17 18:36:32 +00:00
|
|
|
Unix transport.ControlMessages
|
2020-12-11 18:32:03 +00:00
|
|
|
IP IPControlMessages
|
|
|
|
}
|
|
|
|
|
|
|
|
// packetInfoToLinux converts IPPacketInfo from tcpip format to Linux format.
|
|
|
|
func packetInfoToLinux(packetInfo tcpip.IPPacketInfo) linux.ControlMessageIPPacketInfo {
|
|
|
|
var p linux.ControlMessageIPPacketInfo
|
|
|
|
p.NIC = int32(packetInfo.NIC)
|
|
|
|
copy(p.LocalAddr[:], []byte(packetInfo.LocalAddr))
|
|
|
|
copy(p.DestinationAddr[:], []byte(packetInfo.DestinationAddr))
|
|
|
|
return p
|
|
|
|
}
|
|
|
|
|
2020-12-17 16:45:38 +00:00
|
|
|
// errOriginToLinux maps tcpip socket origin to Linux socket origin constants.
|
|
|
|
func errOriginToLinux(origin tcpip.SockErrOrigin) uint8 {
|
|
|
|
switch origin {
|
|
|
|
case tcpip.SockExtErrorOriginNone:
|
|
|
|
return linux.SO_EE_ORIGIN_NONE
|
|
|
|
case tcpip.SockExtErrorOriginLocal:
|
|
|
|
return linux.SO_EE_ORIGIN_LOCAL
|
|
|
|
case tcpip.SockExtErrorOriginICMP:
|
|
|
|
return linux.SO_EE_ORIGIN_ICMP
|
|
|
|
case tcpip.SockExtErrorOriginICMP6:
|
|
|
|
return linux.SO_EE_ORIGIN_ICMP6
|
|
|
|
default:
|
|
|
|
panic(fmt.Sprintf("unknown socket origin: %d", origin))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// sockErrCmsgToLinux converts SockError control message from tcpip format to
|
|
|
|
// Linux format.
|
|
|
|
func sockErrCmsgToLinux(sockErr *tcpip.SockError) linux.SockErrCMsg {
|
|
|
|
if sockErr == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
ee := linux.SockExtendedErr{
|
2021-05-12 00:21:24 +00:00
|
|
|
Errno: uint32(syserr.TranslateNetstackError(sockErr.Err).ToLinux()),
|
2021-02-01 20:01:49 +00:00
|
|
|
Origin: errOriginToLinux(sockErr.Cause.Origin()),
|
|
|
|
Type: sockErr.Cause.Type(),
|
|
|
|
Code: sockErr.Cause.Code(),
|
|
|
|
Info: sockErr.Cause.Info(),
|
2020-12-17 16:45:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
switch sockErr.NetProto {
|
|
|
|
case header.IPv4ProtocolNumber:
|
|
|
|
errMsg := &linux.SockErrCMsgIPv4{SockExtendedErr: ee}
|
|
|
|
if len(sockErr.Offender.Addr) > 0 {
|
|
|
|
addr, _ := ConvertAddress(linux.AF_INET, sockErr.Offender)
|
|
|
|
errMsg.Offender = *addr.(*linux.SockAddrInet)
|
|
|
|
}
|
|
|
|
return errMsg
|
|
|
|
case header.IPv6ProtocolNumber:
|
|
|
|
errMsg := &linux.SockErrCMsgIPv6{SockExtendedErr: ee}
|
|
|
|
if len(sockErr.Offender.Addr) > 0 {
|
|
|
|
addr, _ := ConvertAddress(linux.AF_INET6, sockErr.Offender)
|
|
|
|
errMsg.Offender = *addr.(*linux.SockAddrInet6)
|
|
|
|
}
|
|
|
|
return errMsg
|
|
|
|
default:
|
|
|
|
panic(fmt.Sprintf("invalid net proto for creating SockErrCMsg: %d", sockErr.NetProto))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-11 18:32:03 +00:00
|
|
|
// NewIPControlMessages converts the tcpip ControlMessgaes (which does not
|
|
|
|
// have Linux specific format) to Linux format.
|
|
|
|
func NewIPControlMessages(family int, cmgs tcpip.ControlMessages) IPControlMessages {
|
|
|
|
var orgDstAddr linux.SockAddr
|
|
|
|
if cmgs.HasOriginalDstAddress {
|
|
|
|
orgDstAddr, _ = ConvertAddress(family, cmgs.OriginalDstAddress)
|
|
|
|
}
|
|
|
|
return IPControlMessages{
|
|
|
|
HasTimestamp: cmgs.HasTimestamp,
|
|
|
|
Timestamp: cmgs.Timestamp,
|
|
|
|
HasInq: cmgs.HasInq,
|
|
|
|
Inq: cmgs.Inq,
|
|
|
|
HasTOS: cmgs.HasTOS,
|
|
|
|
TOS: cmgs.TOS,
|
|
|
|
HasTClass: cmgs.HasTClass,
|
|
|
|
TClass: cmgs.TClass,
|
|
|
|
HasIPPacketInfo: cmgs.HasIPPacketInfo,
|
|
|
|
PacketInfo: packetInfoToLinux(cmgs.PacketInfo),
|
|
|
|
OriginalDstAddress: orgDstAddr,
|
2020-12-17 16:45:38 +00:00
|
|
|
SockErr: sockErrCmsgToLinux(cmgs.SockErr),
|
2020-12-11 18:32:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// IPControlMessages contains socket control messages for IP sockets.
|
|
|
|
// This can contain Linux specific structures unlike tcpip.ControlMessages.
|
|
|
|
//
|
|
|
|
// +stateify savable
|
|
|
|
type IPControlMessages struct {
|
|
|
|
// HasTimestamp indicates whether Timestamp is valid/set.
|
|
|
|
HasTimestamp bool
|
|
|
|
|
|
|
|
// Timestamp is the time (in ns) that the last packet used to create
|
|
|
|
// the read data was received.
|
|
|
|
Timestamp int64
|
|
|
|
|
|
|
|
// HasInq indicates whether Inq is valid/set.
|
|
|
|
HasInq bool
|
|
|
|
|
|
|
|
// Inq is the number of bytes ready to be received.
|
|
|
|
Inq int32
|
|
|
|
|
|
|
|
// HasTOS indicates whether Tos is valid/set.
|
|
|
|
HasTOS bool
|
|
|
|
|
|
|
|
// TOS is the IPv4 type of service of the associated packet.
|
|
|
|
TOS uint8
|
|
|
|
|
|
|
|
// HasTClass indicates whether TClass is valid/set.
|
|
|
|
HasTClass bool
|
|
|
|
|
|
|
|
// TClass is the IPv6 traffic class of the associated packet.
|
|
|
|
TClass uint32
|
|
|
|
|
|
|
|
// HasIPPacketInfo indicates whether PacketInfo is set.
|
|
|
|
HasIPPacketInfo bool
|
|
|
|
|
|
|
|
// PacketInfo holds interface and address data on an incoming packet.
|
|
|
|
PacketInfo linux.ControlMessageIPPacketInfo
|
|
|
|
|
|
|
|
// OriginalDestinationAddress holds the original destination address
|
|
|
|
// and port of the incoming packet.
|
|
|
|
OriginalDstAddress linux.SockAddr
|
2020-12-17 16:45:38 +00:00
|
|
|
|
|
|
|
// SockErr is the dequeued socket error on recvmsg(MSG_ERRQUEUE).
|
|
|
|
SockErr linux.SockErrCMsg
|
2018-05-02 05:11:07 +00:00
|
|
|
}
|
|
|
|
|
2019-12-10 18:56:51 +00:00
|
|
|
// Release releases Unix domain socket credentials and rights.
|
2020-08-03 20:33:47 +00:00
|
|
|
func (c *ControlMessages) Release(ctx context.Context) {
|
|
|
|
c.Unix.Release(ctx)
|
2019-12-10 18:56:51 +00:00
|
|
|
}
|
|
|
|
|
2020-04-01 14:56:05 +00:00
|
|
|
// Socket is an interface combining fs.FileOperations and SocketOps,
|
|
|
|
// representing a VFS1 socket file.
|
2018-04-27 17:37:02 +00:00
|
|
|
type Socket interface {
|
|
|
|
fs.FileOperations
|
2020-04-01 14:56:05 +00:00
|
|
|
SocketOps
|
|
|
|
}
|
|
|
|
|
|
|
|
// SocketVFS2 is an interface combining vfs.FileDescription and SocketOps,
|
|
|
|
// representing a VFS2 socket file.
|
|
|
|
type SocketVFS2 interface {
|
|
|
|
vfs.FileDescriptionImpl
|
|
|
|
SocketOps
|
|
|
|
}
|
2018-04-27 17:37:02 +00:00
|
|
|
|
2020-04-01 14:56:05 +00:00
|
|
|
// SocketOps is the interface containing socket syscalls used by the syscall
|
|
|
|
// layer to redirect them to the appropriate implementation.
|
|
|
|
//
|
|
|
|
// It is implemented by both Socket and SocketVFS2.
|
|
|
|
type SocketOps interface {
|
2021-03-03 18:23:55 +00:00
|
|
|
// Connect implements the connect(2) linux unix.
|
2018-04-27 17:37:02 +00:00
|
|
|
Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error
|
|
|
|
|
2021-03-03 18:23:55 +00:00
|
|
|
// Accept implements the accept4(2) linux unix.
|
2018-04-27 17:37:02 +00:00
|
|
|
// Returns fd, real peer address length and error. Real peer address
|
|
|
|
// length is only set if len(peer) > 0.
|
2019-08-08 23:49:18 +00:00
|
|
|
Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, linux.SockAddr, uint32, *syserr.Error)
|
2018-04-27 17:37:02 +00:00
|
|
|
|
2021-03-03 18:23:55 +00:00
|
|
|
// Bind implements the bind(2) linux unix.
|
2018-04-27 17:37:02 +00:00
|
|
|
Bind(t *kernel.Task, sockaddr []byte) *syserr.Error
|
|
|
|
|
2021-03-03 18:23:55 +00:00
|
|
|
// Listen implements the listen(2) linux unix.
|
2018-04-27 17:37:02 +00:00
|
|
|
Listen(t *kernel.Task, backlog int) *syserr.Error
|
|
|
|
|
2021-03-03 18:23:55 +00:00
|
|
|
// Shutdown implements the shutdown(2) linux unix.
|
2018-04-27 17:37:02 +00:00
|
|
|
Shutdown(t *kernel.Task, how int) *syserr.Error
|
|
|
|
|
2021-03-03 18:23:55 +00:00
|
|
|
// GetSockOpt implements the getsockopt(2) linux unix.
|
2021-03-29 20:28:32 +00:00
|
|
|
GetSockOpt(t *kernel.Task, level int, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error)
|
2018-04-27 17:37:02 +00:00
|
|
|
|
2021-03-03 18:23:55 +00:00
|
|
|
// SetSockOpt implements the setsockopt(2) linux unix.
|
2018-04-27 17:37:02 +00:00
|
|
|
SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *syserr.Error
|
|
|
|
|
2021-03-03 18:23:55 +00:00
|
|
|
// GetSockName implements the getsockname(2) linux unix.
|
2018-04-27 17:37:02 +00:00
|
|
|
//
|
|
|
|
// addrLen is the address length to be returned to the application, not
|
|
|
|
// necessarily the actual length of the address.
|
2019-08-08 23:49:18 +00:00
|
|
|
GetSockName(t *kernel.Task) (addr linux.SockAddr, addrLen uint32, err *syserr.Error)
|
2018-04-27 17:37:02 +00:00
|
|
|
|
2021-03-03 18:23:55 +00:00
|
|
|
// GetPeerName implements the getpeername(2) linux unix.
|
2018-04-27 17:37:02 +00:00
|
|
|
//
|
|
|
|
// addrLen is the address length to be returned to the application, not
|
|
|
|
// necessarily the actual length of the address.
|
2019-08-08 23:49:18 +00:00
|
|
|
GetPeerName(t *kernel.Task) (addr linux.SockAddr, addrLen uint32, err *syserr.Error)
|
2018-04-27 17:37:02 +00:00
|
|
|
|
2021-03-03 18:23:55 +00:00
|
|
|
// RecvMsg implements the recvmsg(2) linux unix.
|
2018-04-27 17:37:02 +00:00
|
|
|
//
|
|
|
|
// senderAddrLen is the address length to be returned to the application,
|
|
|
|
// not necessarily the actual length of the address.
|
2018-12-13 21:19:39 +00:00
|
|
|
//
|
2019-04-30 04:20:05 +00:00
|
|
|
// flags control how RecvMsg should be completed. msgFlags indicate how
|
|
|
|
// the RecvMsg call was completed. Note that control message truncation
|
|
|
|
// may still be required even if the MSG_CTRUNC bit is not set in
|
|
|
|
// msgFlags. In that case, the caller should set MSG_CTRUNC appropriately.
|
|
|
|
//
|
2018-12-13 21:19:39 +00:00
|
|
|
// If err != nil, the recv was not successful.
|
2019-08-08 23:49:18 +00:00
|
|
|
RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (n int, msgFlags int, senderAddr linux.SockAddr, senderAddrLen uint32, controlMessages ControlMessages, err *syserr.Error)
|
2018-04-27 17:37:02 +00:00
|
|
|
|
2021-03-03 18:23:55 +00:00
|
|
|
// SendMsg implements the sendmsg(2) linux unix. SendMsg does not take
|
2018-04-27 17:37:02 +00:00
|
|
|
// ownership of the ControlMessage on error.
|
2018-12-06 19:40:39 +00:00
|
|
|
//
|
|
|
|
// If n > 0, err will either be nil or an error from t.Block.
|
2018-12-15 00:12:51 +00:00
|
|
|
SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, haveDeadline bool, deadline ktime.Time, controlMessages ControlMessages) (n int, err *syserr.Error)
|
2018-04-27 17:37:02 +00:00
|
|
|
|
|
|
|
// SetRecvTimeout sets the timeout (in ns) for recv operations. Zero means
|
2018-12-15 00:12:51 +00:00
|
|
|
// no timeout, and negative means DONTWAIT.
|
2018-04-27 17:37:02 +00:00
|
|
|
SetRecvTimeout(nanoseconds int64)
|
|
|
|
|
|
|
|
// RecvTimeout gets the current timeout (in ns) for recv operations. Zero
|
2018-12-15 00:12:51 +00:00
|
|
|
// means no timeout, and negative means DONTWAIT.
|
2018-04-27 17:37:02 +00:00
|
|
|
RecvTimeout() int64
|
2018-12-15 00:12:51 +00:00
|
|
|
|
|
|
|
// SetSendTimeout sets the timeout (in ns) for send operations. Zero means
|
|
|
|
// no timeout, and negative means DONTWAIT.
|
|
|
|
SetSendTimeout(nanoseconds int64)
|
|
|
|
|
|
|
|
// SendTimeout gets the current timeout (in ns) for send operations. Zero
|
|
|
|
// means no timeout, and negative means DONTWAIT.
|
|
|
|
SendTimeout() int64
|
2019-06-06 22:03:44 +00:00
|
|
|
|
|
|
|
// State returns the current state of the socket, as represented by Linux in
|
|
|
|
// procfs. The returned state value is protocol-specific.
|
|
|
|
State() uint32
|
2019-06-10 22:16:42 +00:00
|
|
|
|
|
|
|
// Type returns the family, socket type and protocol of the socket.
|
|
|
|
Type() (family int, skType linux.SockType, protocol int)
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Provider is the interface implemented by providers of sockets for specific
|
|
|
|
// address families (e.g., AF_INET).
|
|
|
|
type Provider interface {
|
|
|
|
// Socket creates a new socket.
|
|
|
|
//
|
|
|
|
// If a nil Socket _and_ a nil error is returned, it means that the
|
|
|
|
// protocol is not supported. A non-nil error should only be returned
|
|
|
|
// if the protocol is supported, but an error occurs during creation.
|
2019-06-06 23:59:21 +00:00
|
|
|
Socket(t *kernel.Task, stype linux.SockType, protocol int) (*fs.File, *syserr.Error)
|
2018-04-27 17:37:02 +00:00
|
|
|
|
|
|
|
// Pair creates a pair of connected sockets.
|
|
|
|
//
|
|
|
|
// See Socket for error information.
|
2019-06-06 23:59:21 +00:00
|
|
|
Pair(t *kernel.Task, stype linux.SockType, protocol int) (*fs.File, *fs.File, *syserr.Error)
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// families holds a map of all known address families and their providers.
|
|
|
|
var families = make(map[int][]Provider)
|
|
|
|
|
|
|
|
// RegisterProvider registers the provider of a given address family so that
|
|
|
|
// sockets of that type can be created via socket() and/or socketpair()
|
|
|
|
// syscalls.
|
2020-04-01 14:56:05 +00:00
|
|
|
//
|
|
|
|
// This should only be called during the initialization of the address family.
|
2018-04-27 17:37:02 +00:00
|
|
|
func RegisterProvider(family int, provider Provider) {
|
|
|
|
families[family] = append(families[family], provider)
|
|
|
|
}
|
|
|
|
|
|
|
|
// New creates a new socket with the given family, type and protocol.
|
2019-06-06 23:59:21 +00:00
|
|
|
func New(t *kernel.Task, family int, stype linux.SockType, protocol int) (*fs.File, *syserr.Error) {
|
2018-04-27 17:37:02 +00:00
|
|
|
for _, p := range families[family] {
|
|
|
|
s, err := p.Socket(t, stype, protocol)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if s != nil {
|
2019-06-10 22:16:42 +00:00
|
|
|
t.Kernel().RecordSocket(s)
|
2018-04-27 17:37:02 +00:00
|
|
|
return s, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil, syserr.ErrAddressFamilyNotSupported
|
|
|
|
}
|
|
|
|
|
|
|
|
// Pair creates a new connected socket pair with the given family, type and
|
|
|
|
// protocol.
|
2019-06-06 23:59:21 +00:00
|
|
|
func Pair(t *kernel.Task, family int, stype linux.SockType, protocol int) (*fs.File, *fs.File, *syserr.Error) {
|
2018-04-27 17:37:02 +00:00
|
|
|
providers, ok := families[family]
|
|
|
|
if !ok {
|
|
|
|
return nil, nil, syserr.ErrAddressFamilyNotSupported
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, p := range providers {
|
2019-02-07 22:43:18 +00:00
|
|
|
s1, s2, err := p.Pair(t, stype, protocol)
|
2018-04-27 17:37:02 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
2019-02-07 22:43:18 +00:00
|
|
|
if s1 != nil && s2 != nil {
|
|
|
|
k := t.Kernel()
|
2019-06-10 22:16:42 +00:00
|
|
|
k.RecordSocket(s1)
|
|
|
|
k.RecordSocket(s2)
|
2019-02-07 22:43:18 +00:00
|
|
|
return s1, s2, nil
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil, nil, syserr.ErrSocketNotSupported
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewDirent returns a sockfs fs.Dirent that resides on device d.
|
|
|
|
func NewDirent(ctx context.Context, d *device.Device) *fs.Dirent {
|
|
|
|
ino := d.NextIno()
|
2019-01-15 04:33:29 +00:00
|
|
|
iops := &fsutil.SimpleFileInode{
|
|
|
|
InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(ctx, fs.FileOwnerFromContext(ctx), fs.FilePermissions{
|
|
|
|
User: fs.PermMask{Read: true, Write: true},
|
|
|
|
}, linux.SOCKFS_MAGIC),
|
|
|
|
}
|
2019-06-14 01:39:43 +00:00
|
|
|
inode := fs.NewInode(ctx, iops, fs.NewPseudoMountSource(ctx), fs.StableAttr{
|
2018-04-27 17:37:02 +00:00
|
|
|
Type: fs.Socket,
|
|
|
|
DeviceID: d.DeviceID(),
|
|
|
|
InodeID: ino,
|
2021-03-29 20:28:32 +00:00
|
|
|
BlockSize: hostarch.PageSize,
|
2018-04-27 17:37:02 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
// Dirent name matches net/socket.c:sockfs_dname.
|
2019-06-14 01:39:43 +00:00
|
|
|
return fs.NewDirent(ctx, inode, fmt.Sprintf("socket:[%d]", ino))
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
2020-04-01 14:56:05 +00:00
|
|
|
// ProviderVFS2 is the vfs2 interface implemented by providers of sockets for
|
|
|
|
// specific address families (e.g., AF_INET).
|
|
|
|
type ProviderVFS2 interface {
|
|
|
|
// Socket creates a new socket.
|
|
|
|
//
|
|
|
|
// If a nil Socket _and_ a nil error is returned, it means that the
|
|
|
|
// protocol is not supported. A non-nil error should only be returned
|
|
|
|
// if the protocol is supported, but an error occurs during creation.
|
|
|
|
Socket(t *kernel.Task, stype linux.SockType, protocol int) (*vfs.FileDescription, *syserr.Error)
|
|
|
|
|
|
|
|
// Pair creates a pair of connected sockets.
|
|
|
|
//
|
|
|
|
// See Socket for error information.
|
|
|
|
Pair(t *kernel.Task, stype linux.SockType, protocol int) (*vfs.FileDescription, *vfs.FileDescription, *syserr.Error)
|
|
|
|
}
|
|
|
|
|
|
|
|
// familiesVFS2 holds a map of all known address families and their providers.
|
|
|
|
var familiesVFS2 = make(map[int][]ProviderVFS2)
|
|
|
|
|
|
|
|
// RegisterProviderVFS2 registers the provider of a given address family so that
|
|
|
|
// sockets of that type can be created via socket() and/or socketpair()
|
|
|
|
// syscalls.
|
|
|
|
//
|
|
|
|
// This should only be called during the initialization of the address family.
|
|
|
|
func RegisterProviderVFS2(family int, provider ProviderVFS2) {
|
|
|
|
familiesVFS2[family] = append(familiesVFS2[family], provider)
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewVFS2 creates a new socket with the given family, type and protocol.
|
|
|
|
func NewVFS2(t *kernel.Task, family int, stype linux.SockType, protocol int) (*vfs.FileDescription, *syserr.Error) {
|
|
|
|
for _, p := range familiesVFS2[family] {
|
|
|
|
s, err := p.Socket(t, stype, protocol)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if s != nil {
|
2020-04-05 04:01:42 +00:00
|
|
|
t.Kernel().RecordSocketVFS2(s)
|
2020-04-01 14:56:05 +00:00
|
|
|
return s, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil, syserr.ErrAddressFamilyNotSupported
|
|
|
|
}
|
|
|
|
|
|
|
|
// PairVFS2 creates a new connected socket pair with the given family, type and
|
|
|
|
// protocol.
|
|
|
|
func PairVFS2(t *kernel.Task, family int, stype linux.SockType, protocol int) (*vfs.FileDescription, *vfs.FileDescription, *syserr.Error) {
|
|
|
|
providers, ok := familiesVFS2[family]
|
|
|
|
if !ok {
|
|
|
|
return nil, nil, syserr.ErrAddressFamilyNotSupported
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, p := range providers {
|
|
|
|
s1, s2, err := p.Pair(t, stype, protocol)
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
if s1 != nil && s2 != nil {
|
2020-04-05 04:01:42 +00:00
|
|
|
k := t.Kernel()
|
|
|
|
k.RecordSocketVFS2(s1)
|
|
|
|
k.RecordSocketVFS2(s2)
|
2020-04-01 14:56:05 +00:00
|
|
|
return s1, s2, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil, nil, syserr.ErrSocketNotSupported
|
|
|
|
}
|
|
|
|
|
2018-12-15 00:12:51 +00:00
|
|
|
// SendReceiveTimeout stores timeouts for send and receive calls.
|
2018-04-27 17:37:02 +00:00
|
|
|
//
|
|
|
|
// It is meant to be embedded into Socket implementations to help satisfy the
|
|
|
|
// interface.
|
|
|
|
//
|
2018-12-15 00:12:51 +00:00
|
|
|
// Care must be taken when copying SendReceiveTimeout as it contains atomic
|
2018-04-27 17:37:02 +00:00
|
|
|
// variables.
|
2018-08-02 17:41:44 +00:00
|
|
|
//
|
|
|
|
// +stateify savable
|
2018-12-15 00:12:51 +00:00
|
|
|
type SendReceiveTimeout struct {
|
|
|
|
// send is length of the send timeout in nanoseconds.
|
|
|
|
//
|
|
|
|
// send must be accessed atomically.
|
|
|
|
send int64
|
|
|
|
|
|
|
|
// recv is length of the receive timeout in nanoseconds.
|
2018-04-27 17:37:02 +00:00
|
|
|
//
|
2018-12-15 00:12:51 +00:00
|
|
|
// recv must be accessed atomically.
|
|
|
|
recv int64
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// SetRecvTimeout implements Socket.SetRecvTimeout.
|
2018-12-15 00:12:51 +00:00
|
|
|
func (to *SendReceiveTimeout) SetRecvTimeout(nanoseconds int64) {
|
|
|
|
atomic.StoreInt64(&to.recv, nanoseconds)
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// RecvTimeout implements Socket.RecvTimeout.
|
2018-12-15 00:12:51 +00:00
|
|
|
func (to *SendReceiveTimeout) RecvTimeout() int64 {
|
|
|
|
return atomic.LoadInt64(&to.recv)
|
|
|
|
}
|
|
|
|
|
|
|
|
// SetSendTimeout implements Socket.SetSendTimeout.
|
|
|
|
func (to *SendReceiveTimeout) SetSendTimeout(nanoseconds int64) {
|
|
|
|
atomic.StoreInt64(&to.send, nanoseconds)
|
|
|
|
}
|
|
|
|
|
|
|
|
// SendTimeout implements Socket.SendTimeout.
|
|
|
|
func (to *SendReceiveTimeout) SendTimeout() int64 {
|
|
|
|
return atomic.LoadInt64(&to.send)
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
2018-11-19 23:25:00 +00:00
|
|
|
|
|
|
|
// GetSockOptEmitUnimplementedEvent emits unimplemented event if name is valid.
|
|
|
|
// It contains names that are valid for GetSockOpt when level is SOL_SOCKET.
|
|
|
|
func GetSockOptEmitUnimplementedEvent(t *kernel.Task, name int) {
|
|
|
|
switch name {
|
|
|
|
case linux.SO_ACCEPTCONN,
|
|
|
|
linux.SO_BPF_EXTENSIONS,
|
|
|
|
linux.SO_COOKIE,
|
|
|
|
linux.SO_DOMAIN,
|
|
|
|
linux.SO_ERROR,
|
|
|
|
linux.SO_GET_FILTER,
|
|
|
|
linux.SO_INCOMING_NAPI_ID,
|
|
|
|
linux.SO_MEMINFO,
|
|
|
|
linux.SO_PEERCRED,
|
|
|
|
linux.SO_PEERGROUPS,
|
|
|
|
linux.SO_PEERNAME,
|
|
|
|
linux.SO_PEERSEC,
|
|
|
|
linux.SO_PROTOCOL,
|
|
|
|
linux.SO_SNDLOWAT,
|
|
|
|
linux.SO_TYPE:
|
|
|
|
|
|
|
|
t.Kernel().EmitUnimplementedEvent(t)
|
|
|
|
|
|
|
|
default:
|
|
|
|
emitUnimplementedEvent(t, name)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// SetSockOptEmitUnimplementedEvent emits unimplemented event if name is valid.
|
|
|
|
// It contains names that are valid for SetSockOpt when level is SOL_SOCKET.
|
|
|
|
func SetSockOptEmitUnimplementedEvent(t *kernel.Task, name int) {
|
|
|
|
switch name {
|
|
|
|
case linux.SO_ATTACH_BPF,
|
|
|
|
linux.SO_ATTACH_FILTER,
|
|
|
|
linux.SO_ATTACH_REUSEPORT_CBPF,
|
|
|
|
linux.SO_ATTACH_REUSEPORT_EBPF,
|
|
|
|
linux.SO_CNX_ADVICE,
|
|
|
|
linux.SO_DETACH_FILTER,
|
|
|
|
linux.SO_RCVBUFFORCE,
|
|
|
|
linux.SO_SNDBUFFORCE:
|
|
|
|
|
|
|
|
t.Kernel().EmitUnimplementedEvent(t)
|
|
|
|
|
|
|
|
default:
|
|
|
|
emitUnimplementedEvent(t, name)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// emitUnimplementedEvent emits unimplemented event if name is valid. It
|
|
|
|
// contains names that are common between Get and SetSocketOpt when level is
|
|
|
|
// SOL_SOCKET.
|
|
|
|
func emitUnimplementedEvent(t *kernel.Task, name int) {
|
|
|
|
switch name {
|
|
|
|
case linux.SO_BINDTODEVICE,
|
|
|
|
linux.SO_BROADCAST,
|
|
|
|
linux.SO_BSDCOMPAT,
|
|
|
|
linux.SO_BUSY_POLL,
|
|
|
|
linux.SO_DEBUG,
|
|
|
|
linux.SO_DONTROUTE,
|
|
|
|
linux.SO_INCOMING_CPU,
|
|
|
|
linux.SO_KEEPALIVE,
|
|
|
|
linux.SO_LINGER,
|
|
|
|
linux.SO_LOCK_FILTER,
|
|
|
|
linux.SO_MARK,
|
|
|
|
linux.SO_MAX_PACING_RATE,
|
|
|
|
linux.SO_NOFCS,
|
|
|
|
linux.SO_OOBINLINE,
|
|
|
|
linux.SO_PASSCRED,
|
|
|
|
linux.SO_PASSSEC,
|
|
|
|
linux.SO_PEEK_OFF,
|
|
|
|
linux.SO_PRIORITY,
|
|
|
|
linux.SO_RCVBUF,
|
|
|
|
linux.SO_RCVLOWAT,
|
|
|
|
linux.SO_RCVTIMEO,
|
|
|
|
linux.SO_REUSEADDR,
|
|
|
|
linux.SO_REUSEPORT,
|
|
|
|
linux.SO_RXQ_OVFL,
|
|
|
|
linux.SO_SELECT_ERR_QUEUE,
|
|
|
|
linux.SO_SNDBUF,
|
|
|
|
linux.SO_SNDTIMEO,
|
|
|
|
linux.SO_TIMESTAMP,
|
|
|
|
linux.SO_TIMESTAMPING,
|
|
|
|
linux.SO_TIMESTAMPNS,
|
|
|
|
linux.SO_TXTIME,
|
|
|
|
linux.SO_WIFI_STATUS,
|
|
|
|
linux.SO_ZEROCOPY:
|
|
|
|
|
|
|
|
t.Kernel().EmitUnimplementedEvent(t)
|
|
|
|
}
|
|
|
|
}
|
2019-08-08 23:49:18 +00:00
|
|
|
|
|
|
|
// UnmarshalSockAddr unmarshals memory representing a struct sockaddr to one of
|
|
|
|
// the ABI socket address types.
|
|
|
|
//
|
|
|
|
// Precondition: data must be long enough to represent a socket address of the
|
|
|
|
// given family.
|
|
|
|
func UnmarshalSockAddr(family int, data []byte) linux.SockAddr {
|
|
|
|
switch family {
|
2021-03-03 18:23:55 +00:00
|
|
|
case unix.AF_INET:
|
2019-08-08 23:49:18 +00:00
|
|
|
var addr linux.SockAddrInet
|
2021-04-27 23:17:03 +00:00
|
|
|
addr.UnmarshalUnsafe(data[:addr.SizeBytes()])
|
2019-08-08 23:49:18 +00:00
|
|
|
return &addr
|
2021-03-03 18:23:55 +00:00
|
|
|
case unix.AF_INET6:
|
2019-08-08 23:49:18 +00:00
|
|
|
var addr linux.SockAddrInet6
|
2021-04-27 23:17:03 +00:00
|
|
|
addr.UnmarshalUnsafe(data[:addr.SizeBytes()])
|
2019-08-08 23:49:18 +00:00
|
|
|
return &addr
|
2021-03-03 18:23:55 +00:00
|
|
|
case unix.AF_UNIX:
|
2019-08-08 23:49:18 +00:00
|
|
|
var addr linux.SockAddrUnix
|
2021-04-27 23:17:03 +00:00
|
|
|
addr.UnmarshalUnsafe(data[:addr.SizeBytes()])
|
2019-08-08 23:49:18 +00:00
|
|
|
return &addr
|
2021-03-03 18:23:55 +00:00
|
|
|
case unix.AF_NETLINK:
|
2019-08-08 23:49:18 +00:00
|
|
|
var addr linux.SockAddrNetlink
|
2021-04-27 23:17:03 +00:00
|
|
|
addr.UnmarshalUnsafe(data[:addr.SizeBytes()])
|
2019-08-08 23:49:18 +00:00
|
|
|
return &addr
|
|
|
|
default:
|
|
|
|
panic(fmt.Sprintf("Unsupported socket family %v", family))
|
|
|
|
}
|
|
|
|
}
|
2020-12-02 19:31:38 +00:00
|
|
|
|
|
|
|
var sockAddrLinkSize = (&linux.SockAddrLink{}).SizeBytes()
|
|
|
|
var sockAddrInetSize = (&linux.SockAddrInet{}).SizeBytes()
|
|
|
|
var sockAddrInet6Size = (&linux.SockAddrInet6{}).SizeBytes()
|
|
|
|
|
|
|
|
// Ntohs converts a 16-bit number from network byte order to host byte order. It
|
|
|
|
// assumes that the host is little endian.
|
|
|
|
func Ntohs(v uint16) uint16 {
|
|
|
|
return v<<8 | v>>8
|
|
|
|
}
|
|
|
|
|
|
|
|
// Htons converts a 16-bit number from host byte order to network byte order. It
|
|
|
|
// assumes that the host is little endian.
|
|
|
|
func Htons(v uint16) uint16 {
|
|
|
|
return Ntohs(v)
|
|
|
|
}
|
|
|
|
|
|
|
|
// isLinkLocal determines if the given IPv6 address is link-local. This is the
|
|
|
|
// case when it has the fe80::/10 prefix. This check is used to determine when
|
|
|
|
// the NICID is relevant for a given IPv6 address.
|
|
|
|
func isLinkLocal(addr tcpip.Address) bool {
|
|
|
|
return len(addr) >= 2 && addr[0] == 0xfe && addr[1]&0xc0 == 0x80
|
|
|
|
}
|
|
|
|
|
|
|
|
// ConvertAddress converts the given address to a native format.
|
|
|
|
func ConvertAddress(family int, addr tcpip.FullAddress) (linux.SockAddr, uint32) {
|
|
|
|
switch family {
|
|
|
|
case linux.AF_UNIX:
|
|
|
|
var out linux.SockAddrUnix
|
|
|
|
out.Family = linux.AF_UNIX
|
|
|
|
l := len([]byte(addr.Addr))
|
|
|
|
for i := 0; i < l; i++ {
|
|
|
|
out.Path[i] = int8(addr.Addr[i])
|
|
|
|
}
|
|
|
|
|
|
|
|
// Linux returns the used length of the address struct (including the
|
|
|
|
// null terminator) for filesystem paths. The Family field is 2 bytes.
|
|
|
|
// It is sometimes allowed to exclude the null terminator if the
|
|
|
|
// address length is the max. Abstract and empty paths always return
|
|
|
|
// the full exact length.
|
|
|
|
if l == 0 || out.Path[0] == 0 || l == len(out.Path) {
|
|
|
|
return &out, uint32(2 + l)
|
|
|
|
}
|
|
|
|
return &out, uint32(3 + l)
|
|
|
|
|
|
|
|
case linux.AF_INET:
|
|
|
|
var out linux.SockAddrInet
|
|
|
|
copy(out.Addr[:], addr.Addr)
|
|
|
|
out.Family = linux.AF_INET
|
|
|
|
out.Port = Htons(addr.Port)
|
|
|
|
return &out, uint32(sockAddrInetSize)
|
|
|
|
|
|
|
|
case linux.AF_INET6:
|
|
|
|
var out linux.SockAddrInet6
|
|
|
|
if len(addr.Addr) == header.IPv4AddressSize {
|
|
|
|
// Copy address in v4-mapped format.
|
|
|
|
copy(out.Addr[12:], addr.Addr)
|
|
|
|
out.Addr[10] = 0xff
|
|
|
|
out.Addr[11] = 0xff
|
|
|
|
} else {
|
|
|
|
copy(out.Addr[:], addr.Addr)
|
|
|
|
}
|
|
|
|
out.Family = linux.AF_INET6
|
|
|
|
out.Port = Htons(addr.Port)
|
|
|
|
if isLinkLocal(addr.Addr) {
|
|
|
|
out.Scope_id = uint32(addr.NIC)
|
|
|
|
}
|
|
|
|
return &out, uint32(sockAddrInet6Size)
|
|
|
|
|
|
|
|
case linux.AF_PACKET:
|
|
|
|
// TODO(gvisor.dev/issue/173): Return protocol too.
|
|
|
|
var out linux.SockAddrLink
|
|
|
|
out.Family = linux.AF_PACKET
|
|
|
|
out.InterfaceIndex = int32(addr.NIC)
|
|
|
|
out.HardwareAddrLen = header.EthernetAddressSize
|
|
|
|
copy(out.HardwareAddr[:], addr.Addr)
|
|
|
|
return &out, uint32(sockAddrLinkSize)
|
|
|
|
|
|
|
|
default:
|
|
|
|
return nil, 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// BytesToIPAddress converts an IPv4 or IPv6 address from the user to the
|
|
|
|
// netstack representation taking any addresses into account.
|
|
|
|
func BytesToIPAddress(addr []byte) tcpip.Address {
|
|
|
|
if bytes.Equal(addr, make([]byte, 4)) || bytes.Equal(addr, make([]byte, 16)) {
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
return tcpip.Address(addr)
|
|
|
|
}
|
|
|
|
|
|
|
|
// AddressAndFamily reads an sockaddr struct from the given address and
|
|
|
|
// converts it to the FullAddress format. It supports AF_UNIX, AF_INET,
|
|
|
|
// AF_INET6, and AF_PACKET addresses.
|
|
|
|
//
|
|
|
|
// AddressAndFamily returns an address and its family.
|
|
|
|
func AddressAndFamily(addr []byte) (tcpip.FullAddress, uint16, *syserr.Error) {
|
|
|
|
// Make sure we have at least 2 bytes for the address family.
|
|
|
|
if len(addr) < 2 {
|
|
|
|
return tcpip.FullAddress{}, 0, syserr.ErrInvalidArgument
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get the rest of the fields based on the address family.
|
2021-03-29 20:28:32 +00:00
|
|
|
switch family := hostarch.ByteOrder.Uint16(addr); family {
|
2020-12-02 19:31:38 +00:00
|
|
|
case linux.AF_UNIX:
|
|
|
|
path := addr[2:]
|
|
|
|
if len(path) > linux.UnixPathMax {
|
|
|
|
return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
|
|
|
|
}
|
|
|
|
// Drop the terminating NUL (if one exists) and everything after
|
|
|
|
// it for filesystem (non-abstract) addresses.
|
|
|
|
if len(path) > 0 && path[0] != 0 {
|
|
|
|
if n := bytes.IndexByte(path[1:], 0); n >= 0 {
|
|
|
|
path = path[:n+1]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return tcpip.FullAddress{
|
|
|
|
Addr: tcpip.Address(path),
|
|
|
|
}, family, nil
|
|
|
|
|
|
|
|
case linux.AF_INET:
|
|
|
|
var a linux.SockAddrInet
|
|
|
|
if len(addr) < sockAddrInetSize {
|
|
|
|
return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
|
|
|
|
}
|
2021-04-27 23:17:03 +00:00
|
|
|
a.UnmarshalUnsafe(addr[:sockAddrInetSize])
|
2020-12-02 19:31:38 +00:00
|
|
|
|
|
|
|
out := tcpip.FullAddress{
|
|
|
|
Addr: BytesToIPAddress(a.Addr[:]),
|
|
|
|
Port: Ntohs(a.Port),
|
|
|
|
}
|
|
|
|
return out, family, nil
|
|
|
|
|
|
|
|
case linux.AF_INET6:
|
|
|
|
var a linux.SockAddrInet6
|
|
|
|
if len(addr) < sockAddrInet6Size {
|
|
|
|
return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
|
|
|
|
}
|
2021-04-27 23:17:03 +00:00
|
|
|
a.UnmarshalUnsafe(addr[:sockAddrInet6Size])
|
2020-12-02 19:31:38 +00:00
|
|
|
|
|
|
|
out := tcpip.FullAddress{
|
|
|
|
Addr: BytesToIPAddress(a.Addr[:]),
|
|
|
|
Port: Ntohs(a.Port),
|
|
|
|
}
|
|
|
|
if isLinkLocal(out.Addr) {
|
|
|
|
out.NIC = tcpip.NICID(a.Scope_id)
|
|
|
|
}
|
|
|
|
return out, family, nil
|
|
|
|
|
|
|
|
case linux.AF_PACKET:
|
|
|
|
var a linux.SockAddrLink
|
|
|
|
if len(addr) < sockAddrLinkSize {
|
|
|
|
return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
|
|
|
|
}
|
2021-04-27 23:17:03 +00:00
|
|
|
a.UnmarshalUnsafe(addr[:sockAddrLinkSize])
|
2020-12-02 19:31:38 +00:00
|
|
|
if a.Family != linux.AF_PACKET || a.HardwareAddrLen != header.EthernetAddressSize {
|
|
|
|
return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO(gvisor.dev/issue/173): Return protocol too.
|
|
|
|
return tcpip.FullAddress{
|
|
|
|
NIC: tcpip.NICID(a.InterfaceIndex),
|
|
|
|
Addr: tcpip.Address(a.HardwareAddr[:header.EthernetAddressSize]),
|
|
|
|
}, family, nil
|
|
|
|
|
|
|
|
case linux.AF_UNSPEC:
|
|
|
|
return tcpip.FullAddress{}, family, nil
|
|
|
|
|
|
|
|
default:
|
|
|
|
return tcpip.FullAddress{}, 0, syserr.ErrAddressFamilyNotSupported
|
|
|
|
}
|
|
|
|
}
|