2019-04-29 21:25:05 +00:00
|
|
|
// Copyright 2018 The gVisor Authors.
|
2018-04-27 17:37:02 +00:00
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
// Package netlink provides core functionality for netlink sockets.
|
|
|
|
package netlink
|
|
|
|
|
|
|
|
import (
|
2018-08-09 05:38:41 +00:00
|
|
|
"math"
|
2018-04-27 17:37:02 +00:00
|
|
|
"sync"
|
|
|
|
|
2019-06-13 23:49:09 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/abi/linux"
|
|
|
|
"gvisor.dev/gvisor/pkg/binary"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/arch"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/context"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/device"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/fs"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/kernel"
|
2019-11-01 19:42:04 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
|
2019-06-13 23:49:09 +00:00
|
|
|
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
|
2019-10-10 23:54:30 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sentry/safemem"
|
2019-06-13 23:49:09 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/sentry/socket"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/socket/netlink/port"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/socket/unix"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/usermem"
|
|
|
|
"gvisor.dev/gvisor/pkg/syserr"
|
|
|
|
"gvisor.dev/gvisor/pkg/syserror"
|
|
|
|
"gvisor.dev/gvisor/pkg/tcpip"
|
|
|
|
"gvisor.dev/gvisor/pkg/waiter"
|
2018-04-27 17:37:02 +00:00
|
|
|
)
|
|
|
|
|
2018-08-09 05:38:41 +00:00
|
|
|
const sizeOfInt32 int = 4
|
|
|
|
|
|
|
|
const (
|
|
|
|
// minBufferSize is the smallest size of a send buffer.
|
|
|
|
minSendBufferSize = 4 << 10 // 4096 bytes.
|
|
|
|
|
|
|
|
// defaultSendBufferSize is the default size for the send buffer.
|
|
|
|
defaultSendBufferSize = 16 * 1024
|
|
|
|
|
|
|
|
// maxBufferSize is the largest size a send buffer can grow to.
|
|
|
|
maxSendBufferSize = 4 << 20 // 4MB
|
|
|
|
)
|
2018-04-27 17:37:02 +00:00
|
|
|
|
2019-11-04 18:06:00 +00:00
|
|
|
var errNoFilter = syserr.New("no filter attached", linux.ENOENT)
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// netlinkSocketDevice is the netlink socket virtual device.
|
|
|
|
var netlinkSocketDevice = device.NewAnonDevice()
|
|
|
|
|
|
|
|
// Socket is the base socket type for netlink sockets.
|
|
|
|
//
|
|
|
|
// This implementation only supports userspace sending and receiving messages
|
|
|
|
// to/from the kernel.
|
|
|
|
//
|
2019-11-01 19:42:04 +00:00
|
|
|
// Socket implements socket.Socket and transport.Credentialer.
|
2018-08-02 17:41:44 +00:00
|
|
|
//
|
|
|
|
// +stateify savable
|
2018-04-27 17:37:02 +00:00
|
|
|
type Socket struct {
|
2019-04-11 07:41:42 +00:00
|
|
|
fsutil.FilePipeSeek `state:"nosave"`
|
|
|
|
fsutil.FileNotDirReaddir `state:"nosave"`
|
|
|
|
fsutil.FileNoFsync `state:"nosave"`
|
|
|
|
fsutil.FileNoMMap `state:"nosave"`
|
2019-05-21 22:17:05 +00:00
|
|
|
fsutil.FileNoSplice `state:"nosave"`
|
|
|
|
fsutil.FileNoopFlush `state:"nosave"`
|
2019-04-11 07:41:42 +00:00
|
|
|
fsutil.FileUseInodeUnstableAttr `state:"nosave"`
|
2018-12-15 00:12:51 +00:00
|
|
|
socket.SendReceiveTimeout
|
2018-04-27 17:37:02 +00:00
|
|
|
|
|
|
|
// ports provides netlink port allocation.
|
|
|
|
ports *port.Manager
|
|
|
|
|
|
|
|
// protocol is the netlink protocol implementation.
|
|
|
|
protocol Protocol
|
|
|
|
|
2019-06-10 22:16:42 +00:00
|
|
|
// skType is the socket type. This is either SOCK_DGRAM or SOCK_RAW for
|
|
|
|
// netlink sockets.
|
|
|
|
skType linux.SockType
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// ep is a datagram unix endpoint used to buffer messages sent from the
|
|
|
|
// kernel to userspace. RecvMsg reads messages from this endpoint.
|
2018-10-17 18:36:32 +00:00
|
|
|
ep transport.Endpoint
|
2018-04-27 17:37:02 +00:00
|
|
|
|
|
|
|
// connection is the kernel's connection to ep, used to write messages
|
|
|
|
// sent to userspace.
|
2018-10-17 18:36:32 +00:00
|
|
|
connection transport.ConnectedEndpoint
|
2018-04-27 17:37:02 +00:00
|
|
|
|
|
|
|
// mu protects the fields below.
|
|
|
|
mu sync.Mutex `state:"nosave"`
|
|
|
|
|
|
|
|
// bound indicates that portid is valid.
|
|
|
|
bound bool
|
|
|
|
|
|
|
|
// portID is the port ID allocated for this socket.
|
|
|
|
portID int32
|
|
|
|
|
|
|
|
// sendBufferSize is the send buffer "size". We don't actually have a
|
|
|
|
// fixed buffer but only consume this many bytes.
|
2018-08-09 05:38:41 +00:00
|
|
|
sendBufferSize uint32
|
2019-11-01 19:42:04 +00:00
|
|
|
|
|
|
|
// passcred indicates if this socket wants SCM credentials.
|
|
|
|
passcred bool
|
2019-11-04 18:06:00 +00:00
|
|
|
|
|
|
|
// filter indicates that this socket has a BPF filter "installed".
|
|
|
|
//
|
|
|
|
// TODO(gvisor.dev/issue/1119): We don't actually support filtering,
|
|
|
|
// this is just bookkeeping for tracking add/remove.
|
|
|
|
filter bool
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
var _ socket.Socket = (*Socket)(nil)
|
2019-11-01 19:42:04 +00:00
|
|
|
var _ transport.Credentialer = (*Socket)(nil)
|
2018-04-27 17:37:02 +00:00
|
|
|
|
|
|
|
// NewSocket creates a new Socket.
|
2019-06-10 22:16:42 +00:00
|
|
|
func NewSocket(t *kernel.Task, skType linux.SockType, protocol Protocol) (*Socket, *syserr.Error) {
|
2018-04-27 17:37:02 +00:00
|
|
|
// Datagram endpoint used to buffer kernel -> user messages.
|
2019-06-14 01:39:43 +00:00
|
|
|
ep := transport.NewConnectionless(t)
|
2018-04-27 17:37:02 +00:00
|
|
|
|
|
|
|
// Bind the endpoint for good measure so we can connect to it. The
|
|
|
|
// bound address will never be exposed.
|
2018-10-24 18:04:11 +00:00
|
|
|
if err := ep.Bind(tcpip.FullAddress{Addr: "dummy"}, nil); err != nil {
|
2018-04-27 17:37:02 +00:00
|
|
|
ep.Close()
|
2018-10-24 18:04:11 +00:00
|
|
|
return nil, err
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Create a connection from which the kernel can write messages.
|
2019-06-14 01:39:43 +00:00
|
|
|
connection, err := ep.(transport.BoundEndpoint).UnidirectionalConnect(t)
|
2018-10-24 18:04:11 +00:00
|
|
|
if err != nil {
|
2018-04-27 17:37:02 +00:00
|
|
|
ep.Close()
|
2018-10-24 18:04:11 +00:00
|
|
|
return nil, err
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return &Socket{
|
|
|
|
ports: t.Kernel().NetlinkPorts(),
|
|
|
|
protocol: protocol,
|
2019-06-10 22:16:42 +00:00
|
|
|
skType: skType,
|
2018-04-27 17:37:02 +00:00
|
|
|
ep: ep,
|
|
|
|
connection: connection,
|
|
|
|
sendBufferSize: defaultSendBufferSize,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Release implements fs.FileOperations.Release.
|
|
|
|
func (s *Socket) Release() {
|
|
|
|
s.connection.Release()
|
|
|
|
s.ep.Close()
|
|
|
|
|
|
|
|
if s.bound {
|
|
|
|
s.ports.Release(s.protocol.Protocol(), s.portID)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Readiness implements waiter.Waitable.Readiness.
|
|
|
|
func (s *Socket) Readiness(mask waiter.EventMask) waiter.EventMask {
|
|
|
|
// ep holds messages to be read and thus handles EventIn readiness.
|
|
|
|
ready := s.ep.Readiness(mask)
|
|
|
|
|
|
|
|
if mask&waiter.EventOut == waiter.EventOut {
|
|
|
|
// sendMsg handles messages synchronously and is thus always
|
|
|
|
// ready for writing.
|
|
|
|
ready |= waiter.EventOut
|
|
|
|
}
|
|
|
|
|
|
|
|
return ready
|
|
|
|
}
|
|
|
|
|
|
|
|
// EventRegister implements waiter.Waitable.EventRegister.
|
|
|
|
func (s *Socket) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
|
|
|
|
s.ep.EventRegister(e, mask)
|
|
|
|
// Writable readiness never changes, so no registration is needed.
|
|
|
|
}
|
|
|
|
|
|
|
|
// EventUnregister implements waiter.Waitable.EventUnregister.
|
|
|
|
func (s *Socket) EventUnregister(e *waiter.Entry) {
|
|
|
|
s.ep.EventUnregister(e)
|
|
|
|
}
|
|
|
|
|
2019-11-01 19:42:04 +00:00
|
|
|
// Passcred implements transport.Credentialer.Passcred.
|
|
|
|
func (s *Socket) Passcred() bool {
|
|
|
|
s.mu.Lock()
|
|
|
|
passcred := s.passcred
|
|
|
|
s.mu.Unlock()
|
|
|
|
return passcred
|
|
|
|
}
|
|
|
|
|
|
|
|
// ConnectedPasscred implements transport.Credentialer.ConnectedPasscred.
|
|
|
|
func (s *Socket) ConnectedPasscred() bool {
|
|
|
|
// This socket is connected to the kernel, which doesn't need creds.
|
|
|
|
//
|
|
|
|
// This is arbitrary, as ConnectedPasscred on this type has no callers.
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// Ioctl implements fs.FileOperations.Ioctl.
|
2019-06-28 00:21:34 +00:00
|
|
|
func (*Socket) Ioctl(context.Context, *fs.File, usermem.IO, arch.SyscallArguments) (uintptr, error) {
|
2019-04-29 21:03:04 +00:00
|
|
|
// TODO(b/68878065): no ioctls supported.
|
2018-04-27 17:37:02 +00:00
|
|
|
return 0, syserror.ENOTTY
|
|
|
|
}
|
|
|
|
|
|
|
|
// ExtractSockAddr extracts the SockAddrNetlink from b.
|
|
|
|
func ExtractSockAddr(b []byte) (*linux.SockAddrNetlink, *syserr.Error) {
|
|
|
|
if len(b) < linux.SockAddrNetlinkSize {
|
|
|
|
return nil, syserr.ErrBadAddress
|
|
|
|
}
|
|
|
|
|
|
|
|
var sa linux.SockAddrNetlink
|
|
|
|
binary.Unmarshal(b[:linux.SockAddrNetlinkSize], usermem.ByteOrder, &sa)
|
|
|
|
|
|
|
|
if sa.Family != linux.AF_NETLINK {
|
|
|
|
return nil, syserr.ErrInvalidArgument
|
|
|
|
}
|
|
|
|
|
|
|
|
return &sa, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// bindPort binds this socket to a port, preferring 'port' if it is available.
|
|
|
|
//
|
|
|
|
// port of 0 defaults to the ThreadGroup ID.
|
|
|
|
//
|
|
|
|
// Preconditions: mu is held.
|
|
|
|
func (s *Socket) bindPort(t *kernel.Task, port int32) *syserr.Error {
|
|
|
|
if s.bound {
|
|
|
|
// Re-binding is only allowed if the port doesn't change.
|
|
|
|
if port != s.portID {
|
|
|
|
return syserr.ErrInvalidArgument
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if port == 0 {
|
|
|
|
port = int32(t.ThreadGroup().ID())
|
|
|
|
}
|
|
|
|
port, ok := s.ports.Allocate(s.protocol.Protocol(), port)
|
|
|
|
if !ok {
|
|
|
|
return syserr.ErrBusy
|
|
|
|
}
|
|
|
|
|
|
|
|
s.portID = port
|
|
|
|
s.bound = true
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Bind implements socket.Socket.Bind.
|
|
|
|
func (s *Socket) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
|
|
|
|
a, err := ExtractSockAddr(sockaddr)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// No support for multicast groups yet.
|
|
|
|
if a.Groups != 0 {
|
|
|
|
return syserr.ErrPermissionDenied
|
|
|
|
}
|
|
|
|
|
|
|
|
s.mu.Lock()
|
|
|
|
defer s.mu.Unlock()
|
|
|
|
|
|
|
|
return s.bindPort(t, int32(a.PortID))
|
|
|
|
}
|
|
|
|
|
|
|
|
// Connect implements socket.Socket.Connect.
|
|
|
|
func (s *Socket) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error {
|
|
|
|
a, err := ExtractSockAddr(sockaddr)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// No support for multicast groups yet.
|
|
|
|
if a.Groups != 0 {
|
|
|
|
return syserr.ErrPermissionDenied
|
|
|
|
}
|
|
|
|
|
|
|
|
s.mu.Lock()
|
|
|
|
defer s.mu.Unlock()
|
|
|
|
|
|
|
|
if a.PortID == 0 {
|
|
|
|
// Netlink sockets default to connected to the kernel, but
|
|
|
|
// connecting anyways automatically binds if not already bound.
|
|
|
|
if !s.bound {
|
|
|
|
// Pass port 0 to get an auto-selected port ID.
|
|
|
|
return s.bindPort(t, 0)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// We don't support non-kernel destination ports. Linux returns EPERM
|
|
|
|
// if applications attempt to do this without NL_CFG_F_NONROOT_SEND, so
|
|
|
|
// we emulate that.
|
|
|
|
return syserr.ErrPermissionDenied
|
|
|
|
}
|
|
|
|
|
|
|
|
// Accept implements socket.Socket.Accept.
|
2019-08-08 23:49:18 +00:00
|
|
|
func (s *Socket) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, linux.SockAddr, uint32, *syserr.Error) {
|
2018-04-27 17:37:02 +00:00
|
|
|
// Netlink sockets never support accept.
|
|
|
|
return 0, nil, 0, syserr.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
|
|
|
// Listen implements socket.Socket.Listen.
|
|
|
|
func (s *Socket) Listen(t *kernel.Task, backlog int) *syserr.Error {
|
|
|
|
// Netlink sockets never support listen.
|
|
|
|
return syserr.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
|
|
|
// Shutdown implements socket.Socket.Shutdown.
|
|
|
|
func (s *Socket) Shutdown(t *kernel.Task, how int) *syserr.Error {
|
|
|
|
// Netlink sockets never support shutdown.
|
|
|
|
return syserr.ErrNotSupported
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetSockOpt implements socket.Socket.GetSockOpt.
|
2019-08-02 23:25:34 +00:00
|
|
|
func (s *Socket) GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.Addr, outLen int) (interface{}, *syserr.Error) {
|
2018-08-09 05:38:41 +00:00
|
|
|
switch level {
|
|
|
|
case linux.SOL_SOCKET:
|
|
|
|
switch name {
|
|
|
|
case linux.SO_SNDBUF:
|
|
|
|
if outLen < sizeOfInt32 {
|
|
|
|
return nil, syserr.ErrInvalidArgument
|
|
|
|
}
|
2019-03-19 17:37:46 +00:00
|
|
|
s.mu.Lock()
|
|
|
|
defer s.mu.Unlock()
|
2018-08-09 05:38:41 +00:00
|
|
|
return int32(s.sendBufferSize), nil
|
|
|
|
|
|
|
|
case linux.SO_RCVBUF:
|
|
|
|
if outLen < sizeOfInt32 {
|
|
|
|
return nil, syserr.ErrInvalidArgument
|
|
|
|
}
|
|
|
|
// We don't have limit on receiving size.
|
2018-12-04 01:02:28 +00:00
|
|
|
return int32(math.MaxInt32), nil
|
2018-11-19 23:25:00 +00:00
|
|
|
|
2019-11-01 19:42:04 +00:00
|
|
|
case linux.SO_PASSCRED:
|
|
|
|
if outLen < sizeOfInt32 {
|
|
|
|
return nil, syserr.ErrInvalidArgument
|
|
|
|
}
|
|
|
|
var passcred int32
|
|
|
|
if s.Passcred() {
|
|
|
|
passcred = 1
|
|
|
|
}
|
|
|
|
return passcred, nil
|
|
|
|
|
2018-11-19 23:25:00 +00:00
|
|
|
default:
|
|
|
|
socket.GetSockOptEmitUnimplementedEvent(t, name)
|
|
|
|
}
|
2019-11-01 19:42:04 +00:00
|
|
|
|
2018-11-19 23:25:00 +00:00
|
|
|
case linux.SOL_NETLINK:
|
|
|
|
switch name {
|
|
|
|
case linux.NETLINK_BROADCAST_ERROR,
|
|
|
|
linux.NETLINK_CAP_ACK,
|
|
|
|
linux.NETLINK_DUMP_STRICT_CHK,
|
|
|
|
linux.NETLINK_EXT_ACK,
|
|
|
|
linux.NETLINK_LIST_MEMBERSHIPS,
|
|
|
|
linux.NETLINK_NO_ENOBUFS,
|
|
|
|
linux.NETLINK_PKTINFO:
|
|
|
|
|
|
|
|
t.Kernel().EmitUnimplementedEvent(t)
|
2018-08-09 05:38:41 +00:00
|
|
|
}
|
|
|
|
}
|
2019-04-29 21:03:04 +00:00
|
|
|
// TODO(b/68878065): other sockopts are not supported.
|
2018-04-27 17:37:02 +00:00
|
|
|
return nil, syserr.ErrProtocolNotAvailable
|
|
|
|
}
|
|
|
|
|
|
|
|
// SetSockOpt implements socket.Socket.SetSockOpt.
|
|
|
|
func (s *Socket) SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *syserr.Error {
|
2018-08-09 05:38:41 +00:00
|
|
|
switch level {
|
|
|
|
case linux.SOL_SOCKET:
|
|
|
|
switch name {
|
|
|
|
case linux.SO_SNDBUF:
|
|
|
|
if len(opt) < sizeOfInt32 {
|
|
|
|
return syserr.ErrInvalidArgument
|
|
|
|
}
|
|
|
|
size := usermem.ByteOrder.Uint32(opt)
|
|
|
|
if size < minSendBufferSize {
|
|
|
|
size = minSendBufferSize
|
|
|
|
} else if size > maxSendBufferSize {
|
|
|
|
size = maxSendBufferSize
|
|
|
|
}
|
2019-03-19 17:37:46 +00:00
|
|
|
s.mu.Lock()
|
2018-08-09 05:38:41 +00:00
|
|
|
s.sendBufferSize = size
|
2019-03-19 17:37:46 +00:00
|
|
|
s.mu.Unlock()
|
2018-08-09 05:38:41 +00:00
|
|
|
return nil
|
2019-11-01 19:42:04 +00:00
|
|
|
|
2018-08-09 05:38:41 +00:00
|
|
|
case linux.SO_RCVBUF:
|
|
|
|
if len(opt) < sizeOfInt32 {
|
|
|
|
return syserr.ErrInvalidArgument
|
|
|
|
}
|
|
|
|
// We don't have limit on receiving size. So just accept anything as
|
|
|
|
// valid for compatibility.
|
|
|
|
return nil
|
2019-11-01 19:42:04 +00:00
|
|
|
|
|
|
|
case linux.SO_PASSCRED:
|
|
|
|
if len(opt) < sizeOfInt32 {
|
|
|
|
return syserr.ErrInvalidArgument
|
|
|
|
}
|
|
|
|
passcred := usermem.ByteOrder.Uint32(opt)
|
|
|
|
|
|
|
|
s.mu.Lock()
|
|
|
|
s.passcred = passcred != 0
|
|
|
|
s.mu.Unlock()
|
|
|
|
return nil
|
|
|
|
|
2019-11-04 18:06:00 +00:00
|
|
|
case linux.SO_ATTACH_FILTER:
|
|
|
|
// TODO(gvisor.dev/issue/1119): We don't actually
|
|
|
|
// support filtering. If this socket can't ever send
|
|
|
|
// messages, then there is nothing to filter and we can
|
|
|
|
// advertise support. Otherwise, be conservative and
|
|
|
|
// return an error.
|
|
|
|
if s.protocol.CanSend() {
|
|
|
|
socket.SetSockOptEmitUnimplementedEvent(t, name)
|
|
|
|
return syserr.ErrProtocolNotAvailable
|
|
|
|
}
|
|
|
|
|
|
|
|
s.mu.Lock()
|
|
|
|
s.filter = true
|
|
|
|
s.mu.Unlock()
|
|
|
|
return nil
|
|
|
|
|
|
|
|
case linux.SO_DETACH_FILTER:
|
|
|
|
// TODO(gvisor.dev/issue/1119): See above.
|
|
|
|
if s.protocol.CanSend() {
|
|
|
|
socket.SetSockOptEmitUnimplementedEvent(t, name)
|
|
|
|
return syserr.ErrProtocolNotAvailable
|
|
|
|
}
|
|
|
|
|
|
|
|
s.mu.Lock()
|
|
|
|
filter := s.filter
|
|
|
|
s.filter = false
|
|
|
|
s.mu.Unlock()
|
|
|
|
|
|
|
|
if !filter {
|
|
|
|
return errNoFilter
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
2018-11-19 23:25:00 +00:00
|
|
|
default:
|
|
|
|
socket.SetSockOptEmitUnimplementedEvent(t, name)
|
2018-08-09 05:38:41 +00:00
|
|
|
}
|
2018-11-19 23:25:00 +00:00
|
|
|
|
|
|
|
case linux.SOL_NETLINK:
|
|
|
|
switch name {
|
|
|
|
case linux.NETLINK_ADD_MEMBERSHIP,
|
|
|
|
linux.NETLINK_BROADCAST_ERROR,
|
|
|
|
linux.NETLINK_CAP_ACK,
|
|
|
|
linux.NETLINK_DROP_MEMBERSHIP,
|
|
|
|
linux.NETLINK_DUMP_STRICT_CHK,
|
|
|
|
linux.NETLINK_EXT_ACK,
|
|
|
|
linux.NETLINK_LISTEN_ALL_NSID,
|
|
|
|
linux.NETLINK_NO_ENOBUFS,
|
|
|
|
linux.NETLINK_PKTINFO:
|
|
|
|
|
|
|
|
t.Kernel().EmitUnimplementedEvent(t)
|
|
|
|
}
|
|
|
|
|
2018-08-09 05:38:41 +00:00
|
|
|
}
|
2019-04-29 21:03:04 +00:00
|
|
|
// TODO(b/68878065): other sockopts are not supported.
|
2018-04-27 17:37:02 +00:00
|
|
|
return syserr.ErrProtocolNotAvailable
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetSockName implements socket.Socket.GetSockName.
|
2019-08-08 23:49:18 +00:00
|
|
|
func (s *Socket) GetSockName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) {
|
2018-04-27 17:37:02 +00:00
|
|
|
s.mu.Lock()
|
|
|
|
defer s.mu.Unlock()
|
|
|
|
|
2019-08-08 23:49:18 +00:00
|
|
|
sa := &linux.SockAddrNetlink{
|
2018-04-27 17:37:02 +00:00
|
|
|
Family: linux.AF_NETLINK,
|
|
|
|
PortID: uint32(s.portID),
|
|
|
|
}
|
|
|
|
return sa, uint32(binary.Size(sa)), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetPeerName implements socket.Socket.GetPeerName.
|
2019-08-08 23:49:18 +00:00
|
|
|
func (s *Socket) GetPeerName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) {
|
|
|
|
sa := &linux.SockAddrNetlink{
|
2018-04-27 17:37:02 +00:00
|
|
|
Family: linux.AF_NETLINK,
|
2019-04-29 21:03:04 +00:00
|
|
|
// TODO(b/68878065): Support non-kernel peers. For now the peer
|
2018-04-27 17:37:02 +00:00
|
|
|
// must be the kernel.
|
|
|
|
PortID: 0,
|
|
|
|
}
|
|
|
|
return sa, uint32(binary.Size(sa)), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// RecvMsg implements socket.Socket.RecvMsg.
|
2019-08-08 23:49:18 +00:00
|
|
|
func (s *Socket) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (int, int, linux.SockAddr, uint32, socket.ControlMessages, *syserr.Error) {
|
|
|
|
from := &linux.SockAddrNetlink{
|
2018-04-27 17:37:02 +00:00
|
|
|
Family: linux.AF_NETLINK,
|
|
|
|
PortID: 0,
|
|
|
|
}
|
|
|
|
fromLen := uint32(binary.Size(from))
|
|
|
|
|
|
|
|
trunc := flags&linux.MSG_TRUNC != 0
|
|
|
|
|
2018-10-17 18:36:32 +00:00
|
|
|
r := unix.EndpointReader{
|
2018-04-27 17:37:02 +00:00
|
|
|
Endpoint: s.ep,
|
|
|
|
Peek: flags&linux.MSG_PEEK != 0,
|
|
|
|
}
|
|
|
|
|
2019-10-10 23:54:30 +00:00
|
|
|
// If MSG_TRUNC is set with a zero byte destination then we still need
|
|
|
|
// to read the message and discard it, or in the case where MSG_PEEK is
|
|
|
|
// set, leave it be. In both cases the full message length must be
|
|
|
|
// returned. However, the memory manager for the destination will not read
|
|
|
|
// the endpoint if the destination is zero length.
|
|
|
|
//
|
|
|
|
// In order for the endpoint to be read when the destination size is zero,
|
|
|
|
// we must cause a read of the endpoint by using a separate fake zero
|
|
|
|
// length block sequence and calling the EndpointReader directly.
|
|
|
|
if trunc && dst.Addrs.NumBytes() == 0 {
|
|
|
|
// Perform a read to a zero byte block sequence. We can ignore the
|
|
|
|
// original destination since it was zero bytes. The length returned by
|
|
|
|
// ReadToBlocks is ignored and we return the full message length to comply
|
|
|
|
// with MSG_TRUNC.
|
|
|
|
_, err := r.ReadToBlocks(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(make([]byte, 0))))
|
|
|
|
return int(r.MsgSize), linux.MSG_TRUNC, from, fromLen, socket.ControlMessages{}, syserr.FromError(err)
|
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
if n, err := dst.CopyOutFrom(t, &r); err != syserror.ErrWouldBlock || flags&linux.MSG_DONTWAIT != 0 {
|
2019-04-19 23:15:37 +00:00
|
|
|
var mflags int
|
|
|
|
if n < int64(r.MsgSize) {
|
|
|
|
mflags |= linux.MSG_TRUNC
|
|
|
|
}
|
2018-04-27 17:37:02 +00:00
|
|
|
if trunc {
|
|
|
|
n = int64(r.MsgSize)
|
|
|
|
}
|
2019-04-19 23:15:37 +00:00
|
|
|
return int(n), mflags, from, fromLen, socket.ControlMessages{}, syserr.FromError(err)
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// We'll have to block. Register for notification and keep trying to
|
|
|
|
// receive all the data.
|
|
|
|
e, ch := waiter.NewChannelEntry(nil)
|
|
|
|
s.EventRegister(&e, waiter.EventIn)
|
|
|
|
defer s.EventUnregister(&e)
|
|
|
|
|
|
|
|
for {
|
|
|
|
if n, err := dst.CopyOutFrom(t, &r); err != syserror.ErrWouldBlock {
|
2019-04-19 23:15:37 +00:00
|
|
|
var mflags int
|
|
|
|
if n < int64(r.MsgSize) {
|
|
|
|
mflags |= linux.MSG_TRUNC
|
|
|
|
}
|
2018-04-27 17:37:02 +00:00
|
|
|
if trunc {
|
|
|
|
n = int64(r.MsgSize)
|
|
|
|
}
|
2019-04-19 23:15:37 +00:00
|
|
|
return int(n), mflags, from, fromLen, socket.ControlMessages{}, syserr.FromError(err)
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil {
|
|
|
|
if err == syserror.ETIMEDOUT {
|
2019-04-19 23:15:37 +00:00
|
|
|
return 0, 0, nil, 0, socket.ControlMessages{}, syserr.ErrTryAgain
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
2019-04-19 23:15:37 +00:00
|
|
|
return 0, 0, nil, 0, socket.ControlMessages{}, syserr.FromError(err)
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read implements fs.FileOperations.Read.
|
|
|
|
func (s *Socket) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, _ int64) (int64, error) {
|
|
|
|
if dst.NumBytes() == 0 {
|
|
|
|
return 0, nil
|
|
|
|
}
|
2018-10-17 18:36:32 +00:00
|
|
|
return dst.CopyOutFrom(ctx, &unix.EndpointReader{
|
2018-04-27 17:37:02 +00:00
|
|
|
Endpoint: s.ep,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2019-11-01 19:42:04 +00:00
|
|
|
// kernelSCM implements control.SCMCredentials with credentials that represent
|
|
|
|
// the kernel itself rather than a Task.
|
|
|
|
//
|
|
|
|
// +stateify savable
|
|
|
|
type kernelSCM struct{}
|
|
|
|
|
|
|
|
// Equals implements transport.CredentialsControlMessage.Equals.
|
|
|
|
func (kernelSCM) Equals(oc transport.CredentialsControlMessage) bool {
|
|
|
|
_, ok := oc.(kernelSCM)
|
|
|
|
return ok
|
|
|
|
}
|
|
|
|
|
|
|
|
// Credentials implements control.SCMCredentials.Credentials.
|
|
|
|
func (kernelSCM) Credentials(*kernel.Task) (kernel.ThreadID, auth.UID, auth.GID) {
|
|
|
|
return 0, auth.RootUID, auth.RootGID
|
|
|
|
}
|
|
|
|
|
|
|
|
// kernelCreds is the concrete version of kernelSCM used in all creds.
|
|
|
|
var kernelCreds = &kernelSCM{}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// sendResponse sends the response messages in ms back to userspace.
|
|
|
|
func (s *Socket) sendResponse(ctx context.Context, ms *MessageSet) *syserr.Error {
|
|
|
|
// Linux combines multiple netlink messages into a single datagram.
|
|
|
|
bufs := make([][]byte, 0, len(ms.Messages))
|
|
|
|
for _, m := range ms.Messages {
|
|
|
|
bufs = append(bufs, m.Finalize())
|
|
|
|
}
|
|
|
|
|
2019-11-01 19:42:04 +00:00
|
|
|
// All messages are from the kernel.
|
|
|
|
cms := transport.ControlMessages{
|
|
|
|
Credentials: kernelCreds,
|
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
if len(bufs) > 0 {
|
|
|
|
// RecvMsg never receives the address, so we don't need to send
|
|
|
|
// one.
|
2019-11-01 19:42:04 +00:00
|
|
|
_, notify, err := s.connection.Send(bufs, cms, tcpip.FullAddress{})
|
2018-04-27 17:37:02 +00:00
|
|
|
// If the buffer is full, we simply drop messages, just like
|
|
|
|
// Linux.
|
2018-10-24 18:04:11 +00:00
|
|
|
if err != nil && err != syserr.ErrWouldBlock {
|
|
|
|
return err
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
if notify {
|
|
|
|
s.connection.SendNotify()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// N.B. multi-part messages should still send NLMSG_DONE even if
|
|
|
|
// MessageSet contains no messages.
|
|
|
|
//
|
|
|
|
// N.B. NLMSG_DONE is always sent in a different datagram. See
|
|
|
|
// net/netlink/af_netlink.c:netlink_dump.
|
|
|
|
if ms.Multi {
|
|
|
|
m := NewMessage(linux.NetlinkMessageHeader{
|
|
|
|
Type: linux.NLMSG_DONE,
|
|
|
|
Flags: linux.NLM_F_MULTI,
|
|
|
|
Seq: ms.Seq,
|
|
|
|
PortID: uint32(ms.PortID),
|
|
|
|
})
|
|
|
|
|
2019-10-10 23:54:30 +00:00
|
|
|
// Add the dump_done_errno payload.
|
|
|
|
m.Put(int64(0))
|
|
|
|
|
2019-11-01 19:42:04 +00:00
|
|
|
_, notify, err := s.connection.Send([][]byte{m.Finalize()}, cms, tcpip.FullAddress{})
|
2018-10-24 18:04:11 +00:00
|
|
|
if err != nil && err != syserr.ErrWouldBlock {
|
|
|
|
return err
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
if notify {
|
|
|
|
s.connection.SendNotify()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-08-10 05:33:40 +00:00
|
|
|
func (s *Socket) dumpErrorMesage(ctx context.Context, hdr linux.NetlinkMessageHeader, ms *MessageSet, err *syserr.Error) *syserr.Error {
|
|
|
|
m := ms.AddMessage(linux.NetlinkMessageHeader{
|
|
|
|
Type: linux.NLMSG_ERROR,
|
|
|
|
})
|
|
|
|
|
|
|
|
m.Put(linux.NetlinkErrorMessage{
|
|
|
|
Error: int32(-err.ToLinux().Number()),
|
|
|
|
Header: hdr,
|
|
|
|
})
|
|
|
|
return nil
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2018-04-27 17:37:02 +00:00
|
|
|
// processMessages handles each message in buf, passing it to the protocol
|
|
|
|
// handler for final handling.
|
|
|
|
func (s *Socket) processMessages(ctx context.Context, buf []byte) *syserr.Error {
|
|
|
|
for len(buf) > 0 {
|
|
|
|
if len(buf) < linux.NetlinkMessageHeaderSize {
|
|
|
|
// Linux ignores messages that are too short. See
|
|
|
|
// net/netlink/af_netlink.c:netlink_rcv_skb.
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
var hdr linux.NetlinkMessageHeader
|
|
|
|
binary.Unmarshal(buf[:linux.NetlinkMessageHeaderSize], usermem.ByteOrder, &hdr)
|
|
|
|
|
|
|
|
if hdr.Length < linux.NetlinkMessageHeaderSize || uint64(hdr.Length) > uint64(len(buf)) {
|
|
|
|
// Linux ignores malformed messages. See
|
|
|
|
// net/netlink/af_netlink.c:netlink_rcv_skb.
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
// Data from this message.
|
|
|
|
data := buf[linux.NetlinkMessageHeaderSize:hdr.Length]
|
|
|
|
|
|
|
|
// Advance to the next message.
|
|
|
|
next := alignUp(int(hdr.Length), linux.NLMSG_ALIGNTO)
|
|
|
|
if next >= len(buf)-1 {
|
|
|
|
next = len(buf) - 1
|
|
|
|
}
|
|
|
|
buf = buf[next:]
|
|
|
|
|
|
|
|
// Ignore control messages.
|
|
|
|
if hdr.Type < linux.NLMSG_MIN_TYPE {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2019-08-10 05:33:40 +00:00
|
|
|
ms := NewMessageSet(s.portID, hdr.Seq)
|
|
|
|
var err *syserr.Error
|
2019-04-29 21:03:04 +00:00
|
|
|
// TODO(b/68877377): ACKs not supported yet.
|
2018-04-27 17:37:02 +00:00
|
|
|
if hdr.Flags&linux.NLM_F_ACK == linux.NLM_F_ACK {
|
2019-08-10 05:33:40 +00:00
|
|
|
err = syserr.ErrNotSupported
|
|
|
|
} else {
|
2018-04-27 17:37:02 +00:00
|
|
|
|
2019-08-10 05:33:40 +00:00
|
|
|
err = s.protocol.ProcessMessage(ctx, hdr, data, ms)
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
ms = NewMessageSet(s.portID, hdr.Seq)
|
|
|
|
if err := s.dumpErrorMesage(ctx, hdr, ms, err); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2018-04-27 17:37:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if err := s.sendResponse(ctx, ms); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// sendMsg is the core of message send, used for SendMsg and Write.
|
2018-05-02 05:11:07 +00:00
|
|
|
func (s *Socket) sendMsg(ctx context.Context, src usermem.IOSequence, to []byte, flags int, controlMessages socket.ControlMessages) (int, *syserr.Error) {
|
2018-04-27 17:37:02 +00:00
|
|
|
dstPort := int32(0)
|
|
|
|
|
|
|
|
if len(to) != 0 {
|
|
|
|
a, err := ExtractSockAddr(to)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// No support for multicast groups yet.
|
|
|
|
if a.Groups != 0 {
|
|
|
|
return 0, syserr.ErrPermissionDenied
|
|
|
|
}
|
|
|
|
|
|
|
|
dstPort = int32(a.PortID)
|
|
|
|
}
|
|
|
|
|
|
|
|
if dstPort != 0 {
|
|
|
|
// Non-kernel destinations not supported yet. Treat as if
|
|
|
|
// NL_CFG_F_NONROOT_SEND is not set.
|
|
|
|
return 0, syserr.ErrPermissionDenied
|
|
|
|
}
|
|
|
|
|
|
|
|
s.mu.Lock()
|
|
|
|
defer s.mu.Unlock()
|
|
|
|
|
|
|
|
// For simplicity, and consistency with Linux, we copy in the entire
|
|
|
|
// message up front.
|
2018-08-09 05:38:41 +00:00
|
|
|
if src.NumBytes() > int64(s.sendBufferSize) {
|
2018-04-27 17:37:02 +00:00
|
|
|
return 0, syserr.ErrMessageTooLong
|
|
|
|
}
|
|
|
|
|
|
|
|
buf := make([]byte, src.NumBytes())
|
|
|
|
n, err := src.CopyIn(ctx, buf)
|
|
|
|
if err != nil {
|
|
|
|
// Don't partially consume messages.
|
|
|
|
return 0, syserr.FromError(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := s.processMessages(ctx, buf); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return n, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// SendMsg implements socket.Socket.SendMsg.
|
2018-12-15 00:12:51 +00:00
|
|
|
func (s *Socket) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, haveDeadline bool, deadline ktime.Time, controlMessages socket.ControlMessages) (int, *syserr.Error) {
|
2018-04-27 17:37:02 +00:00
|
|
|
return s.sendMsg(t, src, to, flags, controlMessages)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Write implements fs.FileOperations.Write.
|
|
|
|
func (s *Socket) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) {
|
2018-05-02 05:11:07 +00:00
|
|
|
n, err := s.sendMsg(ctx, src, nil, 0, socket.ControlMessages{})
|
2018-04-27 17:37:02 +00:00
|
|
|
return int64(n), err.ToError()
|
|
|
|
}
|
2019-06-06 22:03:44 +00:00
|
|
|
|
|
|
|
// State implements socket.Socket.State.
|
|
|
|
func (s *Socket) State() uint32 {
|
|
|
|
return s.ep.State()
|
|
|
|
}
|
2019-06-10 22:16:42 +00:00
|
|
|
|
|
|
|
// Type implements socket.Socket.Type.
|
|
|
|
func (s *Socket) Type() (family int, skType linux.SockType, protocol int) {
|
|
|
|
return linux.AF_NETLINK, s.skType, s.protocol.Protocol()
|
|
|
|
}
|