gvisor/pkg/sentry/socket/unix/unix_vfs2.go

375 lines
11 KiB
Go

// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package unix
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/sentry/arch"
fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/socket"
"gvisor.dev/gvisor/pkg/sentry/socket/control"
"gvisor.dev/gvisor/pkg/sentry/socket/netstack"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
// SocketVFS2 implements socket.SocketVFS2 (and by extension,
// vfs.FileDescriptionImpl) for Unix sockets.
type SocketVFS2 struct {
vfsfd vfs.FileDescription
vfs.FileDescriptionDefaultImpl
vfs.DentryMetadataFileDescriptionImpl
vfs.LockFD
socketOpsCommon
}
var _ = socket.SocketVFS2(&SocketVFS2{})
// NewSockfsFile creates a new socket file in the global sockfs mount and
// returns a corresponding file description.
func NewSockfsFile(t *kernel.Task, ep transport.Endpoint, stype linux.SockType) (*vfs.FileDescription, *syserr.Error) {
mnt := t.Kernel().SocketMount()
d := sockfs.NewDentry(t.Credentials(), mnt)
fd, err := NewFileDescription(ep, stype, linux.O_RDWR, mnt, d, &vfs.FileLocks{})
if err != nil {
return nil, syserr.FromError(err)
}
return fd, nil
}
// NewFileDescription creates and returns a socket file description
// corresponding to the given mount and dentry.
func NewFileDescription(ep transport.Endpoint, stype linux.SockType, flags uint32, mnt *vfs.Mount, d *vfs.Dentry, locks *vfs.FileLocks) (*vfs.FileDescription, error) {
// You can create AF_UNIX, SOCK_RAW sockets. They're the same as
// SOCK_DGRAM and don't require CAP_NET_RAW.
if stype == linux.SOCK_RAW {
stype = linux.SOCK_DGRAM
}
sock := &SocketVFS2{
socketOpsCommon: socketOpsCommon{
ep: ep,
stype: stype,
},
}
sock.LockFD.Init(locks)
vfsfd := &sock.vfsfd
if err := vfsfd.Init(sock, flags, mnt, d, &vfs.FileDescriptionOptions{
DenyPRead: true,
DenyPWrite: true,
UseDentryMetadata: true,
}); err != nil {
return nil, err
}
return vfsfd, nil
}
// GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by
// a transport.Endpoint.
func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
return netstack.GetSockOpt(t, s, s.ep, linux.AF_UNIX, s.ep.Type(), level, name, outPtr, outLen)
}
// blockingAccept implements a blocking version of accept(2), that is, if no
// connections are ready to be accept, it will block until one becomes ready.
func (s *SocketVFS2) blockingAccept(t *kernel.Task, peerAddr *tcpip.FullAddress) (transport.Endpoint, *syserr.Error) {
// Register for notifications.
e, ch := waiter.NewChannelEntry(nil)
s.socketOpsCommon.EventRegister(&e, waiter.EventIn)
defer s.socketOpsCommon.EventUnregister(&e)
// Try to accept the connection; if it fails, then wait until we get a
// notification.
for {
if ep, err := s.ep.Accept(peerAddr); err != syserr.ErrWouldBlock {
return ep, err
}
if err := t.Block(ch); err != nil {
return nil, syserr.FromError(err)
}
}
}
// Accept implements the linux syscall accept(2) for sockets backed by
// a transport.Endpoint.
func (s *SocketVFS2) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, linux.SockAddr, uint32, *syserr.Error) {
var peerAddr *tcpip.FullAddress
if peerRequested {
peerAddr = &tcpip.FullAddress{}
}
ep, err := s.ep.Accept(peerAddr)
if err != nil {
if err != syserr.ErrWouldBlock || !blocking {
return 0, nil, 0, err
}
var err *syserr.Error
ep, err = s.blockingAccept(t, peerAddr)
if err != nil {
return 0, nil, 0, err
}
}
ns, err := NewSockfsFile(t, ep, s.stype)
if err != nil {
return 0, nil, 0, err
}
defer ns.DecRef(t)
if flags&linux.SOCK_NONBLOCK != 0 {
ns.SetStatusFlags(t, t.Credentials(), linux.SOCK_NONBLOCK)
}
var addr linux.SockAddr
var addrLen uint32
if peerAddr != nil {
addr, addrLen = netstack.ConvertAddress(linux.AF_UNIX, *peerAddr)
}
fd, e := t.NewFDFromVFS2(0, ns, kernel.FDFlags{
CloseOnExec: flags&linux.SOCK_CLOEXEC != 0,
})
if e != nil {
return 0, nil, 0, syserr.FromError(e)
}
t.Kernel().RecordSocketVFS2(ns)
return fd, addr, addrLen, nil
}
// Bind implements the linux syscall bind(2) for unix sockets.
func (s *SocketVFS2) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
p, e := extractPath(sockaddr)
if e != nil {
return e
}
bep, ok := s.ep.(transport.BoundEndpoint)
if !ok {
// This socket can't be bound.
return syserr.ErrInvalidArgument
}
return s.ep.Bind(tcpip.FullAddress{Addr: tcpip.Address(p)}, func() *syserr.Error {
// Is it abstract?
if p[0] == 0 {
if t.IsNetworkNamespaced() {
return syserr.ErrInvalidEndpointState
}
asn := t.AbstractSockets()
name := p[1:]
if err := asn.Bind(t, name, bep, s); err != nil {
// syserr.ErrPortInUse corresponds to EADDRINUSE.
return syserr.ErrPortInUse
}
s.abstractName = name
s.abstractNamespace = asn
} else {
path := fspath.Parse(p)
root := t.FSContext().RootDirectoryVFS2()
defer root.DecRef(t)
start := root
relPath := !path.Absolute
if relPath {
start = t.FSContext().WorkingDirectoryVFS2()
defer start.DecRef(t)
}
pop := vfs.PathOperation{
Root: root,
Start: start,
Path: path,
}
stat, err := s.vfsfd.Stat(t, vfs.StatOptions{Mask: linux.STATX_MODE})
if err != nil {
return syserr.FromError(err)
}
err = t.Kernel().VFS().MknodAt(t, t.Credentials(), &pop, &vfs.MknodOptions{
// File permissions correspond to net/unix/af_unix.c:unix_bind.
Mode: linux.FileMode(linux.S_IFSOCK | uint(stat.Mode)&^t.FSContext().Umask()),
Endpoint: bep,
})
if err == syserror.EEXIST {
return syserr.ErrAddressInUse
}
return syserr.FromError(err)
}
return nil
})
}
// Ioctl implements vfs.FileDescriptionImpl.
func (s *SocketVFS2) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) {
return netstack.Ioctl(ctx, s.ep, uio, args)
}
// PRead implements vfs.FileDescriptionImpl.
func (s *SocketVFS2) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
return 0, syserror.ESPIPE
}
// Read implements vfs.FileDescriptionImpl.
func (s *SocketVFS2) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
// All flags other than RWF_NOWAIT should be ignored.
// TODO(gvisor.dev/issue/2601): Support RWF_NOWAIT.
if opts.Flags != 0 {
return 0, syserror.EOPNOTSUPP
}
if dst.NumBytes() == 0 {
return 0, nil
}
return dst.CopyOutFrom(ctx, &EndpointReader{
Ctx: ctx,
Endpoint: s.ep,
NumRights: 0,
Peek: false,
From: nil,
})
}
// PWrite implements vfs.FileDescriptionImpl.
func (s *SocketVFS2) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
return 0, syserror.ESPIPE
}
// Write implements vfs.FileDescriptionImpl.
func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
// All flags other than RWF_NOWAIT should be ignored.
// TODO(gvisor.dev/issue/2601): Support RWF_NOWAIT.
if opts.Flags != 0 {
return 0, syserror.EOPNOTSUPP
}
t := kernel.TaskFromContext(ctx)
ctrl := control.New(t, s.ep, nil)
if src.NumBytes() == 0 {
nInt, err := s.ep.SendMsg(ctx, [][]byte{}, ctrl, nil)
return int64(nInt), err.ToError()
}
return src.CopyInTo(ctx, &EndpointWriter{
Ctx: ctx,
Endpoint: s.ep,
Control: ctrl,
To: nil,
})
}
// Readiness implements waiter.Waitable.Readiness.
func (s *SocketVFS2) Readiness(mask waiter.EventMask) waiter.EventMask {
return s.socketOpsCommon.Readiness(mask)
}
// EventRegister implements waiter.Waitable.EventRegister.
func (s *SocketVFS2) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
s.socketOpsCommon.EventRegister(e, mask)
}
// EventUnregister implements waiter.Waitable.EventUnregister.
func (s *SocketVFS2) EventUnregister(e *waiter.Entry) {
s.socketOpsCommon.EventUnregister(e)
}
// SetSockOpt implements the linux syscall setsockopt(2) for sockets backed by
// a transport.Endpoint.
func (s *SocketVFS2) SetSockOpt(t *kernel.Task, level int, name int, optVal []byte) *syserr.Error {
return netstack.SetSockOpt(t, s, s.ep, level, name, optVal)
}
// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
func (s *SocketVFS2) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
return s.Locks().LockPOSIX(ctx, &s.vfsfd, uid, t, start, length, whence, block)
}
// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
func (s *SocketVFS2) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
return s.Locks().UnlockPOSIX(ctx, &s.vfsfd, uid, start, length, whence)
}
// providerVFS2 is a unix domain socket provider for VFS2.
type providerVFS2 struct{}
func (*providerVFS2) Socket(t *kernel.Task, stype linux.SockType, protocol int) (*vfs.FileDescription, *syserr.Error) {
// Check arguments.
if protocol != 0 && protocol != linux.AF_UNIX /* PF_UNIX */ {
return nil, syserr.ErrProtocolNotSupported
}
// Create the endpoint and socket.
var ep transport.Endpoint
switch stype {
case linux.SOCK_DGRAM, linux.SOCK_RAW:
ep = transport.NewConnectionless(t)
case linux.SOCK_SEQPACKET, linux.SOCK_STREAM:
ep = transport.NewConnectioned(t, stype, t.Kernel())
default:
return nil, syserr.ErrInvalidArgument
}
f, err := NewSockfsFile(t, ep, stype)
if err != nil {
ep.Close(t)
return nil, err
}
return f, nil
}
// Pair creates a new pair of AF_UNIX connected sockets.
func (*providerVFS2) Pair(t *kernel.Task, stype linux.SockType, protocol int) (*vfs.FileDescription, *vfs.FileDescription, *syserr.Error) {
// Check arguments.
if protocol != 0 && protocol != linux.AF_UNIX /* PF_UNIX */ {
return nil, nil, syserr.ErrProtocolNotSupported
}
switch stype {
case linux.SOCK_STREAM, linux.SOCK_DGRAM, linux.SOCK_SEQPACKET, linux.SOCK_RAW:
// Ok
default:
return nil, nil, syserr.ErrInvalidArgument
}
// Create the endpoints and sockets.
ep1, ep2 := transport.NewPair(t, stype, t.Kernel())
s1, err := NewSockfsFile(t, ep1, stype)
if err != nil {
ep1.Close(t)
ep2.Close(t)
return nil, nil, err
}
s2, err := NewSockfsFile(t, ep2, stype)
if err != nil {
s1.DecRef(t)
ep2.Close(t)
return nil, nil, err
}
return s1, s2, nil
}