Add FileDescriptionImpl for Unix sockets.
This change involves several steps: - Refactor the VFS1 unix socket implementation to share methods between VFS1 and VFS2 where possible. Re-implement the rest. - Override the default PRead, Read, PWrite, Write, Ioctl, Release methods in FileDescriptionDefaultImpl. - Add functions to create and initialize a new Dentry/Inode and FileDescription for a Unix socket file. Updates #1476 PiperOrigin-RevId: 304689796
This commit is contained in:
parent
a94309628e
commit
5818663ebe
|
@ -7,6 +7,7 @@ go_library(
|
|||
srcs = ["sockfs.go"],
|
||||
visibility = ["//pkg/sentry:internal"],
|
||||
deps = [
|
||||
"//pkg/abi/linux",
|
||||
"//pkg/context",
|
||||
"//pkg/sentry/fsimpl/kernfs",
|
||||
"//pkg/sentry/kernel/auth",
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
package sockfs
|
||||
|
||||
import (
|
||||
"gvisor.dev/gvisor/pkg/abi/linux"
|
||||
"gvisor.dev/gvisor/pkg/context"
|
||||
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
|
||||
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
|
||||
|
@ -60,6 +61,10 @@ type filesystem struct {
|
|||
}
|
||||
|
||||
// inode implements kernfs.Inode.
|
||||
//
|
||||
// TODO(gvisor.dev/issue/1476): Add device numbers to this inode (which are
|
||||
// not included in InodeAttrs) to store the numbers of the appropriate
|
||||
// socket device. Override InodeAttrs.Stat() accordingly.
|
||||
type inode struct {
|
||||
kernfs.InodeNotDirectory
|
||||
kernfs.InodeNotSymlink
|
||||
|
@ -71,3 +76,27 @@ type inode struct {
|
|||
func (i *inode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
|
||||
return nil, syserror.ENXIO
|
||||
}
|
||||
|
||||
// InitSocket initializes a socket FileDescription, with a corresponding
|
||||
// Dentry in mnt.
|
||||
//
|
||||
// fd should be the FileDescription associated with socketImpl, i.e. its first
|
||||
// field. mnt should be the global socket mount, Kernel.socketMount.
|
||||
func InitSocket(socketImpl vfs.FileDescriptionImpl, fd *vfs.FileDescription, mnt *vfs.Mount, creds *auth.Credentials) error {
|
||||
fsimpl := mnt.Filesystem().Impl()
|
||||
fs := fsimpl.(*kernfs.Filesystem)
|
||||
|
||||
// File mode matches net/socket.c:sock_alloc.
|
||||
filemode := linux.FileMode(linux.S_IFSOCK | 0600)
|
||||
i := &inode{}
|
||||
i.Init(creds, fs.NextIno(), filemode)
|
||||
|
||||
d := &kernfs.Dentry{}
|
||||
d.Init(i)
|
||||
|
||||
opts := &vfs.FileDescriptionOptions{UseDentryMetadata: true}
|
||||
if err := fd.Init(socketImpl, linux.O_RDWR, mnt, d.VFSDentry(), opts); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -169,6 +169,7 @@ go_library(
|
|||
"//pkg/sentry/fs/lock",
|
||||
"//pkg/sentry/fs/timerfd",
|
||||
"//pkg/sentry/fsbridge",
|
||||
"//pkg/sentry/fsimpl/kernfs",
|
||||
"//pkg/sentry/fsimpl/sockfs",
|
||||
"//pkg/sentry/hostcpu",
|
||||
"//pkg/sentry/inet",
|
||||
|
|
|
@ -940,7 +940,7 @@ func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr us
|
|||
|
||||
// GetSockOpt can be used to implement the linux syscall getsockopt(2) for
|
||||
// sockets backed by a commonEndpoint.
|
||||
func GetSockOpt(t *kernel.Task, s socket.Socket, ep commonEndpoint, family int, skType linux.SockType, level, name, outLen int) (interface{}, *syserr.Error) {
|
||||
func GetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family int, skType linux.SockType, level, name, outLen int) (interface{}, *syserr.Error) {
|
||||
switch level {
|
||||
case linux.SOL_SOCKET:
|
||||
return getSockOptSocket(t, s, ep, family, skType, name, outLen)
|
||||
|
@ -966,7 +966,7 @@ func GetSockOpt(t *kernel.Task, s socket.Socket, ep commonEndpoint, family int,
|
|||
}
|
||||
|
||||
// getSockOptSocket implements GetSockOpt when level is SOL_SOCKET.
|
||||
func getSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, family int, skType linux.SockType, name, outLen int) (interface{}, *syserr.Error) {
|
||||
func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family int, skType linux.SockType, name, outLen int) (interface{}, *syserr.Error) {
|
||||
// TODO(b/124056281): Stop rejecting short optLen values in getsockopt.
|
||||
switch name {
|
||||
case linux.SO_ERROR:
|
||||
|
@ -1541,7 +1541,7 @@ func (s *SocketOperations) SetSockOpt(t *kernel.Task, level int, name int, optVa
|
|||
|
||||
// SetSockOpt can be used to implement the linux syscall setsockopt(2) for
|
||||
// sockets backed by a commonEndpoint.
|
||||
func SetSockOpt(t *kernel.Task, s socket.Socket, ep commonEndpoint, level int, name int, optVal []byte) *syserr.Error {
|
||||
func SetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, level int, name int, optVal []byte) *syserr.Error {
|
||||
switch level {
|
||||
case linux.SOL_SOCKET:
|
||||
return setSockOptSocket(t, s, ep, name, optVal)
|
||||
|
@ -1568,7 +1568,7 @@ func SetSockOpt(t *kernel.Task, s socket.Socket, ep commonEndpoint, level int, n
|
|||
}
|
||||
|
||||
// setSockOptSocket implements SetSockOpt when level is SOL_SOCKET.
|
||||
func setSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, name int, optVal []byte) *syserr.Error {
|
||||
func setSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, optVal []byte) *syserr.Error {
|
||||
switch name {
|
||||
case linux.SO_SNDBUF:
|
||||
if len(optVal) < sizeOfInt32 {
|
||||
|
|
|
@ -8,23 +8,27 @@ go_library(
|
|||
"device.go",
|
||||
"io.go",
|
||||
"unix.go",
|
||||
"unix_vfs2.go",
|
||||
],
|
||||
visibility = ["//pkg/sentry:internal"],
|
||||
deps = [
|
||||
"//pkg/abi/linux",
|
||||
"//pkg/context",
|
||||
"//pkg/fspath",
|
||||
"//pkg/refs",
|
||||
"//pkg/safemem",
|
||||
"//pkg/sentry/arch",
|
||||
"//pkg/sentry/device",
|
||||
"//pkg/sentry/fs",
|
||||
"//pkg/sentry/fs/fsutil",
|
||||
"//pkg/sentry/fsimpl/sockfs",
|
||||
"//pkg/sentry/kernel",
|
||||
"//pkg/sentry/kernel/time",
|
||||
"//pkg/sentry/socket",
|
||||
"//pkg/sentry/socket/control",
|
||||
"//pkg/sentry/socket/netstack",
|
||||
"//pkg/sentry/socket/unix/transport",
|
||||
"//pkg/sentry/vfs",
|
||||
"//pkg/syserr",
|
||||
"//pkg/syserror",
|
||||
"//pkg/tcpip",
|
||||
|
|
|
@ -23,6 +23,7 @@ import (
|
|||
|
||||
"gvisor.dev/gvisor/pkg/abi/linux"
|
||||
"gvisor.dev/gvisor/pkg/context"
|
||||
"gvisor.dev/gvisor/pkg/fspath"
|
||||
"gvisor.dev/gvisor/pkg/refs"
|
||||
"gvisor.dev/gvisor/pkg/sentry/arch"
|
||||
"gvisor.dev/gvisor/pkg/sentry/fs"
|
||||
|
@ -33,6 +34,7 @@ import (
|
|||
"gvisor.dev/gvisor/pkg/sentry/socket/control"
|
||||
"gvisor.dev/gvisor/pkg/sentry/socket/netstack"
|
||||
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
|
||||
"gvisor.dev/gvisor/pkg/sentry/vfs"
|
||||
"gvisor.dev/gvisor/pkg/syserr"
|
||||
"gvisor.dev/gvisor/pkg/syserror"
|
||||
"gvisor.dev/gvisor/pkg/tcpip"
|
||||
|
@ -52,11 +54,8 @@ type SocketOperations struct {
|
|||
fsutil.FileNoSplice `state:"nosave"`
|
||||
fsutil.FileNoopFlush `state:"nosave"`
|
||||
fsutil.FileUseInodeUnstableAttr `state:"nosave"`
|
||||
refs.AtomicRefCount
|
||||
socket.SendReceiveTimeout
|
||||
|
||||
ep transport.Endpoint
|
||||
stype linux.SockType
|
||||
socketOpsCommon
|
||||
}
|
||||
|
||||
// New creates a new unix socket.
|
||||
|
@ -75,16 +74,29 @@ func NewWithDirent(ctx context.Context, d *fs.Dirent, ep transport.Endpoint, sty
|
|||
}
|
||||
|
||||
s := SocketOperations{
|
||||
ep: ep,
|
||||
stype: stype,
|
||||
socketOpsCommon: socketOpsCommon{
|
||||
ep: ep,
|
||||
stype: stype,
|
||||
},
|
||||
}
|
||||
s.EnableLeakCheck("unix.SocketOperations")
|
||||
|
||||
return fs.NewFile(ctx, d, flags, &s)
|
||||
}
|
||||
|
||||
// socketOpsCommon contains the socket operations common to VFS1 and VFS2.
|
||||
//
|
||||
// +stateify savable
|
||||
type socketOpsCommon struct {
|
||||
refs.AtomicRefCount
|
||||
socket.SendReceiveTimeout
|
||||
|
||||
ep transport.Endpoint
|
||||
stype linux.SockType
|
||||
}
|
||||
|
||||
// DecRef implements RefCounter.DecRef.
|
||||
func (s *SocketOperations) DecRef() {
|
||||
func (s *socketOpsCommon) DecRef() {
|
||||
s.DecRefWithDestructor(func() {
|
||||
s.ep.Close()
|
||||
})
|
||||
|
@ -97,7 +109,7 @@ func (s *SocketOperations) Release() {
|
|||
s.DecRef()
|
||||
}
|
||||
|
||||
func (s *SocketOperations) isPacket() bool {
|
||||
func (s *socketOpsCommon) isPacket() bool {
|
||||
switch s.stype {
|
||||
case linux.SOCK_DGRAM, linux.SOCK_SEQPACKET:
|
||||
return true
|
||||
|
@ -110,7 +122,7 @@ func (s *SocketOperations) isPacket() bool {
|
|||
}
|
||||
|
||||
// Endpoint extracts the transport.Endpoint.
|
||||
func (s *SocketOperations) Endpoint() transport.Endpoint {
|
||||
func (s *socketOpsCommon) Endpoint() transport.Endpoint {
|
||||
return s.ep
|
||||
}
|
||||
|
||||
|
@ -143,7 +155,7 @@ func extractPath(sockaddr []byte) (string, *syserr.Error) {
|
|||
|
||||
// GetPeerName implements the linux syscall getpeername(2) for sockets backed by
|
||||
// a transport.Endpoint.
|
||||
func (s *SocketOperations) GetPeerName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) {
|
||||
func (s *socketOpsCommon) GetPeerName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) {
|
||||
addr, err := s.ep.GetRemoteAddress()
|
||||
if err != nil {
|
||||
return nil, 0, syserr.TranslateNetstackError(err)
|
||||
|
@ -155,7 +167,7 @@ func (s *SocketOperations) GetPeerName(t *kernel.Task) (linux.SockAddr, uint32,
|
|||
|
||||
// GetSockName implements the linux syscall getsockname(2) for sockets backed by
|
||||
// a transport.Endpoint.
|
||||
func (s *SocketOperations) GetSockName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) {
|
||||
func (s *socketOpsCommon) GetSockName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) {
|
||||
addr, err := s.ep.GetLocalAddress()
|
||||
if err != nil {
|
||||
return nil, 0, syserr.TranslateNetstackError(err)
|
||||
|
@ -178,7 +190,7 @@ func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr us
|
|||
|
||||
// Listen implements the linux syscall listen(2) for sockets backed by
|
||||
// a transport.Endpoint.
|
||||
func (s *SocketOperations) Listen(t *kernel.Task, backlog int) *syserr.Error {
|
||||
func (s *socketOpsCommon) Listen(t *kernel.Task, backlog int) *syserr.Error {
|
||||
return s.ep.Listen(backlog)
|
||||
}
|
||||
|
||||
|
@ -310,6 +322,8 @@ func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
|
|||
}
|
||||
|
||||
// Create the socket.
|
||||
//
|
||||
// TODO(gvisor.dev/issue/2324): Correctly set file permissions.
|
||||
childDir, err := d.Bind(t, t.FSContext().RootDirectory(), name, bep, fs.FilePermissions{User: fs.PermMask{Read: true}})
|
||||
if err != nil {
|
||||
return syserr.ErrPortInUse
|
||||
|
@ -345,6 +359,31 @@ func extractEndpoint(t *kernel.Task, sockaddr []byte) (transport.BoundEndpoint,
|
|||
return ep, nil
|
||||
}
|
||||
|
||||
if kernel.VFS2Enabled {
|
||||
p := fspath.Parse(path)
|
||||
root := t.FSContext().RootDirectoryVFS2()
|
||||
start := root
|
||||
relPath := !p.Absolute
|
||||
if relPath {
|
||||
start = t.FSContext().WorkingDirectoryVFS2()
|
||||
}
|
||||
pop := vfs.PathOperation{
|
||||
Root: root,
|
||||
Start: start,
|
||||
Path: p,
|
||||
FollowFinalSymlink: true,
|
||||
}
|
||||
ep, e := t.Kernel().VFS().BoundEndpointAt(t, t.Credentials(), &pop)
|
||||
root.DecRef()
|
||||
if relPath {
|
||||
start.DecRef()
|
||||
}
|
||||
if e != nil {
|
||||
return nil, syserr.FromError(e)
|
||||
}
|
||||
return ep, nil
|
||||
}
|
||||
|
||||
// Find the node in the filesystem.
|
||||
root := t.FSContext().RootDirectory()
|
||||
cwd := t.FSContext().WorkingDirectory()
|
||||
|
@ -363,12 +402,11 @@ func extractEndpoint(t *kernel.Task, sockaddr []byte) (transport.BoundEndpoint,
|
|||
// No socket!
|
||||
return nil, syserr.ErrConnectionRefused
|
||||
}
|
||||
|
||||
return ep, nil
|
||||
}
|
||||
|
||||
// Connect implements the linux syscall connect(2) for unix sockets.
|
||||
func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error {
|
||||
func (s *socketOpsCommon) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error {
|
||||
ep, err := extractEndpoint(t, sockaddr)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -379,7 +417,7 @@ func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking boo
|
|||
return s.ep.Connect(t, ep)
|
||||
}
|
||||
|
||||
// Writev implements fs.FileOperations.Write.
|
||||
// Write implements fs.FileOperations.Write.
|
||||
func (s *SocketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) {
|
||||
t := kernel.TaskFromContext(ctx)
|
||||
ctrl := control.New(t, s.ep, nil)
|
||||
|
@ -399,7 +437,7 @@ func (s *SocketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IO
|
|||
|
||||
// SendMsg implements the linux syscall sendmsg(2) for unix sockets backed by
|
||||
// a transport.Endpoint.
|
||||
func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, haveDeadline bool, deadline ktime.Time, controlMessages socket.ControlMessages) (int, *syserr.Error) {
|
||||
func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, haveDeadline bool, deadline ktime.Time, controlMessages socket.ControlMessages) (int, *syserr.Error) {
|
||||
w := EndpointWriter{
|
||||
Ctx: t,
|
||||
Endpoint: s.ep,
|
||||
|
@ -453,27 +491,27 @@ func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []
|
|||
}
|
||||
|
||||
// Passcred implements transport.Credentialer.Passcred.
|
||||
func (s *SocketOperations) Passcred() bool {
|
||||
func (s *socketOpsCommon) Passcred() bool {
|
||||
return s.ep.Passcred()
|
||||
}
|
||||
|
||||
// ConnectedPasscred implements transport.Credentialer.ConnectedPasscred.
|
||||
func (s *SocketOperations) ConnectedPasscred() bool {
|
||||
func (s *socketOpsCommon) ConnectedPasscred() bool {
|
||||
return s.ep.ConnectedPasscred()
|
||||
}
|
||||
|
||||
// Readiness implements waiter.Waitable.Readiness.
|
||||
func (s *SocketOperations) Readiness(mask waiter.EventMask) waiter.EventMask {
|
||||
func (s *socketOpsCommon) Readiness(mask waiter.EventMask) waiter.EventMask {
|
||||
return s.ep.Readiness(mask)
|
||||
}
|
||||
|
||||
// EventRegister implements waiter.Waitable.EventRegister.
|
||||
func (s *SocketOperations) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
|
||||
func (s *socketOpsCommon) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
|
||||
s.ep.EventRegister(e, mask)
|
||||
}
|
||||
|
||||
// EventUnregister implements waiter.Waitable.EventUnregister.
|
||||
func (s *SocketOperations) EventUnregister(e *waiter.Entry) {
|
||||
func (s *socketOpsCommon) EventUnregister(e *waiter.Entry) {
|
||||
s.ep.EventUnregister(e)
|
||||
}
|
||||
|
||||
|
@ -485,7 +523,7 @@ func (s *SocketOperations) SetSockOpt(t *kernel.Task, level int, name int, optVa
|
|||
|
||||
// Shutdown implements the linux syscall shutdown(2) for sockets backed by
|
||||
// a transport.Endpoint.
|
||||
func (s *SocketOperations) Shutdown(t *kernel.Task, how int) *syserr.Error {
|
||||
func (s *socketOpsCommon) Shutdown(t *kernel.Task, how int) *syserr.Error {
|
||||
f, err := netstack.ConvertShutdown(how)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -511,7 +549,7 @@ func (s *SocketOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOS
|
|||
|
||||
// RecvMsg implements the linux syscall recvmsg(2) for sockets backed by
|
||||
// a transport.Endpoint.
|
||||
func (s *SocketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (n int, msgFlags int, senderAddr linux.SockAddr, senderAddrLen uint32, controlMessages socket.ControlMessages, err *syserr.Error) {
|
||||
func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (n int, msgFlags int, senderAddr linux.SockAddr, senderAddrLen uint32, controlMessages socket.ControlMessages, err *syserr.Error) {
|
||||
trunc := flags&linux.MSG_TRUNC != 0
|
||||
peek := flags&linux.MSG_PEEK != 0
|
||||
dontWait := flags&linux.MSG_DONTWAIT != 0
|
||||
|
@ -648,12 +686,12 @@ func (s *SocketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags
|
|||
}
|
||||
|
||||
// State implements socket.Socket.State.
|
||||
func (s *SocketOperations) State() uint32 {
|
||||
func (s *socketOpsCommon) State() uint32 {
|
||||
return s.ep.State()
|
||||
}
|
||||
|
||||
// Type implements socket.Socket.Type.
|
||||
func (s *SocketOperations) Type() (family int, skType linux.SockType, protocol int) {
|
||||
func (s *socketOpsCommon) Type() (family int, skType linux.SockType, protocol int) {
|
||||
// Unix domain sockets always have a protocol of 0.
|
||||
return linux.AF_UNIX, s.stype, 0
|
||||
}
|
||||
|
@ -706,4 +744,5 @@ func (*provider) Pair(t *kernel.Task, stype linux.SockType, protocol int) (*fs.F
|
|||
|
||||
func init() {
|
||||
socket.RegisterProvider(linux.AF_UNIX, &provider{})
|
||||
socket.RegisterProviderVFS2(linux.AF_UNIX, &providerVFS2{})
|
||||
}
|
||||
|
|
|
@ -0,0 +1,348 @@
|
|||
// Copyright 2018 The gVisor Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package unix
|
||||
|
||||
import (
|
||||
"gvisor.dev/gvisor/pkg/abi/linux"
|
||||
"gvisor.dev/gvisor/pkg/context"
|
||||
"gvisor.dev/gvisor/pkg/fspath"
|
||||
"gvisor.dev/gvisor/pkg/sentry/arch"
|
||||
"gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs"
|
||||
"gvisor.dev/gvisor/pkg/sentry/kernel"
|
||||
"gvisor.dev/gvisor/pkg/sentry/socket/control"
|
||||
"gvisor.dev/gvisor/pkg/sentry/socket/netstack"
|
||||
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
|
||||
"gvisor.dev/gvisor/pkg/sentry/vfs"
|
||||
"gvisor.dev/gvisor/pkg/syserr"
|
||||
"gvisor.dev/gvisor/pkg/syserror"
|
||||
"gvisor.dev/gvisor/pkg/tcpip"
|
||||
"gvisor.dev/gvisor/pkg/usermem"
|
||||
"gvisor.dev/gvisor/pkg/waiter"
|
||||
)
|
||||
|
||||
// SocketVFS2 implements socket.SocketVFS2 (and by extension,
|
||||
// vfs.FileDescriptionImpl) for Unix sockets.
|
||||
type SocketVFS2 struct {
|
||||
vfsfd vfs.FileDescription
|
||||
vfs.FileDescriptionDefaultImpl
|
||||
vfs.DentryMetadataFileDescriptionImpl
|
||||
|
||||
socketOpsCommon
|
||||
}
|
||||
|
||||
// NewVFS2File creates and returns a new vfs.FileDescription for a unix socket.
|
||||
func NewVFS2File(t *kernel.Task, ep transport.Endpoint, stype linux.SockType) (*vfs.FileDescription, *syserr.Error) {
|
||||
sock := NewFDImpl(ep, stype)
|
||||
vfsfd := &sock.vfsfd
|
||||
if err := sockfs.InitSocket(sock, vfsfd, t.Kernel().SocketMount(), t.Credentials()); err != nil {
|
||||
return nil, syserr.FromError(err)
|
||||
}
|
||||
return vfsfd, nil
|
||||
}
|
||||
|
||||
// NewFDImpl creates and returns a new SocketVFS2.
|
||||
func NewFDImpl(ep transport.Endpoint, stype linux.SockType) *SocketVFS2 {
|
||||
// You can create AF_UNIX, SOCK_RAW sockets. They're the same as
|
||||
// SOCK_DGRAM and don't require CAP_NET_RAW.
|
||||
if stype == linux.SOCK_RAW {
|
||||
stype = linux.SOCK_DGRAM
|
||||
}
|
||||
|
||||
return &SocketVFS2{
|
||||
socketOpsCommon: socketOpsCommon{
|
||||
ep: ep,
|
||||
stype: stype,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by
|
||||
// a transport.Endpoint.
|
||||
func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.Addr, outLen int) (interface{}, *syserr.Error) {
|
||||
return netstack.GetSockOpt(t, s, s.ep, linux.AF_UNIX, s.ep.Type(), level, name, outLen)
|
||||
}
|
||||
|
||||
// blockingAccept implements a blocking version of accept(2), that is, if no
|
||||
// connections are ready to be accept, it will block until one becomes ready.
|
||||
func (s *SocketVFS2) blockingAccept(t *kernel.Task) (transport.Endpoint, *syserr.Error) {
|
||||
// Register for notifications.
|
||||
e, ch := waiter.NewChannelEntry(nil)
|
||||
s.socketOpsCommon.EventRegister(&e, waiter.EventIn)
|
||||
defer s.socketOpsCommon.EventUnregister(&e)
|
||||
|
||||
// Try to accept the connection; if it fails, then wait until we get a
|
||||
// notification.
|
||||
for {
|
||||
if ep, err := s.ep.Accept(); err != syserr.ErrWouldBlock {
|
||||
return ep, err
|
||||
}
|
||||
|
||||
if err := t.Block(ch); err != nil {
|
||||
return nil, syserr.FromError(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Accept implements the linux syscall accept(2) for sockets backed by
|
||||
// a transport.Endpoint.
|
||||
func (s *SocketVFS2) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, linux.SockAddr, uint32, *syserr.Error) {
|
||||
// Issue the accept request to get the new endpoint.
|
||||
ep, err := s.ep.Accept()
|
||||
if err != nil {
|
||||
if err != syserr.ErrWouldBlock || !blocking {
|
||||
return 0, nil, 0, err
|
||||
}
|
||||
|
||||
var err *syserr.Error
|
||||
ep, err = s.blockingAccept(t)
|
||||
if err != nil {
|
||||
return 0, nil, 0, err
|
||||
}
|
||||
}
|
||||
|
||||
// We expect this to be a FileDescription here.
|
||||
ns, err := NewVFS2File(t, ep, s.stype)
|
||||
if err != nil {
|
||||
return 0, nil, 0, err
|
||||
}
|
||||
defer ns.DecRef()
|
||||
|
||||
if flags&linux.SOCK_NONBLOCK != 0 {
|
||||
ns.SetStatusFlags(t, t.Credentials(), linux.SOCK_NONBLOCK)
|
||||
}
|
||||
|
||||
var addr linux.SockAddr
|
||||
var addrLen uint32
|
||||
if peerRequested {
|
||||
// Get address of the peer.
|
||||
var err *syserr.Error
|
||||
addr, addrLen, err = ns.Impl().(*SocketVFS2).GetPeerName(t)
|
||||
if err != nil {
|
||||
return 0, nil, 0, err
|
||||
}
|
||||
}
|
||||
|
||||
fd, e := t.NewFDFromVFS2(0, ns, kernel.FDFlags{
|
||||
CloseOnExec: flags&linux.SOCK_CLOEXEC != 0,
|
||||
})
|
||||
if e != nil {
|
||||
return 0, nil, 0, syserr.FromError(e)
|
||||
}
|
||||
|
||||
// TODO: add vfs2 sockets to global table.
|
||||
return fd, addr, addrLen, nil
|
||||
}
|
||||
|
||||
// Bind implements the linux syscall bind(2) for unix sockets.
|
||||
func (s *SocketVFS2) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
|
||||
p, e := extractPath(sockaddr)
|
||||
if e != nil {
|
||||
return e
|
||||
}
|
||||
|
||||
bep, ok := s.ep.(transport.BoundEndpoint)
|
||||
if !ok {
|
||||
// This socket can't be bound.
|
||||
return syserr.ErrInvalidArgument
|
||||
}
|
||||
|
||||
return s.ep.Bind(tcpip.FullAddress{Addr: tcpip.Address(p)}, func() *syserr.Error {
|
||||
// Is it abstract?
|
||||
if p[0] == 0 {
|
||||
if t.IsNetworkNamespaced() {
|
||||
return syserr.ErrInvalidEndpointState
|
||||
}
|
||||
if err := t.AbstractSockets().Bind(p[1:], bep, s); err != nil {
|
||||
// syserr.ErrPortInUse corresponds to EADDRINUSE.
|
||||
return syserr.ErrPortInUse
|
||||
}
|
||||
} else {
|
||||
path := fspath.Parse(p)
|
||||
root := t.FSContext().RootDirectoryVFS2()
|
||||
defer root.DecRef()
|
||||
start := root
|
||||
relPath := !path.Absolute
|
||||
if relPath {
|
||||
start = t.FSContext().WorkingDirectoryVFS2()
|
||||
defer start.DecRef()
|
||||
}
|
||||
pop := vfs.PathOperation{
|
||||
Root: root,
|
||||
Start: start,
|
||||
Path: path,
|
||||
}
|
||||
err := t.Kernel().VFS().MknodAt(t, t.Credentials(), &pop, &vfs.MknodOptions{
|
||||
// TODO(gvisor.dev/issue/2324): The file permissions should be taken
|
||||
// from s and t.FSContext().Umask() (see net/unix/af_unix.c:unix_bind),
|
||||
// but VFS1 just always uses 0400. Resolve this inconsistency.
|
||||
Mode: linux.S_IFSOCK | 0400,
|
||||
Endpoint: bep,
|
||||
})
|
||||
if err == syserror.EEXIST {
|
||||
return syserr.ErrAddressInUse
|
||||
}
|
||||
return syserr.FromError(err)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
// Ioctl implements vfs.FileDescriptionImpl.
|
||||
func (s *SocketVFS2) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) {
|
||||
return netstack.Ioctl(ctx, s.ep, uio, args)
|
||||
}
|
||||
|
||||
// PRead implements vfs.FileDescriptionImpl.
|
||||
func (s *SocketVFS2) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
|
||||
return 0, syserror.ESPIPE
|
||||
}
|
||||
|
||||
// Read implements vfs.FileDescriptionImpl.
|
||||
func (s *SocketVFS2) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
|
||||
// All flags other than RWF_NOWAIT should be ignored.
|
||||
// TODO(gvisor.dev/issue/1476): Support RWF_NOWAIT.
|
||||
if opts.Flags != 0 {
|
||||
return 0, syserror.EOPNOTSUPP
|
||||
}
|
||||
|
||||
if dst.NumBytes() == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
return dst.CopyOutFrom(ctx, &EndpointReader{
|
||||
Ctx: ctx,
|
||||
Endpoint: s.ep,
|
||||
NumRights: 0,
|
||||
Peek: false,
|
||||
From: nil,
|
||||
})
|
||||
}
|
||||
|
||||
// PWrite implements vfs.FileDescriptionImpl.
|
||||
func (s *SocketVFS2) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
|
||||
return 0, syserror.ESPIPE
|
||||
}
|
||||
|
||||
// Write implements vfs.FileDescriptionImpl.
|
||||
func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
|
||||
// All flags other than RWF_NOWAIT should be ignored.
|
||||
// TODO(gvisor.dev/issue/1476): Support RWF_NOWAIT.
|
||||
if opts.Flags != 0 {
|
||||
return 0, syserror.EOPNOTSUPP
|
||||
}
|
||||
|
||||
t := kernel.TaskFromContext(ctx)
|
||||
ctrl := control.New(t, s.ep, nil)
|
||||
|
||||
if src.NumBytes() == 0 {
|
||||
nInt, err := s.ep.SendMsg(ctx, [][]byte{}, ctrl, nil)
|
||||
return int64(nInt), err.ToError()
|
||||
}
|
||||
|
||||
return src.CopyInTo(ctx, &EndpointWriter{
|
||||
Ctx: ctx,
|
||||
Endpoint: s.ep,
|
||||
Control: ctrl,
|
||||
To: nil,
|
||||
})
|
||||
}
|
||||
|
||||
// Release implements vfs.FileDescriptionImpl.
|
||||
func (s *SocketVFS2) Release() {
|
||||
// Release only decrements a reference on s because s may be referenced in
|
||||
// the abstract socket namespace.
|
||||
s.DecRef()
|
||||
}
|
||||
|
||||
// Readiness implements waiter.Waitable.Readiness.
|
||||
func (s *SocketVFS2) Readiness(mask waiter.EventMask) waiter.EventMask {
|
||||
return s.socketOpsCommon.Readiness(mask)
|
||||
}
|
||||
|
||||
// EventRegister implements waiter.Waitable.EventRegister.
|
||||
func (s *SocketVFS2) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
|
||||
s.socketOpsCommon.EventRegister(e, mask)
|
||||
}
|
||||
|
||||
// EventUnregister implements waiter.Waitable.EventUnregister.
|
||||
func (s *SocketVFS2) EventUnregister(e *waiter.Entry) {
|
||||
s.socketOpsCommon.EventUnregister(e)
|
||||
}
|
||||
|
||||
// SetSockOpt implements the linux syscall setsockopt(2) for sockets backed by
|
||||
// a transport.Endpoint.
|
||||
func (s *SocketVFS2) SetSockOpt(t *kernel.Task, level int, name int, optVal []byte) *syserr.Error {
|
||||
return netstack.SetSockOpt(t, s, s.ep, level, name, optVal)
|
||||
}
|
||||
|
||||
// providerVFS2 is a unix domain socket provider for VFS2.
|
||||
type providerVFS2 struct{}
|
||||
|
||||
func (*providerVFS2) Socket(t *kernel.Task, stype linux.SockType, protocol int) (*vfs.FileDescription, *syserr.Error) {
|
||||
// Check arguments.
|
||||
if protocol != 0 && protocol != linux.AF_UNIX /* PF_UNIX */ {
|
||||
return nil, syserr.ErrProtocolNotSupported
|
||||
}
|
||||
|
||||
// Create the endpoint and socket.
|
||||
var ep transport.Endpoint
|
||||
switch stype {
|
||||
case linux.SOCK_DGRAM, linux.SOCK_RAW:
|
||||
ep = transport.NewConnectionless(t)
|
||||
case linux.SOCK_SEQPACKET, linux.SOCK_STREAM:
|
||||
ep = transport.NewConnectioned(t, stype, t.Kernel())
|
||||
default:
|
||||
return nil, syserr.ErrInvalidArgument
|
||||
}
|
||||
|
||||
f, err := NewVFS2File(t, ep, stype)
|
||||
if err != nil {
|
||||
ep.Close()
|
||||
return nil, err
|
||||
}
|
||||
return f, nil
|
||||
}
|
||||
|
||||
// Pair creates a new pair of AF_UNIX connected sockets.
|
||||
func (*providerVFS2) Pair(t *kernel.Task, stype linux.SockType, protocol int) (*vfs.FileDescription, *vfs.FileDescription, *syserr.Error) {
|
||||
// Check arguments.
|
||||
if protocol != 0 && protocol != linux.AF_UNIX /* PF_UNIX */ {
|
||||
return nil, nil, syserr.ErrProtocolNotSupported
|
||||
}
|
||||
|
||||
switch stype {
|
||||
case linux.SOCK_STREAM, linux.SOCK_DGRAM, linux.SOCK_SEQPACKET, linux.SOCK_RAW:
|
||||
// Ok
|
||||
default:
|
||||
return nil, nil, syserr.ErrInvalidArgument
|
||||
}
|
||||
|
||||
// Create the endpoints and sockets.
|
||||
ep1, ep2 := transport.NewPair(t, stype, t.Kernel())
|
||||
s1, err := NewVFS2File(t, ep1, stype)
|
||||
if err != nil {
|
||||
ep1.Close()
|
||||
ep2.Close()
|
||||
return nil, nil, err
|
||||
}
|
||||
s2, err := NewVFS2File(t, ep2, stype)
|
||||
if err != nil {
|
||||
s1.DecRef()
|
||||
ep2.Close()
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
return s1, s2, nil
|
||||
}
|
|
@ -16,6 +16,7 @@ package vfs
|
|||
|
||||
import (
|
||||
"gvisor.dev/gvisor/pkg/abi/linux"
|
||||
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
|
||||
)
|
||||
|
||||
// GetDentryOptions contains options to VirtualFilesystem.GetDentryAt() and
|
||||
|
@ -44,6 +45,10 @@ type MknodOptions struct {
|
|||
// DevMinor are the major and minor device numbers for the created device.
|
||||
DevMajor uint32
|
||||
DevMinor uint32
|
||||
|
||||
// Endpoint is the endpoint to bind to the created file, if a socket file is
|
||||
// being created for bind(2) on a Unix domain socket.
|
||||
Endpoint transport.BoundEndpoint
|
||||
}
|
||||
|
||||
// MountFlags contains flags as specified for mount(2), e.g. MS_NOEXEC.
|
||||
|
|
Loading…
Reference in New Issue