Port eventfd to VFS2.

And move sys_timerfd.go to just timerfd.go for consistency.

Updates #1475.

PiperOrigin-RevId: 309835029
This commit is contained in:
Nicolas Lacasse 2020-05-04 16:00:22 -07:00 committed by gVisor bot
parent e7ed68d225
commit da71dc7fdd
9 changed files with 466 additions and 15 deletions

View File

@ -15,6 +15,7 @@
package linux
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
@ -22,32 +23,24 @@ import (
"gvisor.dev/gvisor/pkg/syserror"
)
const (
// EFD_SEMAPHORE is a flag used in syscall eventfd(2) and eventfd2(2). Please
// see its man page for more information.
EFD_SEMAPHORE = 1
EFD_NONBLOCK = 0x800
EFD_CLOEXEC = 0x80000
)
// Eventfd2 implements linux syscall eventfd2(2).
func Eventfd2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
initVal := args[0].Int()
flags := uint(args[1].Uint())
allOps := uint(EFD_SEMAPHORE | EFD_NONBLOCK | EFD_CLOEXEC)
allOps := uint(linux.EFD_SEMAPHORE | linux.EFD_NONBLOCK | linux.EFD_CLOEXEC)
if flags & ^allOps != 0 {
return 0, nil, syserror.EINVAL
}
event := eventfd.New(t, uint64(initVal), flags&EFD_SEMAPHORE != 0)
event := eventfd.New(t, uint64(initVal), flags&linux.EFD_SEMAPHORE != 0)
event.SetFlags(fs.SettableFileFlags{
NonBlocking: flags&EFD_NONBLOCK != 0,
NonBlocking: flags&linux.EFD_NONBLOCK != 0,
})
defer event.DecRef()
fd, err := t.NewFDFrom(0, event, kernel.FDFlags{
CloseOnExec: flags&EFD_CLOEXEC != 0,
CloseOnExec: flags&linux.EFD_CLOEXEC != 0,
})
if err != nil {
return 0, nil, err

View File

@ -6,6 +6,7 @@ go_library(
name = "vfs2",
srcs = [
"epoll.go",
"eventfd.go",
"execve.go",
"fd.go",
"filesystem.go",
@ -26,7 +27,7 @@ go_library(
"stat_amd64.go",
"stat_arm64.go",
"sync.go",
"sys_timerfd.go",
"timerfd.go",
"xattr.go",
],
marshal = True,

View File

@ -0,0 +1,59 @@
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package vfs2
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/syserror"
)
// Eventfd2 implements linux syscall eventfd2(2).
func Eventfd2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
initVal := uint64(args[0].Uint())
flags := uint(args[1].Uint())
allOps := uint(linux.EFD_SEMAPHORE | linux.EFD_NONBLOCK | linux.EFD_CLOEXEC)
if flags & ^allOps != 0 {
return 0, nil, syserror.EINVAL
}
fileFlags := uint32(linux.O_RDWR)
if flags&linux.EFD_NONBLOCK != 0 {
fileFlags |= linux.O_NONBLOCK
}
semMode := flags&linux.EFD_SEMAPHORE != 0
eventfd, err := t.Kernel().VFS().NewEventFD(initVal, semMode, fileFlags)
if err != nil {
return 0, nil, err
}
defer eventfd.DecRef()
fd, err := t.NewFDFromVFS2(0, eventfd, kernel.FDFlags{
CloseOnExec: flags&linux.EFD_CLOEXEC != 0,
})
if err != nil {
return 0, nil, err
}
return uintptr(fd), nil, nil
}
// Eventfd implements linux syscall eventfd(2).
func Eventfd(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
args[1].Value = 0
return Eventfd2(t, args)
}

View File

@ -141,14 +141,14 @@ func Override(table map[uintptr]kernel.Syscall) {
table[281] = syscalls.Supported("epoll_pwait", EpollPwait)
delete(table, 282) // signalfd
table[283] = syscalls.Supported("timerfd_create", TimerfdCreate)
delete(table, 284) // eventfd
table[284] = syscalls.Supported("eventfd", Eventfd)
delete(table, 285) // fallocate
table[286] = syscalls.Supported("timerfd_settime", TimerfdSettime)
table[287] = syscalls.Supported("timerfd_gettime", TimerfdGettime)
// TODO(gvisor.dev/issue/1485): Port all socket variants to VFS2.
table[288] = syscalls.PartiallySupported("accept4", Accept4, "In process of porting socket syscalls to VFS2.", nil)
delete(table, 289) // signalfd4
delete(table, 290) // eventfd2
table[290] = syscalls.Supported("eventfd2", Eventfd2)
table[291] = syscalls.Supported("epoll_create1", EpollCreate1)
table[292] = syscalls.Supported("dup3", Dup3)
table[293] = syscalls.Supported("pipe2", Pipe2)

View File

@ -25,6 +25,7 @@ go_library(
"device.go",
"epoll.go",
"epoll_interest_list.go",
"eventfd.go",
"file_description.go",
"file_description_impl_util.go",
"filesystem.go",
@ -44,6 +45,7 @@ go_library(
"//pkg/abi/linux",
"//pkg/context",
"//pkg/fd",
"//pkg/fdnotifier",
"//pkg/fspath",
"//pkg/gohacks",
"//pkg/log",
@ -68,6 +70,7 @@ go_test(
name = "vfs_test",
size = "small",
srcs = [
"eventfd_test.go",
"file_description_impl_util_test.go",
"mount_test.go",
],
@ -79,5 +82,6 @@ go_test(
"//pkg/sync",
"//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
],
)

282
pkg/sentry/vfs/eventfd.go Normal file
View File

@ -0,0 +1,282 @@
// Copyright 2020 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package vfs
import (
"math"
"sync"
"syscall"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fdnotifier"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
// EventFileDescription implements FileDescriptionImpl for file-based event
// notification (eventfd). Eventfds are usually internal to the Sentry but in
// certain situations they may be converted into a host-backed eventfd.
type EventFileDescription struct {
vfsfd FileDescription
FileDescriptionDefaultImpl
DentryMetadataFileDescriptionImpl
// queue is used to notify interested parties when the event object
// becomes readable or writable.
queue waiter.Queue `state:"zerovalue"`
// mu protects the fields below.
mu sync.Mutex `state:"nosave"`
// val is the current value of the event counter.
val uint64
// semMode specifies whether the event is in "semaphore" mode.
semMode bool
// hostfd indicates whether this eventfd is passed through to the host.
hostfd int
}
var _ FileDescriptionImpl = (*EventFileDescription)(nil)
// NewEventFD creates a new event fd.
func (vfs *VirtualFilesystem) NewEventFD(initVal uint64, semMode bool, flags uint32) (*FileDescription, error) {
vd := vfs.NewAnonVirtualDentry("[eventfd]")
defer vd.DecRef()
efd := &EventFileDescription{
val: initVal,
semMode: semMode,
hostfd: -1,
}
if err := efd.vfsfd.Init(efd, flags, vd.Mount(), vd.Dentry(), &FileDescriptionOptions{
UseDentryMetadata: true,
DenyPRead: true,
DenyPWrite: true,
}); err != nil {
return nil, err
}
return &efd.vfsfd, nil
}
// HostFD returns the host eventfd associated with this event.
func (efd *EventFileDescription) HostFD() (int, error) {
efd.mu.Lock()
defer efd.mu.Unlock()
if efd.hostfd >= 0 {
return efd.hostfd, nil
}
flags := linux.EFD_NONBLOCK
if efd.semMode {
flags |= linux.EFD_SEMAPHORE
}
fd, _, errno := syscall.Syscall(syscall.SYS_EVENTFD2, uintptr(efd.val), uintptr(flags), 0)
if errno != 0 {
return -1, errno
}
if err := fdnotifier.AddFD(int32(fd), &efd.queue); err != nil {
if closeErr := syscall.Close(int(fd)); closeErr != nil {
log.Warningf("close(%d) eventfd failed: %v", fd, closeErr)
}
return -1, err
}
efd.hostfd = int(fd)
return efd.hostfd, nil
}
// Release implements FileDescriptionImpl.Release()
func (efd *EventFileDescription) Release() {
efd.mu.Lock()
defer efd.mu.Unlock()
if efd.hostfd >= 0 {
fdnotifier.RemoveFD(int32(efd.hostfd))
if closeErr := syscall.Close(int(efd.hostfd)); closeErr != nil {
log.Warningf("close(%d) eventfd failed: %v", efd.hostfd, closeErr)
}
efd.hostfd = -1
}
}
// Read implements FileDescriptionImpl.Read.
func (efd *EventFileDescription) Read(ctx context.Context, dst usermem.IOSequence, _ ReadOptions) (int64, error) {
if dst.NumBytes() < 8 {
return 0, syscall.EINVAL
}
if err := efd.read(ctx, dst); err != nil {
return 0, err
}
return 8, nil
}
// Write implements FileDescriptionImpl.Write.
func (efd *EventFileDescription) Write(ctx context.Context, src usermem.IOSequence, _ WriteOptions) (int64, error) {
if src.NumBytes() < 8 {
return 0, syscall.EINVAL
}
if err := efd.write(ctx, src); err != nil {
return 0, err
}
return 8, nil
}
// Preconditions: Must be called with efd.mu locked.
func (efd *EventFileDescription) hostReadLocked(ctx context.Context, dst usermem.IOSequence) error {
var buf [8]byte
if _, err := syscall.Read(efd.hostfd, buf[:]); err != nil {
if err == syscall.EWOULDBLOCK {
return syserror.ErrWouldBlock
}
return err
}
_, err := dst.CopyOut(ctx, buf[:])
return err
}
func (efd *EventFileDescription) read(ctx context.Context, dst usermem.IOSequence) error {
efd.mu.Lock()
if efd.hostfd >= 0 {
defer efd.mu.Unlock()
return efd.hostReadLocked(ctx, dst)
}
// We can't complete the read if the value is currently zero.
if efd.val == 0 {
efd.mu.Unlock()
return syserror.ErrWouldBlock
}
// Update the value based on the mode the event is operating in.
var val uint64
if efd.semMode {
val = 1
// Consistent with Linux, this is done even if writing to memory fails.
efd.val--
} else {
val = efd.val
efd.val = 0
}
efd.mu.Unlock()
// Notify writers. We do this even if we were already writable because
// it is possible that a writer is waiting to write the maximum value
// to the event.
efd.queue.Notify(waiter.EventOut)
var buf [8]byte
usermem.ByteOrder.PutUint64(buf[:], val)
_, err := dst.CopyOut(ctx, buf[:])
return err
}
// Preconditions: Must be called with efd.mu locked.
func (efd *EventFileDescription) hostWriteLocked(val uint64) error {
var buf [8]byte
usermem.ByteOrder.PutUint64(buf[:], val)
_, err := syscall.Write(efd.hostfd, buf[:])
if err == syscall.EWOULDBLOCK {
return syserror.ErrWouldBlock
}
return err
}
func (efd *EventFileDescription) write(ctx context.Context, src usermem.IOSequence) error {
var buf [8]byte
if _, err := src.CopyIn(ctx, buf[:]); err != nil {
return err
}
val := usermem.ByteOrder.Uint64(buf[:])
return efd.Signal(val)
}
// Signal is an internal function to signal the event fd.
func (efd *EventFileDescription) Signal(val uint64) error {
if val == math.MaxUint64 {
return syscall.EINVAL
}
efd.mu.Lock()
if efd.hostfd >= 0 {
defer efd.mu.Unlock()
return efd.hostWriteLocked(val)
}
// We only allow writes that won't cause the value to go over the max
// uint64 minus 1.
if val > math.MaxUint64-1-efd.val {
efd.mu.Unlock()
return syserror.ErrWouldBlock
}
efd.val += val
efd.mu.Unlock()
// Always trigger a notification.
efd.queue.Notify(waiter.EventIn)
return nil
}
// Readiness implements waiter.Waitable.Readiness.
func (efd *EventFileDescription) Readiness(mask waiter.EventMask) waiter.EventMask {
efd.mu.Lock()
defer efd.mu.Unlock()
if efd.hostfd >= 0 {
return fdnotifier.NonBlockingPoll(int32(efd.hostfd), mask)
}
ready := waiter.EventMask(0)
if efd.val > 0 {
ready |= waiter.EventIn
}
if efd.val < math.MaxUint64-1 {
ready |= waiter.EventOut
}
return mask & ready
}
// EventRegister implements waiter.Waitable.EventRegister.
func (efd *EventFileDescription) EventRegister(entry *waiter.Entry, mask waiter.EventMask) {
efd.queue.EventRegister(entry, mask)
efd.mu.Lock()
defer efd.mu.Unlock()
if efd.hostfd >= 0 {
fdnotifier.UpdateFD(int32(efd.hostfd))
}
}
// EventUnregister implements waiter.Waitable.EventUnregister.
func (efd *EventFileDescription) EventUnregister(entry *waiter.Entry) {
efd.queue.EventUnregister(entry)
efd.mu.Lock()
defer efd.mu.Unlock()
if efd.hostfd >= 0 {
fdnotifier.UpdateFD(int32(efd.hostfd))
}
}

View File

@ -0,0 +1,96 @@
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package vfs
import (
"testing"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
func TestEventFD(t *testing.T) {
initVals := []uint64{
0,
// Using a non-zero initial value verifies that writing to an
// eventfd signals when the eventfd's counter was already
// non-zero.
343,
}
for _, initVal := range initVals {
ctx := contexttest.Context(t)
vfsObj := &VirtualFilesystem{}
if err := vfsObj.Init(); err != nil {
t.Fatalf("VFS init: %v", err)
}
// Make a new eventfd that is writable.
eventfd, err := vfsObj.NewEventFD(initVal, false, linux.O_RDWR)
if err != nil {
t.Fatalf("NewEventFD failed: %v", err)
}
defer eventfd.DecRef()
// Register a callback for a write event.
w, ch := waiter.NewChannelEntry(nil)
eventfd.EventRegister(&w, waiter.EventIn)
defer eventfd.EventUnregister(&w)
data := []byte("00000124")
// Create and submit a write request.
n, err := eventfd.Write(ctx, usermem.BytesIOSequence(data), WriteOptions{})
if err != nil {
t.Fatal(err)
}
if n != 8 {
t.Errorf("eventfd.write wrote %d bytes, not full int64", n)
}
// Check if the callback fired due to the write event.
select {
case <-ch:
default:
t.Errorf("Didn't get notified of EventIn after write")
}
}
}
func TestEventFDStat(t *testing.T) {
ctx := contexttest.Context(t)
vfsObj := &VirtualFilesystem{}
if err := vfsObj.Init(); err != nil {
t.Fatalf("VFS init: %v", err)
}
// Make a new eventfd that is writable.
eventfd, err := vfsObj.NewEventFD(0, false, linux.O_RDWR)
if err != nil {
t.Fatalf("NewEventFD failed: %v", err)
}
defer eventfd.DecRef()
statx, err := eventfd.Stat(ctx, StatOptions{
Mask: linux.STATX_BASIC_STATS,
})
if err != nil {
t.Fatalf("eventfd.Stat failed: %v", err)
}
if statx.Size != 0 {
t.Errorf("eventfd size should be 0")
}
}

View File

@ -100,6 +100,22 @@ TEST(EventfdTest, SmallRead) {
ASSERT_THAT(read(efd.get(), &l, 4), SyscallFailsWithErrno(EINVAL));
}
TEST(EventfdTest, PreadIllegalSeek) {
FileDescriptor efd =
ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE));
uint64_t l = 0;
ASSERT_THAT(pread(efd.get(), &l, 4, 0), SyscallFailsWithErrno(ESPIPE));
}
TEST(EventfdTest, PwriteIllegalSeek) {
FileDescriptor efd =
ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE));
uint64_t l = 0;
ASSERT_THAT(pwrite(efd.get(), &l, 4, 0), SyscallFailsWithErrno(ESPIPE));
}
TEST(EventfdTest, BigWrite) {
FileDescriptor efd =
ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE));