Enable automated marshalling for epoll events.

Ensure we use the correct architecture-specific defintion of epoll
event, and use go-marshal for serialization.

PiperOrigin-RevId: 308145677
This commit is contained in:
Rahat Mahmood 2020-04-23 15:47:59 -07:00 committed by gVisor bot
parent 5042ea7e2c
commit 93dd471461
9 changed files with 20 additions and 91 deletions

View File

@ -12,11 +12,13 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
// +build amd64
package linux package linux
// EpollEvent is equivalent to struct epoll_event from epoll(2). // EpollEvent is equivalent to struct epoll_event from epoll(2).
// //
// +marshal // +marshal slice:EpollEventSlice
type EpollEvent struct { type EpollEvent struct {
Events uint32 Events uint32
// Linux makes struct epoll_event::data a __u64. We represent it as // Linux makes struct epoll_event::data a __u64. We represent it as

View File

@ -12,11 +12,13 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
// +build arm64
package linux package linux
// EpollEvent is equivalent to struct epoll_event from epoll(2). // EpollEvent is equivalent to struct epoll_event from epoll(2).
// //
// +marshal // +marshal slice:EpollEventSlice
type EpollEvent struct { type EpollEvent struct {
Events uint32 Events uint32
// Linux makes struct epoll_event a __u64, necessitating 4 bytes of padding // Linux makes struct epoll_event a __u64, necessitating 4 bytes of padding

View File

@ -24,6 +24,7 @@ go_library(
], ],
visibility = ["//pkg/sentry:internal"], visibility = ["//pkg/sentry:internal"],
deps = [ deps = [
"//pkg/abi/linux",
"//pkg/context", "//pkg/context",
"//pkg/refs", "//pkg/refs",
"//pkg/sentry/fs", "//pkg/sentry/fs",

View File

@ -20,6 +20,7 @@ import (
"fmt" "fmt"
"syscall" "syscall"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs"
@ -30,19 +31,6 @@ import (
"gvisor.dev/gvisor/pkg/waiter" "gvisor.dev/gvisor/pkg/waiter"
) )
// Event describes the event mask that was observed and the user data to be
// returned when one of the events occurs. It has this format to match the linux
// format to avoid extra copying/allocation when writing events to userspace.
type Event struct {
// Events is the event mask containing the set of events that have been
// observed on an entry.
Events uint32
// Data is an opaque 64-bit value provided by the caller when adding the
// entry, and returned to the caller when the entry reports an event.
Data [2]int32
}
// EntryFlags is a bitmask that holds an entry's flags. // EntryFlags is a bitmask that holds an entry's flags.
type EntryFlags int type EntryFlags int
@ -227,9 +215,9 @@ func (e *EventPoll) Readiness(mask waiter.EventMask) waiter.EventMask {
} }
// ReadEvents returns up to max available events. // ReadEvents returns up to max available events.
func (e *EventPoll) ReadEvents(max int) []Event { func (e *EventPoll) ReadEvents(max int) []linux.EpollEvent {
var local pollEntryList var local pollEntryList
var ret []Event var ret []linux.EpollEvent
e.listsMu.Lock() e.listsMu.Lock()
@ -251,7 +239,7 @@ func (e *EventPoll) ReadEvents(max int) []Event {
} }
// Add event to the array that will be returned to caller. // Add event to the array that will be returned to caller.
ret = append(ret, Event{ ret = append(ret, linux.EpollEvent{
Events: uint32(ready), Events: uint32(ready),
Data: entry.userData, Data: entry.userData,
}) })

View File

@ -17,6 +17,7 @@ package syscalls
import ( import (
"time" "time"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/epoll" "gvisor.dev/gvisor/pkg/sentry/kernel/epoll"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
@ -118,7 +119,7 @@ func RemoveEpoll(t *kernel.Task, epfd int32, fd int32) error {
} }
// WaitEpoll implements the epoll_wait(2) linux syscall. // WaitEpoll implements the epoll_wait(2) linux syscall.
func WaitEpoll(t *kernel.Task, fd int32, max int, timeout int) ([]epoll.Event, error) { func WaitEpoll(t *kernel.Task, fd int32, max int, timeout int) ([]linux.EpollEvent, error) {
// Get epoll from the file descriptor. // Get epoll from the file descriptor.
epollfile := t.GetFile(fd) epollfile := t.GetFile(fd)
if epollfile == nil { if epollfile == nil {

View File

@ -21,7 +21,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/epoll" "gvisor.dev/gvisor/pkg/sentry/kernel/epoll"
"gvisor.dev/gvisor/pkg/sentry/syscalls" "gvisor.dev/gvisor/pkg/sentry/syscalls"
"gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter" "gvisor.dev/gvisor/pkg/waiter"
) )
@ -72,7 +71,7 @@ func EpollCtl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
var data [2]int32 var data [2]int32
if op != linux.EPOLL_CTL_DEL { if op != linux.EPOLL_CTL_DEL {
var e linux.EpollEvent var e linux.EpollEvent
if _, err := t.CopyIn(eventAddr, &e); err != nil { if _, err := e.CopyIn(t, eventAddr); err != nil {
return 0, nil, err return 0, nil, err
} }
@ -105,28 +104,6 @@ func EpollCtl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
} }
} }
// copyOutEvents copies epoll events from the kernel to user memory.
func copyOutEvents(t *kernel.Task, addr usermem.Addr, e []epoll.Event) error {
const itemLen = 12
buffLen := len(e) * itemLen
if _, ok := addr.AddLength(uint64(buffLen)); !ok {
return syserror.EFAULT
}
b := t.CopyScratchBuffer(buffLen)
for i := range e {
usermem.ByteOrder.PutUint32(b[i*itemLen:], e[i].Events)
usermem.ByteOrder.PutUint32(b[i*itemLen+4:], uint32(e[i].Data[0]))
usermem.ByteOrder.PutUint32(b[i*itemLen+8:], uint32(e[i].Data[1]))
}
if _, err := t.CopyOutBytes(addr, b); err != nil {
return err
}
return nil
}
// EpollWait implements the epoll_wait(2) linux syscall. // EpollWait implements the epoll_wait(2) linux syscall.
func EpollWait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { func EpollWait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
epfd := args[0].Int() epfd := args[0].Int()
@ -140,7 +117,7 @@ func EpollWait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
} }
if len(r) != 0 { if len(r) != 0 {
if err := copyOutEvents(t, eventsAddr, r); err != nil { if _, err := linux.CopyEpollEventSliceOut(t, eventsAddr, r); err != nil {
return 0, nil, err return 0, nil, err
} }
} }

View File

@ -6,7 +6,6 @@ go_library(
name = "vfs2", name = "vfs2",
srcs = [ srcs = [
"epoll.go", "epoll.go",
"epoll_unsafe.go",
"execve.go", "execve.go",
"fd.go", "fd.go",
"filesystem.go", "filesystem.go",

View File

@ -28,6 +28,8 @@ import (
"gvisor.dev/gvisor/pkg/waiter" "gvisor.dev/gvisor/pkg/waiter"
) )
var sizeofEpollEvent = (*linux.EpollEvent)(nil).SizeBytes()
// EpollCreate1 implements Linux syscall epoll_create1(2). // EpollCreate1 implements Linux syscall epoll_create1(2).
func EpollCreate1(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { func EpollCreate1(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
flags := args[0].Int() flags := args[0].Int()
@ -124,7 +126,7 @@ func EpollWait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
maxEvents := int(args[2].Int()) maxEvents := int(args[2].Int())
timeout := int(args[3].Int()) timeout := int(args[3].Int())
const _EP_MAX_EVENTS = math.MaxInt32 / sizeofEpollEvent // Linux: fs/eventpoll.c:EP_MAX_EVENTS var _EP_MAX_EVENTS = math.MaxInt32 / sizeofEpollEvent // Linux: fs/eventpoll.c:EP_MAX_EVENTS
if maxEvents <= 0 || maxEvents > _EP_MAX_EVENTS { if maxEvents <= 0 || maxEvents > _EP_MAX_EVENTS {
return 0, nil, syserror.EINVAL return 0, nil, syserror.EINVAL
} }
@ -157,7 +159,8 @@ func EpollWait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
maxEvents -= n maxEvents -= n
if n != 0 { if n != 0 {
// Copy what we read out. // Copy what we read out.
copiedEvents, err := copyOutEvents(t, eventsAddr, events[:n]) copiedBytes, err := linux.CopyEpollEventSliceOut(t, eventsAddr, events[:n])
copiedEvents := copiedBytes / sizeofEpollEvent // rounded down
eventsAddr += usermem.Addr(copiedEvents * sizeofEpollEvent) eventsAddr += usermem.Addr(copiedEvents * sizeofEpollEvent)
total += copiedEvents total += copiedEvents
if err != nil { if err != nil {

View File

@ -1,44 +0,0 @@
// Copyright 2020 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package vfs2
import (
"reflect"
"runtime"
"unsafe"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/gohacks"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/usermem"
)
const sizeofEpollEvent = int(unsafe.Sizeof(linux.EpollEvent{}))
func copyOutEvents(t *kernel.Task, addr usermem.Addr, events []linux.EpollEvent) (int, error) {
if len(events) == 0 {
return 0, nil
}
// Cast events to a byte slice for copying.
var eventBytes []byte
eventBytesHdr := (*reflect.SliceHeader)(unsafe.Pointer(&eventBytes))
eventBytesHdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(&events[0])))
eventBytesHdr.Len = len(events) * sizeofEpollEvent
eventBytesHdr.Cap = len(events) * sizeofEpollEvent
copiedBytes, err := t.CopyOutBytes(addr, eventBytes)
runtime.KeepAlive(events)
copiedEvents := copiedBytes / sizeofEpollEvent // rounded down
return copiedEvents, err
}