Priority-inheritance futex implementation
It is Implemented without the priority inheritance part given that gVisor defers scheduling decisions to Go runtime and doesn't have control over it. PiperOrigin-RevId: 236989545 Change-Id: I714c8ca0798743ecf3167b14ffeb5cd834302560
This commit is contained in:
parent
fcba4e8f04
commit
0b76887147
|
@ -54,3 +54,9 @@ const (
|
|||
|
||||
// FUTEX_TID_MASK is the TID portion of a PI futex word.
|
||||
const FUTEX_TID_MASK = 0x3fffffff
|
||||
|
||||
// Constants used for priority-inheritance futexes.
|
||||
const (
|
||||
FUTEX_WAITERS = 0x80000000
|
||||
FUTEX_OWNER_DIED = 0x40000000
|
||||
)
|
||||
|
|
|
@ -37,6 +37,8 @@ go_library(
|
|||
visibility = ["//pkg/sentry:internal"],
|
||||
deps = [
|
||||
"//pkg/abi/linux",
|
||||
"//pkg/log",
|
||||
"//pkg/sentry/context",
|
||||
"//pkg/sentry/memmap",
|
||||
"//pkg/sentry/usermem",
|
||||
"//pkg/syserror",
|
||||
|
|
|
@ -95,12 +95,15 @@ func (k *Key) matches(k2 *Key) bool {
|
|||
|
||||
// Target abstracts memory accesses and keys.
|
||||
type Target interface {
|
||||
// SwapUint32 gives access to usermem.SwapUint32.
|
||||
// SwapUint32 gives access to usermem.IO.SwapUint32.
|
||||
SwapUint32(addr usermem.Addr, new uint32) (uint32, error)
|
||||
|
||||
// CompareAndSwap gives access to usermem.CompareAndSwapUint32.
|
||||
// CompareAndSwap gives access to usermem.IO.CompareAndSwapUint32.
|
||||
CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint32, error)
|
||||
|
||||
// LoadUint32 gives access to usermem.IO.LoadUint32.
|
||||
LoadUint32(addr usermem.Addr) (uint32, error)
|
||||
|
||||
// GetSharedKey returns a Key with kind KindSharedPrivate or
|
||||
// KindSharedMappable corresponding to the memory mapped at address addr.
|
||||
//
|
||||
|
@ -112,11 +115,11 @@ type Target interface {
|
|||
|
||||
// check performs a basic equality check on the given address.
|
||||
func check(t Target, addr usermem.Addr, val uint32) error {
|
||||
prev, err := t.CompareAndSwapUint32(addr, val, val)
|
||||
cur, err := t.LoadUint32(addr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if prev != val {
|
||||
if cur != val {
|
||||
return syserror.EAGAIN
|
||||
}
|
||||
return nil
|
||||
|
@ -140,11 +143,14 @@ func atomicOp(t Target, addr usermem.Addr, opIn uint32) (bool, error) {
|
|||
)
|
||||
if opType == linux.FUTEX_OP_SET {
|
||||
oldVal, err = t.SwapUint32(addr, opArg)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
} else {
|
||||
for {
|
||||
oldVal, err = t.CompareAndSwapUint32(addr, 0, 0)
|
||||
oldVal, err = t.LoadUint32(addr)
|
||||
if err != nil {
|
||||
break
|
||||
return false, err
|
||||
}
|
||||
var newVal uint32
|
||||
switch opType {
|
||||
|
@ -161,7 +167,7 @@ func atomicOp(t Target, addr usermem.Addr, opIn uint32) (bool, error) {
|
|||
}
|
||||
prev, err := t.CompareAndSwapUint32(addr, oldVal, newVal)
|
||||
if err != nil {
|
||||
break
|
||||
return false, err
|
||||
}
|
||||
if prev == oldVal {
|
||||
break // Success.
|
||||
|
@ -222,6 +228,9 @@ type Waiter struct {
|
|||
// The bitmask we're waiting on.
|
||||
// This is used the case of a FUTEX_WAKE_BITSET.
|
||||
bitmask uint32
|
||||
|
||||
// tid is the thread ID for the waiter in case this is a PI mutex.
|
||||
tid uint32
|
||||
}
|
||||
|
||||
// NewWaiter returns a new unqueued Waiter.
|
||||
|
@ -262,23 +271,28 @@ func (b *bucket) wakeLocked(key *Key, bitmask uint32, n int) int {
|
|||
// Remove from the bucket and wake the waiter.
|
||||
woke := w
|
||||
w = w.Next() // Next iteration.
|
||||
b.waiters.Remove(woke)
|
||||
woke.C <- struct{}{}
|
||||
|
||||
// NOTE: The above channel write establishes a write barrier according
|
||||
// to the memory model, so nothing may be ordered around it. Since
|
||||
// we've dequeued woke and will never touch it again, we can safely
|
||||
// store nil to woke.bucket here and allow the WaitComplete() to
|
||||
// short-circuit grabbing the bucket lock. If they somehow miss the
|
||||
// store, we are still holding the lock, so we can know that they won't
|
||||
// dequeue woke, assume it's free and have the below operation
|
||||
// afterwards.
|
||||
woke.bucket.Store(nil)
|
||||
b.wakeWaiterLocked(woke)
|
||||
done++
|
||||
}
|
||||
return done
|
||||
}
|
||||
|
||||
func (b *bucket) wakeWaiterLocked(w *Waiter) {
|
||||
// Remove from the bucket and wake the waiter.
|
||||
b.waiters.Remove(w)
|
||||
w.C <- struct{}{}
|
||||
|
||||
// NOTE: The above channel write establishes a write barrier according
|
||||
// to the memory model, so nothing may be ordered around it. Since
|
||||
// we've dequeued w and will never touch it again, we can safely
|
||||
// store nil to w.bucket here and allow the WaitComplete() to
|
||||
// short-circuit grabbing the bucket lock. If they somehow miss the
|
||||
// store, we are still holding the lock, so we can know that they won't
|
||||
// dequeue w, assume it's free and have the below operation
|
||||
// afterwards.
|
||||
w.bucket.Store(nil)
|
||||
}
|
||||
|
||||
// requeueLocked takes n waiters from the bucket and moves them to naddr on the
|
||||
// bucket "to".
|
||||
//
|
||||
|
@ -596,7 +610,7 @@ func (m *Manager) WaitComplete(w *Waiter) {
|
|||
continue
|
||||
}
|
||||
|
||||
// Remove w from b.
|
||||
// Remove waiter from bucket.
|
||||
b.waiters.Remove(w)
|
||||
w.bucket.Store(nil)
|
||||
b.mu.Unlock()
|
||||
|
@ -606,3 +620,164 @@ func (m *Manager) WaitComplete(w *Waiter) {
|
|||
// Release references held by the waiter.
|
||||
w.key.release()
|
||||
}
|
||||
|
||||
// LockPI attempts to lock the futex following the Priority-inheritance futex
|
||||
// rules. The lock is acquired only when 'addr' points to 0. The TID of the
|
||||
// calling task is set to 'addr' to indicate the futex is owned. It returns true
|
||||
// if the futex was successfully acquired.
|
||||
//
|
||||
// FUTEX_OWNER_DIED is only set by the Linux when robust lists are in use (see
|
||||
// exit_robust_list()). Given we don't support robust lists, although handled
|
||||
// below, it's never set.
|
||||
func (m *Manager) LockPI(w *Waiter, t Target, addr usermem.Addr, tid uint32, private, try bool) (bool, error) {
|
||||
k, err := getKey(t, addr, private)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
// Ownership of k is transferred to w below.
|
||||
|
||||
// Prepare the Waiter before taking the bucket lock.
|
||||
select {
|
||||
case <-w.C:
|
||||
default:
|
||||
}
|
||||
w.key = k
|
||||
w.tid = tid
|
||||
|
||||
b := m.lockBucket(&k)
|
||||
// Hot function: avoid defers.
|
||||
|
||||
success, err := m.lockPILocked(w, t, addr, tid, b, try)
|
||||
if err != nil {
|
||||
w.key.release()
|
||||
b.mu.Unlock()
|
||||
return false, err
|
||||
}
|
||||
if success || try {
|
||||
// Release waiter if it's not going to be a wait.
|
||||
w.key.release()
|
||||
}
|
||||
b.mu.Unlock()
|
||||
return success, nil
|
||||
}
|
||||
|
||||
func (m *Manager) lockPILocked(w *Waiter, t Target, addr usermem.Addr, tid uint32, b *bucket, try bool) (bool, error) {
|
||||
for {
|
||||
cur, err := t.LoadUint32(addr)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if (cur & linux.FUTEX_TID_MASK) == tid {
|
||||
return false, syserror.EDEADLK
|
||||
}
|
||||
|
||||
if (cur & linux.FUTEX_TID_MASK) == 0 {
|
||||
// No owner and no waiters, try to acquire the futex.
|
||||
|
||||
// Set TID and preserve owner died status.
|
||||
val := tid
|
||||
val |= cur & linux.FUTEX_OWNER_DIED
|
||||
prev, err := t.CompareAndSwapUint32(addr, cur, val)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if prev != cur {
|
||||
// CAS failed, retry...
|
||||
// Linux reacquires the bucket lock on retries, which will re-lookup the
|
||||
// mapping at the futex address. However, retrying while holding the
|
||||
// lock is more efficient and reduces the chance of another conflict.
|
||||
continue
|
||||
}
|
||||
// Futex acquired.
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// Futex is already owned, prepare to wait.
|
||||
|
||||
if try {
|
||||
// Caller doesn't want to wait.
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Set waiters bit if not set yet.
|
||||
if cur&linux.FUTEX_WAITERS == 0 {
|
||||
prev, err := t.CompareAndSwapUint32(addr, cur, cur|linux.FUTEX_WAITERS)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if prev != cur {
|
||||
// CAS failed, retry...
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Add the waiter to the bucket.
|
||||
b.waiters.PushBack(w)
|
||||
w.bucket.Store(b)
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
// UnlockPI unlock the futex following the Priority-inheritance futex
|
||||
// rules. The address provided must contain the caller's TID. If there are
|
||||
// waiters, TID of the next waiter (FIFO) is set to the given address, and the
|
||||
// waiter woken up. If there are no waiters, 0 is set to the address.
|
||||
func (m *Manager) UnlockPI(t Target, addr usermem.Addr, tid uint32, private bool) error {
|
||||
k, err := getKey(t, addr, private)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
b := m.lockBucket(&k)
|
||||
|
||||
err = m.unlockPILocked(t, addr, tid, b)
|
||||
|
||||
k.release()
|
||||
b.mu.Unlock()
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *Manager) unlockPILocked(t Target, addr usermem.Addr, tid uint32, b *bucket) error {
|
||||
cur, err := t.LoadUint32(addr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if (cur & linux.FUTEX_TID_MASK) != tid {
|
||||
return syserror.EPERM
|
||||
}
|
||||
|
||||
if b.waiters.Empty() {
|
||||
// It's safe to set 0 because there are no waiters, no new owner, and the
|
||||
// executing task is the current owner (no owner died bit).
|
||||
prev, err := t.CompareAndSwapUint32(addr, cur, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if prev != cur {
|
||||
// Let user mode handle CAS races. This is different than lock, which
|
||||
// retries when CAS fails.
|
||||
return syserror.EAGAIN
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
next := b.waiters.Front()
|
||||
|
||||
// Set next owner's TID, waiters if there are any. Resets owner died bit, if
|
||||
// set, because the executing task takes over as the owner.
|
||||
val := next.tid
|
||||
if next.Next() != nil {
|
||||
val |= linux.FUTEX_WAITERS
|
||||
}
|
||||
|
||||
prev, err := t.CompareAndSwapUint32(addr, cur, val)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if prev != cur {
|
||||
return syserror.EINVAL
|
||||
}
|
||||
|
||||
b.wakeWaiterLocked(next)
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -49,6 +49,10 @@ func (t testData) CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint
|
|||
return atomic.LoadUint32((*uint32)(unsafe.Pointer(&t[addr]))), nil
|
||||
}
|
||||
|
||||
func (t testData) LoadUint32(addr usermem.Addr) (uint32, error) {
|
||||
return atomic.LoadUint32((*uint32)(unsafe.Pointer(&t[addr]))), nil
|
||||
}
|
||||
|
||||
func (t testData) GetSharedKey(addr usermem.Addr) (Key, error) {
|
||||
return Key{
|
||||
Kind: KindSharedMappable,
|
||||
|
|
|
@ -41,6 +41,13 @@ func (t *Task) CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint32,
|
|||
})
|
||||
}
|
||||
|
||||
// LoadUint32 implemets futex.Target.LoadUint32.
|
||||
func (t *Task) LoadUint32(addr usermem.Addr) (uint32, error) {
|
||||
return t.MemoryManager().LoadUint32(t, addr, usermem.IOOpts{
|
||||
AddressSpaceActive: true,
|
||||
})
|
||||
}
|
||||
|
||||
// GetSharedKey implements futex.Target.GetSharedKey.
|
||||
func (t *Task) GetSharedKey(addr usermem.Addr) (futex.Key, error) {
|
||||
return t.MemoryManager().GetSharedFutexKey(t, addr)
|
||||
|
|
|
@ -346,6 +346,7 @@ func (mm *MemoryManager) SwapUint32(ctx context.Context, addr usermem.Addr, new
|
|||
if err != nil {
|
||||
return 0, translateIOError(ctx, err)
|
||||
}
|
||||
// Return the number of bytes read.
|
||||
return 4, nil
|
||||
})
|
||||
return old, err
|
||||
|
@ -388,11 +389,55 @@ func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr usermem.
|
|||
if err != nil {
|
||||
return 0, translateIOError(ctx, err)
|
||||
}
|
||||
// Return the number of bytes read.
|
||||
return 4, nil
|
||||
})
|
||||
return prev, err
|
||||
}
|
||||
|
||||
// LoadUint32 implements usermem.IO.LoadUint32.
|
||||
func (mm *MemoryManager) LoadUint32(ctx context.Context, addr usermem.Addr, opts usermem.IOOpts) (uint32, error) {
|
||||
ar, ok := mm.CheckIORange(addr, 4)
|
||||
if !ok {
|
||||
return 0, syserror.EFAULT
|
||||
}
|
||||
|
||||
// Do AddressSpace IO if applicable.
|
||||
if mm.haveASIO && opts.AddressSpaceActive && !opts.IgnorePermissions {
|
||||
for {
|
||||
val, err := mm.as.LoadUint32(addr)
|
||||
if err == nil {
|
||||
return val, nil
|
||||
}
|
||||
if f, ok := err.(platform.SegmentationFault); ok {
|
||||
if err := mm.handleASIOFault(ctx, f.Addr, ar, usermem.Read); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
continue
|
||||
}
|
||||
return 0, translateIOError(ctx, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Go through internal mappings.
|
||||
var val uint32
|
||||
_, err := mm.withInternalMappings(ctx, ar, usermem.Read, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
|
||||
if ims.NumBlocks() != 1 || ims.NumBytes() != 4 {
|
||||
// Atomicity is unachievable across mappings.
|
||||
return 0, syserror.EFAULT
|
||||
}
|
||||
im := ims.Head()
|
||||
var err error
|
||||
val, err = safemem.LoadUint32(im)
|
||||
if err != nil {
|
||||
return 0, translateIOError(ctx, err)
|
||||
}
|
||||
// Return the number of bytes read.
|
||||
return 4, nil
|
||||
})
|
||||
return val, err
|
||||
}
|
||||
|
||||
// handleASIOFault handles a page fault at address addr for an AddressSpaceIO
|
||||
// operation spanning ioar.
|
||||
//
|
||||
|
|
|
@ -254,6 +254,11 @@ type AddressSpaceIO interface {
|
|||
//
|
||||
// Preconditions: addr must be aligned to a 4-byte boundary.
|
||||
CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint32, error)
|
||||
|
||||
// LoadUint32 atomically loads the uint32 value at addr and returns it.
|
||||
//
|
||||
// Preconditions: addr must be aligned to a 4-byte boundary.
|
||||
LoadUint32(addr usermem.Addr) (uint32, error)
|
||||
}
|
||||
|
||||
// NoAddressSpaceIO implements AddressSpaceIO methods by panicing.
|
||||
|
@ -284,6 +289,11 @@ func (NoAddressSpaceIO) CompareAndSwapUint32(addr usermem.Addr, old, new uint32)
|
|||
panic("This platform does not support AddressSpaceIO")
|
||||
}
|
||||
|
||||
// LoadUint32 implements AddressSpaceIO.LoadUint32.
|
||||
func (NoAddressSpaceIO) LoadUint32(addr usermem.Addr) (uint32, error) {
|
||||
panic("This platform does not support AddressSpaceIO")
|
||||
}
|
||||
|
||||
// SegmentationFault is an error returned by AddressSpaceIO methods when IO
|
||||
// fails due to access of an unmapped page, or a mapped page with insufficient
|
||||
// permissions.
|
||||
|
|
|
@ -18,9 +18,7 @@ go_library(
|
|||
],
|
||||
importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/platform/safecopy",
|
||||
visibility = ["//pkg/sentry:internal"],
|
||||
deps = [
|
||||
"//pkg/syserror",
|
||||
],
|
||||
deps = ["//pkg/syserror"],
|
||||
)
|
||||
|
||||
go_test(
|
||||
|
|
|
@ -106,3 +106,31 @@ TEXT ·compareAndSwapUint32(SB), NOSPLIT, $0-24
|
|||
CMPXCHGL DX, 0(DI)
|
||||
MOVL AX, prev+16(FP)
|
||||
RET
|
||||
|
||||
// handleLoadUint32Fault returns the value stored in DI. Control is transferred
|
||||
// to it when LoadUint32 below receives SIGSEGV or SIGBUS, with the signal
|
||||
// number stored in DI.
|
||||
//
|
||||
// It must have the same frame configuration as loadUint32 so that it can undo
|
||||
// any potential call frame set up by the assembler.
|
||||
TEXT handleLoadUint32Fault(SB), NOSPLIT, $0-16
|
||||
MOVL DI, sig+12(FP)
|
||||
RET
|
||||
|
||||
// loadUint32 atomically loads *addr and returns it. If a SIGSEGV or SIGBUS
|
||||
// signal is received, the value returned is unspecified, and sig is the number
|
||||
// of the signal that was received.
|
||||
//
|
||||
// Preconditions: addr must be aligned to a 4-byte boundary.
|
||||
//
|
||||
//func loadUint32(ptr unsafe.Pointer) (val uint32, sig int32)
|
||||
TEXT ·loadUint32(SB), NOSPLIT, $0-16
|
||||
// Store 0 as the returned signal number. If we run to completion,
|
||||
// this is the value the caller will see; if a signal is received,
|
||||
// handleLoadUint32Fault will store a different value in this address.
|
||||
MOVL $0, sig+12(FP)
|
||||
|
||||
MOVQ addr+0(FP), AX
|
||||
MOVL (AX), BX
|
||||
MOVL BX, val+8(FP)
|
||||
RET
|
||||
|
|
|
@ -96,3 +96,31 @@ again:
|
|||
done:
|
||||
MOVW R3, prev+16(FP)
|
||||
RET
|
||||
|
||||
// handleLoadUint32Fault returns the value stored in DI. Control is transferred
|
||||
// to it when LoadUint32 below receives SIGSEGV or SIGBUS, with the signal
|
||||
// number stored in DI.
|
||||
//
|
||||
// It must have the same frame configuration as loadUint32 so that it can undo
|
||||
// any potential call frame set up by the assembler.
|
||||
TEXT handleLoadUint32Fault(SB), NOSPLIT, $0-16
|
||||
MOVW R1, sig+12(FP)
|
||||
RET
|
||||
|
||||
// loadUint32 atomically loads *addr and returns it. If a SIGSEGV or SIGBUS
|
||||
// signal is received, the value returned is unspecified, and sig is the number
|
||||
// of the signal that was received.
|
||||
//
|
||||
// Preconditions: addr must be aligned to a 4-byte boundary.
|
||||
//
|
||||
//func loadUint32(ptr unsafe.Pointer) (val uint32, sig int32)
|
||||
TEXT ·loadUint32(SB), NOSPLIT, $0-16
|
||||
// Store 0 as the returned signal number. If we run to completion,
|
||||
// this is the value the caller will see; if a signal is received,
|
||||
// handleLoadUint32Fault will store a different value in this address.
|
||||
MOVW $0, sig+12(FP)
|
||||
|
||||
MOVD addr+0(FP), R0
|
||||
LDARW (R0), R1
|
||||
MOVW R1, val+8(FP)
|
||||
RET
|
||||
|
|
|
@ -75,6 +75,8 @@ var (
|
|||
swapUint64End uintptr
|
||||
compareAndSwapUint32Begin uintptr
|
||||
compareAndSwapUint32End uintptr
|
||||
loadUint32Begin uintptr
|
||||
loadUint32End uintptr
|
||||
|
||||
// savedSigSegVHandler is a pointer to the SIGSEGV handler that was
|
||||
// configured before we replaced it with our own. We still call into it
|
||||
|
@ -119,6 +121,8 @@ func initializeAddresses() {
|
|||
swapUint64End = FindEndAddress(swapUint64Begin)
|
||||
compareAndSwapUint32Begin = reflect.ValueOf(compareAndSwapUint32).Pointer()
|
||||
compareAndSwapUint32End = FindEndAddress(compareAndSwapUint32Begin)
|
||||
loadUint32Begin = reflect.ValueOf(loadUint32).Pointer()
|
||||
loadUint32End = FindEndAddress(loadUint32Begin)
|
||||
}
|
||||
|
||||
func init() {
|
||||
|
|
|
@ -79,6 +79,14 @@ func swapUint64(ptr unsafe.Pointer, new uint64) (old uint64, sig int32)
|
|||
//go:noescape
|
||||
func compareAndSwapUint32(ptr unsafe.Pointer, old, new uint32) (prev uint32, sig int32)
|
||||
|
||||
// LoadUint32 is like sync/atomic.LoadUint32, but operates with user memory. It
|
||||
// may fail with SIGSEGV or SIGBUS if it is received while reading from ptr.
|
||||
//
|
||||
// Preconditions: ptr must be aligned to a 4-byte boundary.
|
||||
//
|
||||
//go:noescape
|
||||
func loadUint32(ptr unsafe.Pointer) (val uint32, sig int32)
|
||||
|
||||
// CopyIn copies len(dst) bytes from src to dst. It returns the number of bytes
|
||||
// copied and an error if SIGSEGV or SIGBUS is received while reading from src.
|
||||
func CopyIn(dst []byte, src unsafe.Pointer) (int, error) {
|
||||
|
@ -260,6 +268,18 @@ func CompareAndSwapUint32(ptr unsafe.Pointer, old, new uint32) (uint32, error) {
|
|||
return prev, errorFromFaultSignal(ptr, sig)
|
||||
}
|
||||
|
||||
// LoadUint32 is like sync/atomic.LoadUint32, but operates with user memory. It
|
||||
// may fail with SIGSEGV or SIGBUS if it is received while reading from ptr.
|
||||
//
|
||||
// Preconditions: ptr must be aligned to a 4-byte boundary.
|
||||
func LoadUint32(ptr unsafe.Pointer) (uint32, error) {
|
||||
if addr := uintptr(ptr); addr&3 != 0 {
|
||||
return 0, AlignmentError{addr, 4}
|
||||
}
|
||||
val, sig := loadUint32(ptr)
|
||||
return val, errorFromFaultSignal(ptr, sig)
|
||||
}
|
||||
|
||||
func errorFromFaultSignal(addr unsafe.Pointer, sig int32) error {
|
||||
switch sig {
|
||||
case 0:
|
||||
|
|
|
@ -101,6 +101,15 @@ not_swapuint64:
|
|||
JMP handle_fault
|
||||
|
||||
not_casuint32:
|
||||
CMPQ CX, ·loadUint32Begin(SB)
|
||||
JB not_loaduint32
|
||||
CMPQ CX, ·loadUint32End(SB)
|
||||
JAE not_loaduint32
|
||||
|
||||
LEAQ handleLoadUint32Fault(SB), CX
|
||||
JMP handle_fault
|
||||
|
||||
not_loaduint32:
|
||||
original_handler:
|
||||
// Jump to the previous signal handler, which is likely the golang one.
|
||||
XORQ CX, CX
|
||||
|
|
|
@ -110,6 +110,17 @@ not_swapuint64:
|
|||
B handle_fault
|
||||
|
||||
not_casuint32:
|
||||
MOVD ·loadUint32Begin(SB), R8
|
||||
CMP R8, R7
|
||||
BLO not_loaduint32
|
||||
MOVD ·loadUint32End(SB), R8
|
||||
CMP R8, R7
|
||||
BHS not_loaduint32
|
||||
|
||||
MOVD $handleLoadUint32Fault(SB), R7
|
||||
B handle_fault
|
||||
|
||||
not_loaduint32:
|
||||
original_handler:
|
||||
// Jump to the previous signal handler, which is likely the golang one.
|
||||
MOVD ·savedSigBusHandler(SB), R7
|
||||
|
|
|
@ -267,3 +267,13 @@ func CompareAndSwapUint32(b Block, old, new uint32) (uint32, error) {
|
|||
}
|
||||
return safecopy.CompareAndSwapUint32(b.start, old, new)
|
||||
}
|
||||
|
||||
// LoadUint32 invokes safecopy.LoadUint32 on the first 4 bytes of b.
|
||||
//
|
||||
// Preconditions: b.Len() >= 4.
|
||||
func LoadUint32(b Block) (uint32, error) {
|
||||
if b.length < 4 {
|
||||
panic(fmt.Sprintf("insufficient length: %d", b.length))
|
||||
}
|
||||
return safecopy.LoadUint32(b.start)
|
||||
}
|
||||
|
|
|
@ -124,6 +124,46 @@ func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, add
|
|||
return 0, kernel.ERESTART_RESTARTBLOCK
|
||||
}
|
||||
|
||||
func futexLockPI(t *kernel.Task, ts linux.Timespec, forever bool, addr usermem.Addr, private bool) error {
|
||||
w := t.FutexWaiter()
|
||||
locked, err := t.Futex().LockPI(w, t, addr, uint32(t.ThreadID()), private, false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if locked {
|
||||
// Futex acquired, we're done!
|
||||
return nil
|
||||
}
|
||||
|
||||
if forever {
|
||||
err = t.Block(w.C)
|
||||
} else {
|
||||
notifier, tchan := ktime.NewChannelNotifier()
|
||||
timer := ktime.NewTimer(t.Kernel().RealtimeClock(), notifier)
|
||||
timer.Swap(ktime.Setting{
|
||||
Enabled: true,
|
||||
Next: ktime.FromTimespec(ts),
|
||||
})
|
||||
err = t.BlockWithTimer(w.C, tchan)
|
||||
timer.Destroy()
|
||||
}
|
||||
|
||||
t.Futex().WaitComplete(w)
|
||||
return syserror.ConvertIntr(err, kernel.ERESTARTSYS)
|
||||
}
|
||||
|
||||
func tryLockPI(t *kernel.Task, addr usermem.Addr, private bool) error {
|
||||
w := t.FutexWaiter()
|
||||
locked, err := t.Futex().LockPI(w, t, addr, uint32(t.ThreadID()), private, true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !locked {
|
||||
return syserror.EWOULDBLOCK
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Futex implements linux syscall futex(2).
|
||||
// It provides a method for a program to wait for a value at a given address to
|
||||
// change, and a method to wake up anyone waiting on a particular address.
|
||||
|
@ -144,7 +184,7 @@ func Futex(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
|
|||
switch cmd {
|
||||
case linux.FUTEX_WAIT, linux.FUTEX_WAIT_BITSET:
|
||||
// WAIT{_BITSET} wait forever if the timeout isn't passed.
|
||||
forever := timeout == 0
|
||||
forever := (timeout == 0)
|
||||
|
||||
var timespec linux.Timespec
|
||||
if !forever {
|
||||
|
@ -205,8 +245,30 @@ func Futex(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
|
|||
n, err := t.Futex().WakeOp(t, addr, naddr, private, val, nreq, op)
|
||||
return uintptr(n), nil, err
|
||||
|
||||
case linux.FUTEX_LOCK_PI, linux.FUTEX_UNLOCK_PI, linux.FUTEX_TRYLOCK_PI, linux.FUTEX_WAIT_REQUEUE_PI, linux.FUTEX_CMP_REQUEUE_PI:
|
||||
// We don't support any priority inversion futexes.
|
||||
case linux.FUTEX_LOCK_PI:
|
||||
forever := (timeout == 0)
|
||||
|
||||
var timespec linux.Timespec
|
||||
if !forever {
|
||||
var err error
|
||||
timespec, err = copyTimespecIn(t, timeout)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
}
|
||||
err := futexLockPI(t, timespec, forever, addr, private)
|
||||
return 0, nil, err
|
||||
|
||||
case linux.FUTEX_TRYLOCK_PI:
|
||||
err := tryLockPI(t, addr, private)
|
||||
return 0, nil, err
|
||||
|
||||
case linux.FUTEX_UNLOCK_PI:
|
||||
err := t.Futex().UnlockPI(t, addr, uint32(t.ThreadID()), private)
|
||||
return 0, nil, err
|
||||
|
||||
case linux.FUTEX_WAIT_REQUEUE_PI, linux.FUTEX_CMP_REQUEUE_PI:
|
||||
t.Kernel().EmitUnimplementedEvent(t)
|
||||
return 0, nil, syserror.ENOSYS
|
||||
|
||||
default:
|
||||
|
|
|
@ -37,3 +37,11 @@ func (b *BytesIO) CompareAndSwapUint32(ctx context.Context, addr Addr, old, new
|
|||
}
|
||||
return atomicbitops.CompareAndSwapUint32((*uint32)(unsafe.Pointer(&b.Bytes[int(addr)])), old, new), nil
|
||||
}
|
||||
|
||||
// LoadUint32 implements IO.LoadUint32.
|
||||
func (b *BytesIO) LoadUint32(ctx context.Context, addr Addr, opts IOOpts) (uint32, error) {
|
||||
if _, err := b.rangeCheck(addr, 4); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return atomic.LoadUint32((*uint32)(unsafe.Pointer(&b.Bytes[int(addr)]))), nil
|
||||
}
|
||||
|
|
|
@ -103,6 +103,13 @@ type IO interface {
|
|||
// any following locks in the lock order. addr must be aligned to a 4-byte
|
||||
// boundary.
|
||||
CompareAndSwapUint32(ctx context.Context, addr Addr, old, new uint32, opts IOOpts) (uint32, error)
|
||||
|
||||
// LoadUint32 atomically loads the uint32 value at addr and returns it.
|
||||
//
|
||||
// Preconditions: The caller must not hold mm.MemoryManager.mappingMu or
|
||||
// any following locks in the lock order. addr must be aligned to a 4-byte
|
||||
// boundary.
|
||||
LoadUint32(ctx context.Context, addr Addr, opts IOOpts) (uint32, error)
|
||||
}
|
||||
|
||||
// IOOpts contains options applicable to all IO methods.
|
||||
|
|
|
@ -33,6 +33,7 @@ var (
|
|||
ECHILD = error(syscall.ECHILD)
|
||||
ECONNREFUSED = error(syscall.ECONNREFUSED)
|
||||
ECONNRESET = error(syscall.ECONNRESET)
|
||||
EDEADLK = error(syscall.EDEADLK)
|
||||
EEXIST = error(syscall.EEXIST)
|
||||
EFAULT = error(syscall.EFAULT)
|
||||
EFBIG = error(syscall.EFBIG)
|
||||
|
|
|
@ -99,8 +99,8 @@ func (c *compatEmitter) emitUnimplementedSyscall(us *spb.UnimplementedSyscall) {
|
|||
// args: cmd, ...
|
||||
tr = newArgsTracker(0)
|
||||
|
||||
case syscall.SYS_IOCTL, syscall.SYS_EPOLL_CTL, syscall.SYS_SHMCTL:
|
||||
// args: fd, cmd, ...
|
||||
case syscall.SYS_IOCTL, syscall.SYS_EPOLL_CTL, syscall.SYS_SHMCTL, syscall.SYS_FUTEX:
|
||||
// args: fd/addr, cmd, ...
|
||||
tr = newArgsTracker(1)
|
||||
|
||||
case syscall.SYS_GETSOCKOPT, syscall.SYS_SETSOCKOPT:
|
||||
|
|
|
@ -808,6 +808,7 @@ cc_binary(
|
|||
"//test/util:cleanup",
|
||||
"//test/util:file_descriptor",
|
||||
"//test/util:memory_util",
|
||||
"//test/util:save_util",
|
||||
"//test/util:temp_path",
|
||||
"//test/util:test_main",
|
||||
"//test/util:test_util",
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
#include "test/util/cleanup.h"
|
||||
#include "test/util/file_descriptor.h"
|
||||
#include "test/util/memory_util.h"
|
||||
#include "test/util/save_util.h"
|
||||
#include "test/util/temp_path.h"
|
||||
#include "test/util/test_util.h"
|
||||
#include "test/util/thread_util.h"
|
||||
|
@ -118,6 +119,30 @@ int futex_wake_op(bool priv, std::atomic<int>* uaddr1, std::atomic<int>* uaddr2,
|
|||
return syscall(SYS_futex, uaddr1, op, nwake1, nwake2, uaddr2, sub_op);
|
||||
}
|
||||
|
||||
int futex_lock_pi(bool priv, std::atomic<int>* uaddr) {
|
||||
int op = FUTEX_LOCK_PI;
|
||||
if (priv) {
|
||||
op |= FUTEX_PRIVATE_FLAG;
|
||||
}
|
||||
return RetryEINTR(syscall)(SYS_futex, uaddr, op, nullptr, nullptr);
|
||||
}
|
||||
|
||||
int futex_trylock_pi(bool priv, std::atomic<int>* uaddr) {
|
||||
int op = FUTEX_TRYLOCK_PI;
|
||||
if (priv) {
|
||||
op |= FUTEX_PRIVATE_FLAG;
|
||||
}
|
||||
return RetryEINTR(syscall)(SYS_futex, uaddr, op, nullptr, nullptr);
|
||||
}
|
||||
|
||||
int futex_unlock_pi(bool priv, std::atomic<int>* uaddr) {
|
||||
int op = FUTEX_UNLOCK_PI;
|
||||
if (priv) {
|
||||
op |= FUTEX_PRIVATE_FLAG;
|
||||
}
|
||||
return RetryEINTR(syscall)(SYS_futex, uaddr, op, nullptr, nullptr);
|
||||
}
|
||||
|
||||
// Fixture for futex tests parameterized by whether to use private or shared
|
||||
// futexes.
|
||||
class PrivateAndSharedFutexTest : public ::testing::TestWithParam<bool> {
|
||||
|
@ -589,7 +614,95 @@ TEST(SharedFutexTest, WakeInterprocessFile_NoRandomSave) {
|
|||
<< " status " << status;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
TEST_P(PrivateAndSharedFutexTest, PIBasic) {
|
||||
std::atomic<int> a = ATOMIC_VAR_INIT(0);
|
||||
|
||||
ASSERT_THAT(futex_lock_pi(IsPrivate(), &a), SyscallSucceeds());
|
||||
EXPECT_EQ(a.load(), gettid());
|
||||
EXPECT_THAT(futex_lock_pi(IsPrivate(), &a), SyscallFailsWithErrno(EDEADLK));
|
||||
|
||||
ASSERT_THAT(futex_unlock_pi(IsPrivate(), &a), SyscallSucceeds());
|
||||
EXPECT_EQ(a.load(), 0);
|
||||
EXPECT_THAT(futex_unlock_pi(IsPrivate(), &a), SyscallFailsWithErrno(EPERM));
|
||||
}
|
||||
|
||||
TEST_P(PrivateAndSharedFutexTest, PIConcurrency_NoRandomSave) {
|
||||
DisableSave ds; // Too many syscalls.
|
||||
|
||||
std::atomic<int> a = ATOMIC_VAR_INIT(0);
|
||||
const bool is_priv = IsPrivate();
|
||||
|
||||
std::unique_ptr<ScopedThread> threads[100];
|
||||
for (size_t i = 0; i < ABSL_ARRAYSIZE(threads); ++i) {
|
||||
threads[i] = absl::make_unique<ScopedThread>([is_priv, &a] {
|
||||
for (size_t j = 0; j < 10; ++j) {
|
||||
ASSERT_THAT(futex_lock_pi(is_priv, &a), SyscallSucceeds());
|
||||
EXPECT_EQ(a.load() & FUTEX_TID_MASK, gettid());
|
||||
SleepSafe(absl::Milliseconds(5));
|
||||
ASSERT_THAT(futex_unlock_pi(is_priv, &a), SyscallSucceeds());
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(PrivateAndSharedFutexTest, PIWaiters) {
|
||||
std::atomic<int> a = ATOMIC_VAR_INIT(0);
|
||||
const bool is_priv = IsPrivate();
|
||||
|
||||
ASSERT_THAT(futex_lock_pi(is_priv, &a), SyscallSucceeds());
|
||||
EXPECT_EQ(a.load(), gettid());
|
||||
|
||||
ScopedThread th([is_priv, &a] {
|
||||
ASSERT_THAT(futex_lock_pi(is_priv, &a), SyscallSucceeds());
|
||||
ASSERT_THAT(futex_unlock_pi(is_priv, &a), SyscallSucceeds());
|
||||
});
|
||||
|
||||
// Wait until the thread blocks on the futex, setting the waiters bit.
|
||||
auto start = absl::Now();
|
||||
while (a.load() != (FUTEX_WAITERS | gettid())) {
|
||||
ASSERT_LT(absl::Now() - start, absl::Seconds(5));
|
||||
absl::SleepFor(absl::Milliseconds(100));
|
||||
}
|
||||
ASSERT_THAT(futex_unlock_pi(is_priv, &a), SyscallSucceeds());
|
||||
}
|
||||
|
||||
TEST_P(PrivateAndSharedFutexTest, PITryLock) {
|
||||
std::atomic<int> a = ATOMIC_VAR_INIT(0);
|
||||
const bool is_priv = IsPrivate();
|
||||
|
||||
ASSERT_THAT(futex_trylock_pi(IsPrivate(), &a), SyscallSucceeds());
|
||||
EXPECT_EQ(a.load(), gettid());
|
||||
|
||||
EXPECT_THAT(futex_trylock_pi(is_priv, &a), SyscallFailsWithErrno(EDEADLK));
|
||||
ScopedThread th([is_priv, &a] {
|
||||
EXPECT_THAT(futex_trylock_pi(is_priv, &a), SyscallFailsWithErrno(EAGAIN));
|
||||
});
|
||||
th.Join();
|
||||
|
||||
ASSERT_THAT(futex_unlock_pi(IsPrivate(), &a), SyscallSucceeds());
|
||||
}
|
||||
|
||||
TEST_P(PrivateAndSharedFutexTest, PITryLockConcurrency_NoRandomSave) {
|
||||
DisableSave ds; // Too many syscalls.
|
||||
|
||||
std::atomic<int> a = ATOMIC_VAR_INIT(0);
|
||||
const bool is_priv = IsPrivate();
|
||||
|
||||
std::unique_ptr<ScopedThread> threads[100];
|
||||
for (size_t i = 0; i < ABSL_ARRAYSIZE(threads); ++i) {
|
||||
threads[i] = absl::make_unique<ScopedThread>([is_priv, &a] {
|
||||
for (size_t j = 0; j < 10;) {
|
||||
if (futex_trylock_pi(is_priv, &a) >= 0) {
|
||||
++j;
|
||||
EXPECT_EQ(a.load() & FUTEX_TID_MASK, gettid());
|
||||
SleepSafe(absl::Milliseconds(5));
|
||||
ASSERT_THAT(futex_unlock_pi(is_priv, &a), SyscallSucceeds());
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace testing
|
||||
} // namespace gvisor
|
||||
|
|
Loading…
Reference in New Issue