gvisor/pkg/sentry/syscalls/linux/sys_futex.go

320 lines
9.0 KiB
Go

// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package linux
import (
"time"
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
"gvisor.googlesource.com/gvisor/pkg/syserror"
)
// futexChecker is a futex.Checker that uses a Task's MemoryManager.
type futexChecker struct {
t *kernel.Task
}
// Check checks if the address contains the given value, and returns
// syserror.EAGAIN if it doesn't. See Checker interface in futex package
// for more information.
func (f futexChecker) Check(addr uintptr, val uint32) error {
in := f.t.CopyScratchBuffer(4)
_, err := f.t.CopyInBytes(usermem.Addr(addr), in)
if err != nil {
return err
}
nval := usermem.ByteOrder.Uint32(in)
if val != nval {
return syserror.EAGAIN
}
return nil
}
func (f futexChecker) atomicOp(addr uintptr, op func(uint32) uint32) (uint32, error) {
in := f.t.CopyScratchBuffer(4)
_, err := f.t.CopyInBytes(usermem.Addr(addr), in)
if err != nil {
return 0, err
}
o := usermem.ByteOrder.Uint32(in)
mm := f.t.MemoryManager()
for {
n := op(o)
r, err := mm.CompareAndSwapUint32(f.t, usermem.Addr(addr), o, n, usermem.IOOpts{
AddressSpaceActive: true,
})
if err != nil {
return 0, err
}
if r == o {
return o, nil
}
o = r
}
}
// Op performs an operation on addr and returns a result based on the operation.
func (f futexChecker) Op(addr uintptr, opIn uint32) (bool, error) {
op := (opIn >> 28) & 0xf
cmp := (opIn >> 24) & 0xf
opArg := (opIn >> 12) & 0xfff
cmpArg := opIn & 0xfff
if op&linux.FUTEX_OP_OPARG_SHIFT != 0 {
opArg = 1 << opArg
op &^= linux.FUTEX_OP_OPARG_SHIFT // clear flag
}
var oldVal uint32
var err error
switch op {
case linux.FUTEX_OP_SET:
oldVal, err = f.t.MemoryManager().SwapUint32(f.t, usermem.Addr(addr), opArg, usermem.IOOpts{
AddressSpaceActive: true,
})
case linux.FUTEX_OP_ADD:
oldVal, err = f.atomicOp(addr, func(a uint32) uint32 {
return a + opArg
})
case linux.FUTEX_OP_OR:
oldVal, err = f.atomicOp(addr, func(a uint32) uint32 {
return a | opArg
})
case linux.FUTEX_OP_ANDN:
oldVal, err = f.atomicOp(addr, func(a uint32) uint32 {
return a & ^opArg
})
case linux.FUTEX_OP_XOR:
oldVal, err = f.atomicOp(addr, func(a uint32) uint32 {
return a ^ opArg
})
default:
return false, syserror.ENOSYS
}
if err != nil {
return false, err
}
switch cmp {
case linux.FUTEX_OP_CMP_EQ:
return oldVal == cmpArg, nil
case linux.FUTEX_OP_CMP_NE:
return oldVal != cmpArg, nil
case linux.FUTEX_OP_CMP_LT:
return oldVal < cmpArg, nil
case linux.FUTEX_OP_CMP_LE:
return oldVal <= cmpArg, nil
case linux.FUTEX_OP_CMP_GT:
return oldVal > cmpArg, nil
case linux.FUTEX_OP_CMP_GE:
return oldVal >= cmpArg, nil
default:
return false, syserror.ENOSYS
}
}
// futexWaitRestartBlock encapsulates the state required to restart futex(2)
// via restart_syscall(2).
type futexWaitRestartBlock struct {
duration time.Duration
// addr stored as uint64 since uintptr is not save-able.
addr uint64
val uint32
mask uint32
}
// Restart implements kernel.SyscallRestartBlock.Restart.
func (f *futexWaitRestartBlock) Restart(t *kernel.Task) (uintptr, error) {
return futexWaitDuration(t, f.duration, false, uintptr(f.addr), f.val, f.mask)
}
// futexWaitAbsolute performs a FUTEX_WAIT_BITSET, blocking until the wait is
// complete.
//
// The wait blocks forever if forever is true, otherwise it blocks until ts.
//
// If blocking is interrupted, the syscall is restarted with the original
// arguments.
func futexWaitAbsolute(t *kernel.Task, clockRealtime bool, ts linux.Timespec, forever bool, addr uintptr, val, mask uint32) (uintptr, error) {
w := t.FutexWaiter()
err := t.Futex().WaitPrepare(w, futexChecker{t}, addr, val, mask)
if err != nil {
return 0, err
}
if forever {
err = t.Block(w.C)
} else if clockRealtime {
notifier, tchan := ktime.NewChannelNotifier()
timer := ktime.NewTimer(t.Kernel().RealtimeClock(), notifier)
timer.Swap(ktime.Setting{
Enabled: true,
Next: ktime.FromTimespec(ts),
})
err = t.BlockWithTimer(w.C, tchan)
timer.Destroy()
} else {
err = t.BlockWithDeadline(w.C, true, ktime.FromTimespec(ts))
}
t.Futex().WaitComplete(w)
return 0, syserror.ConvertIntr(err, kernel.ERESTARTSYS)
}
// futexWaitDuration performs a FUTEX_WAIT, blocking until the wait is
// complete.
//
// The wait blocks forever if forever is true, otherwise is blocks for
// duration.
//
// If blocking is interrupted, forever determines how to restart the
// syscall. If forever is true, the syscall is restarted with the original
// arguments. If forever is false, duration is a relative timeout and the
// syscall is restarted with the remaining timeout.
func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, addr uintptr, val, mask uint32) (uintptr, error) {
w := t.FutexWaiter()
err := t.Futex().WaitPrepare(w, futexChecker{t}, addr, val, mask)
if err != nil {
return 0, err
}
remaining, err := t.BlockWithTimeout(w.C, !forever, duration)
t.Futex().WaitComplete(w)
if err == nil {
return 0, nil
}
// The wait was unsuccessful for some reason other than interruption. Simply
// forward the error.
if err != syserror.ErrInterrupted {
return 0, err
}
// The wait was interrupted and we need to restart. Decide how.
// The wait duration was absolute, restart with the original arguments.
if forever {
return 0, kernel.ERESTARTSYS
}
// The wait duration was relative, restart with the remaining duration.
t.SetSyscallRestartBlock(&futexWaitRestartBlock{
duration: remaining,
addr: uint64(addr),
val: val,
mask: mask,
})
return 0, kernel.ERESTART_RESTARTBLOCK
}
// Futex implements linux syscall futex(2).
// It provides a method for a program to wait for a value at a given address to
// change, and a method to wake up anyone waiting on a particular address.
func Futex(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
uaddr := args[0].Pointer()
futexOp := args[1].Int()
val := int(args[2].Int())
nreq := int(args[3].Int())
timeout := args[3].Pointer()
uaddr2 := args[4].Pointer()
val3 := args[5].Int()
addr := uintptr(uaddr)
naddr := uintptr(uaddr2)
cmd := futexOp &^ (linux.FUTEX_PRIVATE_FLAG | linux.FUTEX_CLOCK_REALTIME)
clockRealtime := (futexOp & linux.FUTEX_CLOCK_REALTIME) == linux.FUTEX_CLOCK_REALTIME
mask := uint32(val3)
switch cmd {
case linux.FUTEX_WAIT, linux.FUTEX_WAIT_BITSET:
// WAIT{_BITSET} wait forever if the timeout isn't passed.
forever := timeout == 0
var timespec linux.Timespec
if !forever {
var err error
timespec, err = copyTimespecIn(t, timeout)
if err != nil {
return 0, nil, err
}
}
switch cmd {
case linux.FUTEX_WAIT:
// WAIT uses a relative timeout.
mask = ^uint32(0)
var timeoutDur time.Duration
if !forever {
timeoutDur = time.Duration(timespec.ToNsecCapped()) * time.Nanosecond
}
n, err := futexWaitDuration(t, timeoutDur, forever, addr, uint32(val), mask)
return n, nil, err
case linux.FUTEX_WAIT_BITSET:
// WAIT_BITSET uses an absolute timeout which is either
// CLOCK_MONOTONIC or CLOCK_REALTIME.
if mask == 0 {
return 0, nil, syserror.EINVAL
}
n, err := futexWaitAbsolute(t, clockRealtime, timespec, forever, addr, uint32(val), mask)
return n, nil, err
default:
panic("unreachable")
}
case linux.FUTEX_WAKE:
mask = ^uint32(0)
fallthrough
case linux.FUTEX_WAKE_BITSET:
if mask == 0 {
return 0, nil, syserror.EINVAL
}
n, err := t.Futex().Wake(addr, mask, val)
return uintptr(n), nil, err
case linux.FUTEX_REQUEUE:
n, err := t.Futex().Requeue(addr, naddr, val, nreq)
return uintptr(n), nil, err
case linux.FUTEX_CMP_REQUEUE:
// 'val3' contains the value to be checked at 'addr' and
// 'val' is the number of waiters that should be woken up.
nval := uint32(val3)
n, err := t.Futex().RequeueCmp(futexChecker{t}, addr, nval, naddr, val, nreq)
return uintptr(n), nil, err
case linux.FUTEX_WAKE_OP:
op := uint32(val3)
n, err := t.Futex().WakeOp(futexChecker{t}, addr, naddr, val, nreq, op)
return uintptr(n), nil, err
case linux.FUTEX_LOCK_PI, linux.FUTEX_UNLOCK_PI, linux.FUTEX_TRYLOCK_PI, linux.FUTEX_WAIT_REQUEUE_PI, linux.FUTEX_CMP_REQUEUE_PI:
// We don't support any priority inversion futexes.
return 0, nil, syserror.ENOSYS
default:
// We don't even know about this command.
return 0, nil, syserror.ENOSYS
}
}