gvisor/pkg/sentry/syscalls/linux/sys_signal.go

552 lines
17 KiB
Go

// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package linux
import (
"math"
"time"
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
"gvisor.googlesource.com/gvisor/pkg/syserror"
)
// "For a process to have permission to send a signal it must
// - either be privileged (CAP_KILL), or
// - the real or effective user ID of the sending process must be equal to the
// real or saved set-user-ID of the target process.
//
// In the case of SIGCONT it suffices when the sending and receiving processes
// belong to the same session." - kill(2)
//
// Equivalent to kernel/signal.c:check_kill_permission.
func mayKill(t *kernel.Task, target *kernel.Task, sig linux.Signal) bool {
// kernel/signal.c:check_kill_permission also allows a signal if the
// sending and receiving tasks share a thread group, which is not
// mentioned in kill(2) since kill does not allow task-level
// granularity in signal sending.
if t.ThreadGroup() == target.ThreadGroup() {
return true
}
if t.HasCapabilityIn(linux.CAP_KILL, target.UserNamespace()) {
return true
}
creds := t.Credentials()
tcreds := target.Credentials()
if creds.EffectiveKUID == tcreds.SavedKUID ||
creds.EffectiveKUID == tcreds.RealKUID ||
creds.RealKUID == tcreds.SavedKUID ||
creds.RealKUID == tcreds.RealKUID {
return true
}
if sig == linux.SIGCONT && target.ThreadGroup().Session() == t.ThreadGroup().Session() {
return true
}
return false
}
// Kill implements linux syscall kill(2).
func Kill(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
pid := kernel.ThreadID(args[0].Int())
sig := linux.Signal(args[1].Int())
switch {
case pid > 0:
// "If pid is positive, then signal sig is sent to the process with the
// ID specified by pid." - kill(2)
// This loops to handle races with execve where target dies between
// TaskWithID and SendGroupSignal. Compare Linux's
// kernel/signal.c:kill_pid_info().
for {
target := t.PIDNamespace().TaskWithID(pid)
if target == nil {
return 0, nil, syserror.ESRCH
}
if !mayKill(t, target, sig) {
return 0, nil, syserror.EPERM
}
info := &arch.SignalInfo{
Signo: int32(sig),
Code: arch.SignalInfoUser,
}
info.SetPid(int32(target.PIDNamespace().IDOfTask(t)))
info.SetUid(int32(t.Credentials().RealKUID.In(target.UserNamespace()).OrOverflow()))
if err := target.SendGroupSignal(info); err != syserror.ESRCH {
return 0, nil, err
}
}
case pid == -1:
// "If pid equals -1, then sig is sent to every process for which the
// calling process has permission to send signals, except for process 1
// (init), but see below. ... POSIX.1-2001 requires that kill(-1,sig)
// send sig to all processes that the calling process may send signals
// to, except possibly for some implementation-defined system
// processes. Linux allows a process to signal itself, but on Linux the
// call kill(-1,sig) does not signal the calling process."
var (
lastErr error
delivered int
)
for _, tg := range t.PIDNamespace().ThreadGroups() {
if tg == t.ThreadGroup() {
continue
}
if t.PIDNamespace().IDOfThreadGroup(tg) == kernel.InitTID {
continue
}
// If pid == -1, the returned error is the last non-EPERM error
// from any call to group_send_sig_info.
if !mayKill(t, tg.Leader(), sig) {
continue
}
// Here and below, whether or not kill returns an error may
// depend on the iteration order. We at least implement the
// semantics documented by the man page: "On success (at least
// one signal was sent), zero is returned."
info := &arch.SignalInfo{
Signo: int32(sig),
Code: arch.SignalInfoUser,
}
info.SetPid(int32(tg.PIDNamespace().IDOfTask(t)))
info.SetUid(int32(t.Credentials().RealKUID.In(tg.Leader().UserNamespace()).OrOverflow()))
err := tg.SendSignal(info)
if err == syserror.ESRCH {
// ESRCH is ignored because it means the task
// exited while we were iterating. This is a
// race which would not normally exist on
// Linux, so we suppress it.
continue
}
delivered++
if err != nil {
lastErr = err
}
}
if delivered > 0 {
return 0, nil, lastErr
}
return 0, nil, syserror.ESRCH
default:
// "If pid equals 0, then sig is sent to every process in the process
// group of the calling process."
//
// "If pid is less than -1, then sig is sent to every process
// in the process group whose ID is -pid."
pgid := kernel.ProcessGroupID(-pid)
if pgid == 0 {
pgid = t.PIDNamespace().IDOfProcessGroup(t.ThreadGroup().ProcessGroup())
}
// If pid != -1 (i.e. signalling a process group), the returned error
// is the last error from any call to group_send_sig_info.
lastErr := syserror.ESRCH
for _, tg := range t.PIDNamespace().ThreadGroups() {
if t.PIDNamespace().IDOfProcessGroup(tg.ProcessGroup()) == pgid {
if !mayKill(t, tg.Leader(), sig) {
lastErr = syserror.EPERM
continue
}
info := &arch.SignalInfo{
Signo: int32(sig),
Code: arch.SignalInfoUser,
}
info.SetPid(int32(tg.PIDNamespace().IDOfTask(t)))
info.SetUid(int32(t.Credentials().RealKUID.In(tg.Leader().UserNamespace()).OrOverflow()))
// See note above regarding ESRCH race above.
if err := tg.SendSignal(info); err != syserror.ESRCH {
lastErr = err
}
}
}
return 0, nil, lastErr
}
}
func tkillSigInfo(sender, receiver *kernel.Task, sig linux.Signal) *arch.SignalInfo {
info := &arch.SignalInfo{
Signo: int32(sig),
Code: arch.SignalInfoTkill,
}
info.SetPid(int32(receiver.PIDNamespace().IDOfThreadGroup(sender.ThreadGroup())))
info.SetUid(int32(sender.Credentials().RealKUID.In(receiver.UserNamespace()).OrOverflow()))
return info
}
// Tkill implements linux syscall tkill(2).
func Tkill(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
tid := kernel.ThreadID(args[0].Int())
sig := linux.Signal(args[1].Int())
// N.B. Inconsistent with man page, linux actually rejects calls with
// tid <=0 by EINVAL. This isn't the same for all signal calls.
if tid <= 0 {
return 0, nil, syserror.EINVAL
}
target := t.PIDNamespace().TaskWithID(tid)
if target == nil {
return 0, nil, syserror.ESRCH
}
if !mayKill(t, target, sig) {
return 0, nil, syserror.EPERM
}
return 0, nil, target.SendSignal(tkillSigInfo(t, target, sig))
}
// Tgkill implements linux syscall tgkill(2).
func Tgkill(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
tgid := kernel.ThreadID(args[0].Int())
tid := kernel.ThreadID(args[1].Int())
sig := linux.Signal(args[2].Int())
// N.B. Inconsistent with man page, linux actually rejects calls with
// tgid/tid <=0 by EINVAL. This isn't the same for all signal calls.
if tgid <= 0 || tid <= 0 {
return 0, nil, syserror.EINVAL
}
targetTG := t.PIDNamespace().ThreadGroupWithID(tgid)
target := t.PIDNamespace().TaskWithID(tid)
if targetTG == nil || target == nil || target.ThreadGroup() != targetTG {
return 0, nil, syserror.ESRCH
}
if !mayKill(t, target, sig) {
return 0, nil, syserror.EPERM
}
return 0, nil, target.SendSignal(tkillSigInfo(t, target, sig))
}
// RtSigaction implements linux syscall rt_sigaction(2).
func RtSigaction(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
sig := linux.Signal(args[0].Int())
newactarg := args[1].Pointer()
oldactarg := args[2].Pointer()
var newactptr *arch.SignalAct
if newactarg != 0 {
newact, err := t.CopyInSignalAct(newactarg)
if err != nil {
return 0, nil, err
}
newactptr = &newact
}
oldact, err := t.ThreadGroup().SetSignalAct(sig, newactptr)
if err != nil {
return 0, nil, err
}
if oldactarg != 0 {
if err := t.CopyOutSignalAct(oldactarg, &oldact); err != nil {
return 0, nil, err
}
}
return 0, nil, nil
}
// Sigreturn implements linux syscall sigreturn(2).
func Sigreturn(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
ctrl, err := t.SignalReturn(false)
return 0, ctrl, err
}
// RtSigreturn implements linux syscall rt_sigreturn(2).
func RtSigreturn(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
ctrl, err := t.SignalReturn(true)
return 0, ctrl, err
}
// RtSigprocmask implements linux syscall rt_sigprocmask(2).
func RtSigprocmask(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
how := args[0].Int()
setaddr := args[1].Pointer()
oldaddr := args[2].Pointer()
sigsetsize := args[3].SizeT()
if sigsetsize != linux.SignalSetSize {
return 0, nil, syserror.EINVAL
}
oldmask := t.SignalMask()
if setaddr != 0 {
mask, err := copyInSigSet(t, setaddr, sigsetsize)
if err != nil {
return 0, nil, err
}
switch how {
case linux.SIG_BLOCK:
t.SetSignalMask(oldmask | mask)
case linux.SIG_UNBLOCK:
t.SetSignalMask(oldmask &^ mask)
case linux.SIG_SETMASK:
t.SetSignalMask(mask)
default:
return 0, nil, syserror.EINVAL
}
}
if oldaddr != 0 {
return 0, nil, copyOutSigSet(t, oldaddr, oldmask)
}
return 0, nil, nil
}
// Sigaltstack implements linux syscall sigaltstack(2).
func Sigaltstack(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
setaddr := args[0].Pointer()
oldaddr := args[1].Pointer()
alt := t.SignalStack()
if oldaddr != 0 {
if err := t.CopyOutSignalStack(oldaddr, &alt); err != nil {
return 0, nil, err
}
}
if setaddr != 0 {
alt, err := t.CopyInSignalStack(setaddr)
if err != nil {
return 0, nil, err
}
// The signal stack cannot be changed if the task is currently
// on the stack. This is enforced at the lowest level because
// these semantics apply to changing the signal stack via a
// ucontext during a signal handler.
if !t.SetSignalStack(alt) {
return 0, nil, syserror.EPERM
}
}
return 0, nil, nil
}
// Pause implements linux syscall pause(2).
func Pause(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
return 0, nil, syserror.ConvertIntr(t.Block(nil), kernel.ERESTARTNOHAND)
}
func sigtimedwait(t *kernel.Task, mask linux.SignalSet, timeout time.Duration) (*arch.SignalInfo, error) {
// Is it already pending?
if info := t.TakeSignal(^mask); info != nil {
return info, nil
}
// No signals available immediately and asked not to wait.
if timeout == 0 {
return nil, syserror.EAGAIN
}
// No signals available yet. Temporarily unblock the ones we are interested
// in then wait for either a timeout or a new signal.
oldmask := t.SignalMask()
t.SetSignalMask(oldmask &^ mask)
_, err := t.BlockWithTimeout(nil, true, timeout)
t.SetSignalMask(oldmask)
// How did the wait go?
switch err {
case syserror.ErrInterrupted:
if info := t.TakeSignal(^mask); info != nil {
// Got one of the signals we were waiting for.
return info, nil
}
// Got a signal we weren't waiting for.
return nil, syserror.EINTR
case syserror.ETIMEDOUT:
// Timed out and still no signals.
return nil, syserror.EAGAIN
default:
// Some other error? Shouldn't be possible. The event channel
// passed to BlockWithTimeout was nil, so the only two ways the
// block could've ended are a timeout or an interrupt.
panic("unreachable")
}
}
// RtSigpending implements linux syscall rt_sigpending(2).
func RtSigpending(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
addr := args[0].Pointer()
pending := t.PendingSignals()
_, err := t.CopyOut(addr, pending)
return 0, nil, err
}
// RtSigtimedwait implements linux syscall rt_sigtimedwait(2).
func RtSigtimedwait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
sigset := args[0].Pointer()
siginfo := args[1].Pointer()
timespec := args[2].Pointer()
sigsetsize := args[3].SizeT()
mask, err := copyInSigSet(t, sigset, sigsetsize)
if err != nil {
return 0, nil, err
}
var timeout time.Duration
if timespec != 0 {
d, err := copyTimespecIn(t, timespec)
if err != nil {
return 0, nil, err
}
if !d.Valid() {
return 0, nil, syserror.EINVAL
}
timeout = time.Duration(d.ToNsecCapped())
} else {
timeout = time.Duration(math.MaxInt64)
}
si, err := sigtimedwait(t, mask, timeout)
if err != nil {
return 0, nil, err
}
if si != nil {
if siginfo != 0 {
si.FixSignalCodeForUser()
if _, err := t.CopyOut(siginfo, si); err != nil {
return 0, nil, err
}
}
return uintptr(si.Signo), nil, nil
}
// sigtimedwait's not supposed to return nil si and err...
return 0, nil, nil
}
// RtSigqueueinfo implements linux syscall rt_sigqueueinfo(2).
func RtSigqueueinfo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
pid := kernel.ThreadID(args[0].Int())
sig := linux.Signal(args[1].Int())
infoAddr := args[2].Pointer()
// Copy in the info.
//
// We must ensure that the Signo is set (Linux overrides this in the
// same way), and that the code is in the allowed set. This same logic
// appears below in RtSigtgqueueinfo and should be kept in sync.
var info arch.SignalInfo
if _, err := t.CopyIn(infoAddr, &info); err != nil {
return 0, nil, err
}
info.Signo = int32(sig)
// This must loop to handle the race with execve described in Kill.
for {
// Deliver to the given task's thread group.
target := t.PIDNamespace().TaskWithID(pid)
if target == nil {
return 0, nil, syserror.ESRCH
}
// If the sender is not the receiver, it can't use si_codes used by the
// kernel or SI_TKILL.
if (info.Code >= 0 || info.Code == arch.SignalInfoTkill) && target != t {
return 0, nil, syserror.EPERM
}
if !mayKill(t, target, sig) {
return 0, nil, syserror.EPERM
}
if err := target.SendGroupSignal(&info); err != syserror.ESRCH {
return 0, nil, err
}
}
}
// RtTgsigqueueinfo implements linux syscall rt_tgsigqueueinfo(2).
func RtTgsigqueueinfo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
tgid := kernel.ThreadID(args[0].Int())
tid := kernel.ThreadID(args[1].Int())
sig := linux.Signal(args[2].Int())
infoAddr := args[3].Pointer()
// N.B. Inconsistent with man page, linux actually rejects calls with
// tgid/tid <=0 by EINVAL. This isn't the same for all signal calls.
if tgid <= 0 || tid <= 0 {
return 0, nil, syserror.EINVAL
}
// Copy in the info. See RtSigqueueinfo above.
var info arch.SignalInfo
if _, err := t.CopyIn(infoAddr, &info); err != nil {
return 0, nil, err
}
info.Signo = int32(sig)
// Deliver to the given task.
targetTG := t.PIDNamespace().ThreadGroupWithID(tgid)
target := t.PIDNamespace().TaskWithID(tid)
if targetTG == nil || target == nil || target.ThreadGroup() != targetTG {
return 0, nil, syserror.ESRCH
}
// If the sender is not the receiver, it can't use si_codes used by the
// kernel or SI_TKILL.
if (info.Code >= 0 || info.Code == arch.SignalInfoTkill) && target != t {
return 0, nil, syserror.EPERM
}
if !mayKill(t, target, sig) {
return 0, nil, syserror.EPERM
}
return 0, nil, target.SendSignal(&info)
}
// RtSigsuspend implements linux syscall rt_sigsuspend(2).
func RtSigsuspend(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
sigset := args[0].Pointer()
// Copy in the signal mask.
var mask linux.SignalSet
if _, err := t.CopyIn(sigset, &mask); err != nil {
return 0, nil, err
}
mask &^= kernel.UnblockableSignals
// Swap the mask.
oldmask := t.SignalMask()
t.SetSignalMask(mask)
t.SetSavedSignalMask(oldmask)
// Perform the wait.
return 0, nil, syserror.ConvertIntr(t.Block(nil), kernel.ERESTARTNOHAND)
}
// RestartSyscall implements the linux syscall restart_syscall(2).
func RestartSyscall(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
if r := t.SyscallRestartBlock(); r != nil {
n, err := r.Restart(t)
return n, nil, err
}
// The restart block should never be nil here, but it's possible
// ERESTART_RESTARTBLOCK was set by ptrace without the current syscall
// setting up a restart block. If ptrace didn't manipulate the return value,
// finding a nil restart block is a bug. Linux ensures that the restart
// function is never null by (re)initializing it with one that translates
// the restart into EINTR. We'll emulate that behaviour.
t.Debugf("Restart block missing in restart_syscall(2). Did ptrace inject a return value of ERESTART_RESTARTBLOCK?")
return 0, nil, syserror.EINTR
}