gvisor/pkg/sentry/kernel/ptrace.go

1051 lines
34 KiB
Go

// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package kernel
import (
"fmt"
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
"gvisor.googlesource.com/gvisor/pkg/syserror"
)
// ptraceOptions are the subset of options controlling a task's ptrace behavior
// that are set by ptrace(PTRACE_SETOPTIONS).
//
// +stateify savable
type ptraceOptions struct {
// ExitKill is true if the tracee should be sent SIGKILL when the tracer
// exits.
ExitKill bool
// If SysGood is true, set bit 7 in the signal number for
// syscall-entry-stop and syscall-exit-stop traps delivered to this task's
// tracer.
SysGood bool
// TraceClone is true if the tracer wants to receive PTRACE_EVENT_CLONE
// events.
TraceClone bool
// TraceExec is true if the tracer wants to receive PTRACE_EVENT_EXEC
// events.
TraceExec bool
// TraceExit is true if the tracer wants to receive PTRACE_EVENT_EXIT
// events.
TraceExit bool
// TraceFork is true if the tracer wants to receive PTRACE_EVENT_FORK
// events.
TraceFork bool
// TraceSeccomp is true if the tracer wants to receive PTRACE_EVENT_SECCOMP
// events.
TraceSeccomp bool
// TraceVfork is true if the tracer wants to receive PTRACE_EVENT_VFORK
// events.
TraceVfork bool
// TraceVforkDone is true if the tracer wants to receive
// PTRACE_EVENT_VFORK_DONE events.
TraceVforkDone bool
}
// ptraceSyscallMode controls the behavior of a ptraced task at syscall entry
// and exit.
type ptraceSyscallMode int
const (
// ptraceSyscallNone indicates that the task has never ptrace-stopped, or
// that it was resumed from its last ptrace-stop by PTRACE_CONT or
// PTRACE_DETACH. The task's syscalls will not be intercepted.
ptraceSyscallNone ptraceSyscallMode = iota
// ptraceSyscallIntercept indicates that the task was resumed from its last
// ptrace-stop by PTRACE_SYSCALL. The next time the task enters or exits a
// syscall, a ptrace-stop will occur.
ptraceSyscallIntercept
// ptraceSyscallEmu indicates that the task was resumed from its last
// ptrace-stop by PTRACE_SYSEMU or PTRACE_SYSEMU_SINGLESTEP. The next time
// the task enters a syscall, the syscall will be skipped, and a
// ptrace-stop will occur.
ptraceSyscallEmu
)
// CanTrace checks that t is permitted to access target's state, as defined by
// ptrace(2), subsection "Ptrace access mode checking". If attach is true, it
// checks for access mode PTRACE_MODE_ATTACH; otherwise, it checks for access
// mode PTRACE_MODE_READ.
func (t *Task) CanTrace(target *Task, attach bool) bool {
// "1. If the calling thread and the target thread are in the same thread
// group, access is always allowed." - ptrace(2)
//
// Note: Strictly speaking, prior to 73af963f9f30 ("__ptrace_may_access()
// should not deny sub-threads", first released in Linux 3.12), the rule
// only applies if t and target are the same task. But, as that commit
// message puts it, "[any] security check is pointless when the tasks share
// the same ->mm."
if t.tg == target.tg {
return true
}
// """
// 2. If the access mode specifies PTRACE_MODE_FSCREDS (ED: snipped,
// doesn't exist until Linux 4.5).
//
// Otherwise, the access mode specifies PTRACE_MODE_REALCREDS, so use the
// caller's real UID and GID for the checks in the next step. (Most APIs
// that check the caller's UID and GID use the effective IDs. For
// historical reasons, the PTRACE_MODE_REALCREDS check uses the real IDs
// instead.)
//
// 3. Deny access if neither of the following is true:
//
// - The real, effective, and saved-set user IDs of the target match the
// caller's user ID, *and* the real, effective, and saved-set group IDs of
// the target match the caller's group ID.
//
// - The caller has the CAP_SYS_PTRACE capability in the user namespace of
// the target.
//
// 4. Deny access if the target process "dumpable" attribute has a value
// other than 1 (SUID_DUMP_USER; see the discussion of PR_SET_DUMPABLE in
// prctl(2)), and the caller does not have the CAP_SYS_PTRACE capability in
// the user namespace of the target process.
//
// 5. The kernel LSM security_ptrace_access_check() interface is invoked to
// see if ptrace access is permitted. The results depend on the LSM(s). The
// implementation of this interface in the commoncap LSM performs the
// following steps:
//
// a) If the access mode includes PTRACE_MODE_FSCREDS, then use the
// caller's effective capability set; otherwise (the access mode specifies
// PTRACE_MODE_REALCREDS, so) use the caller's permitted capability set.
//
// b) Deny access if neither of the following is true:
//
// - The caller and the target process are in the same user namespace, and
// the caller's capabilities are a proper superset of the target process's
// permitted capabilities.
//
// - The caller has the CAP_SYS_PTRACE capability in the target process's
// user namespace.
//
// Note that the commoncap LSM does not distinguish between
// PTRACE_MODE_READ and PTRACE_MODE_ATTACH. (ED: From earlier in this
// section: "the commoncap LSM ... is always invoked".)
// """
callerCreds := t.Credentials()
targetCreds := target.Credentials()
if callerCreds.HasCapabilityIn(linux.CAP_SYS_PTRACE, targetCreds.UserNamespace) {
return true
}
if cuid := callerCreds.RealKUID; cuid != targetCreds.RealKUID || cuid != targetCreds.EffectiveKUID || cuid != targetCreds.SavedKUID {
return false
}
if cgid := callerCreds.RealKGID; cgid != targetCreds.RealKGID || cgid != targetCreds.EffectiveKGID || cgid != targetCreds.SavedKGID {
return false
}
// TODO: dumpability check
if callerCreds.UserNamespace != targetCreds.UserNamespace {
return false
}
if targetCreds.PermittedCaps&^callerCreds.PermittedCaps != 0 {
return false
}
// TODO: Yama LSM
return true
}
// Tracer returns t's ptrace Tracer.
func (t *Task) Tracer() *Task {
return t.ptraceTracer.Load().(*Task)
}
// hasTracer returns true if t has a ptrace tracer attached.
func (t *Task) hasTracer() bool {
// This isn't just inlined into callers so that if Task.Tracer() turns out
// to be too expensive because of e.g. interface conversion, we can switch
// to having a separate atomic flag more easily.
return t.Tracer() != nil
}
// ptraceStop is a TaskStop placed on tasks in a ptrace-stop.
//
// +stateify savable
type ptraceStop struct {
// If frozen is true, the stopped task's tracer is currently operating on
// it, so Task.Kill should not remove the stop.
frozen bool
}
// Killable implements TaskStop.Killable.
func (s *ptraceStop) Killable() bool {
return !s.frozen
}
// beginPtraceStopLocked initiates an unfrozen ptrace-stop on t. If t has been
// killed, the stop is skipped, and beginPtraceStopLocked returns false.
//
// beginPtraceStopLocked does not signal t's tracer or wake it if it is
// waiting.
//
// Preconditions: The TaskSet mutex must be locked. The caller must be running
// on the task goroutine.
func (t *Task) beginPtraceStopLocked() bool {
t.tg.signalHandlers.mu.Lock()
defer t.tg.signalHandlers.mu.Unlock()
// This is analogous to Linux's kernel/signal.c:ptrace_stop() => ... =>
// kernel/sched/core.c:__schedule() => signal_pending_state() check, which
// is what prevents tasks from entering ptrace-stops after being killed.
// Note that if t was SIGKILLed and beingPtraceStopLocked is being called
// for PTRACE_EVENT_EXIT, the task will have dequeued the signal before
// entering the exit path, so t.killable() will no longer return true. This
// is consistent with Linux: "Bugs: ... A SIGKILL signal may still cause a
// PTRACE_EVENT_EXIT stop before actual signal death. This may be changed
// in the future; SIGKILL is meant to always immediately kill tasks even
// under ptrace. Last confirmed on Linux 3.13." - ptrace(2)
if t.killedLocked() {
return false
}
t.beginInternalStopLocked(&ptraceStop{})
return true
}
// Preconditions: The TaskSet mutex must be locked.
func (t *Task) ptraceTrapLocked(code int32) {
t.ptraceCode = code
t.ptraceSiginfo = &arch.SignalInfo{
Signo: int32(linux.SIGTRAP),
Code: code,
}
t.ptraceSiginfo.SetPid(int32(t.tg.pidns.tids[t]))
t.ptraceSiginfo.SetUid(int32(t.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow()))
if t.beginPtraceStopLocked() {
tracer := t.Tracer()
tracer.signalStop(t, arch.CLD_TRAPPED, int32(linux.SIGTRAP))
tracer.tg.eventQueue.Notify(EventTraceeStop)
}
}
// ptraceFreeze checks if t is in a ptraceStop. If so, it freezes the
// ptraceStop, temporarily preventing it from being removed by a concurrent
// Task.Kill, and returns true. Otherwise it returns false.
//
// Preconditions: The TaskSet mutex must be locked. The caller must be running
// on the task goroutine of t's tracer.
func (t *Task) ptraceFreeze() bool {
t.tg.signalHandlers.mu.Lock()
defer t.tg.signalHandlers.mu.Unlock()
if t.stop == nil {
return false
}
s, ok := t.stop.(*ptraceStop)
if !ok {
return false
}
s.frozen = true
return true
}
// ptraceUnfreeze ends the effect of a previous successful call to
// ptraceFreeze.
//
// Preconditions: t must be in a frozen ptraceStop.
func (t *Task) ptraceUnfreeze() {
// t.tg.signalHandlers is stable because t is in a frozen ptrace-stop,
// preventing its thread group from completing execve.
t.tg.signalHandlers.mu.Lock()
defer t.tg.signalHandlers.mu.Unlock()
// Do this even if the task has been killed to ensure a panic if t.stop is
// nil or not a ptraceStop.
t.stop.(*ptraceStop).frozen = false
if t.killedLocked() {
t.endInternalStopLocked()
}
}
// ptraceUnstop implements ptrace request PTRACE_CONT, PTRACE_SYSCALL,
// PTRACE_SINGLESTEP, PTRACE_SYSEMU, or PTRACE_SYSEMU_SINGLESTEP depending on
// mode and singlestep.
//
// Preconditions: t must be in a frozen ptrace stop.
//
// Postconditions: If ptraceUnstop returns nil, t will no longer be in a ptrace
// stop.
func (t *Task) ptraceUnstop(mode ptraceSyscallMode, singlestep bool, sig linux.Signal) error {
if sig != 0 && !sig.IsValid() {
return syserror.EIO
}
t.tg.pidns.owner.mu.Lock()
defer t.tg.pidns.owner.mu.Unlock()
t.ptraceCode = int32(sig)
t.ptraceSyscallMode = mode
t.ptraceSinglestep = singlestep
t.tg.signalHandlers.mu.Lock()
defer t.tg.signalHandlers.mu.Unlock()
t.endInternalStopLocked()
return nil
}
func (t *Task) ptraceTraceme() error {
t.tg.pidns.owner.mu.Lock()
defer t.tg.pidns.owner.mu.Unlock()
if t.hasTracer() {
return syserror.EPERM
}
if t.parent == nil {
// In Linux, only init can not have a parent, and init is assumed never
// to invoke PTRACE_TRACEME. In the sentry, TGID 1 is an arbitrary user
// application that may invoke PTRACE_TRACEME; having no parent can
// also occur if all tasks in the parent thread group have exited, and
// failed to find a living thread group to reparent to. The former case
// is treated as if TGID 1 has an exited parent in an invisible
// ancestor PID namespace that is an owner of the root user namespace
// (and consequently has CAP_SYS_PTRACE), and the latter case is a
// special form of the exited parent case below. In either case,
// returning nil here is correct.
return nil
}
if !t.parent.CanTrace(t, true) {
return syserror.EPERM
}
if t.parent.exitState != TaskExitNone {
// Fail silently, as if we were successfully attached but then
// immediately detached. This is consistent with Linux.
return nil
}
t.ptraceTracer.Store(t.parent)
t.parent.ptraceTracees[t] = struct{}{}
return nil
}
// ptraceAttach implements ptrace(PTRACE_ATTACH, target). t is the caller.
func (t *Task) ptraceAttach(target *Task) error {
if t.tg == target.tg {
return syserror.EPERM
}
if !t.CanTrace(target, true) {
return syserror.EPERM
}
t.tg.pidns.owner.mu.Lock()
defer t.tg.pidns.owner.mu.Unlock()
if target.hasTracer() {
return syserror.EPERM
}
// Attaching to zombies and dead tasks is not permitted; the exit
// notification logic relies on this. Linux allows attaching to PF_EXITING
// tasks, though.
if target.exitState >= TaskExitZombie {
return syserror.EPERM
}
target.ptraceTracer.Store(t)
t.ptraceTracees[target] = struct{}{}
target.tg.signalHandlers.mu.Lock()
target.sendSignalLocked(&arch.SignalInfo{
Signo: int32(linux.SIGSTOP),
Code: arch.SignalInfoUser,
}, false /* group */)
// Undocumented Linux feature: If the tracee is already group-stopped (and
// consequently will not report the SIGSTOP just sent), force it to leave
// and re-enter the stop so that it will switch to a ptrace-stop.
if target.stop == (*groupStop)(nil) {
target.groupStopRequired = true
target.endInternalStopLocked()
}
target.tg.signalHandlers.mu.Unlock()
return nil
}
// ptraceDetach implements ptrace(PTRACE_DETACH, target, 0, sig). t is the
// caller.
//
// Preconditions: target must be a tracee of t in a frozen ptrace stop.
//
// Postconditions: If ptraceDetach returns nil, target will no longer be in a
// ptrace stop.
func (t *Task) ptraceDetach(target *Task, sig linux.Signal) error {
if sig != 0 && !sig.IsValid() {
return syserror.EIO
}
t.tg.pidns.owner.mu.Lock()
defer t.tg.pidns.owner.mu.Unlock()
target.ptraceCode = int32(sig)
target.forgetTracerLocked()
delete(t.ptraceTracees, target)
return nil
}
// exitPtrace is called in the exit path to detach all of t's tracees.
func (t *Task) exitPtrace() {
t.tg.pidns.owner.mu.Lock()
defer t.tg.pidns.owner.mu.Unlock()
for target := range t.ptraceTracees {
if target.ptraceOpts.ExitKill {
target.tg.signalHandlers.mu.Lock()
target.sendSignalLocked(&arch.SignalInfo{
Signo: int32(linux.SIGKILL),
}, false /* group */)
target.tg.signalHandlers.mu.Unlock()
}
// Leave ptraceCode unchanged so that if the task is ptrace-stopped, it
// observes the ptraceCode it set before it entered the stop. I believe
// this is consistent with Linux.
target.forgetTracerLocked()
}
// "nil maps cannot be saved"
t.ptraceTracees = make(map[*Task]struct{})
}
// forgetTracerLocked detaches t's tracer and ensures that t is no longer
// ptrace-stopped.
//
// Preconditions: The TaskSet mutex must be locked for writing.
func (t *Task) forgetTracerLocked() {
t.ptraceOpts = ptraceOptions{}
t.ptraceSyscallMode = ptraceSyscallNone
t.ptraceSinglestep = false
t.ptraceTracer.Store((*Task)(nil))
if t.exitTracerNotified && !t.exitTracerAcked {
t.exitTracerAcked = true
t.exitNotifyLocked(true)
}
// If t is ptrace-stopped, but its thread group is in a group stop and t is
// eligible to participate, make it do so. This is essentially the reverse
// of the special case in ptraceAttach, which converts a group stop to a
// ptrace stop. ("Handling of restart from group-stop is currently buggy,
// but the "as planned" behavior is to leave tracee stopped and waiting for
// SIGCONT." - ptrace(2))
t.tg.signalHandlers.mu.Lock()
defer t.tg.signalHandlers.mu.Unlock()
if t.stop == nil {
return
}
if _, ok := t.stop.(*ptraceStop); ok {
if t.exitState < TaskExitInitiated && t.tg.groupStopPhase >= groupStopInitiated {
t.groupStopRequired = true
}
t.endInternalStopLocked()
}
}
// ptraceSignalLocked is called after signal dequeueing to check if t should
// enter ptrace signal-delivery-stop.
//
// Preconditions: The signal mutex must be locked. The caller must be running
// on the task goroutine.
func (t *Task) ptraceSignalLocked(info *arch.SignalInfo) bool {
if linux.Signal(info.Signo) == linux.SIGKILL {
return false
}
if !t.hasTracer() {
return false
}
// The tracer might change this signal into a stop signal, in which case
// any SIGCONT received after the signal was originally dequeued should
// cancel it. This is consistent with Linux.
if t.tg.groupStopPhase == groupStopNone {
t.tg.groupStopPhase = groupStopDequeued
}
// Can't lock the TaskSet mutex while holding a signal mutex.
t.tg.signalHandlers.mu.Unlock()
defer t.tg.signalHandlers.mu.Lock()
t.tg.pidns.owner.mu.RLock()
defer t.tg.pidns.owner.mu.RUnlock()
tracer := t.Tracer()
if tracer == nil {
return false
}
t.ptraceCode = info.Signo
t.ptraceSiginfo = info
t.Debugf("Entering signal-delivery-stop for signal %d", info.Signo)
if t.beginPtraceStopLocked() {
tracer.signalStop(t, arch.CLD_TRAPPED, info.Signo)
tracer.tg.eventQueue.Notify(EventTraceeStop)
}
return true
}
// ptraceSeccomp is called when a seccomp-bpf filter returns action
// SECCOMP_RET_TRACE to check if t should enter PTRACE_EVENT_SECCOMP stop. data
// is the lower 16 bits of the filter's return value.
func (t *Task) ptraceSeccomp(data uint16) bool {
if !t.hasTracer() {
return false
}
t.tg.pidns.owner.mu.RLock()
defer t.tg.pidns.owner.mu.RUnlock()
if !t.ptraceOpts.TraceSeccomp {
return false
}
t.Debugf("Entering PTRACE_EVENT_SECCOMP stop")
t.ptraceEventLocked(linux.PTRACE_EVENT_SECCOMP, uint64(data))
return true
}
// ptraceSyscallEnter is called immediately before entering a syscall to check
// if t should enter ptrace syscall-enter-stop.
func (t *Task) ptraceSyscallEnter() (taskRunState, bool) {
if !t.hasTracer() {
return nil, false
}
t.tg.pidns.owner.mu.RLock()
defer t.tg.pidns.owner.mu.RUnlock()
switch t.ptraceSyscallMode {
case ptraceSyscallNone:
return nil, false
case ptraceSyscallIntercept:
t.Debugf("Entering syscall-enter-stop from PTRACE_SYSCALL")
t.ptraceSyscallStopLocked()
return (*runSyscallAfterSyscallEnterStop)(nil), true
case ptraceSyscallEmu:
t.Debugf("Entering syscall-enter-stop from PTRACE_SYSEMU")
t.ptraceSyscallStopLocked()
return (*runSyscallAfterSysemuStop)(nil), true
}
panic(fmt.Sprintf("Unknown ptraceSyscallMode: %v", t.ptraceSyscallMode))
}
// ptraceSyscallExit is called immediately after leaving a syscall to check if
// t should enter ptrace syscall-exit-stop.
func (t *Task) ptraceSyscallExit() {
if !t.hasTracer() {
return
}
t.tg.pidns.owner.mu.RLock()
defer t.tg.pidns.owner.mu.RUnlock()
if t.ptraceSyscallMode != ptraceSyscallIntercept {
return
}
t.Debugf("Entering syscall-exit-stop")
t.ptraceSyscallStopLocked()
}
// Preconditions: The TaskSet mutex must be locked.
func (t *Task) ptraceSyscallStopLocked() {
code := int32(linux.SIGTRAP)
if t.ptraceOpts.SysGood {
code |= 0x80
}
t.ptraceTrapLocked(code)
}
type ptraceCloneKind int32
const (
// ptraceCloneKindClone represents a call to Task.Clone where
// TerminationSignal is not SIGCHLD and Vfork is false.
ptraceCloneKindClone ptraceCloneKind = iota
// ptraceCloneKindFork represents a call to Task.Clone where
// TerminationSignal is SIGCHLD and Vfork is false.
ptraceCloneKindFork
// ptraceCloneKindVfork represents a call to Task.Clone where Vfork is
// true.
ptraceCloneKindVfork
)
// ptraceClone is called at the end of a clone or fork syscall to check if t
// should enter PTRACE_EVENT_CLONE, PTRACE_EVENT_FORK, or PTRACE_EVENT_VFORK
// stop. child is the new task.
func (t *Task) ptraceClone(kind ptraceCloneKind, child *Task, opts *CloneOptions) bool {
if !t.hasTracer() {
return false
}
t.tg.pidns.owner.mu.RLock()
defer t.tg.pidns.owner.mu.RUnlock()
event := false
if !opts.Untraced {
switch kind {
case ptraceCloneKindClone:
if t.ptraceOpts.TraceClone {
t.Debugf("Entering PTRACE_EVENT_CLONE stop")
t.ptraceEventLocked(linux.PTRACE_EVENT_CLONE, uint64(t.tg.pidns.tids[child]))
event = true
}
case ptraceCloneKindFork:
if t.ptraceOpts.TraceFork {
t.Debugf("Entering PTRACE_EVENT_FORK stop")
t.ptraceEventLocked(linux.PTRACE_EVENT_FORK, uint64(t.tg.pidns.tids[child]))
event = true
}
case ptraceCloneKindVfork:
if t.ptraceOpts.TraceVfork {
t.Debugf("Entering PTRACE_EVENT_VFORK stop")
t.ptraceEventLocked(linux.PTRACE_EVENT_VFORK, uint64(t.tg.pidns.tids[child]))
event = true
}
default:
panic(fmt.Sprintf("Unknown ptraceCloneKind: %v", kind))
}
}
// "If the PTRACE_O_TRACEFORK, PTRACE_O_TRACEVFORK, or PTRACE_O_TRACECLONE
// options are in effect, then children created by, respectively, vfork(2)
// or clone(2) with the CLONE_VFORK flag, fork(2) or clone(2) with the exit
// signal set to SIGCHLD, and other kinds of clone(2), are automatically
// attached to the same tracer which traced their parent. SIGSTOP is
// delivered to the children, causing them to enter signal-delivery-stop
// after they exit the system call which created them." - ptrace(2)
//
// clone(2)'s documentation of CLONE_UNTRACED and CLONE_PTRACE is
// confusingly wrong; see kernel/fork.c:_do_fork() => copy_process() =>
// include/linux/ptrace.h:ptrace_init_task().
if event || opts.InheritTracer {
tracer := t.Tracer()
if tracer != nil {
child.ptraceTracer.Store(tracer)
tracer.ptraceTracees[child] = struct{}{}
// "Flags are inherited by new tracees created and "auto-attached"
// via active PTRACE_O_TRACEFORK, PTRACE_O_TRACEVFORK, or
// PTRACE_O_TRACECLONE options."
child.ptraceOpts = t.ptraceOpts
child.tg.signalHandlers.mu.Lock()
// If the child is PT_SEIZED (currently not possible in the sentry
// because PTRACE_SEIZE is unimplemented, but for future
// reference), Linux just sets JOBCTL_TRAP_STOP instead, so the
// child skips signal-delivery-stop and goes directly to
// group-stop.
//
// The child will self-t.interrupt() when its task goroutine starts
// running, so we don't have to.
child.pendingSignals.enqueue(&arch.SignalInfo{
Signo: int32(linux.SIGSTOP),
})
child.tg.signalHandlers.mu.Unlock()
}
}
return event
}
// ptraceVforkDone is called after the end of a vfork stop to check if t should
// enter PTRACE_EVENT_VFORK_DONE stop. child is the new task's thread ID in t's
// PID namespace.
func (t *Task) ptraceVforkDone(child ThreadID) bool {
if !t.hasTracer() {
return false
}
t.tg.pidns.owner.mu.RLock()
defer t.tg.pidns.owner.mu.RUnlock()
if !t.ptraceOpts.TraceVforkDone {
return false
}
t.Debugf("Entering PTRACE_EVENT_VFORK_DONE stop")
t.ptraceEventLocked(linux.PTRACE_EVENT_VFORK_DONE, uint64(child))
return true
}
// ptraceExec is called at the end of an execve syscall to check if t should
// enter PTRACE_EVENT_EXEC stop. oldTID is t's thread ID, in its *tracer's* PID
// namespace, prior to the execve. (If t did not have a tracer at the time
// oldTID was read, oldTID may be 0. This is consistent with Linux.)
func (t *Task) ptraceExec(oldTID ThreadID) {
if !t.hasTracer() {
return
}
t.tg.pidns.owner.mu.RLock()
defer t.tg.pidns.owner.mu.RUnlock()
// Recheck with the TaskSet mutex locked. Most ptrace points don't need to
// do this because detaching resets ptrace options, but PTRACE_EVENT_EXEC
// is special because both TraceExec and !TraceExec do something if a
// tracer is attached.
if !t.hasTracer() {
return
}
if t.ptraceOpts.TraceExec {
t.Debugf("Entering PTRACE_EVENT_EXEC stop")
t.ptraceEventLocked(linux.PTRACE_EVENT_EXEC, uint64(oldTID))
return
}
// "If the PTRACE_O_TRACEEXEC option is not in effect for the execing
// tracee, and if the tracee was PTRACE_ATTACHed rather that [sic]
// PTRACE_SEIZEd, the kernel delivers an extra SIGTRAP to the tracee after
// execve(2) returns. This is an ordinary signal (similar to one which can
// be generated by `kill -TRAP`, not a special kind of ptrace-stop.
// Employing PTRACE_GETSIGINFO for this signal returns si_code set to 0
// (SI_USER). This signal may be blocked by signal mask, and thus may be
// delivered (much) later." - ptrace(2)
t.tg.signalHandlers.mu.Lock()
defer t.tg.signalHandlers.mu.Unlock()
t.sendSignalLocked(&arch.SignalInfo{
Signo: int32(linux.SIGTRAP),
Code: arch.SignalInfoUser,
}, false /* group */)
}
// ptraceExit is called early in the task exit path to check if t should enter
// PTRACE_EVENT_EXIT stop.
func (t *Task) ptraceExit() {
if !t.hasTracer() {
return
}
t.tg.pidns.owner.mu.RLock()
defer t.tg.pidns.owner.mu.RUnlock()
if !t.ptraceOpts.TraceExit {
return
}
t.tg.signalHandlers.mu.Lock()
status := t.exitStatus.Status()
t.tg.signalHandlers.mu.Unlock()
t.Debugf("Entering PTRACE_EVENT_EXIT stop")
t.ptraceEventLocked(linux.PTRACE_EVENT_EXIT, uint64(status))
}
// Preconditions: The TaskSet mutex must be locked.
func (t *Task) ptraceEventLocked(event int32, msg uint64) {
t.ptraceEventMsg = msg
// """
// PTRACE_EVENT stops are observed by the tracer as waitpid(2) returning
// with WIFSTOPPED(status), and WSTOPSIG(status) returns SIGTRAP. An
// additional bit is set in the higher byte of the status word: the value
// status>>8 will be
//
// (SIGTRAP | PTRACE_EVENT_foo << 8).
//
// ...
//
// """ - ptrace(2)
t.ptraceTrapLocked(int32(linux.SIGTRAP) | (event << 8))
}
// ptraceKill implements ptrace(PTRACE_KILL, target). t is the caller.
func (t *Task) ptraceKill(target *Task) error {
t.tg.pidns.owner.mu.Lock()
defer t.tg.pidns.owner.mu.Unlock()
if target.Tracer() != t {
return syserror.ESRCH
}
target.tg.signalHandlers.mu.Lock()
defer target.tg.signalHandlers.mu.Unlock()
// "This operation is deprecated; do not use it! Instead, send a SIGKILL
// directly using kill(2) or tgkill(2). The problem with PTRACE_KILL is
// that it requires the tracee to be in signal-delivery-stop, otherwise it
// may not work (i.e., may complete successfully but won't kill the
// tracee)." - ptrace(2)
if target.stop == nil {
return nil
}
if _, ok := target.stop.(*ptraceStop); !ok {
return nil
}
target.ptraceCode = int32(linux.SIGKILL)
target.endInternalStopLocked()
return nil
}
// Ptrace implements the ptrace system call.
func (t *Task) Ptrace(req int64, pid ThreadID, addr, data usermem.Addr) error {
// PTRACE_TRACEME ignores all other arguments.
if req == linux.PTRACE_TRACEME {
return t.ptraceTraceme()
}
// All other ptrace requests operate on a current or future tracee
// specified by pid.
target := t.tg.pidns.TaskWithID(pid)
if target == nil {
return syserror.ESRCH
}
// PTRACE_ATTACH (and PTRACE_SEIZE, which is unimplemented) do not require
// that target is not already a tracee.
if req == linux.PTRACE_ATTACH {
return t.ptraceAttach(target)
}
// PTRACE_KILL (and PTRACE_INTERRUPT, which is unimplemented) require that
// the target is a tracee, but does not require that it is ptrace-stopped.
if req == linux.PTRACE_KILL {
return t.ptraceKill(target)
}
// All other ptrace requests require that the target is a ptrace-stopped
// tracee, and freeze the ptrace-stop so the tracee can be operated on.
t.tg.pidns.owner.mu.RLock()
if target.Tracer() != t {
t.tg.pidns.owner.mu.RUnlock()
return syserror.ESRCH
}
if !target.ptraceFreeze() {
t.tg.pidns.owner.mu.RUnlock()
// "Most ptrace commands (all except PTRACE_ATTACH, PTRACE_SEIZE,
// PTRACE_TRACEME, PTRACE_INTERRUPT, and PTRACE_KILL) require the
// tracee to be in a ptrace-stop, otherwise they fail with ESRCH." -
// ptrace(2)
return syserror.ESRCH
}
t.tg.pidns.owner.mu.RUnlock()
// Even if the target has a ptrace-stop active, the tracee's task goroutine
// may not yet have reached Task.doStop; wait for it to do so. This is safe
// because there's no way for target to initiate a ptrace-stop and then
// block (by calling Task.block) before entering it.
//
// Caveat: If tasks were just restored, the tracee's first call to
// Task.Activate (in Task.run) occurs before its first call to Task.doStop,
// which may block if the tracer's address space is active.
t.UninterruptibleSleepStart(true)
target.waitGoroutineStoppedOrExited()
t.UninterruptibleSleepFinish(true)
// Resuming commands end the ptrace stop, but only if successful.
switch req {
case linux.PTRACE_DETACH:
if err := t.ptraceDetach(target, linux.Signal(data)); err != nil {
target.ptraceUnfreeze()
return err
}
return nil
case linux.PTRACE_CONT:
if err := target.ptraceUnstop(ptraceSyscallNone, false, linux.Signal(data)); err != nil {
target.ptraceUnfreeze()
return err
}
return nil
case linux.PTRACE_SYSCALL:
if err := target.ptraceUnstop(ptraceSyscallIntercept, false, linux.Signal(data)); err != nil {
target.ptraceUnfreeze()
return err
}
return nil
case linux.PTRACE_SINGLESTEP:
if err := target.ptraceUnstop(ptraceSyscallNone, true, linux.Signal(data)); err != nil {
target.ptraceUnfreeze()
return err
}
return nil
case linux.PTRACE_SYSEMU:
if err := target.ptraceUnstop(ptraceSyscallEmu, false, linux.Signal(data)); err != nil {
target.ptraceUnfreeze()
return err
}
return nil
case linux.PTRACE_SYSEMU_SINGLESTEP:
if err := target.ptraceUnstop(ptraceSyscallEmu, true, linux.Signal(data)); err != nil {
target.ptraceUnfreeze()
return err
}
return nil
}
// All other ptrace requests expect us to unfreeze the stop.
defer target.ptraceUnfreeze()
switch req {
case linux.PTRACE_PEEKTEXT, linux.PTRACE_PEEKDATA:
// "At the system call level, the PTRACE_PEEKTEXT, PTRACE_PEEKDATA, and
// PTRACE_PEEKUSER requests have a different API: they store the result
// at the address specified by the data parameter, and the return value
// is the error flag." - ptrace(2)
word := t.Arch().Native(0)
if _, err := usermem.CopyObjectIn(t, target.MemoryManager(), addr, word, usermem.IOOpts{
IgnorePermissions: true,
}); err != nil {
return err
}
_, err := t.CopyOut(data, word)
return err
case linux.PTRACE_POKETEXT, linux.PTRACE_POKEDATA:
_, err := usermem.CopyObjectOut(t, target.MemoryManager(), addr, t.Arch().Native(uintptr(data)), usermem.IOOpts{
IgnorePermissions: true,
})
return err
case linux.PTRACE_PEEKUSR: // aka PTRACE_PEEKUSER
n, err := target.Arch().PtracePeekUser(uintptr(addr))
if err != nil {
return err
}
_, err = t.CopyOut(data, n)
return err
case linux.PTRACE_POKEUSR: // aka PTRACE_POKEUSER
return target.Arch().PtracePokeUser(uintptr(addr), uintptr(data))
case linux.PTRACE_GETREGS:
// "Copy the tracee's general-purpose ... registers ... to the address
// data in the tracer. ... (addr is ignored.) Note that SPARC systems
// have the meaning of data and addr reversed ..."
_, err := target.Arch().PtraceGetRegs(&usermem.IOReadWriter{
Ctx: t,
IO: t.MemoryManager(),
Addr: data,
Opts: usermem.IOOpts{
AddressSpaceActive: true,
},
})
return err
case linux.PTRACE_GETFPREGS:
_, err := target.Arch().PtraceGetFPRegs(&usermem.IOReadWriter{
Ctx: t,
IO: t.MemoryManager(),
Addr: data,
Opts: usermem.IOOpts{
AddressSpaceActive: true,
},
})
return err
case linux.PTRACE_GETREGSET:
// "Read the tracee's registers. addr specifies, in an
// architecture-dependent way, the type of registers to be read. ...
// data points to a struct iovec, which describes the destination
// buffer's location and length. On return, the kernel modifies iov.len
// to indicate the actual number of bytes returned." - ptrace(2)
ars, err := t.CopyInIovecs(data, 1)
if err != nil {
return err
}
ar := ars.Head()
n, err := target.Arch().PtraceGetRegSet(uintptr(addr), &usermem.IOReadWriter{
Ctx: t,
IO: t.MemoryManager(),
Addr: ar.Start,
Opts: usermem.IOOpts{
AddressSpaceActive: true,
},
}, int(ar.Length()))
if err != nil {
return err
}
ar.End -= usermem.Addr(n)
return t.CopyOutIovecs(data, usermem.AddrRangeSeqOf(ar))
case linux.PTRACE_SETREGS:
_, err := target.Arch().PtraceSetRegs(&usermem.IOReadWriter{
Ctx: t,
IO: t.MemoryManager(),
Addr: data,
Opts: usermem.IOOpts{
AddressSpaceActive: true,
},
})
return err
case linux.PTRACE_SETFPREGS:
_, err := target.Arch().PtraceSetFPRegs(&usermem.IOReadWriter{
Ctx: t,
IO: t.MemoryManager(),
Addr: data,
Opts: usermem.IOOpts{
AddressSpaceActive: true,
},
})
return err
case linux.PTRACE_SETREGSET:
ars, err := t.CopyInIovecs(data, 1)
if err != nil {
return err
}
ar := ars.Head()
n, err := target.Arch().PtraceSetRegSet(uintptr(addr), &usermem.IOReadWriter{
Ctx: t,
IO: t.MemoryManager(),
Addr: ar.Start,
Opts: usermem.IOOpts{
AddressSpaceActive: true,
},
}, int(ar.Length()))
if err != nil {
return err
}
ar.End -= usermem.Addr(n)
return t.CopyOutIovecs(data, usermem.AddrRangeSeqOf(ar))
case linux.PTRACE_GETSIGINFO:
t.tg.pidns.owner.mu.RLock()
defer t.tg.pidns.owner.mu.RUnlock()
if target.ptraceSiginfo == nil {
return syserror.EINVAL
}
_, err := t.CopyOut(data, target.ptraceSiginfo)
return err
case linux.PTRACE_SETSIGINFO:
var info arch.SignalInfo
if _, err := t.CopyIn(data, &info); err != nil {
return err
}
t.tg.pidns.owner.mu.RLock()
defer t.tg.pidns.owner.mu.RUnlock()
if target.ptraceSiginfo == nil {
return syserror.EINVAL
}
target.ptraceSiginfo = &info
return nil
case linux.PTRACE_GETSIGMASK:
if addr != linux.SignalSetSize {
return syserror.EINVAL
}
target.mu.Lock()
defer target.mu.Unlock()
_, err := t.CopyOut(data, target.tr.SignalMask)
return err
case linux.PTRACE_SETSIGMASK:
if addr != linux.SignalSetSize {
return syserror.EINVAL
}
var mask linux.SignalSet
if _, err := t.CopyIn(data, &mask); err != nil {
return err
}
// The target's task goroutine is stopped, so this is safe:
target.SetSignalMask(mask &^ UnblockableSignals)
return nil
case linux.PTRACE_SETOPTIONS:
t.tg.pidns.owner.mu.Lock()
defer t.tg.pidns.owner.mu.Unlock()
validOpts := uintptr(linux.PTRACE_O_EXITKILL |
linux.PTRACE_O_TRACESYSGOOD |
linux.PTRACE_O_TRACECLONE |
linux.PTRACE_O_TRACEEXEC |
linux.PTRACE_O_TRACEEXIT |
linux.PTRACE_O_TRACEFORK |
linux.PTRACE_O_TRACESECCOMP |
linux.PTRACE_O_TRACEVFORK |
linux.PTRACE_O_TRACEVFORKDONE)
if uintptr(data)&^validOpts != 0 {
return syserror.EINVAL
}
target.ptraceOpts = ptraceOptions{
ExitKill: data&linux.PTRACE_O_EXITKILL != 0,
SysGood: data&linux.PTRACE_O_TRACESYSGOOD != 0,
TraceClone: data&linux.PTRACE_O_TRACECLONE != 0,
TraceExec: data&linux.PTRACE_O_TRACEEXEC != 0,
TraceExit: data&linux.PTRACE_O_TRACEEXIT != 0,
TraceFork: data&linux.PTRACE_O_TRACEFORK != 0,
TraceSeccomp: data&linux.PTRACE_O_TRACESECCOMP != 0,
TraceVfork: data&linux.PTRACE_O_TRACEVFORK != 0,
TraceVforkDone: data&linux.PTRACE_O_TRACEVFORKDONE != 0,
}
return nil
case linux.PTRACE_GETEVENTMSG:
t.tg.pidns.owner.mu.RLock()
defer t.tg.pidns.owner.mu.RUnlock()
_, err := t.CopyOut(usermem.Addr(data), target.ptraceEventMsg)
return err
default:
// PEEKSIGINFO is unimplemented but seems to have no users anywhere.
return syserror.EIO
}
}