platform/ptrace: don't call probeSeccomp on arm64

The support of PTRACE_SYSEMU on arm64 was added in the 5.3 kernel,
so we can be sure that the current version is higher that 5.3.

And this change moves vsyscall seccomp rules to the arch specific file,
because vsyscall isn't supported on arm64.

PiperOrigin-RevId: 298696493
This commit is contained in:
Andrei Vagin 2020-03-03 14:34:36 -08:00 committed by gVisor bot
parent 844e4d284c
commit 277a0d5a1f
3 changed files with 84 additions and 72 deletions

View File

@ -21,6 +21,7 @@ import (
"strings"
"syscall"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/seccomp"
"gvisor.dev/gvisor/pkg/sentry/arch"
@ -183,13 +184,76 @@ func enableCpuidFault() {
// appendArchSeccompRules append architecture specific seccomp rules when creating BPF program.
// Ref attachedThread() for more detail.
func appendArchSeccompRules(rules []seccomp.RuleSet) []seccomp.RuleSet {
return append(rules, seccomp.RuleSet{
Rules: seccomp.SyscallRules{
syscall.SYS_ARCH_PRCTL: []seccomp.Rule{
{seccomp.AllowValue(linux.ARCH_SET_CPUID), seccomp.AllowValue(0)},
func appendArchSeccompRules(rules []seccomp.RuleSet, defaultAction linux.BPFAction) []seccomp.RuleSet {
rules = append(rules,
// Rules for trapping vsyscall access.
seccomp.RuleSet{
Rules: seccomp.SyscallRules{
syscall.SYS_GETTIMEOFDAY: {},
syscall.SYS_TIME: {},
unix.SYS_GETCPU: {}, // SYS_GETCPU was not defined in package syscall on amd64.
},
},
Action: linux.SECCOMP_RET_ALLOW,
})
Action: linux.SECCOMP_RET_TRAP,
Vsyscall: true,
})
if defaultAction != linux.SECCOMP_RET_ALLOW {
rules = append(rules,
seccomp.RuleSet{
Rules: seccomp.SyscallRules{
syscall.SYS_ARCH_PRCTL: []seccomp.Rule{
{seccomp.AllowValue(linux.ARCH_SET_CPUID), seccomp.AllowValue(0)},
},
},
Action: linux.SECCOMP_RET_ALLOW,
})
}
return rules
}
// probeSeccomp returns true iff seccomp is run after ptrace notifications,
// which is generally the case for kernel version >= 4.8. This check is dynamic
// because kernels have be backported behavior.
//
// See createStub for more information.
//
// Precondition: the runtime OS thread must be locked.
func probeSeccomp() bool {
// Create a completely new, destroyable process.
t, err := attachedThread(0, linux.SECCOMP_RET_ERRNO)
if err != nil {
panic(fmt.Sprintf("seccomp probe failed: %v", err))
}
defer t.destroy()
// Set registers to the yield system call. This call is not allowed
// by the filters specified in the attachThread function.
regs := createSyscallRegs(&t.initRegs, syscall.SYS_SCHED_YIELD)
if err := t.setRegs(&regs); err != nil {
panic(fmt.Sprintf("ptrace set regs failed: %v", err))
}
for {
// Attempt an emulation.
if _, _, errno := syscall.RawSyscall6(syscall.SYS_PTRACE, unix.PTRACE_SYSEMU, uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
panic(fmt.Sprintf("ptrace syscall-enter failed: %v", errno))
}
sig := t.wait(stopped)
if sig == (syscallEvent | syscall.SIGTRAP) {
// Did the seccomp errno hook already run? This would
// indicate that seccomp is first in line and we're
// less than 4.8.
if err := t.getRegs(&regs); err != nil {
panic(fmt.Sprintf("ptrace get-regs failed: %v", err))
}
if _, err := syscallReturnValue(&regs); err == nil {
// The seccomp errno mode ran first, and reset
// the error in the registers.
return false
}
// The seccomp hook did not run yet, and therefore it
// is safe to use RET_KILL mode for dispatched calls.
return true
}
}
}

View File

@ -160,6 +160,15 @@ func enableCpuidFault() {
// appendArchSeccompRules append architecture specific seccomp rules when creating BPF program.
// Ref attachedThread() for more detail.
func appendArchSeccompRules(rules []seccomp.RuleSet) []seccomp.RuleSet {
func appendArchSeccompRules(rules []seccomp.RuleSet, defaultAction linux.BPFAction) []seccomp.RuleSet {
return rules
}
// probeSeccomp returns true if seccomp is run after ptrace notifications,
// which is generally the case for kernel version >= 4.8.
//
// On arm64, the support of PTRACE_SYSEMU was added in the 5.3 kernel, so
// probeSeccomp can always return true.
func probeSeccomp() bool {
return true
}

View File

@ -20,7 +20,6 @@ import (
"fmt"
"syscall"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/procid"
@ -30,54 +29,6 @@ import (
const syscallEvent syscall.Signal = 0x80
// probeSeccomp returns true iff seccomp is run after ptrace notifications,
// which is generally the case for kernel version >= 4.8. This check is dynamic
// because kernels have be backported behavior.
//
// See createStub for more information.
//
// Precondition: the runtime OS thread must be locked.
func probeSeccomp() bool {
// Create a completely new, destroyable process.
t, err := attachedThread(0, linux.SECCOMP_RET_ERRNO)
if err != nil {
panic(fmt.Sprintf("seccomp probe failed: %v", err))
}
defer t.destroy()
// Set registers to the yield system call. This call is not allowed
// by the filters specified in the attachThread function.
regs := createSyscallRegs(&t.initRegs, syscall.SYS_SCHED_YIELD)
if err := t.setRegs(&regs); err != nil {
panic(fmt.Sprintf("ptrace set regs failed: %v", err))
}
for {
// Attempt an emulation.
if _, _, errno := syscall.RawSyscall6(syscall.SYS_PTRACE, unix.PTRACE_SYSEMU, uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
panic(fmt.Sprintf("ptrace syscall-enter failed: %v", errno))
}
sig := t.wait(stopped)
if sig == (syscallEvent | syscall.SIGTRAP) {
// Did the seccomp errno hook already run? This would
// indicate that seccomp is first in line and we're
// less than 4.8.
if err := t.getRegs(&regs); err != nil {
panic(fmt.Sprintf("ptrace get-regs failed: %v", err))
}
if _, err := syscallReturnValue(&regs); err == nil {
// The seccomp errno mode ran first, and reset
// the error in the registers.
return false
}
// The seccomp hook did not run yet, and therefore it
// is safe to use RET_KILL mode for dispatched calls.
return true
}
}
}
// createStub creates a fresh stub processes.
//
// Precondition: the runtime OS thread must be locked.
@ -123,18 +74,7 @@ func attachedThread(flags uintptr, defaultAction linux.BPFAction) (*thread, erro
// stub and all its children. This is used to create child stubs
// (below), so we must include the ability to fork, but otherwise lock
// down available calls only to what is needed.
rules := []seccomp.RuleSet{
// Rules for trapping vsyscall access.
{
Rules: seccomp.SyscallRules{
syscall.SYS_GETTIMEOFDAY: {},
syscall.SYS_TIME: {},
unix.SYS_GETCPU: {}, // SYS_GETCPU was not defined in package syscall on amd64.
},
Action: linux.SECCOMP_RET_TRAP,
Vsyscall: true,
},
}
rules := []seccomp.RuleSet{}
if defaultAction != linux.SECCOMP_RET_ALLOW {
rules = append(rules, seccomp.RuleSet{
Rules: seccomp.SyscallRules{
@ -173,9 +113,8 @@ func attachedThread(flags uintptr, defaultAction linux.BPFAction) (*thread, erro
},
Action: linux.SECCOMP_RET_ALLOW,
})
rules = appendArchSeccompRules(rules)
}
rules = appendArchSeccompRules(rules, defaultAction)
instrs, err := seccomp.BuildProgram(rules, defaultAction)
if err != nil {
return nil, err