diff --git a/pkg/abi/linux/seccomp.go b/pkg/abi/linux/seccomp.go index 9963ceeba..5ec01cc4a 100644 --- a/pkg/abi/linux/seccomp.go +++ b/pkg/abi/linux/seccomp.go @@ -19,17 +19,19 @@ const ( SECCOMP_MODE_NONE = 0 SECCOMP_MODE_FILTER = 2 - SECCOMP_RET_KILL = 0x00000000 - SECCOMP_RET_TRAP = 0x00030000 - SECCOMP_RET_ERRNO = 0x00050000 - SECCOMP_RET_TRACE = 0x7ff00000 - SECCOMP_RET_ALLOW = 0x7fff0000 + SECCOMP_RET_KILL_PROCESS = 0x80000000 + SECCOMP_RET_KILL_THREAD = 0x00000000 + SECCOMP_RET_TRAP = 0x00030000 + SECCOMP_RET_ERRNO = 0x00050000 + SECCOMP_RET_TRACE = 0x7ff00000 + SECCOMP_RET_ALLOW = 0x7fff0000 SECCOMP_RET_ACTION = 0x7fff0000 SECCOMP_RET_DATA = 0x0000ffff SECCOMP_SET_MODE_FILTER = 1 SECCOMP_FILTER_FLAG_TSYNC = 1 + SECCOMP_GET_ACTION_AVAIL = 2 ) const ( diff --git a/pkg/seccomp/seccomp.go b/pkg/seccomp/seccomp.go index 1dfbf749e..9d714d02d 100644 --- a/pkg/seccomp/seccomp.go +++ b/pkg/seccomp/seccomp.go @@ -33,17 +33,42 @@ const ( defaultLabel = "default_action" ) -// Install generates BPF code based on the set of syscalls provided. It only -// allows syscalls that conform to the specification and generates SIGSYS -// trap unless kill is set. -// -// This is a convenience wrapper around BuildProgram and SetFilter. -func Install(rules SyscallRules, kill bool) error { - log.Infof("Installing seccomp filters for %d syscalls (kill=%t)", len(rules), kill) - defaultAction := uint32(linux.SECCOMP_RET_TRAP) - if kill { - defaultAction = uint32(linux.SECCOMP_RET_KILL) +func actionName(a uint32) string { + switch a { + case linux.SECCOMP_RET_KILL_PROCESS: + return "kill process" + case linux.SECCOMP_RET_TRAP: + return "trap" } + panic(fmt.Sprintf("invalid action: %d", a)) +} + +// Install generates BPF code based on the set of syscalls provided. It only +// allows syscalls that conform to the specification. Syscalls that violate the +// specification will trigger RET_KILL_PROCESS, except for the cases below. +// +// RET_TRAP is used in violations, instead of RET_KILL_PROCESS, in the +// following cases: +// 1. Kernel doesn't support RET_KILL_PROCESS: RET_KILL_THREAD only kills the +// offending thread and often keeps the sentry hanging. +// 2. Debug: RET_TRAP generates a panic followed by a stack trace which is +// much easier to debug then RET_KILL_PROCESS which can't be caught. +// +// Be aware that RET_TRAP sends SIGSYS to the process and it may be ignored, +// making it possible for the process to continue running after a violation. +// However, it will leave a SECCOMP audit event trail behind. In any case, the +// syscall is still blocked from executing. +func Install(rules SyscallRules) error { + defaultAction, err := defaultAction() + if err != nil { + return err + } + + // Uncomment to get stack trace when there is a violation. + // defaultAction = uint32(linux.SECCOMP_RET_TRAP) + + log.Infof("Installing seccomp filters for %d syscalls (action=%s)", len(rules), actionName(defaultAction)) + instrs, err := BuildProgram([]RuleSet{ RuleSet{ Rules: rules, @@ -70,6 +95,17 @@ func Install(rules SyscallRules, kill bool) error { return nil } +func defaultAction() (uint32, error) { + available, err := isKillProcessAvailable() + if err != nil { + return 0, err + } + if available { + return uint32(linux.SECCOMP_RET_KILL_PROCESS), nil + } + return uint32(linux.SECCOMP_RET_TRAP), nil +} + // RuleSet is a set of rules and associated action. type RuleSet struct { Rules SyscallRules diff --git a/pkg/seccomp/seccomp_test.go b/pkg/seccomp/seccomp_test.go index 226f30b7b..f2b903e42 100644 --- a/pkg/seccomp/seccomp_test.go +++ b/pkg/seccomp/seccomp_test.go @@ -121,7 +121,7 @@ func TestBasic(t *testing.T) { Action: linux.SECCOMP_RET_TRAP, }, }, - defaultAction: linux.SECCOMP_RET_KILL, + defaultAction: linux.SECCOMP_RET_KILL_THREAD, specs: []spec{ { desc: "Multiple rulesets allowed (1a)", @@ -141,7 +141,7 @@ func TestBasic(t *testing.T) { { desc: "Multiple rulesets allowed (2)", data: seccompData{nr: 0, arch: linux.AUDIT_ARCH_X86_64}, - want: linux.SECCOMP_RET_KILL, + want: linux.SECCOMP_RET_KILL_THREAD, }, }, }, @@ -431,15 +431,23 @@ func TestRealDeal(t *testing.T) { t.Errorf("victim was not killed as expected, output: %s", out) continue } + // Depending on kernel version, either RET_TRAP or RET_KILL_PROCESS is + // used. RET_TRAP dumps reason for exit in output, while RET_KILL_PROCESS + // returns SIGSYS as exit status. + if !strings.Contains(string(out), test.want) && + !strings.Contains(err.Error(), test.want) { + t.Errorf("Victim error is wrong, got: %v, err: %v, want: %v", string(out), err, test.want) + continue + } } else { if err != nil { t.Errorf("victim failed to execute, err: %v", err) continue } - } - if !strings.Contains(string(out), test.want) { - t.Errorf("Victim output is wrong, got: %v, want: %v", err, test.want) - continue + if !strings.Contains(string(out), test.want) { + t.Errorf("Victim output is wrong, got: %v, want: %v", string(out), test.want) + continue + } } } } diff --git a/pkg/seccomp/seccomp_test_victim.go b/pkg/seccomp/seccomp_test_victim.go index 007038273..dd5ed0041 100644 --- a/pkg/seccomp/seccomp_test_victim.go +++ b/pkg/seccomp/seccomp_test_victim.go @@ -106,7 +106,7 @@ func main() { } } - if err := seccomp.Install(syscalls, false); err != nil { + if err := seccomp.Install(syscalls); err != nil { fmt.Printf("Failed to install seccomp: %v", err) os.Exit(1) } diff --git a/pkg/seccomp/seccomp_unsafe.go b/pkg/seccomp/seccomp_unsafe.go index dd009221a..a31c6471d 100644 --- a/pkg/seccomp/seccomp_unsafe.go +++ b/pkg/seccomp/seccomp_unsafe.go @@ -36,22 +36,40 @@ type sockFprog struct { // //go:nosplit func SetFilter(instrs []linux.BPFInstruction) syscall.Errno { - // SYS_SECCOMP is not available in syscall package. - const SYS_SECCOMP = 317 - // PR_SET_NO_NEW_PRIVS is required in order to enable seccomp. See seccomp(2) for details. if _, _, errno := syscall.RawSyscall(syscall.SYS_PRCTL, linux.PR_SET_NO_NEW_PRIVS, 1, 0); errno != 0 { return errno } - // TODO: Use SECCOMP_FILTER_FLAG_KILL_PROCESS when available. sockProg := sockFprog{ Len: uint16(len(instrs)), Filter: (*linux.BPFInstruction)(unsafe.Pointer(&instrs[0])), } - if _, _, errno := syscall.RawSyscall(SYS_SECCOMP, linux.SECCOMP_SET_MODE_FILTER, linux.SECCOMP_FILTER_FLAG_TSYNC, uintptr(unsafe.Pointer(&sockProg))); errno != 0 { + return seccomp(linux.SECCOMP_SET_MODE_FILTER, linux.SECCOMP_FILTER_FLAG_TSYNC, unsafe.Pointer(&sockProg)) +} + +func isKillProcessAvailable() (bool, error) { + action := uint32(linux.SECCOMP_RET_KILL_PROCESS) + if errno := seccomp(linux.SECCOMP_GET_ACTION_AVAIL, 0, unsafe.Pointer(&action)); errno != 0 { + // EINVAL: SECCOMP_GET_ACTION_AVAIL not in this kernel yet. + // EOPNOTSUPP: SECCOMP_RET_KILL_PROCESS not supported. + if errno == syscall.EINVAL || errno == syscall.EOPNOTSUPP { + return false, nil + } + return false, errno + } + return true, nil +} + +// seccomp calls seccomp(2). This is safe to call from an afterFork context. +// +//go:nosplit +func seccomp(op, flags uint32, ptr unsafe.Pointer) syscall.Errno { + // SYS_SECCOMP is not available in syscall package. + const SYS_SECCOMP = 317 + + if _, _, errno := syscall.RawSyscall(SYS_SECCOMP, uintptr(op), uintptr(flags), uintptr(ptr)); errno != 0 { return errno } - return 0 } diff --git a/pkg/sentry/kernel/seccomp.go b/pkg/sentry/kernel/seccomp.go index 433b900c7..d6dc45bbd 100644 --- a/pkg/sentry/kernel/seccomp.go +++ b/pkg/sentry/kernel/seccomp.go @@ -117,7 +117,7 @@ func (t *Task) checkSeccompSyscall(sysno int32, args arch.SyscallArguments, ip u // "Results in the system call being executed." return seccompResultAllow - case linux.SECCOMP_RET_KILL: + case linux.SECCOMP_RET_KILL_THREAD: // "Results in the task exiting immediately without executing the // system call. The exit status of the task will be SIGSYS, not // SIGKILL." @@ -155,7 +155,7 @@ func (t *Task) evaluateSyscallFilters(sysno int32, args arch.SyscallArguments, i thisRet, err := bpf.Exec(f.([]bpf.Program)[i], input) if err != nil { t.Debugf("seccomp-bpf filter %d returned error: %v", i, err) - thisRet = linux.SECCOMP_RET_KILL + thisRet = linux.SECCOMP_RET_KILL_THREAD } // "If multiple filters exist, the return value for the evaluation of a // given system call will always use the highest precedent value." - diff --git a/pkg/sentry/platform/ptrace/subprocess_linux.go b/pkg/sentry/platform/ptrace/subprocess_linux.go index 885ba4b2e..25b8e8cb7 100644 --- a/pkg/sentry/platform/ptrace/subprocess_linux.go +++ b/pkg/sentry/platform/ptrace/subprocess_linux.go @@ -115,7 +115,7 @@ func createStub() (*thread, error) { var defaultAction uint32 if probeSeccomp() { log.Infof("Latest seccomp behavior found (kernel >= 4.8 likely)") - defaultAction = uint32(linux.SECCOMP_RET_KILL) + defaultAction = uint32(linux.SECCOMP_RET_KILL_THREAD) } else { // We must rely on SYSEMU behavior; tracing with SYSEMU is broken. log.Infof("Legacy seccomp behavior found (kernel < 4.8 likely)") diff --git a/runsc/boot/filter/filter.go b/runsc/boot/filter/filter.go index dc7294b1d..d69a6a2cc 100644 --- a/runsc/boot/filter/filter.go +++ b/runsc/boot/filter/filter.go @@ -57,8 +57,7 @@ func Install(opt Options) error { return fmt.Errorf("unknown platform type %T", p) } - // TODO: Set kill=true when SECCOMP_RET_KILL_PROCESS is supported. - return seccomp.Install(s, false) + return seccomp.Install(s) } // Report writes a warning message to the log. diff --git a/runsc/fsgofer/filter/filter.go b/runsc/fsgofer/filter/filter.go index f50b6bc87..c120d57a6 100644 --- a/runsc/fsgofer/filter/filter.go +++ b/runsc/fsgofer/filter/filter.go @@ -29,6 +29,5 @@ func Install() error { // when not enabled. s.Merge(instrumentationFilters()) - // TODO: Set kill=true when SECCOMP_RET_KILL_PROCESS is supported. - return seccomp.Install(s, false) + return seccomp.Install(s) }