From 03226cd95055aee73d4e4dfcb4954490b4fd8a2d Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Tue, 18 Dec 2018 10:27:16 -0800 Subject: [PATCH] Add BPFAction type with Stringer PiperOrigin-RevId: 226018694 Change-Id: I98965e26fe565f37e98e5df5f997363ab273c91b --- pkg/abi/linux/seccomp.go | 48 +++++++++++++---- pkg/seccomp/seccomp.go | 32 ++++------- pkg/seccomp/seccomp_test.go | 14 ++--- pkg/sentry/kernel/seccomp.go | 53 ++++++------------- pkg/sentry/kernel/task_syscall.go | 18 ++++--- .../platform/ptrace/subprocess_linux.go | 16 +++--- test/syscalls/linux/seccomp.cc | 19 +++++++ 7 files changed, 112 insertions(+), 88 deletions(-) diff --git a/pkg/abi/linux/seccomp.go b/pkg/abi/linux/seccomp.go index 785f2f284..8673a27bf 100644 --- a/pkg/abi/linux/seccomp.go +++ b/pkg/abi/linux/seccomp.go @@ -14,22 +14,52 @@ package linux +import "fmt" + // Seccomp constants taken from . const ( SECCOMP_MODE_NONE = 0 SECCOMP_MODE_FILTER = 2 - SECCOMP_RET_KILL_PROCESS = 0x80000000 - SECCOMP_RET_KILL_THREAD = 0x00000000 - SECCOMP_RET_TRAP = 0x00030000 - SECCOMP_RET_ERRNO = 0x00050000 - SECCOMP_RET_TRACE = 0x7ff00000 - SECCOMP_RET_ALLOW = 0x7fff0000 - - SECCOMP_RET_ACTION = 0x7fff0000 - SECCOMP_RET_DATA = 0x0000ffff + SECCOMP_RET_ACTION_FULL = 0xffff0000 + SECCOMP_RET_ACTION = 0x7fff0000 + SECCOMP_RET_DATA = 0x0000ffff SECCOMP_SET_MODE_FILTER = 1 SECCOMP_FILTER_FLAG_TSYNC = 1 SECCOMP_GET_ACTION_AVAIL = 2 ) + +type BPFAction uint32 + +const ( + SECCOMP_RET_KILL_PROCESS BPFAction = 0x80000000 + SECCOMP_RET_KILL_THREAD = 0x00000000 + SECCOMP_RET_TRAP = 0x00030000 + SECCOMP_RET_ERRNO = 0x00050000 + SECCOMP_RET_TRACE = 0x7ff00000 + SECCOMP_RET_ALLOW = 0x7fff0000 +) + +func (a BPFAction) String() string { + switch a & SECCOMP_RET_ACTION_FULL { + case SECCOMP_RET_KILL_PROCESS: + return "kill process" + case SECCOMP_RET_KILL_THREAD: + return "kill thread" + case SECCOMP_RET_TRAP: + return fmt.Sprintf("trap (%d)", a.Data()) + case SECCOMP_RET_ERRNO: + return fmt.Sprintf("errno (%d)", a.Data()) + case SECCOMP_RET_TRACE: + return fmt.Sprintf("trace (%d)", a.Data()) + case SECCOMP_RET_ALLOW: + return "allow" + } + return fmt.Sprintf("invalid action: %#x", a) +} + +// Data returns the SECCOMP_RET_DATA portion of the action. +func (a BPFAction) Data() uint16 { + return uint16(a & SECCOMP_RET_DATA) +} diff --git a/pkg/seccomp/seccomp.go b/pkg/seccomp/seccomp.go index 9d714d02d..ba2955752 100644 --- a/pkg/seccomp/seccomp.go +++ b/pkg/seccomp/seccomp.go @@ -33,16 +33,6 @@ const ( defaultLabel = "default_action" ) -func actionName(a uint32) string { - switch a { - case linux.SECCOMP_RET_KILL_PROCESS: - return "kill process" - case linux.SECCOMP_RET_TRAP: - return "trap" - } - panic(fmt.Sprintf("invalid action: %d", a)) -} - // Install generates BPF code based on the set of syscalls provided. It only // allows syscalls that conform to the specification. Syscalls that violate the // specification will trigger RET_KILL_PROCESS, except for the cases below. @@ -67,12 +57,12 @@ func Install(rules SyscallRules) error { // Uncomment to get stack trace when there is a violation. // defaultAction = uint32(linux.SECCOMP_RET_TRAP) - log.Infof("Installing seccomp filters for %d syscalls (action=%s)", len(rules), actionName(defaultAction)) + log.Infof("Installing seccomp filters for %d syscalls (action=%v)", len(rules), defaultAction) instrs, err := BuildProgram([]RuleSet{ RuleSet{ Rules: rules, - Action: uint32(linux.SECCOMP_RET_ALLOW), + Action: linux.SECCOMP_RET_ALLOW, }, }, defaultAction) if log.IsLogging(log.Debug) { @@ -95,21 +85,21 @@ func Install(rules SyscallRules) error { return nil } -func defaultAction() (uint32, error) { +func defaultAction() (linux.BPFAction, error) { available, err := isKillProcessAvailable() if err != nil { return 0, err } if available { - return uint32(linux.SECCOMP_RET_KILL_PROCESS), nil + return linux.SECCOMP_RET_KILL_PROCESS, nil } - return uint32(linux.SECCOMP_RET_TRAP), nil + return linux.SECCOMP_RET_TRAP, nil } // RuleSet is a set of rules and associated action. type RuleSet struct { Rules SyscallRules - Action uint32 + Action linux.BPFAction // Vsyscall indicates that a check is made for a function being called // from kernel mappings. This is where the vsyscall page is located @@ -127,7 +117,7 @@ var SyscallName = func(sysno uintptr) string { // BuildProgram builds a BPF program from the given map of actions to matching // SyscallRules. The single generated program covers all provided RuleSets. -func BuildProgram(rules []RuleSet, defaultAction uint32) ([]linux.BPFInstruction, error) { +func BuildProgram(rules []RuleSet, defaultAction linux.BPFAction) ([]linux.BPFInstruction, error) { program := bpf.NewProgramBuilder() // Be paranoid and check that syscall is done in the expected architecture. @@ -147,7 +137,7 @@ func BuildProgram(rules []RuleSet, defaultAction uint32) ([]linux.BPFInstruction if err := program.AddLabel(defaultLabel); err != nil { return nil, err } - program.AddStmt(bpf.Ret|bpf.K, defaultAction) + program.AddStmt(bpf.Ret|bpf.K, uint32(defaultAction)) return program.Instructions() } @@ -217,7 +207,7 @@ func checkArgsLabel(sysno uintptr) string { // not insert a jump to the default action at the end and it is the // responsibility of the caller to insert an appropriate jump after calling // this function. -func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, action uint32, ruleSetIdx int, sysno uintptr) error { +func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, action linux.BPFAction, ruleSetIdx int, sysno uintptr) error { for ruleidx, rule := range rules { labelled := false for i, arg := range rule { @@ -240,7 +230,7 @@ func addSyscallArgsCheck(p *bpf.ProgramBuilder, rules []Rule, action uint32, rul } // Matched, emit the given action. - p.AddStmt(bpf.Ret|bpf.K, action) + p.AddStmt(bpf.Ret|bpf.K, uint32(action)) // Label the end of the rule if necessary. This is added for // the jumps above when the argument check fails. @@ -319,7 +309,7 @@ func buildBSTProgram(n *node, rules []RuleSet, program *bpf.ProgramBuilder) erro // Emit matchers. if len(rs.Rules[sysno]) == 0 { // This is a blanket action. - program.AddStmt(bpf.Ret|bpf.K, rs.Action) + program.AddStmt(bpf.Ret|bpf.K, uint32(rs.Action)) emitted = true } else { // Add an argument check for these particular diff --git a/pkg/seccomp/seccomp_test.go b/pkg/seccomp/seccomp_test.go index f2b903e42..11ed90eb4 100644 --- a/pkg/seccomp/seccomp_test.go +++ b/pkg/seccomp/seccomp_test.go @@ -72,12 +72,12 @@ func TestBasic(t *testing.T) { data seccompData // want is the expected return value of the BPF program. - want uint32 + want linux.BPFAction } for _, test := range []struct { ruleSets []RuleSet - defaultAction uint32 + defaultAction linux.BPFAction specs []spec }{ { @@ -357,7 +357,7 @@ func TestBasic(t *testing.T) { t.Errorf("%s: bpf.Exec() got error: %v", spec.desc, err) continue } - if got != spec.want { + if got != uint32(spec.want) { t.Errorf("%s: bpd.Exec() = %d, want: %d", spec.desc, got, spec.want) } } @@ -380,9 +380,9 @@ func TestRandom(t *testing.T) { instrs, err := BuildProgram([]RuleSet{ RuleSet{ Rules: syscallRules, - Action: uint32(linux.SECCOMP_RET_ALLOW), + Action: linux.SECCOMP_RET_ALLOW, }, - }, uint32(linux.SECCOMP_RET_TRAP)) + }, linux.SECCOMP_RET_TRAP) if err != nil { t.Fatalf("buildProgram() got error: %v", err) } @@ -397,11 +397,11 @@ func TestRandom(t *testing.T) { t.Errorf("bpf.Exec() got error: %v, for syscall %d", err, i) continue } - want := uint32(linux.SECCOMP_RET_TRAP) + want := linux.SECCOMP_RET_TRAP if _, ok := syscallRules[uintptr(i)]; ok { want = linux.SECCOMP_RET_ALLOW } - if got != want { + if got != uint32(want) { t.Errorf("bpf.Exec() = %d, want: %d, for syscall %d", got, want, i) } } diff --git a/pkg/sentry/kernel/seccomp.go b/pkg/sentry/kernel/seccomp.go index d6dc45bbd..cec179246 100644 --- a/pkg/sentry/kernel/seccomp.go +++ b/pkg/sentry/kernel/seccomp.go @@ -27,24 +27,6 @@ import ( const maxSyscallFilterInstructions = 1 << 15 -type seccompResult int - -const ( - // seccompResultDeny indicates that a syscall should not be executed. - seccompResultDeny seccompResult = iota - - // seccompResultAllow indicates that a syscall should be executed. - seccompResultAllow - - // seccompResultKill indicates that the task should be killed immediately, - // with the exit status indicating that the task was killed by SIGSYS. - seccompResultKill - - // seccompResultTrace indicates that a ptracer was successfully notified as - // a result of a SECCOMP_RET_TRACE. - seccompResultTrace -) - // seccompData is equivalent to struct seccomp_data, which contains the data // passed to seccomp-bpf filters. type seccompData struct { @@ -83,48 +65,47 @@ func seccompSiginfo(t *Task, errno, sysno int32, ip usermem.Addr) *arch.SignalIn // in because vsyscalls do not use the values in t.Arch().) // // Preconditions: The caller must be running on the task goroutine. -func (t *Task) checkSeccompSyscall(sysno int32, args arch.SyscallArguments, ip usermem.Addr) seccompResult { - result := t.evaluateSyscallFilters(sysno, args, ip) - switch result & linux.SECCOMP_RET_ACTION { +func (t *Task) checkSeccompSyscall(sysno int32, args arch.SyscallArguments, ip usermem.Addr) linux.BPFAction { + result := linux.BPFAction(t.evaluateSyscallFilters(sysno, args, ip)) + action := result & linux.SECCOMP_RET_ACTION + switch action { case linux.SECCOMP_RET_TRAP: // "Results in the kernel sending a SIGSYS signal to the triggering // task without executing the system call. ... The SECCOMP_RET_DATA // portion of the return value will be passed as si_errno." - // Documentation/prctl/seccomp_filter.txt - t.SendSignal(seccompSiginfo(t, int32(result&linux.SECCOMP_RET_DATA), sysno, ip)) - return seccompResultDeny + t.SendSignal(seccompSiginfo(t, int32(result.Data()), sysno, ip)) case linux.SECCOMP_RET_ERRNO: // "Results in the lower 16-bits of the return value being passed to // userland as the errno without executing the system call." - t.Arch().SetReturn(-uintptr(result & linux.SECCOMP_RET_DATA)) - return seccompResultDeny + t.Arch().SetReturn(-uintptr(result.Data())) case linux.SECCOMP_RET_TRACE: // "When returned, this value will cause the kernel to attempt to // notify a ptrace()-based tracer prior to executing the system call. // If there is no tracer present, -ENOSYS is returned to userland and // the system call is not executed." - if t.ptraceSeccomp(uint16(result & linux.SECCOMP_RET_DATA)) { - return seccompResultTrace + if !t.ptraceSeccomp(result.Data()) { + // This useless-looking temporary is needed because Go. + tmp := uintptr(syscall.ENOSYS) + t.Arch().SetReturn(-tmp) + return linux.SECCOMP_RET_ERRNO } - // This useless-looking temporary is needed because Go. - tmp := uintptr(syscall.ENOSYS) - t.Arch().SetReturn(-tmp) - return seccompResultDeny case linux.SECCOMP_RET_ALLOW: // "Results in the system call being executed." - return seccompResultAllow case linux.SECCOMP_RET_KILL_THREAD: // "Results in the task exiting immediately without executing the // system call. The exit status of the task will be SIGSYS, not // SIGKILL." - fallthrough - default: // consistent with Linux - return seccompResultKill + + default: + // consistent with Linux + return linux.SECCOMP_RET_KILL_THREAD } + return action } func (t *Task) evaluateSyscallFilters(sysno int32, args arch.SyscallArguments, ip usermem.Addr) uint32 { @@ -155,7 +136,7 @@ func (t *Task) evaluateSyscallFilters(sysno int32, args arch.SyscallArguments, i thisRet, err := bpf.Exec(f.([]bpf.Program)[i], input) if err != nil { t.Debugf("seccomp-bpf filter %d returned error: %v", i, err) - thisRet = linux.SECCOMP_RET_KILL_THREAD + thisRet = uint32(linux.SECCOMP_RET_KILL_THREAD) } // "If multiple filters exist, the return value for the evaluation of a // given system call will always use the highest precedent value." - diff --git a/pkg/sentry/kernel/task_syscall.go b/pkg/sentry/kernel/task_syscall.go index 2a39ebc68..9e43f089a 100644 --- a/pkg/sentry/kernel/task_syscall.go +++ b/pkg/sentry/kernel/task_syscall.go @@ -199,16 +199,16 @@ func (t *Task) doSyscall() taskRunState { // is rare), not needed for correctness. if t.syscallFilters.Load() != nil { switch r := t.checkSeccompSyscall(int32(sysno), args, usermem.Addr(t.Arch().IP())); r { - case seccompResultDeny: + case linux.SECCOMP_RET_ERRNO, linux.SECCOMP_RET_TRAP: t.Debugf("Syscall %d: denied by seccomp", sysno) return (*runSyscallExit)(nil) - case seccompResultAllow: + case linux.SECCOMP_RET_ALLOW: // ok - case seccompResultKill: + case linux.SECCOMP_RET_KILL_THREAD: t.Debugf("Syscall %d: killed by seccomp", sysno) t.PrepareExit(ExitStatus{Signo: int(linux.SIGSYS)}) return (*runExit)(nil) - case seccompResultTrace: + case linux.SECCOMP_RET_TRACE: t.Debugf("Syscall %d: stopping for PTRACE_EVENT_SECCOMP", sysno) return (*runSyscallAfterPtraceEventSeccomp)(nil) default: @@ -345,14 +345,18 @@ func (t *Task) doVsyscall(addr usermem.Addr, sysno uintptr) taskRunState { args := t.Arch().SyscallArgs() if t.syscallFilters.Load() != nil { switch r := t.checkSeccompSyscall(int32(sysno), args, addr); r { - case seccompResultDeny: + case linux.SECCOMP_RET_ERRNO, linux.SECCOMP_RET_TRAP: t.Debugf("vsyscall %d, caller %x: denied by seccomp", sysno, t.Arch().Value(caller)) return (*runApp)(nil) - case seccompResultAllow: + case linux.SECCOMP_RET_ALLOW: // ok - case seccompResultTrace: + case linux.SECCOMP_RET_TRACE: t.Debugf("vsyscall %d, caller %x: stopping for PTRACE_EVENT_SECCOMP", sysno, t.Arch().Value(caller)) return &runVsyscallAfterPtraceEventSeccomp{addr, sysno, caller} + case linux.SECCOMP_RET_KILL_THREAD: + t.Debugf("vsyscall %d: killed by seccomp", sysno) + t.PrepareExit(ExitStatus{Signo: int(linux.SIGSYS)}) + return (*runExit)(nil) default: panic(fmt.Sprintf("Unknown seccomp result %d", r)) } diff --git a/pkg/sentry/platform/ptrace/subprocess_linux.go b/pkg/sentry/platform/ptrace/subprocess_linux.go index 25b8e8cb7..e2aab8135 100644 --- a/pkg/sentry/platform/ptrace/subprocess_linux.go +++ b/pkg/sentry/platform/ptrace/subprocess_linux.go @@ -38,7 +38,7 @@ const syscallEvent syscall.Signal = 0x80 // Precondition: the runtime OS thread must be locked. func probeSeccomp() bool { // Create a completely new, destroyable process. - t, err := attachedThread(0, uint32(linux.SECCOMP_RET_ERRNO)) + t, err := attachedThread(0, linux.SECCOMP_RET_ERRNO) if err != nil { panic(fmt.Sprintf("seccomp probe failed: %v", err)) } @@ -112,14 +112,14 @@ func createStub() (*thread, error) { // ptrace emulation check. This simplifies using SYSEMU, since seccomp // will never run for emulation. Seccomp will only run for injected // system calls, and thus we can use RET_KILL as our violation action. - var defaultAction uint32 + var defaultAction linux.BPFAction if probeSeccomp() { log.Infof("Latest seccomp behavior found (kernel >= 4.8 likely)") - defaultAction = uint32(linux.SECCOMP_RET_KILL_THREAD) + defaultAction = linux.SECCOMP_RET_KILL_THREAD } else { // We must rely on SYSEMU behavior; tracing with SYSEMU is broken. log.Infof("Legacy seccomp behavior found (kernel < 4.8 likely)") - defaultAction = uint32(linux.SECCOMP_RET_ALLOW) + defaultAction = linux.SECCOMP_RET_ALLOW } // When creating the new child process, we specify SIGKILL as the @@ -135,7 +135,7 @@ func createStub() (*thread, error) { // attachedThread returns a new attached thread. // // Precondition: the runtime OS thread must be locked. -func attachedThread(flags uintptr, defaultAction uint32) (*thread, error) { +func attachedThread(flags uintptr, defaultAction linux.BPFAction) (*thread, error) { // Create a BPF program that allows only the system calls needed by the // stub and all its children. This is used to create child stubs // (below), so we must include the ability to fork, but otherwise lock @@ -148,11 +148,11 @@ func attachedThread(flags uintptr, defaultAction uint32) (*thread, error) { syscall.SYS_TIME: {}, 309: {}, // SYS_GETCPU. }, - Action: uint32(linux.SECCOMP_RET_TRAP), + Action: linux.SECCOMP_RET_TRAP, Vsyscall: true, }, } - if defaultAction != uint32(linux.SECCOMP_RET_ALLOW) { + if defaultAction != linux.SECCOMP_RET_ALLOW { rules = append(rules, seccomp.RuleSet{ Rules: seccomp.SyscallRules{ syscall.SYS_CLONE: []seccomp.Rule{ @@ -191,7 +191,7 @@ func attachedThread(flags uintptr, defaultAction uint32) (*thread, error) { syscall.SYS_MMAP: {}, syscall.SYS_MUNMAP: {}, }, - Action: uint32(linux.SECCOMP_RET_ALLOW), + Action: linux.SECCOMP_RET_ALLOW, }) } instrs, err := seccomp.BuildProgram(rules, defaultAction) diff --git a/test/syscalls/linux/seccomp.cc b/test/syscalls/linux/seccomp.cc index d6ac166a4..ac416b75f 100644 --- a/test/syscalls/linux/seccomp.cc +++ b/test/syscalls/linux/seccomp.cc @@ -215,6 +215,25 @@ TEST(SeccompTest, SeccompAppliesToVsyscall) { << "status " << status; } +TEST(SeccompTest, RetKillVsyscallCausesDeathBySIGSYS) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(IsVsyscallEnabled())); + + pid_t const pid = fork(); + if (pid == 0) { + // Register a signal handler for SIGSYS that we don't expect to be invoked. + RegisterSignalHandler( + SIGSYS, +[](int, siginfo_t*, void*) { _exit(1); }); + ApplySeccompFilter(SYS_time, SECCOMP_RET_KILL); + vsyscall_time(nullptr); // Should result in death. + TEST_CHECK_MSG(false, "Survived invocation of test syscall"); + } + ASSERT_THAT(pid, SyscallSucceeds()); + int status; + ASSERT_THAT(waitpid(pid, &status, 0), SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGSYS) + << "status " << status; +} + TEST(SeccompTest, RetTraceWithoutPtracerReturnsENOSYS) { pid_t const pid = fork(); if (pid == 0) {