Add argument checks to seccomp

This is required to increase protection when running in GKE.

PiperOrigin-RevId: 210635123
Change-Id: Iaaa8be49e73f7a3a90805313885e75894416f0b5
This commit is contained in:
Fabricio Voznika 2018-08-28 17:08:49 -07:00 committed by Shentubot
parent 3b11769c77
commit 30c025f3ef
4 changed files with 400 additions and 75 deletions

View File

@ -22,6 +22,7 @@ go_library(
"//pkg/sentry/platform",
"//pkg/sentry/platform/kvm",
"//pkg/sentry/platform/ptrace",
"//pkg/tcpip/link/fdbased",
"@org_golang_x_sys//unix:go_default_library",
],
)

View File

@ -15,72 +15,127 @@
package filter
import (
"os"
"syscall"
"golang.org/x/sys/unix"
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/seccomp"
"gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased"
)
// allowedSyscalls is the set of syscalls executed by the Sentry
// to the host OS.
// allowedSyscalls is the set of syscalls executed by the Sentry to the host OS.
var allowedSyscalls = seccomp.SyscallRules{
syscall.SYS_ACCEPT: {},
syscall.SYS_ARCH_PRCTL: {},
syscall.SYS_ARCH_PRCTL: []seccomp.Rule{
{seccomp.AllowValue(linux.ARCH_GET_FS)},
{seccomp.AllowValue(linux.ARCH_SET_FS)},
},
syscall.SYS_CLOCK_GETTIME: {},
syscall.SYS_CLONE: {},
syscall.SYS_CLONE: []seccomp.Rule{
{
seccomp.AllowValue(
syscall.CLONE_VM |
syscall.CLONE_FS |
syscall.CLONE_FILES |
syscall.CLONE_SIGHAND |
syscall.CLONE_SYSVSEM |
syscall.CLONE_THREAD),
},
},
syscall.SYS_CLOSE: {},
syscall.SYS_DUP: {},
syscall.SYS_EPOLL_CREATE1: {},
syscall.SYS_EPOLL_CTL: {},
syscall.SYS_EPOLL_PWAIT: {},
syscall.SYS_EPOLL_WAIT: {},
syscall.SYS_EVENTFD2: {},
syscall.SYS_EXIT: {},
syscall.SYS_EXIT_GROUP: {},
syscall.SYS_FALLOCATE: {},
syscall.SYS_FCNTL: {},
syscall.SYS_FSTAT: {},
syscall.SYS_FSYNC: {},
syscall.SYS_FTRUNCATE: {},
syscall.SYS_FUTEX: {},
syscall.SYS_GETDENTS64: {},
syscall.SYS_GETPID: {},
unix.SYS_GETRANDOM: {},
syscall.SYS_GETSOCKOPT: {},
syscall.SYS_GETTID: {},
syscall.SYS_GETTIMEOFDAY: {},
syscall.SYS_LISTEN: {},
syscall.SYS_LSEEK: {},
// TODO: Remove SYS_LSTAT when executable lookup moves
// into the gofer.
syscall.SYS_LSTAT: {},
syscall.SYS_MADVISE: {},
syscall.SYS_MINCORE: {},
syscall.SYS_MMAP: {},
syscall.SYS_MPROTECT: {},
syscall.SYS_MUNMAP: {},
syscall.SYS_NANOSLEEP: {},
syscall.SYS_POLL: {},
syscall.SYS_PREAD64: {},
syscall.SYS_PWRITE64: {},
syscall.SYS_READ: {},
syscall.SYS_READV: {},
syscall.SYS_RECVMSG: {},
syscall.SYS_RESTART_SYSCALL: {},
syscall.SYS_RT_SIGACTION: {},
syscall.SYS_RT_SIGPROCMASK: {},
syscall.SYS_RT_SIGRETURN: {},
syscall.SYS_SCHED_YIELD: {},
syscall.SYS_SENDMSG: {},
syscall.SYS_SETITIMER: {},
syscall.SYS_SHUTDOWN: {},
syscall.SYS_SIGALTSTACK: {},
syscall.SYS_SYNC_FILE_RANGE: {},
syscall.SYS_TGKILL: {},
syscall.SYS_WRITE: {},
syscall.SYS_WRITEV: {},
syscall.SYS_EPOLL_PWAIT: []seccomp.Rule{
{
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowValue(0),
},
},
syscall.SYS_EPOLL_WAIT: {},
syscall.SYS_EVENTFD2: []seccomp.Rule{
{
seccomp.AllowValue(0),
seccomp.AllowValue(0),
},
},
syscall.SYS_EXIT: {},
syscall.SYS_EXIT_GROUP: {},
syscall.SYS_FALLOCATE: {},
syscall.SYS_FCHMOD: {},
syscall.SYS_FCNTL: []seccomp.Rule{
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.F_GETFL),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.F_SETFL),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.F_GETFD),
},
},
syscall.SYS_FSTAT: {},
syscall.SYS_FSYNC: {},
syscall.SYS_FTRUNCATE: {},
syscall.SYS_FUTEX: []seccomp.Rule{
{
seccomp.AllowAny{},
seccomp.AllowValue(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG),
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowValue(0),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG),
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowValue(0),
},
},
syscall.SYS_GETDENTS64: {},
syscall.SYS_GETPID: {},
unix.SYS_GETRANDOM: {},
syscall.SYS_GETSOCKOPT: []seccomp.Rule{
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_SOCKET),
seccomp.AllowValue(syscall.SO_DOMAIN),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_SOCKET),
seccomp.AllowValue(syscall.SO_TYPE),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_SOCKET),
seccomp.AllowValue(syscall.SO_ERROR),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_SOCKET),
seccomp.AllowValue(syscall.SO_SNDBUF),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_SOCKET),
seccomp.AllowValue(syscall.SO_RCVBUF),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_SOCKET),
seccomp.AllowValue(syscall.SO_REUSEADDR),
},
},
syscall.SYS_GETTID: {},
syscall.SYS_GETTIMEOFDAY: {},
// SYS_IOCTL is needed for terminal support, but we only allow
// setting/getting termios and winsize.
syscall.SYS_IOCTL: []seccomp.Rule{
@ -110,6 +165,107 @@ var allowedSyscalls = seccomp.SyscallRules{
seccomp.AllowAny{}, /* winsize struct */
},
},
syscall.SYS_LSEEK: {},
// TODO: Remove SYS_LSTAT when executable lookup moves
// into the gofer.
syscall.SYS_LSTAT: {},
syscall.SYS_MADVISE: {},
syscall.SYS_MINCORE: {},
syscall.SYS_MMAP: []seccomp.Rule{
{
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowValue(syscall.MAP_SHARED),
},
{
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowValue(syscall.MAP_PRIVATE),
},
{
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS),
},
{
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_STACK),
},
{
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_NORESERVE),
},
{
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowValue(syscall.PROT_WRITE | syscall.PROT_READ),
seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_FIXED),
},
},
syscall.SYS_MPROTECT: {},
syscall.SYS_MUNMAP: {},
syscall.SYS_NANOSLEEP: {},
syscall.SYS_POLL: {},
syscall.SYS_PREAD64: {},
syscall.SYS_PWRITE64: {},
syscall.SYS_READ: {},
syscall.SYS_READV: []seccomp.Rule{
{
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowValue(len(fdbased.BufConfig)),
},
},
syscall.SYS_RECVMSG: []seccomp.Rule{
{
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC),
},
{
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC | syscall.MSG_PEEK),
},
},
syscall.SYS_RESTART_SYSCALL: {},
syscall.SYS_RT_SIGACTION: {},
syscall.SYS_RT_SIGPROCMASK: {},
syscall.SYS_RT_SIGRETURN: {},
syscall.SYS_SCHED_YIELD: {},
syscall.SYS_SENDMSG: []seccomp.Rule{
{
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_NOSIGNAL),
},
},
syscall.SYS_SETITIMER: {},
syscall.SYS_SHUTDOWN: []seccomp.Rule{
{seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RDWR)},
},
syscall.SYS_SIGALTSTACK: {},
syscall.SYS_SYNC_FILE_RANGE: {},
syscall.SYS_TGKILL: []seccomp.Rule{
{
seccomp.AllowValue(uint64(os.Getpid())),
},
},
syscall.SYS_WRITE: {},
syscall.SYS_WRITEV: []seccomp.Rule{
{
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowValue(2),
},
},
}
// whitelistFSFilters returns syscalls made by whitelistFS. Using WhitelistFS
@ -154,42 +310,197 @@ func whitelistFSFilters() seccomp.SyscallRules {
// hostInetFilters contains syscalls that are needed by sentry/socket/hostinet.
func hostInetFilters() seccomp.SyscallRules {
return seccomp.SyscallRules{
syscall.SYS_ACCEPT4: {},
syscall.SYS_ACCEPT4: []seccomp.Rule{
{
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
},
},
syscall.SYS_BIND: {},
syscall.SYS_CONNECT: {},
syscall.SYS_GETPEERNAME: {},
syscall.SYS_GETSOCKNAME: {},
syscall.SYS_GETSOCKOPT: {},
syscall.SYS_IOCTL: {},
syscall.SYS_LISTEN: {},
syscall.SYS_READV: {},
syscall.SYS_RECVFROM: {},
syscall.SYS_RECVMSG: {},
syscall.SYS_SENDMSG: {},
syscall.SYS_SENDTO: {},
syscall.SYS_SETSOCKOPT: {},
syscall.SYS_SHUTDOWN: {},
syscall.SYS_SOCKET: {},
syscall.SYS_WRITEV: {},
syscall.SYS_GETSOCKOPT: []seccomp.Rule{
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_IPV6),
seccomp.AllowValue(syscall.IPV6_V6ONLY),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_SOCKET),
seccomp.AllowValue(syscall.SO_ERROR),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_SOCKET),
seccomp.AllowValue(syscall.SO_KEEPALIVE),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_SOCKET),
seccomp.AllowValue(syscall.SO_SNDBUF),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_SOCKET),
seccomp.AllowValue(syscall.SO_REUSEADDR),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_SOCKET),
seccomp.AllowValue(syscall.SO_TYPE),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_SOCKET),
seccomp.AllowValue(syscall.SO_LINGER),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_TCP),
seccomp.AllowValue(syscall.TCP_NODELAY),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_TCP),
seccomp.AllowValue(syscall.TCP_INFO),
},
},
syscall.SYS_IOCTL: []seccomp.Rule{
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.TIOCOUTQ),
},
},
syscall.SYS_LISTEN: {},
syscall.SYS_READV: {},
syscall.SYS_RECVFROM: {},
syscall.SYS_RECVMSG: {},
syscall.SYS_SENDMSG: {},
syscall.SYS_SENDTO: {},
syscall.SYS_SETSOCKOPT: []seccomp.Rule{
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_IPV6),
seccomp.AllowValue(syscall.IPV6_V6ONLY),
seccomp.AllowAny{},
seccomp.AllowValue(4),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_SOCKET),
seccomp.AllowValue(syscall.SO_SNDBUF),
seccomp.AllowAny{},
seccomp.AllowValue(4),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_SOCKET),
seccomp.AllowValue(syscall.SO_RCVBUF),
seccomp.AllowAny{},
seccomp.AllowValue(4),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_SOCKET),
seccomp.AllowValue(syscall.SO_REUSEADDR),
seccomp.AllowAny{},
seccomp.AllowValue(4),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_TCP),
seccomp.AllowValue(syscall.TCP_NODELAY),
seccomp.AllowAny{},
seccomp.AllowValue(4),
},
},
syscall.SYS_SHUTDOWN: []seccomp.Rule{
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SHUT_RD),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SHUT_WR),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SHUT_RDWR),
},
},
syscall.SYS_SOCKET: []seccomp.Rule{
{
seccomp.AllowValue(syscall.AF_INET),
seccomp.AllowValue(syscall.SOCK_STREAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
seccomp.AllowValue(0),
},
{
seccomp.AllowValue(syscall.AF_INET),
seccomp.AllowValue(syscall.SOCK_DGRAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
seccomp.AllowValue(0),
},
{
seccomp.AllowValue(syscall.AF_INET6),
seccomp.AllowValue(syscall.SOCK_STREAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
seccomp.AllowValue(0),
},
{
seccomp.AllowValue(syscall.AF_INET6),
seccomp.AllowValue(syscall.SOCK_DGRAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC),
seccomp.AllowValue(0),
},
},
syscall.SYS_WRITEV: {},
}
}
// ptraceFilters returns syscalls made exclusively by the ptrace platform.
func ptraceFilters() seccomp.SyscallRules {
return seccomp.SyscallRules{
syscall.SYS_PTRACE: {},
syscall.SYS_WAIT4: {},
unix.SYS_GETCPU: {},
unix.SYS_SCHED_SETAFFINITY: {},
syscall.SYS_PTRACE: {},
syscall.SYS_TGKILL: {},
syscall.SYS_WAIT4: {},
}
}
// kvmFilters returns syscalls made exclusively by the KVM platform.
func kvmFilters() seccomp.SyscallRules {
return seccomp.SyscallRules{
syscall.SYS_ARCH_PRCTL: {},
syscall.SYS_FUTEX: {},
syscall.SYS_IOCTL: {},
syscall.SYS_MMAP: {},
syscall.SYS_RT_SIGSUSPEND: {},
syscall.SYS_RT_SIGTIMEDWAIT: {},
0xffffffffffffffff: {}, // KVM uses syscall -1 to transition to host.
}
}
func controlServerFilters(fd int) seccomp.SyscallRules {
return seccomp.SyscallRules{
syscall.SYS_ACCEPT: []seccomp.Rule{
{
seccomp.AllowValue(fd),
},
},
syscall.SYS_LISTEN: []seccomp.Rule{
{
seccomp.AllowValue(fd),
seccomp.AllowValue(16 /* unet.backlog */),
},
},
syscall.SYS_GETSOCKOPT: []seccomp.Rule{
{
seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_SOCKET),
seccomp.AllowValue(syscall.SO_PEERCRED),
},
},
}
}

View File

@ -27,24 +27,33 @@ import (
"gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace"
)
// Options are seccomp filter related options.
type Options struct {
Platform platform.Platform
WhitelistFS bool
HostNetwork bool
ControllerFD int
}
// Install installs seccomp filters for based on the given platform.
func Install(p platform.Platform, whitelistFS, hostNetwork bool) error {
func Install(opt Options) error {
s := allowedSyscalls
s.Merge(controlServerFilters(opt.ControllerFD))
// Set of additional filters used by -race and -msan. Returns empty
// when not enabled.
s.Merge(instrumentationFilters())
if whitelistFS {
if opt.WhitelistFS {
Report("direct file access allows unrestricted file access!")
s.Merge(whitelistFSFilters())
}
if hostNetwork {
if opt.HostNetwork {
Report("host networking enabled: syscall filters less restrictive!")
s.Merge(hostInetFilters())
}
switch p := p.(type) {
switch p := opt.Platform.(type) {
case *ptrace.PTrace:
s.Merge(ptraceFilters())
case *kvm.KVM:

View File

@ -351,9 +351,13 @@ func (l *Loader) run() error {
if l.conf.DisableSeccomp {
filter.Report("syscall filter is DISABLED. Running in less secure mode.")
} else {
whitelistFS := l.conf.FileAccess == FileAccessDirect
hostNet := l.conf.Network == NetworkHost
if err := filter.Install(l.k.Platform, whitelistFS, hostNet); err != nil {
opts := filter.Options{
Platform: l.k.Platform,
WhitelistFS: l.conf.FileAccess == FileAccessDirect,
HostNetwork: l.conf.Network == NetworkHost,
ControllerFD: l.ctrl.srv.FD(),
}
if err := filter.Install(opts); err != nil {
return fmt.Errorf("Failed to install seccomp filters: %v", err)
}
}