From 30c025f3efdf5b599d8fbd4172bb5b856cc269af Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Tue, 28 Aug 2018 17:08:49 -0700 Subject: [PATCH] Add argument checks to seccomp This is required to increase protection when running in GKE. PiperOrigin-RevId: 210635123 Change-Id: Iaaa8be49e73f7a3a90805313885e75894416f0b5 --- runsc/boot/filter/BUILD | 1 + runsc/boot/filter/config.go | 447 ++++++++++++++++++++++++++++++------ runsc/boot/filter/filter.go | 17 +- runsc/boot/loader.go | 10 +- 4 files changed, 400 insertions(+), 75 deletions(-) diff --git a/runsc/boot/filter/BUILD b/runsc/boot/filter/BUILD index d20605a91..48f2c8024 100644 --- a/runsc/boot/filter/BUILD +++ b/runsc/boot/filter/BUILD @@ -22,6 +22,7 @@ go_library( "//pkg/sentry/platform", "//pkg/sentry/platform/kvm", "//pkg/sentry/platform/ptrace", + "//pkg/tcpip/link/fdbased", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index 1bec89900..7227ea2b7 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -15,72 +15,127 @@ package filter import ( + "os" "syscall" "golang.org/x/sys/unix" "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/seccomp" + "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased" ) -// allowedSyscalls is the set of syscalls executed by the Sentry -// to the host OS. +// allowedSyscalls is the set of syscalls executed by the Sentry to the host OS. var allowedSyscalls = seccomp.SyscallRules{ - syscall.SYS_ACCEPT: {}, - syscall.SYS_ARCH_PRCTL: {}, + syscall.SYS_ARCH_PRCTL: []seccomp.Rule{ + {seccomp.AllowValue(linux.ARCH_GET_FS)}, + {seccomp.AllowValue(linux.ARCH_SET_FS)}, + }, syscall.SYS_CLOCK_GETTIME: {}, - syscall.SYS_CLONE: {}, + syscall.SYS_CLONE: []seccomp.Rule{ + { + seccomp.AllowValue( + syscall.CLONE_VM | + syscall.CLONE_FS | + syscall.CLONE_FILES | + syscall.CLONE_SIGHAND | + syscall.CLONE_SYSVSEM | + syscall.CLONE_THREAD), + }, + }, syscall.SYS_CLOSE: {}, syscall.SYS_DUP: {}, syscall.SYS_EPOLL_CREATE1: {}, syscall.SYS_EPOLL_CTL: {}, - syscall.SYS_EPOLL_PWAIT: {}, - syscall.SYS_EPOLL_WAIT: {}, - syscall.SYS_EVENTFD2: {}, - syscall.SYS_EXIT: {}, - syscall.SYS_EXIT_GROUP: {}, - syscall.SYS_FALLOCATE: {}, - syscall.SYS_FCNTL: {}, - syscall.SYS_FSTAT: {}, - syscall.SYS_FSYNC: {}, - syscall.SYS_FTRUNCATE: {}, - syscall.SYS_FUTEX: {}, - syscall.SYS_GETDENTS64: {}, - syscall.SYS_GETPID: {}, - unix.SYS_GETRANDOM: {}, - syscall.SYS_GETSOCKOPT: {}, - syscall.SYS_GETTID: {}, - syscall.SYS_GETTIMEOFDAY: {}, - syscall.SYS_LISTEN: {}, - syscall.SYS_LSEEK: {}, - // TODO: Remove SYS_LSTAT when executable lookup moves - // into the gofer. - syscall.SYS_LSTAT: {}, - syscall.SYS_MADVISE: {}, - syscall.SYS_MINCORE: {}, - syscall.SYS_MMAP: {}, - syscall.SYS_MPROTECT: {}, - syscall.SYS_MUNMAP: {}, - syscall.SYS_NANOSLEEP: {}, - syscall.SYS_POLL: {}, - syscall.SYS_PREAD64: {}, - syscall.SYS_PWRITE64: {}, - syscall.SYS_READ: {}, - syscall.SYS_READV: {}, - syscall.SYS_RECVMSG: {}, - syscall.SYS_RESTART_SYSCALL: {}, - syscall.SYS_RT_SIGACTION: {}, - syscall.SYS_RT_SIGPROCMASK: {}, - syscall.SYS_RT_SIGRETURN: {}, - syscall.SYS_SCHED_YIELD: {}, - syscall.SYS_SENDMSG: {}, - syscall.SYS_SETITIMER: {}, - syscall.SYS_SHUTDOWN: {}, - syscall.SYS_SIGALTSTACK: {}, - syscall.SYS_SYNC_FILE_RANGE: {}, - syscall.SYS_TGKILL: {}, - syscall.SYS_WRITE: {}, - syscall.SYS_WRITEV: {}, - + syscall.SYS_EPOLL_PWAIT: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(0), + }, + }, + syscall.SYS_EPOLL_WAIT: {}, + syscall.SYS_EVENTFD2: []seccomp.Rule{ + { + seccomp.AllowValue(0), + seccomp.AllowValue(0), + }, + }, + syscall.SYS_EXIT: {}, + syscall.SYS_EXIT_GROUP: {}, + syscall.SYS_FALLOCATE: {}, + syscall.SYS_FCHMOD: {}, + syscall.SYS_FCNTL: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.F_GETFL), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.F_SETFL), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.F_GETFD), + }, + }, + syscall.SYS_FSTAT: {}, + syscall.SYS_FSYNC: {}, + syscall.SYS_FTRUNCATE: {}, + syscall.SYS_FUTEX: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowValue(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG), + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(0), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG), + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(0), + }, + }, + syscall.SYS_GETDENTS64: {}, + syscall.SYS_GETPID: {}, + unix.SYS_GETRANDOM: {}, + syscall.SYS_GETSOCKOPT: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_SOCKET), + seccomp.AllowValue(syscall.SO_DOMAIN), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_SOCKET), + seccomp.AllowValue(syscall.SO_TYPE), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_SOCKET), + seccomp.AllowValue(syscall.SO_ERROR), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_SOCKET), + seccomp.AllowValue(syscall.SO_SNDBUF), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_SOCKET), + seccomp.AllowValue(syscall.SO_RCVBUF), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_SOCKET), + seccomp.AllowValue(syscall.SO_REUSEADDR), + }, + }, + syscall.SYS_GETTID: {}, + syscall.SYS_GETTIMEOFDAY: {}, // SYS_IOCTL is needed for terminal support, but we only allow // setting/getting termios and winsize. syscall.SYS_IOCTL: []seccomp.Rule{ @@ -110,6 +165,107 @@ var allowedSyscalls = seccomp.SyscallRules{ seccomp.AllowAny{}, /* winsize struct */ }, }, + syscall.SYS_LSEEK: {}, + // TODO: Remove SYS_LSTAT when executable lookup moves + // into the gofer. + syscall.SYS_LSTAT: {}, + syscall.SYS_MADVISE: {}, + syscall.SYS_MINCORE: {}, + syscall.SYS_MMAP: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.MAP_SHARED), + }, + { + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.MAP_PRIVATE), + }, + { + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS), + }, + { + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_STACK), + }, + { + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_NORESERVE), + }, + { + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.PROT_WRITE | syscall.PROT_READ), + seccomp.AllowValue(syscall.MAP_PRIVATE | syscall.MAP_ANONYMOUS | syscall.MAP_FIXED), + }, + }, + syscall.SYS_MPROTECT: {}, + syscall.SYS_MUNMAP: {}, + syscall.SYS_NANOSLEEP: {}, + syscall.SYS_POLL: {}, + syscall.SYS_PREAD64: {}, + syscall.SYS_PWRITE64: {}, + syscall.SYS_READ: {}, + syscall.SYS_READV: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(len(fdbased.BufConfig)), + }, + }, + syscall.SYS_RECVMSG: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC), + }, + { + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC | syscall.MSG_PEEK), + }, + }, + syscall.SYS_RESTART_SYSCALL: {}, + syscall.SYS_RT_SIGACTION: {}, + syscall.SYS_RT_SIGPROCMASK: {}, + syscall.SYS_RT_SIGRETURN: {}, + syscall.SYS_SCHED_YIELD: {}, + syscall.SYS_SENDMSG: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.MSG_DONTWAIT | syscall.MSG_NOSIGNAL), + }, + }, + syscall.SYS_SETITIMER: {}, + syscall.SYS_SHUTDOWN: []seccomp.Rule{ + {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RDWR)}, + }, + syscall.SYS_SIGALTSTACK: {}, + syscall.SYS_SYNC_FILE_RANGE: {}, + syscall.SYS_TGKILL: []seccomp.Rule{ + { + seccomp.AllowValue(uint64(os.Getpid())), + }, + }, + syscall.SYS_WRITE: {}, + syscall.SYS_WRITEV: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(2), + }, + }, } // whitelistFSFilters returns syscalls made by whitelistFS. Using WhitelistFS @@ -154,42 +310,197 @@ func whitelistFSFilters() seccomp.SyscallRules { // hostInetFilters contains syscalls that are needed by sentry/socket/hostinet. func hostInetFilters() seccomp.SyscallRules { return seccomp.SyscallRules{ - syscall.SYS_ACCEPT4: {}, + syscall.SYS_ACCEPT4: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), + }, + }, syscall.SYS_BIND: {}, syscall.SYS_CONNECT: {}, syscall.SYS_GETPEERNAME: {}, syscall.SYS_GETSOCKNAME: {}, - syscall.SYS_GETSOCKOPT: {}, - syscall.SYS_IOCTL: {}, - syscall.SYS_LISTEN: {}, - syscall.SYS_READV: {}, - syscall.SYS_RECVFROM: {}, - syscall.SYS_RECVMSG: {}, - syscall.SYS_SENDMSG: {}, - syscall.SYS_SENDTO: {}, - syscall.SYS_SETSOCKOPT: {}, - syscall.SYS_SHUTDOWN: {}, - syscall.SYS_SOCKET: {}, - syscall.SYS_WRITEV: {}, + syscall.SYS_GETSOCKOPT: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_IPV6), + seccomp.AllowValue(syscall.IPV6_V6ONLY), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_SOCKET), + seccomp.AllowValue(syscall.SO_ERROR), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_SOCKET), + seccomp.AllowValue(syscall.SO_KEEPALIVE), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_SOCKET), + seccomp.AllowValue(syscall.SO_SNDBUF), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_SOCKET), + seccomp.AllowValue(syscall.SO_REUSEADDR), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_SOCKET), + seccomp.AllowValue(syscall.SO_TYPE), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_SOCKET), + seccomp.AllowValue(syscall.SO_LINGER), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_TCP), + seccomp.AllowValue(syscall.TCP_NODELAY), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_TCP), + seccomp.AllowValue(syscall.TCP_INFO), + }, + }, + syscall.SYS_IOCTL: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.TIOCOUTQ), + }, + }, + syscall.SYS_LISTEN: {}, + syscall.SYS_READV: {}, + syscall.SYS_RECVFROM: {}, + syscall.SYS_RECVMSG: {}, + syscall.SYS_SENDMSG: {}, + syscall.SYS_SENDTO: {}, + syscall.SYS_SETSOCKOPT: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_IPV6), + seccomp.AllowValue(syscall.IPV6_V6ONLY), + seccomp.AllowAny{}, + seccomp.AllowValue(4), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_SOCKET), + seccomp.AllowValue(syscall.SO_SNDBUF), + seccomp.AllowAny{}, + seccomp.AllowValue(4), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_SOCKET), + seccomp.AllowValue(syscall.SO_RCVBUF), + seccomp.AllowAny{}, + seccomp.AllowValue(4), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_SOCKET), + seccomp.AllowValue(syscall.SO_REUSEADDR), + seccomp.AllowAny{}, + seccomp.AllowValue(4), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_TCP), + seccomp.AllowValue(syscall.TCP_NODELAY), + seccomp.AllowAny{}, + seccomp.AllowValue(4), + }, + }, + syscall.SYS_SHUTDOWN: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SHUT_RD), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SHUT_WR), + }, + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SHUT_RDWR), + }, + }, + syscall.SYS_SOCKET: []seccomp.Rule{ + { + seccomp.AllowValue(syscall.AF_INET), + seccomp.AllowValue(syscall.SOCK_STREAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), + seccomp.AllowValue(0), + }, + { + seccomp.AllowValue(syscall.AF_INET), + seccomp.AllowValue(syscall.SOCK_DGRAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), + seccomp.AllowValue(0), + }, + { + seccomp.AllowValue(syscall.AF_INET6), + seccomp.AllowValue(syscall.SOCK_STREAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), + seccomp.AllowValue(0), + }, + { + seccomp.AllowValue(syscall.AF_INET6), + seccomp.AllowValue(syscall.SOCK_DGRAM | syscall.SOCK_NONBLOCK | syscall.SOCK_CLOEXEC), + seccomp.AllowValue(0), + }, + }, + syscall.SYS_WRITEV: {}, } } // ptraceFilters returns syscalls made exclusively by the ptrace platform. func ptraceFilters() seccomp.SyscallRules { return seccomp.SyscallRules{ - syscall.SYS_PTRACE: {}, - syscall.SYS_WAIT4: {}, unix.SYS_GETCPU: {}, unix.SYS_SCHED_SETAFFINITY: {}, + syscall.SYS_PTRACE: {}, + syscall.SYS_TGKILL: {}, + syscall.SYS_WAIT4: {}, } } // kvmFilters returns syscalls made exclusively by the KVM platform. func kvmFilters() seccomp.SyscallRules { return seccomp.SyscallRules{ + syscall.SYS_ARCH_PRCTL: {}, + syscall.SYS_FUTEX: {}, syscall.SYS_IOCTL: {}, + syscall.SYS_MMAP: {}, syscall.SYS_RT_SIGSUSPEND: {}, syscall.SYS_RT_SIGTIMEDWAIT: {}, 0xffffffffffffffff: {}, // KVM uses syscall -1 to transition to host. } } + +func controlServerFilters(fd int) seccomp.SyscallRules { + return seccomp.SyscallRules{ + syscall.SYS_ACCEPT: []seccomp.Rule{ + { + seccomp.AllowValue(fd), + }, + }, + syscall.SYS_LISTEN: []seccomp.Rule{ + { + seccomp.AllowValue(fd), + seccomp.AllowValue(16 /* unet.backlog */), + }, + }, + syscall.SYS_GETSOCKOPT: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.SOL_SOCKET), + seccomp.AllowValue(syscall.SO_PEERCRED), + }, + }, + } +} diff --git a/runsc/boot/filter/filter.go b/runsc/boot/filter/filter.go index c57bbd2e5..56d30f2a0 100644 --- a/runsc/boot/filter/filter.go +++ b/runsc/boot/filter/filter.go @@ -27,24 +27,33 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ptrace" ) +// Options are seccomp filter related options. +type Options struct { + Platform platform.Platform + WhitelistFS bool + HostNetwork bool + ControllerFD int +} + // Install installs seccomp filters for based on the given platform. -func Install(p platform.Platform, whitelistFS, hostNetwork bool) error { +func Install(opt Options) error { s := allowedSyscalls + s.Merge(controlServerFilters(opt.ControllerFD)) // Set of additional filters used by -race and -msan. Returns empty // when not enabled. s.Merge(instrumentationFilters()) - if whitelistFS { + if opt.WhitelistFS { Report("direct file access allows unrestricted file access!") s.Merge(whitelistFSFilters()) } - if hostNetwork { + if opt.HostNetwork { Report("host networking enabled: syscall filters less restrictive!") s.Merge(hostInetFilters()) } - switch p := p.(type) { + switch p := opt.Platform.(type) { case *ptrace.PTrace: s.Merge(ptraceFilters()) case *kvm.KVM: diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 0ad830a6b..74d0c2534 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -351,9 +351,13 @@ func (l *Loader) run() error { if l.conf.DisableSeccomp { filter.Report("syscall filter is DISABLED. Running in less secure mode.") } else { - whitelistFS := l.conf.FileAccess == FileAccessDirect - hostNet := l.conf.Network == NetworkHost - if err := filter.Install(l.k.Platform, whitelistFS, hostNet); err != nil { + opts := filter.Options{ + Platform: l.k.Platform, + WhitelistFS: l.conf.FileAccess == FileAccessDirect, + HostNetwork: l.conf.Network == NetworkHost, + ControllerFD: l.ctrl.srv.FD(), + } + if err := filter.Install(opts); err != nil { return fmt.Errorf("Failed to install seccomp filters: %v", err) } }