diff --git a/pkg/abi/linux/prctl.go b/pkg/abi/linux/prctl.go index 0428282dd..391cfaa1c 100644 --- a/pkg/abi/linux/prctl.go +++ b/pkg/abi/linux/prctl.go @@ -155,3 +155,10 @@ const ( ARCH_GET_GS = 0x1004 ARCH_SET_CPUID = 0x1012 ) + +// Flags for prctl(PR_SET_DUMPABLE), defined in include/linux/sched/coredump.h. +const ( + SUID_DUMP_DISABLE = 0 + SUID_DUMP_USER = 1 + SUID_DUMP_ROOT = 2 +) diff --git a/pkg/sentry/fs/proc/inode.go b/pkg/sentry/fs/proc/inode.go index 379569823..986bc0a45 100644 --- a/pkg/sentry/fs/proc/inode.go +++ b/pkg/sentry/fs/proc/inode.go @@ -21,11 +21,14 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" "gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" + "gvisor.googlesource.com/gvisor/pkg/sentry/mm" "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" ) // taskOwnedInodeOps wraps an fs.InodeOperations and overrides the UnstableAttr -// method to return the task as the owner. +// method to return either the task or root as the owner, depending on the +// task's dumpability. // // +stateify savable type taskOwnedInodeOps struct { @@ -41,9 +44,42 @@ func (i *taskOwnedInodeOps) UnstableAttr(ctx context.Context, inode *fs.Inode) ( if err != nil { return fs.UnstableAttr{}, err } - // Set the task owner as the file owner. + + // By default, set the task owner as the file owner. creds := i.t.Credentials() uattr.Owner = fs.FileOwner{creds.EffectiveKUID, creds.EffectiveKGID} + + // Linux doesn't apply dumpability adjustments to world + // readable/executable directories so that applications can stat + // /proc/PID to determine the effective UID of a process. See + // fs/proc/base.c:task_dump_owner. + if fs.IsDir(inode.StableAttr) && uattr.Perms == fs.FilePermsFromMode(0555) { + return uattr, nil + } + + // If the task is not dumpable, then root (in the namespace preferred) + // owns the file. + var m *mm.MemoryManager + i.t.WithMuLocked(func(t *kernel.Task) { + m = t.MemoryManager() + }) + + if m == nil { + uattr.Owner.UID = auth.RootKUID + uattr.Owner.GID = auth.RootKGID + } else if m.Dumpability() != mm.UserDumpable { + if kuid := creds.UserNamespace.MapToKUID(auth.RootUID); kuid.Ok() { + uattr.Owner.UID = kuid + } else { + uattr.Owner.UID = auth.RootKUID + } + if kgid := creds.UserNamespace.MapToKGID(auth.RootGID); kgid.Ok() { + uattr.Owner.GID = kgid + } else { + uattr.Owner.GID = auth.RootKGID + } + } + return uattr, nil } diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index 77e03d349..21a965f90 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -96,7 +96,7 @@ func (p *proc) newTaskDir(t *kernel.Task, msrc *fs.MountSource, showSubtasks boo contents["cgroup"] = newCGroupInode(t, msrc, p.cgroupControllers) } - // TODO(b/31916171): Set EUID/EGID based on dumpability. + // N.B. taskOwnedInodeOps enforces dumpability-based ownership. d := &taskDir{ Dir: *ramfs.NewDir(t, contents, fs.RootOwner, fs.FilePermsFromMode(0555)), t: t, @@ -667,6 +667,21 @@ func newComm(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { return newProcInode(c, msrc, fs.SpecialFile, t) } +// Check implements fs.InodeOperations.Check. +func (c *comm) Check(ctx context.Context, inode *fs.Inode, p fs.PermMask) bool { + // This file can always be read or written by members of the same + // thread group. See fs/proc/base.c:proc_tid_comm_permission. + // + // N.B. This check is currently a no-op as we don't yet support writing + // and this file is world-readable anyways. + t := kernel.TaskFromContext(ctx) + if t != nil && t.ThreadGroup() == c.t.ThreadGroup() && !p.Execute { + return true + } + + return fs.ContextCanAccessFile(ctx, inode, p) +} + // GetFile implements fs.InodeOperations.GetFile. func (c *comm) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { return fs.NewFile(ctx, dirent, flags, &commFile{t: c.t}), nil diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go index 4423e7efd..193447b17 100644 --- a/pkg/sentry/kernel/ptrace.go +++ b/pkg/sentry/kernel/ptrace.go @@ -19,6 +19,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.googlesource.com/gvisor/pkg/sentry/mm" "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" "gvisor.googlesource.com/gvisor/pkg/syserror" ) @@ -92,6 +93,14 @@ const ( // ptrace(2), subsection "Ptrace access mode checking". If attach is true, it // checks for access mode PTRACE_MODE_ATTACH; otherwise, it checks for access // mode PTRACE_MODE_READ. +// +// NOTE(b/30815691): The result of CanTrace is immediately stale (e.g., a +// racing setuid(2) may change traceability). This may pose a risk when a task +// changes from traceable to not traceable. This is only problematic across +// execve, where privileges may increase. +// +// We currently do not implement privileged executables (set-user/group-ID bits +// and file capabilities), so that case is not reachable. func (t *Task) CanTrace(target *Task, attach bool) bool { // "1. If the calling thread and the target thread are in the same thread // group, access is always allowed." - ptrace(2) @@ -162,7 +171,13 @@ func (t *Task) CanTrace(target *Task, attach bool) bool { if cgid := callerCreds.RealKGID; cgid != targetCreds.RealKGID || cgid != targetCreds.EffectiveKGID || cgid != targetCreds.SavedKGID { return false } - // TODO(b/31916171): dumpability check + var targetMM *mm.MemoryManager + target.WithMuLocked(func(t *Task) { + targetMM = t.MemoryManager() + }) + if targetMM != nil && targetMM.Dumpability() != mm.UserDumpable { + return false + } if callerCreds.UserNamespace != targetCreds.UserNamespace { return false } diff --git a/pkg/sentry/kernel/task_exec.go b/pkg/sentry/kernel/task_exec.go index 5d1425d5c..35d5cb90c 100644 --- a/pkg/sentry/kernel/task_exec.go +++ b/pkg/sentry/kernel/task_exec.go @@ -68,6 +68,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/sentry/arch" "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/mm" "gvisor.googlesource.com/gvisor/pkg/syserror" ) @@ -198,6 +199,12 @@ func (r *runSyscallAfterExecStop) execute(t *Task) taskRunState { return flags.CloseOnExec }) + // NOTE(b/30815691): We currently do not implement privileged + // executables (set-user/group-ID bits and file capabilities). This + // allows us to unconditionally enable user dumpability on the new mm. + // See fs/exec.c:setup_new_exec. + r.tc.MemoryManager.SetDumpability(mm.UserDumpable) + // Switch to the new process. t.MemoryManager().Deactivate() t.mu.Lock() diff --git a/pkg/sentry/kernel/task_identity.go b/pkg/sentry/kernel/task_identity.go index 17f08729a..ec95f78d0 100644 --- a/pkg/sentry/kernel/task_identity.go +++ b/pkg/sentry/kernel/task_identity.go @@ -17,6 +17,7 @@ package kernel import ( "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" + "gvisor.googlesource.com/gvisor/pkg/sentry/mm" "gvisor.googlesource.com/gvisor/pkg/syserror" ) @@ -206,8 +207,17 @@ func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) { // (filesystem UIDs aren't implemented, nor are any of the capabilities in // question) - // Not documented, but compare Linux's kernel/cred.c:commit_creds(). if oldE != newE { + // "[dumpability] is reset to the current value contained in + // the file /proc/sys/fs/suid_dumpable (which by default has + // the value 0), in the following circumstances: The process's + // effective user or group ID is changed." - prctl(2) + // + // (suid_dumpable isn't implemented, so we just use the + // default. + t.MemoryManager().SetDumpability(mm.NotDumpable) + + // Not documented, but compare Linux's kernel/cred.c:commit_creds(). t.parentDeathSignal = 0 } } @@ -303,8 +313,18 @@ func (t *Task) setKGIDsUncheckedLocked(newR, newE, newS auth.KGID) { t.creds = t.creds.Fork() // See doc for creds. t.creds.RealKGID, t.creds.EffectiveKGID, t.creds.SavedKGID = newR, newE, newS - // Not documented, but compare Linux's kernel/cred.c:commit_creds(). if oldE != newE { + // "[dumpability] is reset to the current value contained in + // the file /proc/sys/fs/suid_dumpable (which by default has + // the value 0), in the following circumstances: The process's + // effective user or group ID is changed." - prctl(2) + // + // (suid_dumpable isn't implemented, so we just use the + // default. + t.MemoryManager().SetDumpability(mm.NotDumpable) + + // Not documented, but compare Linux's + // kernel/cred.c:commit_creds(). t.parentDeathSignal = 0 } } diff --git a/pkg/sentry/mm/lifecycle.go b/pkg/sentry/mm/lifecycle.go index 7a65a62a2..7646d5ab2 100644 --- a/pkg/sentry/mm/lifecycle.go +++ b/pkg/sentry/mm/lifecycle.go @@ -37,6 +37,7 @@ func NewMemoryManager(p platform.Platform, mfp pgalloc.MemoryFileProvider) *Memo privateRefs: &privateRefs{}, users: 1, auxv: arch.Auxv{}, + dumpability: UserDumpable, aioManager: aioManager{contexts: make(map[uint64]*AIOContext)}, } } @@ -79,8 +80,9 @@ func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) { envv: mm.envv, auxv: append(arch.Auxv(nil), mm.auxv...), // IncRef'd below, once we know that there isn't an error. - executable: mm.executable, - aioManager: aioManager{contexts: make(map[uint64]*AIOContext)}, + executable: mm.executable, + dumpability: mm.dumpability, + aioManager: aioManager{contexts: make(map[uint64]*AIOContext)}, } // Copy vmas. diff --git a/pkg/sentry/mm/metadata.go b/pkg/sentry/mm/metadata.go index 9768e51f1..c218006ee 100644 --- a/pkg/sentry/mm/metadata.go +++ b/pkg/sentry/mm/metadata.go @@ -20,6 +20,36 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" ) +// Dumpability describes if and how core dumps should be created. +type Dumpability int + +const ( + // NotDumpable indicates that core dumps should never be created. + NotDumpable Dumpability = iota + + // UserDumpable indicates that core dumps should be created, owned by + // the current user. + UserDumpable + + // RootDumpable indicates that core dumps should be created, owned by + // root. + RootDumpable +) + +// Dumpability returns the dumpability. +func (mm *MemoryManager) Dumpability() Dumpability { + mm.metadataMu.Lock() + defer mm.metadataMu.Unlock() + return mm.dumpability +} + +// SetDumpability sets the dumpability. +func (mm *MemoryManager) SetDumpability(d Dumpability) { + mm.metadataMu.Lock() + defer mm.metadataMu.Unlock() + mm.dumpability = d +} + // ArgvStart returns the start of the application argument vector. // // There is no guarantee that this value is sensible w.r.t. ArgvEnd. diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go index eb6defa2b..0a026ff8c 100644 --- a/pkg/sentry/mm/mm.go +++ b/pkg/sentry/mm/mm.go @@ -219,6 +219,12 @@ type MemoryManager struct { // executable is protected by metadataMu. executable *fs.Dirent + // dumpability describes if and how this MemoryManager may be dumped to + // userspace. + // + // dumpability is protected by metadataMu. + dumpability Dumpability + // aioManager keeps track of AIOContexts used for async IOs. AIOManager // must be cloned when CLONE_VM is used. aioManager aioManager diff --git a/pkg/sentry/syscalls/linux/sys_prctl.go b/pkg/sentry/syscalls/linux/sys_prctl.go index 117ae1a0e..1b7e5616b 100644 --- a/pkg/sentry/syscalls/linux/sys_prctl.go +++ b/pkg/sentry/syscalls/linux/sys_prctl.go @@ -15,6 +15,7 @@ package linux import ( + "fmt" "syscall" "gvisor.googlesource.com/gvisor/pkg/abi/linux" @@ -23,6 +24,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" + "gvisor.googlesource.com/gvisor/pkg/sentry/mm" ) // Prctl implements linux syscall prctl(2). @@ -44,6 +46,33 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall _, err := t.CopyOut(args[1].Pointer(), int32(t.ParentDeathSignal())) return 0, nil, err + case linux.PR_GET_DUMPABLE: + d := t.MemoryManager().Dumpability() + switch d { + case mm.NotDumpable: + return linux.SUID_DUMP_DISABLE, nil, nil + case mm.UserDumpable: + return linux.SUID_DUMP_USER, nil, nil + case mm.RootDumpable: + return linux.SUID_DUMP_ROOT, nil, nil + default: + panic(fmt.Sprintf("Unknown dumpability %v", d)) + } + + case linux.PR_SET_DUMPABLE: + var d mm.Dumpability + switch args[1].Int() { + case linux.SUID_DUMP_DISABLE: + d = mm.NotDumpable + case linux.SUID_DUMP_USER: + d = mm.UserDumpable + default: + // N.B. Userspace may not pass SUID_DUMP_ROOT. + return 0, nil, syscall.EINVAL + } + t.MemoryManager().SetDumpability(d) + return 0, nil, nil + case linux.PR_GET_KEEPCAPS: if t.Credentials().KeepCaps { return 1, nil, nil @@ -171,9 +200,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } return 0, nil, t.DropBoundingCapability(cp) - case linux.PR_GET_DUMPABLE, - linux.PR_SET_DUMPABLE, - linux.PR_GET_TIMING, + case linux.PR_GET_TIMING, linux.PR_SET_TIMING, linux.PR_GET_TSC, linux.PR_SET_TSC, diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index ba9fd6d1f..7633ab162 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1317,6 +1317,7 @@ cc_binary( linkstatic = 1, deps = [ "//test/util:capability_util", + "//test/util:cleanup", "//test/util:multiprocess_util", "//test/util:posix_error", "//test/util:test_util", diff --git a/test/syscalls/linux/prctl.cc b/test/syscalls/linux/prctl.cc index bce42dc74..bd1779557 100644 --- a/test/syscalls/linux/prctl.cc +++ b/test/syscalls/linux/prctl.cc @@ -17,10 +17,12 @@ #include #include #include + #include #include "gtest/gtest.h" #include "test/util/capability_util.h" +#include "test/util/cleanup.h" #include "test/util/multiprocess_util.h" #include "test/util/posix_error.h" #include "test/util/test_util.h" @@ -35,6 +37,16 @@ namespace testing { namespace { +#ifndef SUID_DUMP_DISABLE +#define SUID_DUMP_DISABLE 0 +#endif /* SUID_DUMP_DISABLE */ +#ifndef SUID_DUMP_USER +#define SUID_DUMP_USER 1 +#endif /* SUID_DUMP_USER */ +#ifndef SUID_DUMP_ROOT +#define SUID_DUMP_ROOT 2 +#endif /* SUID_DUMP_ROOT */ + TEST(PrctlTest, NameInitialized) { const size_t name_length = 20; char name[name_length] = {}; @@ -178,6 +190,28 @@ TEST(PrctlTest, InvalidPrSetMM) { ASSERT_THAT(prctl(PR_SET_MM, 0, 0, 0, 0), SyscallFailsWithErrno(EPERM)); } +// Sanity check that dumpability is remembered. +TEST(PrctlTest, SetGetDumpability) { + int before; + ASSERT_THAT(before = prctl(PR_GET_DUMPABLE), SyscallSucceeds()); + auto cleanup = Cleanup([before] { + ASSERT_THAT(prctl(PR_SET_DUMPABLE, before), SyscallSucceeds()); + }); + + EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_DISABLE), SyscallSucceeds()); + EXPECT_THAT(prctl(PR_GET_DUMPABLE), + SyscallSucceedsWithValue(SUID_DUMP_DISABLE)); + + EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_USER), SyscallSucceeds()); + EXPECT_THAT(prctl(PR_GET_DUMPABLE), SyscallSucceedsWithValue(SUID_DUMP_USER)); +} + +// SUID_DUMP_ROOT cannot be set via PR_SET_DUMPABLE. +TEST(PrctlTest, RootDumpability) { + EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_ROOT), + SyscallFailsWithErrno(EINVAL)); +} + } // namespace } // namespace testing diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index ede6fb860..924b98e3a 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -69,9 +69,11 @@ // way to get it tested on both gVisor, PTrace and Linux. using ::testing::AllOf; +using ::testing::AnyOf; using ::testing::ContainerEq; using ::testing::Contains; using ::testing::ContainsRegex; +using ::testing::Eq; using ::testing::Gt; using ::testing::HasSubstr; using ::testing::IsSupersetOf; @@ -86,6 +88,16 @@ namespace gvisor { namespace testing { namespace { +#ifndef SUID_DUMP_DISABLE +#define SUID_DUMP_DISABLE 0 +#endif /* SUID_DUMP_DISABLE */ +#ifndef SUID_DUMP_USER +#define SUID_DUMP_USER 1 +#endif /* SUID_DUMP_USER */ +#ifndef SUID_DUMP_ROOT +#define SUID_DUMP_ROOT 2 +#endif /* SUID_DUMP_ROOT */ + // O_LARGEFILE as defined by Linux. glibc tries to be clever by setting it to 0 // because "it isn't needed", even though Linux can return it via F_GETFL. constexpr int kOLargeFile = 00100000; @@ -1896,6 +1908,51 @@ void CheckDuplicatesRecursively(std::string path) { TEST(Proc, NoDuplicates) { CheckDuplicatesRecursively("/proc"); } +// Most /proc/PID files are owned by the task user with SUID_DUMP_USER. +TEST(ProcPid, UserDumpableOwner) { + int before; + ASSERT_THAT(before = prctl(PR_GET_DUMPABLE), SyscallSucceeds()); + auto cleanup = Cleanup([before] { + ASSERT_THAT(prctl(PR_SET_DUMPABLE, before), SyscallSucceeds()); + }); + + EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_USER), SyscallSucceeds()); + + // This applies to the task directory itself and files inside. + struct stat st; + ASSERT_THAT(stat("/proc/self/", &st), SyscallSucceeds()); + EXPECT_EQ(st.st_uid, geteuid()); + EXPECT_EQ(st.st_gid, getegid()); + + ASSERT_THAT(stat("/proc/self/stat", &st), SyscallSucceeds()); + EXPECT_EQ(st.st_uid, geteuid()); + EXPECT_EQ(st.st_gid, getegid()); +} + +// /proc/PID files are owned by root with SUID_DUMP_DISABLE. +TEST(ProcPid, RootDumpableOwner) { + int before; + ASSERT_THAT(before = prctl(PR_GET_DUMPABLE), SyscallSucceeds()); + auto cleanup = Cleanup([before] { + ASSERT_THAT(prctl(PR_SET_DUMPABLE, before), SyscallSucceeds()); + }); + + EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_DISABLE), SyscallSucceeds()); + + // This *does not* applies to the task directory itself (or other 0555 + // directories), but does to files inside. + struct stat st; + ASSERT_THAT(stat("/proc/self/", &st), SyscallSucceeds()); + EXPECT_EQ(st.st_uid, geteuid()); + EXPECT_EQ(st.st_gid, getegid()); + + // This file is owned by root. Also allow nobody in case this test is running + // in a userns without root mapped. + ASSERT_THAT(stat("/proc/self/stat", &st), SyscallSucceeds()); + EXPECT_THAT(st.st_uid, AnyOf(Eq(0), Eq(65534))); + EXPECT_THAT(st.st_gid, AnyOf(Eq(0), Eq(65534))); +} + } // namespace } // namespace testing } // namespace gvisor