Automated rollback of changelist 225861605
PiperOrigin-RevId: 226224230 Change-Id: Id24c7d3733722fd41d5fe74ef64e0ce8c68f0b12
This commit is contained in:
parent
ff7178a4d1
commit
86c9bd2547
|
@ -60,7 +60,7 @@ const (
|
|||
DefaultNofileHardLimit = 4096
|
||||
|
||||
// DefaultMemlockLimit is called MLOCK_LIMIT in Linux.
|
||||
DefaultMemlockLimit = 64 * 1024
|
||||
DefaultMemlockLimit = 64 * 1094
|
||||
|
||||
// DefaultMsgqueueLimit is called MQ_BYTES_MAX in Linux.
|
||||
DefaultMsgqueueLimit = 819200
|
||||
|
|
|
@ -49,18 +49,6 @@ const (
|
|||
MREMAP_FIXED = 1 << 1
|
||||
)
|
||||
|
||||
// Flags for mlock2(2).
|
||||
const (
|
||||
MLOCK_ONFAULT = 0x01
|
||||
)
|
||||
|
||||
// Flags for mlockall(2).
|
||||
const (
|
||||
MCL_CURRENT = 1
|
||||
MCL_FUTURE = 2
|
||||
MCL_ONFAULT = 4
|
||||
)
|
||||
|
||||
// Advice for madvise(2).
|
||||
const (
|
||||
MADV_NORMAL = 0
|
||||
|
|
|
@ -33,7 +33,7 @@ const (
|
|||
Rss
|
||||
ProcessCount
|
||||
NumberOfFiles
|
||||
MemoryLocked
|
||||
MemoryPagesLocked
|
||||
AS
|
||||
Locks
|
||||
SignalsPending
|
||||
|
|
|
@ -30,7 +30,7 @@ var FromLinuxResource = map[int]LimitType{
|
|||
linux.RLIMIT_RSS: Rss,
|
||||
linux.RLIMIT_NPROC: ProcessCount,
|
||||
linux.RLIMIT_NOFILE: NumberOfFiles,
|
||||
linux.RLIMIT_MEMLOCK: MemoryLocked,
|
||||
linux.RLIMIT_MEMLOCK: MemoryPagesLocked,
|
||||
linux.RLIMIT_AS: AS,
|
||||
linux.RLIMIT_LOCKS: Locks,
|
||||
linux.RLIMIT_SIGPENDING: SignalsPending,
|
||||
|
|
|
@ -243,40 +243,6 @@ type MappingIdentity interface {
|
|||
Msync(ctx context.Context, mr MappableRange) error
|
||||
}
|
||||
|
||||
// MLockMode specifies the memory locking behavior of a memory mapping.
|
||||
type MLockMode int
|
||||
|
||||
// Note that the ordering of MLockModes is significant; see
|
||||
// mm.MemoryManager.defMLockMode.
|
||||
const (
|
||||
// MLockNone specifies that a mapping has no memory locking behavior.
|
||||
//
|
||||
// This must be the zero value for MLockMode.
|
||||
MLockNone MLockMode = iota
|
||||
|
||||
// MLockEager specifies that a mapping is memory-locked, as by mlock() or
|
||||
// similar. Pages in the mapping should be made, and kept, resident in
|
||||
// physical memory as soon as possible.
|
||||
//
|
||||
// As of this writing, MLockEager does not cause memory-locking to be
|
||||
// requested from the host; it only affects the sentry's memory management
|
||||
// behavior.
|
||||
//
|
||||
// MLockEager is analogous to Linux's VM_LOCKED.
|
||||
MLockEager
|
||||
|
||||
// MLockLazy specifies that a mapping is memory-locked, as by mlock() or
|
||||
// similar. Pages in the mapping should be kept resident in physical memory
|
||||
// once they have been made resident due to e.g. a page fault.
|
||||
//
|
||||
// As of this writing, MLockLazy does not cause memory-locking to be
|
||||
// requested from the host; in fact, it has virtually no effect, except for
|
||||
// interactions between mlocked pages and other syscalls.
|
||||
//
|
||||
// MLockLazy is analogous to Linux's VM_LOCKED | VM_LOCKONFAULT.
|
||||
MLockLazy
|
||||
)
|
||||
|
||||
// MMapOpts specifies a request to create a memory mapping.
|
||||
type MMapOpts struct {
|
||||
// Length is the length of the mapping.
|
||||
|
@ -337,9 +303,6 @@ type MMapOpts struct {
|
|||
// mapping (see platform.AddressSpace.MapFile).
|
||||
Precommit bool
|
||||
|
||||
// MLockMode specifies the memory locking behavior of the mapping.
|
||||
MLockMode MLockMode
|
||||
|
||||
// Hint is the name used for the mapping in /proc/[pid]/maps. If Hint is
|
||||
// empty, MappingIdentity.MappedName() will be used instead.
|
||||
//
|
||||
|
|
|
@ -106,7 +106,6 @@ go_library(
|
|||
"//pkg/sentry/context",
|
||||
"//pkg/sentry/fs",
|
||||
"//pkg/sentry/fs/proc/seqfile",
|
||||
"//pkg/sentry/kernel/auth",
|
||||
"//pkg/sentry/kernel/futex",
|
||||
"//pkg/sentry/kernel/shm",
|
||||
"//pkg/sentry/limits",
|
||||
|
|
|
@ -149,7 +149,7 @@ func (mm *MemoryManager) Deactivate() {
|
|||
// for all addresses in ar should be precommitted.
|
||||
//
|
||||
// Preconditions: mm.activeMu must be locked. mm.as != nil. ar.Length() != 0.
|
||||
// ar must be page-aligned. pseg == mm.pmas.LowerBoundSegment(ar.Start).
|
||||
// ar must be page-aligned. pseg.Range().Contains(ar.Start).
|
||||
func (mm *MemoryManager) mapASLocked(pseg pmaIterator, ar usermem.AddrRange, precommit bool) error {
|
||||
// By default, map entire pmas at a time, under the assumption that there
|
||||
// is no cost to mapping more of a pma than necessary.
|
||||
|
@ -173,9 +173,7 @@ func (mm *MemoryManager) mapASLocked(pseg pmaIterator, ar usermem.AddrRange, pre
|
|||
}
|
||||
}
|
||||
|
||||
// Since this checks ar.End and not mapAR.End, we will never map a pma that
|
||||
// is not required.
|
||||
for pseg.Ok() && pseg.Start() < ar.End {
|
||||
for {
|
||||
pma := pseg.ValuePtr()
|
||||
pmaAR := pseg.Range()
|
||||
pmaMapAR := pmaAR.Intersect(mapAR)
|
||||
|
@ -186,9 +184,13 @@ func (mm *MemoryManager) mapASLocked(pseg pmaIterator, ar usermem.AddrRange, pre
|
|||
if err := pma.file.MapInto(mm.as, pmaMapAR.Start, pseg.fileRangeOf(pmaMapAR), perms, precommit); err != nil {
|
||||
return err
|
||||
}
|
||||
// Since this checks ar.End and not mapAR.End, we will never map a pma
|
||||
// that is not required.
|
||||
if ar.End <= pmaAR.End {
|
||||
return nil
|
||||
}
|
||||
pseg = pseg.NextSegment()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// unmapASLocked removes all AddressSpace mappings for addresses in ar.
|
||||
|
|
|
@ -22,7 +22,6 @@ import (
|
|||
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/context"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/limits"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/platform"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
|
||||
)
|
||||
|
@ -64,12 +63,8 @@ func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) {
|
|||
layout: mm.layout,
|
||||
privateRefs: mm.privateRefs,
|
||||
users: 1,
|
||||
brk: mm.brk,
|
||||
usageAS: mm.usageAS,
|
||||
// "The child does not inherit its parent's memory locks (mlock(2),
|
||||
// mlockall(2))." - fork(2). So lockedAS is 0 and defMLockMode is
|
||||
// MLockNone, both of which are zero values. vma.mlockMode is reset
|
||||
// when copied below.
|
||||
brk: mm.brk,
|
||||
captureInvalidations: true,
|
||||
argv: mm.argv,
|
||||
envv: mm.envv,
|
||||
|
@ -82,7 +77,7 @@ func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) {
|
|||
// Copy vmas.
|
||||
dstvgap := mm2.vmas.FirstGap()
|
||||
for srcvseg := mm.vmas.FirstSegment(); srcvseg.Ok(); srcvseg = srcvseg.NextSegment() {
|
||||
vma := srcvseg.Value() // makes a copy of the vma
|
||||
vma := srcvseg.ValuePtr()
|
||||
vmaAR := srcvseg.Range()
|
||||
// Inform the Mappable, if any, of the new mapping.
|
||||
if vma.mappable != nil {
|
||||
|
@ -94,8 +89,7 @@ func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) {
|
|||
if vma.id != nil {
|
||||
vma.id.IncRef()
|
||||
}
|
||||
vma.mlockMode = memmap.MLockNone
|
||||
dstvgap = mm2.vmas.Insert(dstvgap, vmaAR, vma).NextGap()
|
||||
dstvgap = mm2.vmas.Insert(dstvgap, vmaAR, *vma).NextGap()
|
||||
// We don't need to update mm2.usageAS since we copied it from mm
|
||||
// above.
|
||||
}
|
||||
|
|
|
@ -95,6 +95,11 @@ type MemoryManager struct {
|
|||
// vmas is protected by mappingMu.
|
||||
vmas vmaSet
|
||||
|
||||
// usageAS is vmas.Span(), cached to accelerate RLIMIT_AS checks.
|
||||
//
|
||||
// usageAS is protected by mappingMu.
|
||||
usageAS uint64
|
||||
|
||||
// brk is the mm's brk, which is manipulated using the brk(2) system call.
|
||||
// The brk is initially set up by the loader which maps an executable
|
||||
// binary into the mm.
|
||||
|
@ -102,23 +107,6 @@ type MemoryManager struct {
|
|||
// brk is protected by mappingMu.
|
||||
brk usermem.AddrRange
|
||||
|
||||
// usageAS is vmas.Span(), cached to accelerate RLIMIT_AS checks.
|
||||
//
|
||||
// usageAS is protected by mappingMu.
|
||||
usageAS uint64
|
||||
|
||||
// lockedAS is the combined size in bytes of all vmas with vma.mlockMode !=
|
||||
// memmap.MLockNone.
|
||||
//
|
||||
// lockedAS is protected by mappingMu.
|
||||
lockedAS uint64
|
||||
|
||||
// New VMAs created by MMap use whichever of memmap.MMapOpts.MLockMode or
|
||||
// defMLockMode is greater.
|
||||
//
|
||||
// defMLockMode is protected by mappingMu.
|
||||
defMLockMode memmap.MLockMode
|
||||
|
||||
// activeMu is loosely analogous to Linux's struct
|
||||
// mm_struct::page_table_lock.
|
||||
activeMu ssync.DowngradableRWMutex `state:"nosave"`
|
||||
|
@ -264,8 +252,6 @@ type vma struct {
|
|||
// metag, none of which we currently support.
|
||||
growsDown bool `state:"manual"`
|
||||
|
||||
mlockMode memmap.MLockMode
|
||||
|
||||
// If id is not nil, it controls the lifecycle of mappable and provides vma
|
||||
// metadata shown in /proc/[pid]/maps, and the vma holds a reference.
|
||||
id memmap.MappingIdentity
|
||||
|
|
|
@ -20,7 +20,6 @@ import (
|
|||
|
||||
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/context"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/limits"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
|
||||
|
@ -129,24 +128,16 @@ func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (userme
|
|||
|
||||
// Get the new vma.
|
||||
mm.mappingMu.Lock()
|
||||
if opts.MLockMode < mm.defMLockMode {
|
||||
opts.MLockMode = mm.defMLockMode
|
||||
}
|
||||
vseg, ar, err := mm.createVMALocked(ctx, opts)
|
||||
if err != nil {
|
||||
mm.mappingMu.Unlock()
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// TODO: In Linux, VM_LOCKONFAULT (which may be set on the new
|
||||
// vma by mlockall(MCL_FUTURE|MCL_ONFAULT) => mm_struct::def_flags) appears
|
||||
// to effectively disable MAP_POPULATE by unsetting FOLL_POPULATE in
|
||||
// mm/util.c:vm_mmap_pgoff() => mm/gup.c:__mm_populate() =>
|
||||
// populate_vma_page_range(). Confirm this behavior.
|
||||
switch {
|
||||
case opts.Precommit || opts.MLockMode == memmap.MLockEager:
|
||||
case opts.Precommit:
|
||||
// Get pmas and map with precommit as requested.
|
||||
mm.populateVMAAndUnlock(ctx, vseg, ar, true)
|
||||
mm.populateAndUnlock(ctx, vseg, ar, true)
|
||||
|
||||
case opts.Mappable == nil && length <= privateAllocUnit:
|
||||
// NOTE: Get pmas and map eagerly in the hope
|
||||
|
@ -155,7 +146,7 @@ func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (userme
|
|||
// memmap.Mappable.Translate is unknown; and only for small mappings,
|
||||
// to avoid needing to allocate large amounts of memory that we may
|
||||
// subsequently need to checkpoint.
|
||||
mm.populateVMAAndUnlock(ctx, vseg, ar, false)
|
||||
mm.populateAndUnlock(ctx, vseg, ar, false)
|
||||
|
||||
default:
|
||||
mm.mappingMu.Unlock()
|
||||
|
@ -164,29 +155,31 @@ func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (userme
|
|||
return ar.Start, nil
|
||||
}
|
||||
|
||||
// populateVMA obtains pmas for addresses in ar in the given vma, and maps them
|
||||
// into mm.as if it is active.
|
||||
// Preconditions: mm.mappingMu must be locked for writing.
|
||||
//
|
||||
// Preconditions: mm.mappingMu must be locked. vseg.Range().IsSupersetOf(ar).
|
||||
func (mm *MemoryManager) populateVMA(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, precommit bool) {
|
||||
// Postconditions: mm.mappingMu will be unlocked.
|
||||
func (mm *MemoryManager) populateAndUnlock(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, precommit bool) {
|
||||
if !vseg.ValuePtr().effectivePerms.Any() {
|
||||
// Linux doesn't populate inaccessible pages. See
|
||||
// mm/gup.c:populate_vma_page_range.
|
||||
mm.mappingMu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
mm.activeMu.Lock()
|
||||
// Can't defer mm.activeMu.Unlock(); see below.
|
||||
|
||||
// Even if we get new pmas, we can't actually map them if we don't have an
|
||||
// Even if we get a new pma, we can't actually map it if we don't have an
|
||||
// AddressSpace.
|
||||
if mm.as == nil {
|
||||
mm.activeMu.Unlock()
|
||||
mm.mappingMu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
// Ensure that we have usable pmas.
|
||||
mm.mappingMu.DowngradeLock()
|
||||
pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, pmaOpts{})
|
||||
mm.mappingMu.RUnlock()
|
||||
if err != nil {
|
||||
// mm/util.c:vm_mmap_pgoff() ignores the error, if any, from
|
||||
// mm/gup.c:mm_populate(). If it matters, we'll get it again when
|
||||
|
@ -204,45 +197,6 @@ func (mm *MemoryManager) populateVMA(ctx context.Context, vseg vmaIterator, ar u
|
|||
mm.activeMu.RUnlock()
|
||||
}
|
||||
|
||||
// populateVMAAndUnlock is equivalent to populateVMA, but also unconditionally
|
||||
// unlocks mm.mappingMu. In cases where populateVMAAndUnlock is usable, it is
|
||||
// preferable to populateVMA since it unlocks mm.mappingMu before performing
|
||||
// expensive operations that don't require it to be locked.
|
||||
//
|
||||
// Preconditions: mm.mappingMu must be locked for writing.
|
||||
// vseg.Range().IsSupersetOf(ar).
|
||||
//
|
||||
// Postconditions: mm.mappingMu will be unlocked.
|
||||
func (mm *MemoryManager) populateVMAAndUnlock(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, precommit bool) {
|
||||
// See populateVMA above for commentary.
|
||||
if !vseg.ValuePtr().effectivePerms.Any() {
|
||||
mm.mappingMu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
mm.activeMu.Lock()
|
||||
|
||||
if mm.as == nil {
|
||||
mm.activeMu.Unlock()
|
||||
mm.mappingMu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
// mm.mappingMu doesn't need to be write-locked for getPMAsLocked, and it
|
||||
// isn't needed at all for mapASLocked.
|
||||
mm.mappingMu.DowngradeLock()
|
||||
pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, pmaOpts{})
|
||||
mm.mappingMu.RUnlock()
|
||||
if err != nil {
|
||||
mm.activeMu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
mm.activeMu.DowngradeLock()
|
||||
mm.mapASLocked(pseg, ar, precommit)
|
||||
mm.activeMu.RUnlock()
|
||||
}
|
||||
|
||||
// MapStack allocates the initial process stack.
|
||||
func (mm *MemoryManager) MapStack(ctx context.Context) (usermem.AddrRange, error) {
|
||||
// maxStackSize is the maximum supported process stack size in bytes.
|
||||
|
@ -282,7 +236,6 @@ func (mm *MemoryManager) MapStack(ctx context.Context) (usermem.AddrRange, error
|
|||
MaxPerms: usermem.AnyAccess,
|
||||
Private: true,
|
||||
GrowsDown: true,
|
||||
MLockMode: mm.defMLockMode,
|
||||
Hint: "[stack]",
|
||||
})
|
||||
return ar, err
|
||||
|
@ -381,19 +334,6 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSi
|
|||
// occupies at least part of the destination. Thus the NoMove case always
|
||||
// fails and the MayMove case always falls back to copying.
|
||||
|
||||
if vma := vseg.ValuePtr(); newSize > oldSize && vma.mlockMode != memmap.MLockNone {
|
||||
// Check against RLIMIT_MEMLOCK. Unlike mmap, mlock, and mlockall,
|
||||
// mremap in Linux does not check mm/mlock.c:can_do_mlock() and
|
||||
// therefore does not return EPERM if RLIMIT_MEMLOCK is 0 and
|
||||
// !CAP_IPC_LOCK.
|
||||
mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur
|
||||
if creds := auth.CredentialsFromContext(ctx); !creds.HasCapabilityIn(linux.CAP_IPC_LOCK, creds.UserNamespace.Root()) {
|
||||
if newLockedAS := mm.lockedAS - oldSize + newSize; newLockedAS > mlockLimit {
|
||||
return 0, syserror.EAGAIN
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if opts.Move != MRemapMustMove {
|
||||
// Handle no-ops and in-place shrinking. These cases don't care if
|
||||
// [oldAddr, oldEnd) maps to a single vma, or is even mapped at all
|
||||
|
@ -420,7 +360,7 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSi
|
|||
if vma.mappable != nil {
|
||||
newOffset = vseg.mappableRange().End
|
||||
}
|
||||
vseg, ar, err := mm.createVMALocked(ctx, memmap.MMapOpts{
|
||||
_, _, err := mm.createVMALocked(ctx, memmap.MMapOpts{
|
||||
Length: newSize - oldSize,
|
||||
MappingIdentity: vma.id,
|
||||
Mappable: vma.mappable,
|
||||
|
@ -431,13 +371,9 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSi
|
|||
MaxPerms: vma.maxPerms,
|
||||
Private: vma.private,
|
||||
GrowsDown: vma.growsDown,
|
||||
MLockMode: vma.mlockMode,
|
||||
Hint: vma.hint,
|
||||
})
|
||||
if err == nil {
|
||||
if vma.mlockMode == memmap.MLockEager {
|
||||
mm.populateVMA(ctx, vseg, ar, true)
|
||||
}
|
||||
return oldAddr, nil
|
||||
}
|
||||
// In-place growth failed. In the MRemapMayMove case, fall through to
|
||||
|
@ -526,14 +462,8 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSi
|
|||
if vma.id != nil {
|
||||
vma.id.IncRef()
|
||||
}
|
||||
vseg := mm.vmas.Insert(mm.vmas.FindGap(newAR.Start), newAR, vma)
|
||||
mm.vmas.Add(newAR, vma)
|
||||
mm.usageAS += uint64(newAR.Length())
|
||||
if vma.mlockMode != memmap.MLockNone {
|
||||
mm.lockedAS += uint64(newAR.Length())
|
||||
if vma.mlockMode == memmap.MLockEager {
|
||||
mm.populateVMA(ctx, vseg, newAR, true)
|
||||
}
|
||||
}
|
||||
return newAR.Start, nil
|
||||
}
|
||||
|
||||
|
@ -555,11 +485,8 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSi
|
|||
vseg = mm.vmas.Isolate(vseg, oldAR)
|
||||
vma := vseg.Value()
|
||||
mm.vmas.Remove(vseg)
|
||||
vseg = mm.vmas.Insert(mm.vmas.FindGap(newAR.Start), newAR, vma)
|
||||
mm.vmas.Add(newAR, vma)
|
||||
mm.usageAS = mm.usageAS - uint64(oldAR.Length()) + uint64(newAR.Length())
|
||||
if vma.mlockMode != memmap.MLockNone {
|
||||
mm.lockedAS = mm.lockedAS - uint64(oldAR.Length()) + uint64(newAR.Length())
|
||||
}
|
||||
|
||||
// Move pmas. This is technically optional for non-private pmas, which
|
||||
// could just go through memmap.Mappable.Translate again, but it's required
|
||||
|
@ -574,10 +501,6 @@ func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSi
|
|||
vma.mappable.RemoveMapping(ctx, mm, oldAR, vma.off, vma.isMappableAsWritable())
|
||||
}
|
||||
|
||||
if vma.mlockMode == memmap.MLockEager {
|
||||
mm.populateVMA(ctx, vseg, newAR, true)
|
||||
}
|
||||
|
||||
return newAR.Start, nil
|
||||
}
|
||||
|
||||
|
@ -688,10 +611,9 @@ func (mm *MemoryManager) BrkSetup(ctx context.Context, addr usermem.Addr) {
|
|||
// error on failure.
|
||||
func (mm *MemoryManager) Brk(ctx context.Context, addr usermem.Addr) (usermem.Addr, error) {
|
||||
mm.mappingMu.Lock()
|
||||
// Can't defer mm.mappingMu.Unlock(); see below.
|
||||
defer mm.mappingMu.Unlock()
|
||||
|
||||
if addr < mm.brk.Start {
|
||||
mm.mappingMu.Unlock()
|
||||
return mm.brk.End, syserror.EINVAL
|
||||
}
|
||||
|
||||
|
@ -701,24 +623,21 @@ func (mm *MemoryManager) Brk(ctx context.Context, addr usermem.Addr) (usermem.Ad
|
|||
// heap + data + bss. The segment sizes need to be plumbed from the
|
||||
// loader package to fully enforce RLIMIT_DATA.
|
||||
if uint64(addr-mm.brk.Start) > limits.FromContext(ctx).Get(limits.Data).Cur {
|
||||
mm.mappingMu.Unlock()
|
||||
return mm.brk.End, syserror.ENOMEM
|
||||
}
|
||||
|
||||
oldbrkpg, _ := mm.brk.End.RoundUp()
|
||||
newbrkpg, ok := addr.RoundUp()
|
||||
if !ok {
|
||||
mm.mappingMu.Unlock()
|
||||
return mm.brk.End, syserror.EFAULT
|
||||
}
|
||||
|
||||
switch {
|
||||
case newbrkpg < oldbrkpg:
|
||||
mm.unmapLocked(ctx, usermem.AddrRange{newbrkpg, oldbrkpg})
|
||||
mm.mappingMu.Unlock()
|
||||
|
||||
case oldbrkpg < newbrkpg:
|
||||
vseg, ar, err := mm.createVMALocked(ctx, memmap.MMapOpts{
|
||||
_, _, err := mm.createVMALocked(ctx, memmap.MMapOpts{
|
||||
Length: uint64(newbrkpg - oldbrkpg),
|
||||
Addr: oldbrkpg,
|
||||
Fixed: true,
|
||||
|
@ -727,221 +646,17 @@ func (mm *MemoryManager) Brk(ctx context.Context, addr usermem.Addr) (usermem.Ad
|
|||
Perms: usermem.ReadWrite,
|
||||
MaxPerms: usermem.AnyAccess,
|
||||
Private: true,
|
||||
// Linux: mm/mmap.c:sys_brk() => do_brk_flags() includes
|
||||
// mm->def_flags.
|
||||
MLockMode: mm.defMLockMode,
|
||||
Hint: "[heap]",
|
||||
})
|
||||
if err != nil {
|
||||
mm.mappingMu.Unlock()
|
||||
return mm.brk.End, err
|
||||
}
|
||||
if mm.defMLockMode == memmap.MLockEager {
|
||||
mm.populateVMAAndUnlock(ctx, vseg, ar, true)
|
||||
} else {
|
||||
mm.mappingMu.Unlock()
|
||||
}
|
||||
|
||||
default:
|
||||
// Nothing to do.
|
||||
mm.mappingMu.Unlock()
|
||||
}
|
||||
|
||||
mm.brk.End = addr
|
||||
return addr, nil
|
||||
}
|
||||
|
||||
// MLock implements the semantics of Linux's mlock()/mlock2()/munlock(),
|
||||
// depending on mode.
|
||||
func (mm *MemoryManager) MLock(ctx context.Context, addr usermem.Addr, length uint64, mode memmap.MLockMode) error {
|
||||
// Linux allows this to overflow.
|
||||
la, _ := usermem.Addr(length + addr.PageOffset()).RoundUp()
|
||||
ar, ok := addr.RoundDown().ToRange(uint64(la))
|
||||
if !ok {
|
||||
return syserror.EINVAL
|
||||
}
|
||||
|
||||
mm.mappingMu.Lock()
|
||||
// Can't defer mm.mappingMu.Unlock(); see below.
|
||||
|
||||
if mode != memmap.MLockNone {
|
||||
// Check against RLIMIT_MEMLOCK.
|
||||
if creds := auth.CredentialsFromContext(ctx); !creds.HasCapabilityIn(linux.CAP_IPC_LOCK, creds.UserNamespace.Root()) {
|
||||
mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur
|
||||
if mlockLimit == 0 {
|
||||
mm.mappingMu.Unlock()
|
||||
return syserror.EPERM
|
||||
}
|
||||
if newLockedAS := mm.lockedAS + uint64(ar.Length()) - mm.mlockedBytesRangeLocked(ar); newLockedAS > mlockLimit {
|
||||
mm.mappingMu.Unlock()
|
||||
return syserror.ENOMEM
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check this after RLIMIT_MEMLOCK for consistency with Linux.
|
||||
if ar.Length() == 0 {
|
||||
mm.mappingMu.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Apply the new mlock mode to vmas.
|
||||
var unmapped bool
|
||||
vseg := mm.vmas.FindSegment(ar.Start)
|
||||
for {
|
||||
if !vseg.Ok() {
|
||||
unmapped = true
|
||||
break
|
||||
}
|
||||
vseg = mm.vmas.Isolate(vseg, ar)
|
||||
vma := vseg.ValuePtr()
|
||||
prevMode := vma.mlockMode
|
||||
vma.mlockMode = mode
|
||||
if mode != memmap.MLockNone && prevMode == memmap.MLockNone {
|
||||
mm.lockedAS += uint64(vseg.Range().Length())
|
||||
} else if mode == memmap.MLockNone && prevMode != memmap.MLockNone {
|
||||
mm.lockedAS -= uint64(vseg.Range().Length())
|
||||
}
|
||||
if ar.End <= vseg.End() {
|
||||
break
|
||||
}
|
||||
vseg, _ = vseg.NextNonEmpty()
|
||||
}
|
||||
mm.vmas.MergeRange(ar)
|
||||
mm.vmas.MergeAdjacent(ar)
|
||||
if unmapped {
|
||||
mm.mappingMu.Unlock()
|
||||
return syserror.ENOMEM
|
||||
}
|
||||
|
||||
if mode == memmap.MLockEager {
|
||||
// Ensure that we have usable pmas. Since we didn't return ENOMEM
|
||||
// above, ar must be fully covered by vmas, so we can just use
|
||||
// NextSegment below.
|
||||
mm.activeMu.Lock()
|
||||
mm.mappingMu.DowngradeLock()
|
||||
for vseg := mm.vmas.FindSegment(ar.Start); vseg.Ok() && vseg.Start() < ar.End; vseg = vseg.NextSegment() {
|
||||
if !vseg.ValuePtr().effectivePerms.Any() {
|
||||
// Linux: mm/gup.c:__get_user_pages() returns EFAULT in this
|
||||
// case, which is converted to ENOMEM by mlock.
|
||||
mm.activeMu.Unlock()
|
||||
mm.mappingMu.RUnlock()
|
||||
return syserror.ENOMEM
|
||||
}
|
||||
_, _, err := mm.getPMAsLocked(ctx, vseg, vseg.Range().Intersect(ar), pmaOpts{})
|
||||
if err != nil {
|
||||
mm.activeMu.Unlock()
|
||||
mm.mappingMu.RUnlock()
|
||||
// Linux: mm/mlock.c:__mlock_posix_error_return()
|
||||
if err == syserror.EFAULT {
|
||||
return syserror.ENOMEM
|
||||
}
|
||||
if err == syserror.ENOMEM {
|
||||
return syserror.EAGAIN
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Map pmas into the active AddressSpace, if we have one.
|
||||
mm.mappingMu.RUnlock()
|
||||
if mm.as != nil {
|
||||
mm.activeMu.DowngradeLock()
|
||||
err := mm.mapASLocked(mm.pmas.LowerBoundSegment(ar.Start), ar, true /* precommit */)
|
||||
mm.activeMu.RUnlock()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
mm.activeMu.Unlock()
|
||||
}
|
||||
} else {
|
||||
mm.mappingMu.Unlock()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// MLockAllOpts holds options to MLockAll.
|
||||
type MLockAllOpts struct {
|
||||
// If Current is true, change the memory-locking behavior of all mappings
|
||||
// to Mode. If Future is true, upgrade the memory-locking behavior of all
|
||||
// future mappings to Mode. At least one of Current or Future must be true.
|
||||
Current bool
|
||||
Future bool
|
||||
Mode memmap.MLockMode
|
||||
}
|
||||
|
||||
// MLockAll implements the semantics of Linux's mlockall()/munlockall(),
|
||||
// depending on opts.
|
||||
func (mm *MemoryManager) MLockAll(ctx context.Context, opts MLockAllOpts) error {
|
||||
if !opts.Current && !opts.Future {
|
||||
return syserror.EINVAL
|
||||
}
|
||||
|
||||
mm.mappingMu.Lock()
|
||||
// Can't defer mm.mappingMu.Unlock(); see below.
|
||||
|
||||
if opts.Current {
|
||||
if opts.Mode != memmap.MLockNone {
|
||||
// Check against RLIMIT_MEMLOCK.
|
||||
if creds := auth.CredentialsFromContext(ctx); !creds.HasCapabilityIn(linux.CAP_IPC_LOCK, creds.UserNamespace.Root()) {
|
||||
mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur
|
||||
if mlockLimit == 0 {
|
||||
mm.mappingMu.Unlock()
|
||||
return syserror.EPERM
|
||||
}
|
||||
if uint64(mm.vmas.Span()) > mlockLimit {
|
||||
mm.mappingMu.Unlock()
|
||||
return syserror.ENOMEM
|
||||
}
|
||||
}
|
||||
}
|
||||
for vseg := mm.vmas.FirstSegment(); vseg.Ok(); vseg = vseg.NextSegment() {
|
||||
vma := vseg.ValuePtr()
|
||||
prevMode := vma.mlockMode
|
||||
vma.mlockMode = opts.Mode
|
||||
if opts.Mode != memmap.MLockNone && prevMode == memmap.MLockNone {
|
||||
mm.lockedAS += uint64(vseg.Range().Length())
|
||||
} else if opts.Mode == memmap.MLockNone && prevMode != memmap.MLockNone {
|
||||
mm.lockedAS -= uint64(vseg.Range().Length())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if opts.Future {
|
||||
mm.defMLockMode = opts.Mode
|
||||
}
|
||||
|
||||
if opts.Current && opts.Mode == memmap.MLockEager {
|
||||
// Linux: mm/mlock.c:sys_mlockall() => include/linux/mm.h:mm_populate()
|
||||
// ignores the return value of __mm_populate(), so all errors below are
|
||||
// ignored.
|
||||
//
|
||||
// Try to get usable pmas.
|
||||
mm.activeMu.Lock()
|
||||
mm.mappingMu.DowngradeLock()
|
||||
for vseg := mm.vmas.FirstSegment(); vseg.Ok(); vseg = vseg.NextSegment() {
|
||||
if vseg.ValuePtr().effectivePerms.Any() {
|
||||
mm.getPMAsLocked(ctx, vseg, vseg.Range(), pmaOpts{})
|
||||
}
|
||||
}
|
||||
|
||||
// Map all pmas into the active AddressSpace, if we have one.
|
||||
mm.mappingMu.RUnlock()
|
||||
if mm.as != nil {
|
||||
mm.activeMu.DowngradeLock()
|
||||
mm.mapASLocked(mm.pmas.FirstSegment(), mm.applicationAddrRange(), true /* precommit */)
|
||||
mm.activeMu.RUnlock()
|
||||
} else {
|
||||
mm.activeMu.Unlock()
|
||||
}
|
||||
} else {
|
||||
mm.mappingMu.Unlock()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Decommit implements the semantics of Linux's madvise(MADV_DONTNEED).
|
||||
func (mm *MemoryManager) Decommit(addr usermem.Addr, length uint64) error {
|
||||
ar, ok := addr.ToRange(length)
|
||||
|
@ -965,25 +680,22 @@ func (mm *MemoryManager) Decommit(addr usermem.Addr, length uint64) error {
|
|||
// ensures that Decommit immediately reduces host memory usage.
|
||||
var didUnmapAS bool
|
||||
pseg := mm.pmas.LowerBoundSegment(ar.Start)
|
||||
vseg := mm.vmas.LowerBoundSegment(ar.Start)
|
||||
mem := mm.p.Memory()
|
||||
for vseg := mm.vmas.LowerBoundSegment(ar.Start); vseg.Ok() && vseg.Start() < ar.End; vseg = vseg.NextSegment() {
|
||||
vma := vseg.ValuePtr()
|
||||
if vma.mlockMode != memmap.MLockNone {
|
||||
return syserror.EINVAL
|
||||
}
|
||||
vsegAR := vseg.Range().Intersect(ar)
|
||||
// pseg should already correspond to either this vma or a later one,
|
||||
// since there can't be a pma without a corresponding vma.
|
||||
if checkInvariants {
|
||||
if pseg.Ok() && pseg.End() <= vsegAR.Start {
|
||||
panic(fmt.Sprintf("pma %v precedes vma %v", pseg.Range(), vsegAR))
|
||||
}
|
||||
}
|
||||
for pseg.Ok() && pseg.Start() < vsegAR.End {
|
||||
for pseg.Ok() && pseg.Start() < ar.End {
|
||||
pma := pseg.ValuePtr()
|
||||
if pma.private && !mm.isPMACopyOnWriteLocked(pseg) {
|
||||
psegAR := pseg.Range().Intersect(ar)
|
||||
if vsegAR.IsSupersetOf(psegAR) && vma.mappable == nil {
|
||||
vseg = vseg.seekNextLowerBound(psegAR.Start)
|
||||
if checkInvariants {
|
||||
if !vseg.Ok() {
|
||||
panic(fmt.Sprintf("no vma after %#x", psegAR.Start))
|
||||
}
|
||||
if psegAR.Start < vseg.Start() {
|
||||
panic(fmt.Sprintf("no vma in [%#x, %#x)", psegAR.Start, vseg.Start()))
|
||||
}
|
||||
}
|
||||
if vseg.Range().IsSupersetOf(psegAR) && vseg.ValuePtr().mappable == nil {
|
||||
if err := mem.Decommit(pseg.fileRangeOf(psegAR)); err == nil {
|
||||
pseg = pseg.NextSegment()
|
||||
continue
|
||||
|
@ -992,7 +704,7 @@ func (mm *MemoryManager) Decommit(addr usermem.Addr, length uint64) error {
|
|||
// invalidation case below.
|
||||
}
|
||||
}
|
||||
pseg = mm.pmas.Isolate(pseg, vsegAR)
|
||||
pseg = mm.pmas.Isolate(pseg, ar)
|
||||
pma = pseg.ValuePtr()
|
||||
if !didUnmapAS {
|
||||
// Unmap all of ar, not just pseg.Range(), to minimize host
|
||||
|
@ -1006,9 +718,9 @@ func (mm *MemoryManager) Decommit(addr usermem.Addr, length uint64) error {
|
|||
}
|
||||
pma.file.DecRef(pseg.fileRange())
|
||||
mm.removeRSSLocked(pseg.Range())
|
||||
|
||||
pseg = mm.pmas.Remove(pseg).NextSegment()
|
||||
}
|
||||
}
|
||||
|
||||
// "If there are some parts of the specified address space that are not
|
||||
// mapped, the Linux version of madvise() ignores them and applies the call
|
||||
|
@ -1020,28 +732,9 @@ func (mm *MemoryManager) Decommit(addr usermem.Addr, length uint64) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// MSyncOpts holds options to MSync.
|
||||
type MSyncOpts struct {
|
||||
// Sync has the semantics of MS_SYNC.
|
||||
Sync bool
|
||||
|
||||
// Invalidate has the semantics of MS_INVALIDATE.
|
||||
Invalidate bool
|
||||
}
|
||||
|
||||
// MSync implements the semantics of Linux's msync().
|
||||
func (mm *MemoryManager) MSync(ctx context.Context, addr usermem.Addr, length uint64, opts MSyncOpts) error {
|
||||
if addr != addr.RoundDown() {
|
||||
return syserror.EINVAL
|
||||
}
|
||||
if length == 0 {
|
||||
return nil
|
||||
}
|
||||
la, ok := usermem.Addr(length).RoundUp()
|
||||
if !ok {
|
||||
return syserror.ENOMEM
|
||||
}
|
||||
ar, ok := addr.ToRange(uint64(la))
|
||||
// Sync implements the semantics of Linux's msync(MS_SYNC).
|
||||
func (mm *MemoryManager) Sync(ctx context.Context, addr usermem.Addr, length uint64) error {
|
||||
ar, ok := addr.ToRange(length)
|
||||
if !ok {
|
||||
return syserror.ENOMEM
|
||||
}
|
||||
|
@ -1066,14 +759,10 @@ func (mm *MemoryManager) MSync(ctx context.Context, addr usermem.Addr, length ui
|
|||
}
|
||||
lastEnd = vseg.End()
|
||||
vma := vseg.ValuePtr()
|
||||
if opts.Invalidate && vma.mlockMode != memmap.MLockNone {
|
||||
mm.mappingMu.RUnlock()
|
||||
return syserror.EBUSY
|
||||
}
|
||||
// It's only possible to have dirtied the Mappable through a shared
|
||||
// mapping. Don't check if the mapping is writable, because mprotect
|
||||
// may have changed this, and also because Linux doesn't.
|
||||
if id := vma.id; opts.Sync && id != nil && vma.mappable != nil && !vma.private {
|
||||
if id := vma.id; id != nil && vma.mappable != nil && !vma.private {
|
||||
// We can't call memmap.MappingIdentity.Msync while holding
|
||||
// mm.mappingMu since it may take fs locks that precede it in the
|
||||
// lock order.
|
||||
|
|
|
@ -17,10 +17,8 @@ package mm
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/context"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/limits"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
|
||||
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
|
||||
|
@ -55,23 +53,6 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp
|
|||
return vmaIterator{}, usermem.AddrRange{}, syserror.ENOMEM
|
||||
}
|
||||
|
||||
if opts.MLockMode != memmap.MLockNone {
|
||||
// Check against RLIMIT_MEMLOCK.
|
||||
if creds := auth.CredentialsFromContext(ctx); !creds.HasCapabilityIn(linux.CAP_IPC_LOCK, creds.UserNamespace.Root()) {
|
||||
mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur
|
||||
if mlockLimit == 0 {
|
||||
return vmaIterator{}, usermem.AddrRange{}, syserror.EPERM
|
||||
}
|
||||
newLockedAS := mm.lockedAS + opts.Length
|
||||
if opts.Unmap {
|
||||
newLockedAS -= mm.mlockedBytesRangeLocked(ar)
|
||||
}
|
||||
if newLockedAS > mlockLimit {
|
||||
return vmaIterator{}, usermem.AddrRange{}, syserror.EAGAIN
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove overwritten mappings. This ordering is consistent with Linux:
|
||||
// compare Linux's mm/mmap.c:mmap_region() => do_munmap(),
|
||||
// file->f_op->mmap().
|
||||
|
@ -104,14 +85,10 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp
|
|||
maxPerms: opts.MaxPerms,
|
||||
private: opts.Private,
|
||||
growsDown: opts.GrowsDown,
|
||||
mlockMode: opts.MLockMode,
|
||||
id: opts.MappingIdentity,
|
||||
hint: opts.Hint,
|
||||
})
|
||||
mm.usageAS += opts.Length
|
||||
if opts.MLockMode != memmap.MLockNone {
|
||||
mm.lockedAS += opts.Length
|
||||
}
|
||||
|
||||
return vseg, ar, nil
|
||||
}
|
||||
|
@ -224,17 +201,6 @@ func (mm *MemoryManager) findHighestAvailableLocked(length, alignment uint64, bo
|
|||
return 0, syserror.ENOMEM
|
||||
}
|
||||
|
||||
// Preconditions: mm.mappingMu must be locked.
|
||||
func (mm *MemoryManager) mlockedBytesRangeLocked(ar usermem.AddrRange) uint64 {
|
||||
var total uint64
|
||||
for vseg := mm.vmas.LowerBoundSegment(ar.Start); vseg.Ok() && vseg.Start() < ar.End; vseg = vseg.NextSegment() {
|
||||
if vseg.ValuePtr().mlockMode != memmap.MLockNone {
|
||||
total += uint64(vseg.Range().Intersect(ar).Length())
|
||||
}
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
// getVMAsLocked ensures that vmas exist for all addresses in ar, and support
|
||||
// access of type (at, ignorePermissions). It returns:
|
||||
//
|
||||
|
@ -372,9 +338,6 @@ func (mm *MemoryManager) removeVMAsLocked(ctx context.Context, ar usermem.AddrRa
|
|||
vma.id.DecRef()
|
||||
}
|
||||
mm.usageAS -= uint64(vmaAR.Length())
|
||||
if vma.mlockMode != memmap.MLockNone {
|
||||
mm.lockedAS -= uint64(vmaAR.Length())
|
||||
}
|
||||
vgap = mm.vmas.Remove(vseg)
|
||||
vseg = vgap.NextSegment()
|
||||
}
|
||||
|
@ -405,7 +368,6 @@ func (vmaSetFunctions) Merge(ar1 usermem.AddrRange, vma1 vma, ar2 usermem.AddrRa
|
|||
vma1.maxPerms != vma2.maxPerms ||
|
||||
vma1.private != vma2.private ||
|
||||
vma1.growsDown != vma2.growsDown ||
|
||||
vma1.mlockMode != vma2.mlockMode ||
|
||||
vma1.id != vma2.id ||
|
||||
vma1.hint != vma2.hint {
|
||||
return vma{}, false
|
||||
|
|
|
@ -197,10 +197,10 @@ var AMD64 = &kernel.SyscallTable{
|
|||
146: SchedGetPriorityMax,
|
||||
147: SchedGetPriorityMin,
|
||||
148: syscalls.ErrorWithEvent(syscall.EPERM), // SchedRrGetInterval,
|
||||
149: Mlock,
|
||||
150: Munlock,
|
||||
151: Mlockall,
|
||||
152: Munlockall,
|
||||
149: syscalls.Error(nil), // Mlock, TODO
|
||||
150: syscalls.Error(nil), // Munlock, TODO
|
||||
151: syscalls.Error(nil), // Mlockall, TODO
|
||||
152: syscalls.Error(nil), // Munlockall, TODO
|
||||
153: syscalls.CapError(linux.CAP_SYS_TTY_CONFIG), // Vhangup,
|
||||
154: syscalls.Error(syscall.EPERM), // ModifyLdt,
|
||||
155: syscalls.Error(syscall.EPERM), // PivotRoot,
|
||||
|
@ -373,9 +373,8 @@ var AMD64 = &kernel.SyscallTable{
|
|||
// 322: Execveat, TODO
|
||||
// 323: Userfaultfd, TODO
|
||||
// 324: Membarrier, TODO
|
||||
325: Mlock2,
|
||||
// Syscalls after 325 are "backports" from versions of Linux after 4.4.
|
||||
// 326: CopyFileRange,
|
||||
// Syscalls after 325 are backports from 4.6.
|
||||
325: syscalls.Error(nil), // Mlock2, TODO
|
||||
327: Preadv2,
|
||||
328: Pwritev2,
|
||||
},
|
||||
|
|
|
@ -69,9 +69,6 @@ func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
|
|||
GrowsDown: linux.MAP_GROWSDOWN&flags != 0,
|
||||
Precommit: linux.MAP_POPULATE&flags != 0,
|
||||
}
|
||||
if linux.MAP_LOCKED&flags != 0 {
|
||||
opts.MLockMode = memmap.MLockEager
|
||||
}
|
||||
defer func() {
|
||||
if opts.MappingIdentity != nil {
|
||||
opts.MappingIdentity.DecRef()
|
||||
|
@ -387,6 +384,16 @@ func Msync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
|
|||
length := args[1].SizeT()
|
||||
flags := args[2].Int()
|
||||
|
||||
if addr != addr.RoundDown() {
|
||||
return 0, nil, syserror.EINVAL
|
||||
}
|
||||
if length == 0 {
|
||||
return 0, nil, nil
|
||||
}
|
||||
la, ok := usermem.Addr(length).RoundUp()
|
||||
if !ok {
|
||||
return 0, nil, syserror.ENOMEM
|
||||
}
|
||||
// "The flags argument should specify exactly one of MS_ASYNC and MS_SYNC,
|
||||
// and may additionally include the MS_INVALIDATE bit. ... However, Linux
|
||||
// permits a call to msync() that specifies neither of these flags, with
|
||||
|
@ -399,72 +406,39 @@ func Msync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
|
|||
if sync && flags&linux.MS_ASYNC != 0 {
|
||||
return 0, nil, syserror.EINVAL
|
||||
}
|
||||
err := t.MemoryManager().MSync(t, addr, uint64(length), mm.MSyncOpts{
|
||||
Sync: sync,
|
||||
Invalidate: flags&linux.MS_INVALIDATE != 0,
|
||||
})
|
||||
// MSync calls fsync, the same interrupt conversion rules apply, see
|
||||
|
||||
// MS_INVALIDATE "asks to invalidate other mappings of the same file (so
|
||||
// that they can be updated with the fresh values just written)". This is a
|
||||
// no-op given that shared memory exists. However, MS_INVALIDATE can also
|
||||
// be used to detect mlocks: "EBUSY: MS_INVALIDATE was specified in flags,
|
||||
// and a memory lock exists for the specified address range." Given that
|
||||
// mlock is stubbed out, it's unsafe to pass MS_INVALIDATE silently since
|
||||
// some user program could be using it for synchronization.
|
||||
if flags&linux.MS_INVALIDATE != 0 {
|
||||
return 0, nil, syserror.EINVAL
|
||||
}
|
||||
// MS_SYNC "requests an update and waits for it to complete."
|
||||
if sync {
|
||||
err := t.MemoryManager().Sync(t, addr, uint64(la))
|
||||
// Sync calls fsync, the same interrupt conversion rules apply, see
|
||||
// mm/msync.c, fsync POSIX.1-2008.
|
||||
return 0, nil, syserror.ConvertIntr(err, kernel.ERESTARTSYS)
|
||||
}
|
||||
|
||||
// Mlock implements linux syscall mlock(2).
|
||||
func Mlock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
|
||||
addr := args[0].Pointer()
|
||||
length := args[1].SizeT()
|
||||
|
||||
return 0, nil, t.MemoryManager().MLock(t, addr, uint64(length), memmap.MLockEager)
|
||||
}
|
||||
|
||||
// Mlock2 implements linux syscall mlock2(2).
|
||||
func Mlock2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
|
||||
addr := args[0].Pointer()
|
||||
length := args[1].SizeT()
|
||||
flags := args[2].Int()
|
||||
|
||||
if flags&^(linux.MLOCK_ONFAULT) != 0 {
|
||||
return 0, nil, syserror.EINVAL
|
||||
}
|
||||
|
||||
mode := memmap.MLockEager
|
||||
if flags&linux.MLOCK_ONFAULT != 0 {
|
||||
mode = memmap.MLockLazy
|
||||
// MS_ASYNC "specifies that an update be scheduled, but the call returns
|
||||
// immediately". As long as dirty pages are tracked and eventually written
|
||||
// back, this is a no-op. (Correspondingly: "Since Linux 2.6.19, MS_ASYNC
|
||||
// is in fact a no-op, since the kernel properly tracks dirty pages and
|
||||
// flushes them to storage as necessary.")
|
||||
//
|
||||
// However: "ENOMEM: The indicated memory (or part of it) was not mapped."
|
||||
// This applies even for MS_ASYNC.
|
||||
ar, ok := addr.ToRange(uint64(la))
|
||||
if !ok {
|
||||
return 0, nil, syserror.ENOMEM
|
||||
}
|
||||
return 0, nil, t.MemoryManager().MLock(t, addr, uint64(length), mode)
|
||||
}
|
||||
|
||||
// Munlock implements linux syscall munlock(2).
|
||||
func Munlock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
|
||||
addr := args[0].Pointer()
|
||||
length := args[1].SizeT()
|
||||
|
||||
return 0, nil, t.MemoryManager().MLock(t, addr, uint64(length), memmap.MLockNone)
|
||||
}
|
||||
|
||||
// Mlockall implements linux syscall mlockall(2).
|
||||
func Mlockall(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
|
||||
flags := args[0].Int()
|
||||
|
||||
if flags&^(linux.MCL_CURRENT|linux.MCL_FUTURE|linux.MCL_ONFAULT) != 0 {
|
||||
return 0, nil, syserror.EINVAL
|
||||
mapped := t.MemoryManager().VirtualMemorySizeRange(ar)
|
||||
if mapped != uint64(la) {
|
||||
return 0, nil, syserror.ENOMEM
|
||||
}
|
||||
|
||||
mode := memmap.MLockEager
|
||||
if flags&linux.MCL_ONFAULT != 0 {
|
||||
mode = memmap.MLockLazy
|
||||
}
|
||||
return 0, nil, t.MemoryManager().MLockAll(t, mm.MLockAllOpts{
|
||||
Current: flags&linux.MCL_CURRENT != 0,
|
||||
Future: flags&linux.MCL_FUTURE != 0,
|
||||
Mode: mode,
|
||||
})
|
||||
}
|
||||
|
||||
// Munlockall implements linux syscall munlockall(2).
|
||||
func Munlockall(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
|
||||
return 0, nil, t.MemoryManager().MLockAll(t, mm.MLockAllOpts{
|
||||
Current: true,
|
||||
Future: true,
|
||||
Mode: memmap.MLockNone,
|
||||
})
|
||||
return 0, nil, nil
|
||||
}
|
||||
|
|
|
@ -90,7 +90,6 @@ var setableLimits = map[limits.LimitType]struct{}{
|
|||
limits.CPU: {},
|
||||
limits.Data: {},
|
||||
limits.FileSize: {},
|
||||
limits.MemoryLocked: {},
|
||||
limits.Stack: {},
|
||||
// These are not enforced, but we include them here to avoid returning
|
||||
// EPERM, since some apps expect them to succeed.
|
||||
|
|
|
@ -29,7 +29,7 @@ var fromLinuxResource = map[string]limits.LimitType{
|
|||
"RLIMIT_DATA": limits.Data,
|
||||
"RLIMIT_FSIZE": limits.FileSize,
|
||||
"RLIMIT_LOCKS": limits.Locks,
|
||||
"RLIMIT_MEMLOCK": limits.MemoryLocked,
|
||||
"RLIMIT_MEMLOCK": limits.MemoryPagesLocked,
|
||||
"RLIMIT_MSGQUEUE": limits.MessageQueueBytes,
|
||||
"RLIMIT_NICE": limits.Nice,
|
||||
"RLIMIT_NOFILE": limits.NumberOfFiles,
|
||||
|
@ -55,7 +55,7 @@ func createLimitSet(spec *specs.Spec) (*limits.LimitSet, error) {
|
|||
ls.SetUnchecked(limits.Data, limits.Limit{Cur: limits.Infinity, Max: limits.Infinity})
|
||||
ls.SetUnchecked(limits.FileSize, limits.Limit{Cur: limits.Infinity, Max: limits.Infinity})
|
||||
ls.SetUnchecked(limits.Locks, limits.Limit{Cur: limits.Infinity, Max: limits.Infinity})
|
||||
ls.SetUnchecked(limits.MemoryLocked, limits.Limit{Cur: 65536, Max: 65536})
|
||||
ls.SetUnchecked(limits.MemoryPagesLocked, limits.Limit{Cur: 65536, Max: 65536})
|
||||
ls.SetUnchecked(limits.MessageQueueBytes, limits.Limit{Cur: 819200, Max: 819200})
|
||||
ls.SetUnchecked(limits.Nice, limits.Limit{Cur: 0, Max: 0})
|
||||
ls.SetUnchecked(limits.NumberOfFiles, limits.Limit{Cur: 1048576, Max: 1048576})
|
||||
|
|
|
@ -1019,21 +1019,6 @@ cc_binary(
|
|||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "mlock_test",
|
||||
testonly = 1,
|
||||
srcs = ["mlock.cc"],
|
||||
linkstatic = 1,
|
||||
deps = [
|
||||
"//test/util:capability_util",
|
||||
"//test/util:cleanup",
|
||||
"//test/util:memory_util",
|
||||
"//test/util:multiprocess_util",
|
||||
"//test/util:test_util",
|
||||
"@com_google_googletest//:gtest",
|
||||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "mmap_test",
|
||||
testonly = 1,
|
||||
|
|
|
@ -1,344 +0,0 @@
|
|||
// Copyright 2018 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "test/util/capability_util.h"
|
||||
#include "test/util/cleanup.h"
|
||||
#include "test/util/memory_util.h"
|
||||
#include "test/util/multiprocess_util.h"
|
||||
#include "test/util/test_util.h"
|
||||
|
||||
using ::testing::_;
|
||||
|
||||
namespace gvisor {
|
||||
namespace testing {
|
||||
|
||||
namespace {
|
||||
|
||||
PosixErrorOr<bool> CanMlock() {
|
||||
struct rlimit rlim;
|
||||
if (getrlimit(RLIMIT_MEMLOCK, &rlim) < 0) {
|
||||
return PosixError(errno, "getrlimit(RLIMIT_MEMLOCK)");
|
||||
}
|
||||
if (rlim.rlim_cur != 0) {
|
||||
return true;
|
||||
}
|
||||
return HaveCapability(CAP_IPC_LOCK);
|
||||
}
|
||||
|
||||
// Returns true if the page containing addr is mlocked.
|
||||
bool IsPageMlocked(uintptr_t addr) {
|
||||
// This relies on msync(MS_INVALIDATE) interacting correctly with mlocked
|
||||
// pages, which is tested for by the MsyncInvalidate case below.
|
||||
int const rv = msync(reinterpret_cast<void*>(addr & ~(kPageSize - 1)),
|
||||
kPageSize, MS_ASYNC | MS_INVALIDATE);
|
||||
if (rv == 0) {
|
||||
return false;
|
||||
}
|
||||
// This uses TEST_PCHECK_MSG since it's used in subprocesses.
|
||||
TEST_PCHECK_MSG(errno == EBUSY, "msync failed with unexpected errno");
|
||||
return true;
|
||||
}
|
||||
|
||||
PosixErrorOr<Cleanup> ScopedSetSoftRlimit(int resource, rlim_t newval) {
|
||||
struct rlimit old_rlim;
|
||||
if (getrlimit(resource, &old_rlim) != 0) {
|
||||
return PosixError(errno, "getrlimit failed");
|
||||
}
|
||||
struct rlimit new_rlim = old_rlim;
|
||||
new_rlim.rlim_cur = newval;
|
||||
if (setrlimit(resource, &new_rlim) != 0) {
|
||||
return PosixError(errno, "setrlimit failed");
|
||||
}
|
||||
return Cleanup([resource, old_rlim] {
|
||||
TEST_PCHECK(setrlimit(resource, &old_rlim) == 0);
|
||||
});
|
||||
}
|
||||
|
||||
TEST(MlockTest, Basic) {
|
||||
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
|
||||
auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
|
||||
MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
|
||||
EXPECT_FALSE(IsPageMlocked(mapping.addr()));
|
||||
ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), SyscallSucceeds());
|
||||
EXPECT_TRUE(IsPageMlocked(mapping.addr()));
|
||||
}
|
||||
|
||||
TEST(MlockTest, ProtNone) {
|
||||
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
|
||||
auto const mapping =
|
||||
ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE));
|
||||
EXPECT_FALSE(IsPageMlocked(mapping.addr()));
|
||||
ASSERT_THAT(mlock(mapping.ptr(), mapping.len()),
|
||||
SyscallFailsWithErrno(ENOMEM));
|
||||
// ENOMEM is returned because mlock can't populate the page, but it's still
|
||||
// considered locked.
|
||||
EXPECT_TRUE(IsPageMlocked(mapping.addr()));
|
||||
}
|
||||
|
||||
TEST(MlockTest, MadviseDontneed) {
|
||||
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
|
||||
auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
|
||||
MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
|
||||
ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), SyscallSucceeds());
|
||||
EXPECT_THAT(madvise(mapping.ptr(), mapping.len(), MADV_DONTNEED),
|
||||
SyscallFailsWithErrno(EINVAL));
|
||||
}
|
||||
|
||||
TEST(MlockTest, MsyncInvalidate) {
|
||||
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
|
||||
auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
|
||||
MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
|
||||
ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), SyscallSucceeds());
|
||||
EXPECT_THAT(msync(mapping.ptr(), mapping.len(), MS_ASYNC | MS_INVALIDATE),
|
||||
SyscallFailsWithErrno(EBUSY));
|
||||
EXPECT_THAT(msync(mapping.ptr(), mapping.len(), MS_SYNC | MS_INVALIDATE),
|
||||
SyscallFailsWithErrno(EBUSY));
|
||||
}
|
||||
|
||||
TEST(MlockTest, Fork) {
|
||||
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
|
||||
auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
|
||||
MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
|
||||
EXPECT_FALSE(IsPageMlocked(mapping.addr()));
|
||||
ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), SyscallSucceeds());
|
||||
EXPECT_TRUE(IsPageMlocked(mapping.addr()));
|
||||
EXPECT_THAT(
|
||||
InForkedProcess([&] { TEST_CHECK(!IsPageMlocked(mapping.addr())); }),
|
||||
IsPosixErrorOkAndHolds(0));
|
||||
}
|
||||
|
||||
TEST(MlockTest, RlimitMemlockZero) {
|
||||
if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) {
|
||||
ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false));
|
||||
}
|
||||
Cleanup reset_rlimit =
|
||||
ASSERT_NO_ERRNO_AND_VALUE(ScopedSetSoftRlimit(RLIMIT_MEMLOCK, 0));
|
||||
auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
|
||||
MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
|
||||
EXPECT_FALSE(IsPageMlocked(mapping.addr()));
|
||||
ASSERT_THAT(mlock(mapping.ptr(), mapping.len()),
|
||||
SyscallFailsWithErrno(EPERM));
|
||||
}
|
||||
|
||||
TEST(MlockTest, RlimitMemlockInsufficient) {
|
||||
if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) {
|
||||
ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false));
|
||||
}
|
||||
Cleanup reset_rlimit =
|
||||
ASSERT_NO_ERRNO_AND_VALUE(ScopedSetSoftRlimit(RLIMIT_MEMLOCK, kPageSize));
|
||||
auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
|
||||
MmapAnon(2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
|
||||
EXPECT_FALSE(IsPageMlocked(mapping.addr()));
|
||||
ASSERT_THAT(mlock(mapping.ptr(), mapping.len()),
|
||||
SyscallFailsWithErrno(ENOMEM));
|
||||
}
|
||||
|
||||
TEST(MunlockTest, Basic) {
|
||||
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
|
||||
auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
|
||||
MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
|
||||
EXPECT_FALSE(IsPageMlocked(mapping.addr()));
|
||||
ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), SyscallSucceeds());
|
||||
EXPECT_TRUE(IsPageMlocked(mapping.addr()));
|
||||
ASSERT_THAT(munlock(mapping.ptr(), mapping.len()), SyscallSucceeds());
|
||||
EXPECT_FALSE(IsPageMlocked(mapping.addr()));
|
||||
}
|
||||
|
||||
TEST(MunlockTest, NotLocked) {
|
||||
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
|
||||
auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
|
||||
MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
|
||||
EXPECT_FALSE(IsPageMlocked(mapping.addr()));
|
||||
EXPECT_THAT(munlock(mapping.ptr(), mapping.len()), SyscallSucceeds());
|
||||
EXPECT_FALSE(IsPageMlocked(mapping.addr()));
|
||||
}
|
||||
|
||||
// There is currently no test for mlockall(MCL_CURRENT) because the default
|
||||
// RLIMIT_MEMLOCK of 64 KB is insufficient to actually invoke
|
||||
// mlockall(MCL_CURRENT).
|
||||
|
||||
TEST(MlockallTest, Future) {
|
||||
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
|
||||
|
||||
// Run this test in a separate (single-threaded) subprocess to ensure that a
|
||||
// background thread doesn't try to mmap a large amount of memory, fail due
|
||||
// to hitting RLIMIT_MEMLOCK, and explode the process violently.
|
||||
EXPECT_THAT(InForkedProcess([] {
|
||||
auto const mapping =
|
||||
MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)
|
||||
.ValueOrDie();
|
||||
TEST_CHECK(!IsPageMlocked(mapping.addr()));
|
||||
TEST_PCHECK(mlockall(MCL_FUTURE) == 0);
|
||||
// Ensure that mlockall(MCL_FUTURE) is turned off before the end
|
||||
// of the test, as otherwise mmaps may fail unexpectedly.
|
||||
Cleanup do_munlockall([] { TEST_PCHECK(munlockall() == 0); });
|
||||
auto const mapping2 = ASSERT_NO_ERRNO_AND_VALUE(
|
||||
MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
|
||||
TEST_CHECK(IsPageMlocked(mapping2.addr()));
|
||||
// Fire munlockall() and check that it disables
|
||||
// mlockall(MCL_FUTURE).
|
||||
do_munlockall.Release()();
|
||||
auto const mapping3 = ASSERT_NO_ERRNO_AND_VALUE(
|
||||
MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
|
||||
TEST_CHECK(!IsPageMlocked(mapping2.addr()));
|
||||
}),
|
||||
IsPosixErrorOkAndHolds(0));
|
||||
}
|
||||
|
||||
TEST(MunlockallTest, Basic) {
|
||||
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
|
||||
auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
|
||||
MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED));
|
||||
EXPECT_TRUE(IsPageMlocked(mapping.addr()));
|
||||
ASSERT_THAT(munlockall(), SyscallSucceeds());
|
||||
EXPECT_FALSE(IsPageMlocked(mapping.addr()));
|
||||
}
|
||||
|
||||
#ifndef SYS_mlock2
|
||||
#ifdef __x86_64__
|
||||
#define SYS_mlock2 325
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef MLOCK_ONFAULT
|
||||
#define MLOCK_ONFAULT 0x01 // Linux: include/uapi/asm-generic/mman-common.h
|
||||
#endif
|
||||
|
||||
#ifdef SYS_mlock2
|
||||
|
||||
int mlock2(void const* addr, size_t len, int flags) {
|
||||
return syscall(SYS_mlock2, addr, len, flags);
|
||||
}
|
||||
|
||||
TEST(Mlock2Test, NoFlags) {
|
||||
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
|
||||
auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
|
||||
MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
|
||||
EXPECT_FALSE(IsPageMlocked(mapping.addr()));
|
||||
ASSERT_THAT(mlock2(mapping.ptr(), mapping.len(), 0), SyscallSucceeds());
|
||||
EXPECT_TRUE(IsPageMlocked(mapping.addr()));
|
||||
}
|
||||
|
||||
TEST(Mlock2Test, MlockOnfault) {
|
||||
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
|
||||
auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
|
||||
MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
|
||||
EXPECT_FALSE(IsPageMlocked(mapping.addr()));
|
||||
ASSERT_THAT(mlock2(mapping.ptr(), mapping.len(), MLOCK_ONFAULT),
|
||||
SyscallSucceeds());
|
||||
EXPECT_TRUE(IsPageMlocked(mapping.addr()));
|
||||
}
|
||||
|
||||
TEST(Mlock2Test, UnknownFlags) {
|
||||
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
|
||||
auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
|
||||
MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
|
||||
EXPECT_THAT(mlock2(mapping.ptr(), mapping.len(), ~0),
|
||||
SyscallFailsWithErrno(EINVAL));
|
||||
}
|
||||
|
||||
#endif // defined(SYS_mlock2)
|
||||
|
||||
TEST(MapLockedTest, Basic) {
|
||||
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
|
||||
auto const mapping = ASSERT_NO_ERRNO_AND_VALUE(
|
||||
MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED));
|
||||
EXPECT_TRUE(IsPageMlocked(mapping.addr()));
|
||||
EXPECT_THAT(munlock(mapping.ptr(), mapping.len()), SyscallSucceeds());
|
||||
EXPECT_FALSE(IsPageMlocked(mapping.addr()));
|
||||
}
|
||||
|
||||
TEST(MapLockedTest, RlimitMemlockZero) {
|
||||
if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) {
|
||||
ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false));
|
||||
}
|
||||
Cleanup reset_rlimit =
|
||||
ASSERT_NO_ERRNO_AND_VALUE(ScopedSetSoftRlimit(RLIMIT_MEMLOCK, 0));
|
||||
EXPECT_THAT(
|
||||
MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED),
|
||||
PosixErrorIs(EPERM, _));
|
||||
}
|
||||
|
||||
TEST(MapLockedTest, RlimitMemlockInsufficient) {
|
||||
if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) {
|
||||
ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false));
|
||||
}
|
||||
Cleanup reset_rlimit =
|
||||
ASSERT_NO_ERRNO_AND_VALUE(ScopedSetSoftRlimit(RLIMIT_MEMLOCK, kPageSize));
|
||||
EXPECT_THAT(
|
||||
MmapAnon(2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED),
|
||||
PosixErrorIs(EAGAIN, _));
|
||||
}
|
||||
|
||||
TEST(MremapLockedTest, Basic) {
|
||||
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
|
||||
auto mapping = ASSERT_NO_ERRNO_AND_VALUE(
|
||||
MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED));
|
||||
EXPECT_TRUE(IsPageMlocked(mapping.addr()));
|
||||
|
||||
void* addr = mremap(mapping.ptr(), mapping.len(), 2 * mapping.len(),
|
||||
MREMAP_MAYMOVE, nullptr);
|
||||
if (addr == MAP_FAILED) {
|
||||
FAIL() << "mremap failed: " << errno << " (" << strerror(errno) << ")";
|
||||
}
|
||||
mapping.release();
|
||||
mapping.reset(addr, 2 * mapping.len());
|
||||
EXPECT_TRUE(IsPageMlocked(reinterpret_cast<uintptr_t>(addr)));
|
||||
}
|
||||
|
||||
TEST(MremapLockedTest, RlimitMemlockZero) {
|
||||
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
|
||||
auto mapping = ASSERT_NO_ERRNO_AND_VALUE(
|
||||
MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED));
|
||||
EXPECT_TRUE(IsPageMlocked(mapping.addr()));
|
||||
|
||||
if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) {
|
||||
ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false));
|
||||
}
|
||||
Cleanup reset_rlimit =
|
||||
ASSERT_NO_ERRNO_AND_VALUE(ScopedSetSoftRlimit(RLIMIT_MEMLOCK, 0));
|
||||
void* addr = mremap(mapping.ptr(), mapping.len(), 2 * mapping.len(),
|
||||
MREMAP_MAYMOVE, nullptr);
|
||||
EXPECT_TRUE(addr == MAP_FAILED && errno == EAGAIN)
|
||||
<< "addr = " << addr << ", errno = " << errno;
|
||||
}
|
||||
|
||||
TEST(MremapLockedTest, RlimitMemlockInsufficient) {
|
||||
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock()));
|
||||
auto mapping = ASSERT_NO_ERRNO_AND_VALUE(
|
||||
MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED));
|
||||
EXPECT_TRUE(IsPageMlocked(mapping.addr()));
|
||||
|
||||
if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) {
|
||||
ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false));
|
||||
}
|
||||
Cleanup reset_rlimit = ASSERT_NO_ERRNO_AND_VALUE(
|
||||
ScopedSetSoftRlimit(RLIMIT_MEMLOCK, mapping.len()));
|
||||
void* addr = mremap(mapping.ptr(), mapping.len(), 2 * mapping.len(),
|
||||
MREMAP_MAYMOVE, nullptr);
|
||||
EXPECT_TRUE(addr == MAP_FAILED && errno == EAGAIN)
|
||||
<< "addr = " << addr << ", errno = " << errno;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace testing
|
||||
} // namespace gvisor
|
|
@ -43,13 +43,14 @@ class MsyncParameterizedTest : public ::testing::TestWithParam<MsyncTestParam> {
|
|||
protected:
|
||||
int msync_flags() const { return std::get<0>(GetParam()); }
|
||||
|
||||
PosixErrorOr<Mapping> GetMapping() const { return std::get<1>(GetParam())(); }
|
||||
PosixErrorOr<Mapping> GetMapping() const {
|
||||
auto rv = std::get<1>(GetParam())();
|
||||
return rv;
|
||||
}
|
||||
};
|
||||
|
||||
// All valid msync(2) flag combinations, not including MS_INVALIDATE. ("Linux
|
||||
// permits a call to msync() that specifies neither [MS_SYNC or MS_ASYNC], with
|
||||
// semantics that are (currently) equivalent to specifying MS_ASYNC." -
|
||||
// msync(2))
|
||||
// All valid msync(2) flag combinations (not including MS_INVALIDATE, which
|
||||
// gVisor doesn't implement).
|
||||
constexpr std::initializer_list<int> kMsyncFlags = {MS_SYNC, MS_ASYNC, 0};
|
||||
|
||||
// Returns functions that return mappings that should be successfully
|
||||
|
@ -133,15 +134,6 @@ TEST_P(MsyncFullParamTest, UnalignedAddressFails) {
|
|||
SyscallFailsWithErrno(EINVAL));
|
||||
}
|
||||
|
||||
TEST_P(MsyncFullParamTest, InvalidateUnlockedSucceeds) {
|
||||
auto m = ASSERT_NO_ERRNO_AND_VALUE(GetMapping());
|
||||
EXPECT_THAT(msync(m.ptr(), m.len(), msync_flags() | MS_INVALIDATE),
|
||||
SyscallSucceeds());
|
||||
}
|
||||
|
||||
// The test for MS_INVALIDATE on mlocked pages is in mlock.cc since it requires
|
||||
// probing for mlock support.
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
All, MsyncFullParamTest,
|
||||
::testing::Combine(::testing::ValuesIn(kMsyncFlags),
|
||||
|
|
Loading…
Reference in New Issue