1170 lines
34 KiB
Go
1170 lines
34 KiB
Go
// Copyright 2018 Google LLC
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package mm
|
|
|
|
import (
|
|
"fmt"
|
|
mrand "math/rand"
|
|
|
|
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
|
|
"gvisor.googlesource.com/gvisor/pkg/sentry/context"
|
|
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
|
|
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex"
|
|
"gvisor.googlesource.com/gvisor/pkg/sentry/limits"
|
|
"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
|
|
"gvisor.googlesource.com/gvisor/pkg/sentry/platform"
|
|
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
|
|
"gvisor.googlesource.com/gvisor/pkg/syserror"
|
|
)
|
|
|
|
// HandleUserFault handles an application page fault. sp is the faulting
|
|
// application thread's stack pointer.
|
|
//
|
|
// Preconditions: mm.as != nil.
|
|
func (mm *MemoryManager) HandleUserFault(ctx context.Context, addr usermem.Addr, at usermem.AccessType, sp usermem.Addr) error {
|
|
ar, ok := addr.RoundDown().ToRange(usermem.PageSize)
|
|
if !ok {
|
|
return syserror.EFAULT
|
|
}
|
|
|
|
// Don't bother trying existingPMAsLocked; in most cases, if we did have
|
|
// existing pmas, we wouldn't have faulted.
|
|
|
|
// Ensure that we have a usable vma. Here and below, since we are only
|
|
// asking for a single page, there is no possibility of partial success,
|
|
// and any error is immediately fatal.
|
|
mm.mappingMu.RLock()
|
|
vseg, _, err := mm.getVMAsLocked(ctx, ar, at, false)
|
|
if err != nil {
|
|
mm.mappingMu.RUnlock()
|
|
return err
|
|
}
|
|
|
|
// Ensure that we have a usable pma.
|
|
mm.activeMu.Lock()
|
|
pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, pmaOpts{
|
|
breakCOW: at.Write,
|
|
})
|
|
mm.mappingMu.RUnlock()
|
|
if err != nil {
|
|
mm.activeMu.Unlock()
|
|
return err
|
|
}
|
|
|
|
// Downgrade to a read-lock on activeMu since we don't need to mutate pmas
|
|
// anymore.
|
|
mm.activeMu.DowngradeLock()
|
|
|
|
// Map the faulted page into the active AddressSpace.
|
|
err = mm.mapASLocked(pseg, ar, false)
|
|
mm.activeMu.RUnlock()
|
|
return err
|
|
}
|
|
|
|
// MMap establishes a memory mapping.
|
|
func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (usermem.Addr, error) {
|
|
if opts.Length == 0 {
|
|
return 0, syserror.EINVAL
|
|
}
|
|
length, ok := usermem.Addr(opts.Length).RoundUp()
|
|
if !ok {
|
|
return 0, syserror.ENOMEM
|
|
}
|
|
opts.Length = uint64(length)
|
|
|
|
if opts.Mappable != nil {
|
|
// Offset must be aligned.
|
|
if usermem.Addr(opts.Offset).RoundDown() != usermem.Addr(opts.Offset) {
|
|
return 0, syserror.EINVAL
|
|
}
|
|
// Offset + length must not overflow.
|
|
if end := opts.Offset + opts.Length; end < opts.Offset {
|
|
return 0, syserror.ENOMEM
|
|
}
|
|
} else {
|
|
opts.Offset = 0
|
|
if !opts.Private {
|
|
if opts.MappingIdentity != nil {
|
|
return 0, syserror.EINVAL
|
|
}
|
|
m, err := NewSharedAnonMappable(opts.Length, platform.FromContext(ctx))
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
opts.MappingIdentity = m
|
|
opts.Mappable = m
|
|
}
|
|
}
|
|
|
|
if opts.Addr.RoundDown() != opts.Addr {
|
|
// MAP_FIXED requires addr to be page-aligned; non-fixed mappings
|
|
// don't.
|
|
if opts.Fixed {
|
|
return 0, syserror.EINVAL
|
|
}
|
|
opts.Addr = opts.Addr.RoundDown()
|
|
}
|
|
|
|
if !opts.MaxPerms.SupersetOf(opts.Perms) {
|
|
return 0, syserror.EACCES
|
|
}
|
|
if opts.Unmap && !opts.Fixed {
|
|
return 0, syserror.EINVAL
|
|
}
|
|
if opts.GrowsDown && opts.Mappable != nil {
|
|
return 0, syserror.EINVAL
|
|
}
|
|
|
|
// Get the new vma.
|
|
mm.mappingMu.Lock()
|
|
if opts.MLockMode < mm.defMLockMode {
|
|
opts.MLockMode = mm.defMLockMode
|
|
}
|
|
vseg, ar, err := mm.createVMALocked(ctx, opts)
|
|
if err != nil {
|
|
mm.mappingMu.Unlock()
|
|
return 0, err
|
|
}
|
|
|
|
// TODO: In Linux, VM_LOCKONFAULT (which may be set on the new
|
|
// vma by mlockall(MCL_FUTURE|MCL_ONFAULT) => mm_struct::def_flags) appears
|
|
// to effectively disable MAP_POPULATE by unsetting FOLL_POPULATE in
|
|
// mm/util.c:vm_mmap_pgoff() => mm/gup.c:__mm_populate() =>
|
|
// populate_vma_page_range(). Confirm this behavior.
|
|
switch {
|
|
case opts.Precommit || opts.MLockMode == memmap.MLockEager:
|
|
// Get pmas and map with precommit as requested.
|
|
mm.populateVMAAndUnlock(ctx, vseg, ar, true)
|
|
|
|
case opts.Mappable == nil && length <= privateAllocUnit:
|
|
// NOTE: Get pmas and map eagerly in the hope
|
|
// that doing so will save on future page faults. We only do this for
|
|
// anonymous mappings, since otherwise the cost of
|
|
// memmap.Mappable.Translate is unknown; and only for small mappings,
|
|
// to avoid needing to allocate large amounts of memory that we may
|
|
// subsequently need to checkpoint.
|
|
mm.populateVMAAndUnlock(ctx, vseg, ar, false)
|
|
|
|
default:
|
|
mm.mappingMu.Unlock()
|
|
}
|
|
|
|
return ar.Start, nil
|
|
}
|
|
|
|
// populateVMA obtains pmas for addresses in ar in the given vma, and maps them
|
|
// into mm.as if it is active.
|
|
//
|
|
// Preconditions: mm.mappingMu must be locked. vseg.Range().IsSupersetOf(ar).
|
|
func (mm *MemoryManager) populateVMA(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, precommit bool) {
|
|
if !vseg.ValuePtr().effectivePerms.Any() {
|
|
// Linux doesn't populate inaccessible pages. See
|
|
// mm/gup.c:populate_vma_page_range.
|
|
return
|
|
}
|
|
|
|
mm.activeMu.Lock()
|
|
// Can't defer mm.activeMu.Unlock(); see below.
|
|
|
|
// Even if we get new pmas, we can't actually map them if we don't have an
|
|
// AddressSpace.
|
|
if mm.as == nil {
|
|
mm.activeMu.Unlock()
|
|
return
|
|
}
|
|
|
|
// Ensure that we have usable pmas.
|
|
pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, pmaOpts{})
|
|
if err != nil {
|
|
// mm/util.c:vm_mmap_pgoff() ignores the error, if any, from
|
|
// mm/gup.c:mm_populate(). If it matters, we'll get it again when
|
|
// userspace actually tries to use the failing page.
|
|
mm.activeMu.Unlock()
|
|
return
|
|
}
|
|
|
|
// Downgrade to a read-lock on activeMu since we don't need to mutate pmas
|
|
// anymore.
|
|
mm.activeMu.DowngradeLock()
|
|
|
|
// As above, errors are silently ignored.
|
|
mm.mapASLocked(pseg, ar, precommit)
|
|
mm.activeMu.RUnlock()
|
|
}
|
|
|
|
// populateVMAAndUnlock is equivalent to populateVMA, but also unconditionally
|
|
// unlocks mm.mappingMu. In cases where populateVMAAndUnlock is usable, it is
|
|
// preferable to populateVMA since it unlocks mm.mappingMu before performing
|
|
// expensive operations that don't require it to be locked.
|
|
//
|
|
// Preconditions: mm.mappingMu must be locked for writing.
|
|
// vseg.Range().IsSupersetOf(ar).
|
|
//
|
|
// Postconditions: mm.mappingMu will be unlocked.
|
|
func (mm *MemoryManager) populateVMAAndUnlock(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, precommit bool) {
|
|
// See populateVMA above for commentary.
|
|
if !vseg.ValuePtr().effectivePerms.Any() {
|
|
mm.mappingMu.Unlock()
|
|
return
|
|
}
|
|
|
|
mm.activeMu.Lock()
|
|
|
|
if mm.as == nil {
|
|
mm.activeMu.Unlock()
|
|
mm.mappingMu.Unlock()
|
|
return
|
|
}
|
|
|
|
// mm.mappingMu doesn't need to be write-locked for getPMAsLocked, and it
|
|
// isn't needed at all for mapASLocked.
|
|
mm.mappingMu.DowngradeLock()
|
|
pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, pmaOpts{})
|
|
mm.mappingMu.RUnlock()
|
|
if err != nil {
|
|
mm.activeMu.Unlock()
|
|
return
|
|
}
|
|
|
|
mm.activeMu.DowngradeLock()
|
|
mm.mapASLocked(pseg, ar, precommit)
|
|
mm.activeMu.RUnlock()
|
|
}
|
|
|
|
// MapStack allocates the initial process stack.
|
|
func (mm *MemoryManager) MapStack(ctx context.Context) (usermem.AddrRange, error) {
|
|
// maxStackSize is the maximum supported process stack size in bytes.
|
|
//
|
|
// This limit exists because stack growing isn't implemented, so the entire
|
|
// process stack must be mapped up-front.
|
|
const maxStackSize = 128 << 20
|
|
|
|
stackSize := limits.FromContext(ctx).Get(limits.Stack)
|
|
r, ok := usermem.Addr(stackSize.Cur).RoundUp()
|
|
sz := uint64(r)
|
|
if !ok {
|
|
// RLIM_INFINITY rounds up to 0.
|
|
sz = linux.DefaultStackSoftLimit
|
|
} else if sz > maxStackSize {
|
|
ctx.Warningf("Capping stack size from RLIMIT_STACK of %v down to %v.", sz, maxStackSize)
|
|
sz = maxStackSize
|
|
} else if sz == 0 {
|
|
return usermem.AddrRange{}, syserror.ENOMEM
|
|
}
|
|
szaddr := usermem.Addr(sz)
|
|
ctx.Debugf("Allocating stack with size of %v bytes", sz)
|
|
|
|
// Determine the stack's desired location. Unlike Linux, address
|
|
// randomization can't be disabled.
|
|
stackEnd := mm.layout.MaxAddr - usermem.Addr(mrand.Int63n(int64(mm.layout.MaxStackRand))).RoundDown()
|
|
if stackEnd < szaddr {
|
|
return usermem.AddrRange{}, syserror.ENOMEM
|
|
}
|
|
stackStart := stackEnd - szaddr
|
|
mm.mappingMu.Lock()
|
|
defer mm.mappingMu.Unlock()
|
|
_, ar, err := mm.createVMALocked(ctx, memmap.MMapOpts{
|
|
Length: sz,
|
|
Addr: stackStart,
|
|
Perms: usermem.ReadWrite,
|
|
MaxPerms: usermem.AnyAccess,
|
|
Private: true,
|
|
GrowsDown: true,
|
|
MLockMode: mm.defMLockMode,
|
|
Hint: "[stack]",
|
|
})
|
|
return ar, err
|
|
}
|
|
|
|
// MUnmap implements the semantics of Linux's munmap(2).
|
|
func (mm *MemoryManager) MUnmap(ctx context.Context, addr usermem.Addr, length uint64) error {
|
|
if addr != addr.RoundDown() {
|
|
return syserror.EINVAL
|
|
}
|
|
if length == 0 {
|
|
return syserror.EINVAL
|
|
}
|
|
la, ok := usermem.Addr(length).RoundUp()
|
|
if !ok {
|
|
return syserror.EINVAL
|
|
}
|
|
ar, ok := addr.ToRange(uint64(la))
|
|
if !ok {
|
|
return syserror.EINVAL
|
|
}
|
|
|
|
mm.mappingMu.Lock()
|
|
defer mm.mappingMu.Unlock()
|
|
mm.unmapLocked(ctx, ar)
|
|
return nil
|
|
}
|
|
|
|
// MRemapOpts specifies options to MRemap.
|
|
type MRemapOpts struct {
|
|
// Move controls whether MRemap moves the remapped mapping to a new address.
|
|
Move MRemapMoveMode
|
|
|
|
// NewAddr is the new address for the remapping. NewAddr is ignored unless
|
|
// Move is MMRemapMustMove.
|
|
NewAddr usermem.Addr
|
|
}
|
|
|
|
// MRemapMoveMode controls MRemap's moving behavior.
|
|
type MRemapMoveMode int
|
|
|
|
const (
|
|
// MRemapNoMove prevents MRemap from moving the remapped mapping.
|
|
MRemapNoMove MRemapMoveMode = iota
|
|
|
|
// MRemapMayMove allows MRemap to move the remapped mapping.
|
|
MRemapMayMove
|
|
|
|
// MRemapMustMove requires MRemap to move the remapped mapping to
|
|
// MRemapOpts.NewAddr, replacing any existing mappings in the remapped
|
|
// range.
|
|
MRemapMustMove
|
|
)
|
|
|
|
// MRemap implements the semantics of Linux's mremap(2).
|
|
func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSize uint64, newSize uint64, opts MRemapOpts) (usermem.Addr, error) {
|
|
// "Note that old_address has to be page aligned." - mremap(2)
|
|
if oldAddr.RoundDown() != oldAddr {
|
|
return 0, syserror.EINVAL
|
|
}
|
|
|
|
// Linux treats an old_size that rounds up to 0 as 0, which is otherwise a
|
|
// valid size. However, new_size can't be 0 after rounding.
|
|
oldSizeAddr, _ := usermem.Addr(oldSize).RoundUp()
|
|
oldSize = uint64(oldSizeAddr)
|
|
newSizeAddr, ok := usermem.Addr(newSize).RoundUp()
|
|
if !ok || newSizeAddr == 0 {
|
|
return 0, syserror.EINVAL
|
|
}
|
|
newSize = uint64(newSizeAddr)
|
|
|
|
oldEnd, ok := oldAddr.AddLength(oldSize)
|
|
if !ok {
|
|
return 0, syserror.EINVAL
|
|
}
|
|
|
|
mm.mappingMu.Lock()
|
|
defer mm.mappingMu.Unlock()
|
|
|
|
// All cases require that a vma exists at oldAddr.
|
|
vseg := mm.vmas.FindSegment(oldAddr)
|
|
if !vseg.Ok() {
|
|
return 0, syserror.EFAULT
|
|
}
|
|
|
|
// Behavior matrix:
|
|
//
|
|
// Move | oldSize = 0 | oldSize < newSize | oldSize = newSize | oldSize > newSize
|
|
// ---------+-------------+-------------------+-------------------+------------------
|
|
// NoMove | ENOMEM [1] | Grow in-place | No-op | Shrink in-place
|
|
// MayMove | Copy [1] | Grow in-place or | No-op | Shrink in-place
|
|
// | | move | |
|
|
// MustMove | Copy | Move and grow | Move | Shrink and move
|
|
//
|
|
// [1] In-place growth is impossible because the vma at oldAddr already
|
|
// occupies at least part of the destination. Thus the NoMove case always
|
|
// fails and the MayMove case always falls back to copying.
|
|
|
|
if vma := vseg.ValuePtr(); newSize > oldSize && vma.mlockMode != memmap.MLockNone {
|
|
// Check against RLIMIT_MEMLOCK. Unlike mmap, mlock, and mlockall,
|
|
// mremap in Linux does not check mm/mlock.c:can_do_mlock() and
|
|
// therefore does not return EPERM if RLIMIT_MEMLOCK is 0 and
|
|
// !CAP_IPC_LOCK.
|
|
mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur
|
|
if creds := auth.CredentialsFromContext(ctx); !creds.HasCapabilityIn(linux.CAP_IPC_LOCK, creds.UserNamespace.Root()) {
|
|
if newLockedAS := mm.lockedAS - oldSize + newSize; newLockedAS > mlockLimit {
|
|
return 0, syserror.EAGAIN
|
|
}
|
|
}
|
|
}
|
|
|
|
if opts.Move != MRemapMustMove {
|
|
// Handle no-ops and in-place shrinking. These cases don't care if
|
|
// [oldAddr, oldEnd) maps to a single vma, or is even mapped at all
|
|
// (aside from oldAddr).
|
|
if newSize <= oldSize {
|
|
if newSize < oldSize {
|
|
// If oldAddr+oldSize didn't overflow, oldAddr+newSize can't
|
|
// either.
|
|
newEnd := oldAddr + usermem.Addr(newSize)
|
|
mm.unmapLocked(ctx, usermem.AddrRange{newEnd, oldEnd})
|
|
}
|
|
return oldAddr, nil
|
|
}
|
|
|
|
// Handle in-place growing.
|
|
|
|
// Check that oldEnd maps to the same vma as oldAddr.
|
|
if vseg.End() < oldEnd {
|
|
return 0, syserror.EFAULT
|
|
}
|
|
// "Grow" the existing vma by creating a new mergeable one.
|
|
vma := vseg.ValuePtr()
|
|
var newOffset uint64
|
|
if vma.mappable != nil {
|
|
newOffset = vseg.mappableRange().End
|
|
}
|
|
vseg, ar, err := mm.createVMALocked(ctx, memmap.MMapOpts{
|
|
Length: newSize - oldSize,
|
|
MappingIdentity: vma.id,
|
|
Mappable: vma.mappable,
|
|
Offset: newOffset,
|
|
Addr: oldEnd,
|
|
Fixed: true,
|
|
Perms: vma.realPerms,
|
|
MaxPerms: vma.maxPerms,
|
|
Private: vma.private,
|
|
GrowsDown: vma.growsDown,
|
|
MLockMode: vma.mlockMode,
|
|
Hint: vma.hint,
|
|
})
|
|
if err == nil {
|
|
if vma.mlockMode == memmap.MLockEager {
|
|
mm.populateVMA(ctx, vseg, ar, true)
|
|
}
|
|
return oldAddr, nil
|
|
}
|
|
// In-place growth failed. In the MRemapMayMove case, fall through to
|
|
// copying/moving below.
|
|
if opts.Move == MRemapNoMove {
|
|
return 0, err
|
|
}
|
|
}
|
|
|
|
// Find a location for the new mapping.
|
|
var newAR usermem.AddrRange
|
|
switch opts.Move {
|
|
case MRemapMayMove:
|
|
newAddr, err := mm.findAvailableLocked(newSize, findAvailableOpts{})
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
newAR, _ = newAddr.ToRange(newSize)
|
|
|
|
case MRemapMustMove:
|
|
newAddr := opts.NewAddr
|
|
if newAddr.RoundDown() != newAddr {
|
|
return 0, syserror.EINVAL
|
|
}
|
|
var ok bool
|
|
newAR, ok = newAddr.ToRange(newSize)
|
|
if !ok {
|
|
return 0, syserror.EINVAL
|
|
}
|
|
if (usermem.AddrRange{oldAddr, oldEnd}).Overlaps(newAR) {
|
|
return 0, syserror.EINVAL
|
|
}
|
|
|
|
// Unmap any mappings at the destination.
|
|
mm.unmapLocked(ctx, newAR)
|
|
|
|
// If the sizes specify shrinking, unmap everything between the new and
|
|
// old sizes at the source. Unmapping before the following checks is
|
|
// correct: compare Linux's mm/mremap.c:mremap_to() => do_munmap(),
|
|
// vma_to_resize().
|
|
if newSize < oldSize {
|
|
oldNewEnd := oldAddr + usermem.Addr(newSize)
|
|
mm.unmapLocked(ctx, usermem.AddrRange{oldNewEnd, oldEnd})
|
|
oldEnd = oldNewEnd
|
|
}
|
|
|
|
// unmapLocked may have invalidated vseg; look it up again.
|
|
vseg = mm.vmas.FindSegment(oldAddr)
|
|
}
|
|
|
|
oldAR := usermem.AddrRange{oldAddr, oldEnd}
|
|
|
|
// Check that oldEnd maps to the same vma as oldAddr.
|
|
if vseg.End() < oldEnd {
|
|
return 0, syserror.EFAULT
|
|
}
|
|
|
|
// Check against RLIMIT_AS.
|
|
newUsageAS := mm.usageAS - uint64(oldAR.Length()) + uint64(newAR.Length())
|
|
if limitAS := limits.FromContext(ctx).Get(limits.AS).Cur; newUsageAS > limitAS {
|
|
return 0, syserror.ENOMEM
|
|
}
|
|
|
|
if vma := vseg.ValuePtr(); vma.mappable != nil {
|
|
// Check that offset+length does not overflow.
|
|
if vma.off+uint64(newAR.Length()) < vma.off {
|
|
return 0, syserror.EINVAL
|
|
}
|
|
// Inform the Mappable, if any, of the new mapping.
|
|
if err := vma.mappable.CopyMapping(ctx, mm, oldAR, newAR, vseg.mappableOffsetAt(oldAR.Start), vma.isMappableAsWritable()); err != nil {
|
|
return 0, err
|
|
}
|
|
}
|
|
|
|
if oldSize == 0 {
|
|
// Handle copying.
|
|
//
|
|
// We can't use createVMALocked because it calls Mappable.AddMapping,
|
|
// whereas we've already called Mappable.CopyMapping (which is
|
|
// consistent with Linux). Call vseg.Value() (rather than
|
|
// vseg.ValuePtr()) to make a copy of the vma.
|
|
vma := vseg.Value()
|
|
if vma.mappable != nil {
|
|
vma.off = vseg.mappableOffsetAt(oldAR.Start)
|
|
}
|
|
if vma.id != nil {
|
|
vma.id.IncRef()
|
|
}
|
|
vseg := mm.vmas.Insert(mm.vmas.FindGap(newAR.Start), newAR, vma)
|
|
mm.usageAS += uint64(newAR.Length())
|
|
if vma.mlockMode != memmap.MLockNone {
|
|
mm.lockedAS += uint64(newAR.Length())
|
|
if vma.mlockMode == memmap.MLockEager {
|
|
mm.populateVMA(ctx, vseg, newAR, true)
|
|
}
|
|
}
|
|
return newAR.Start, nil
|
|
}
|
|
|
|
// Handle moving.
|
|
//
|
|
// Remove the existing vma before inserting the new one to minimize
|
|
// iterator invalidation. We do this directly (instead of calling
|
|
// removeVMAsLocked) because:
|
|
//
|
|
// 1. We can't drop the reference on vma.id, which will be transferred to
|
|
// the new vma.
|
|
//
|
|
// 2. We can't call vma.mappable.RemoveMapping, because pmas are still at
|
|
// oldAR, so calling RemoveMapping could cause us to miss an invalidation
|
|
// overlapping oldAR.
|
|
//
|
|
// Call vseg.Value() (rather than vseg.ValuePtr()) to make a copy of the
|
|
// vma.
|
|
vseg = mm.vmas.Isolate(vseg, oldAR)
|
|
vma := vseg.Value()
|
|
mm.vmas.Remove(vseg)
|
|
vseg = mm.vmas.Insert(mm.vmas.FindGap(newAR.Start), newAR, vma)
|
|
mm.usageAS = mm.usageAS - uint64(oldAR.Length()) + uint64(newAR.Length())
|
|
if vma.mlockMode != memmap.MLockNone {
|
|
mm.lockedAS = mm.lockedAS - uint64(oldAR.Length()) + uint64(newAR.Length())
|
|
}
|
|
|
|
// Move pmas. This is technically optional for non-private pmas, which
|
|
// could just go through memmap.Mappable.Translate again, but it's required
|
|
// for private pmas.
|
|
mm.activeMu.Lock()
|
|
mm.movePMAsLocked(oldAR, newAR)
|
|
mm.activeMu.Unlock()
|
|
|
|
// Now that pmas have been moved to newAR, we can notify vma.mappable that
|
|
// oldAR is no longer mapped.
|
|
if vma.mappable != nil {
|
|
vma.mappable.RemoveMapping(ctx, mm, oldAR, vma.off, vma.isMappableAsWritable())
|
|
}
|
|
|
|
if vma.mlockMode == memmap.MLockEager {
|
|
mm.populateVMA(ctx, vseg, newAR, true)
|
|
}
|
|
|
|
return newAR.Start, nil
|
|
}
|
|
|
|
// MProtect implements the semantics of Linux's mprotect(2).
|
|
func (mm *MemoryManager) MProtect(addr usermem.Addr, length uint64, realPerms usermem.AccessType, growsDown bool) error {
|
|
if addr.RoundDown() != addr {
|
|
return syserror.EINVAL
|
|
}
|
|
if length == 0 {
|
|
return nil
|
|
}
|
|
rlength, ok := usermem.Addr(length).RoundUp()
|
|
if !ok {
|
|
return syserror.ENOMEM
|
|
}
|
|
ar, ok := addr.ToRange(uint64(rlength))
|
|
if !ok {
|
|
return syserror.ENOMEM
|
|
}
|
|
effectivePerms := realPerms.Effective()
|
|
|
|
mm.mappingMu.Lock()
|
|
defer mm.mappingMu.Unlock()
|
|
// Non-growsDown mprotect requires that all of ar is mapped, and stops at
|
|
// the first non-empty gap. growsDown mprotect requires that the first vma
|
|
// be growsDown, but does not require it to extend all the way to ar.Start;
|
|
// vmas after the first must be contiguous but need not be growsDown, like
|
|
// the non-growsDown case.
|
|
vseg := mm.vmas.LowerBoundSegment(ar.Start)
|
|
if !vseg.Ok() {
|
|
return syserror.ENOMEM
|
|
}
|
|
if growsDown {
|
|
if !vseg.ValuePtr().growsDown {
|
|
return syserror.EINVAL
|
|
}
|
|
if ar.End <= vseg.Start() {
|
|
return syserror.ENOMEM
|
|
}
|
|
ar.Start = vseg.Start()
|
|
} else {
|
|
if ar.Start < vseg.Start() {
|
|
return syserror.ENOMEM
|
|
}
|
|
}
|
|
|
|
mm.activeMu.Lock()
|
|
defer mm.activeMu.Unlock()
|
|
defer func() {
|
|
mm.vmas.MergeRange(ar)
|
|
mm.vmas.MergeAdjacent(ar)
|
|
mm.pmas.MergeRange(ar)
|
|
mm.pmas.MergeAdjacent(ar)
|
|
}()
|
|
pseg := mm.pmas.LowerBoundSegment(ar.Start)
|
|
var didUnmapAS bool
|
|
for {
|
|
// Check for permission validity before splitting vmas, for consistency
|
|
// with Linux.
|
|
if !vseg.ValuePtr().maxPerms.SupersetOf(effectivePerms) {
|
|
return syserror.EACCES
|
|
}
|
|
vseg = mm.vmas.Isolate(vseg, ar)
|
|
|
|
// Update vma permissions.
|
|
vma := vseg.ValuePtr()
|
|
vma.realPerms = realPerms
|
|
vma.effectivePerms = effectivePerms
|
|
|
|
// Propagate vma permission changes to pmas.
|
|
for pseg.Ok() && pseg.Start() < vseg.End() {
|
|
if pseg.Range().Overlaps(vseg.Range()) {
|
|
pseg = mm.pmas.Isolate(pseg, vseg.Range())
|
|
if !effectivePerms.SupersetOf(pseg.ValuePtr().vmaEffectivePerms) && !didUnmapAS {
|
|
// Unmap all of ar, not just vseg.Range(), to minimize host
|
|
// syscalls.
|
|
mm.unmapASLocked(ar)
|
|
didUnmapAS = true
|
|
}
|
|
pseg.ValuePtr().vmaEffectivePerms = effectivePerms
|
|
}
|
|
pseg = pseg.NextSegment()
|
|
}
|
|
|
|
// Continue to the next vma.
|
|
if ar.End <= vseg.End() {
|
|
return nil
|
|
}
|
|
vseg, _ = vseg.NextNonEmpty()
|
|
if !vseg.Ok() {
|
|
return syserror.ENOMEM
|
|
}
|
|
}
|
|
}
|
|
|
|
// BrkSetup sets mm's brk address to addr and its brk size to 0.
|
|
func (mm *MemoryManager) BrkSetup(ctx context.Context, addr usermem.Addr) {
|
|
mm.mappingMu.Lock()
|
|
defer mm.mappingMu.Unlock()
|
|
// Unmap the existing brk.
|
|
if mm.brk.Length() != 0 {
|
|
mm.unmapLocked(ctx, mm.brk)
|
|
}
|
|
mm.brk = usermem.AddrRange{addr, addr}
|
|
}
|
|
|
|
// Brk implements the semantics of Linux's brk(2), except that it returns an
|
|
// error on failure.
|
|
func (mm *MemoryManager) Brk(ctx context.Context, addr usermem.Addr) (usermem.Addr, error) {
|
|
mm.mappingMu.Lock()
|
|
// Can't defer mm.mappingMu.Unlock(); see below.
|
|
|
|
if addr < mm.brk.Start {
|
|
mm.mappingMu.Unlock()
|
|
return mm.brk.End, syserror.EINVAL
|
|
}
|
|
|
|
// TODO: This enforces RLIMIT_DATA, but is slightly more
|
|
// permissive than the usual data limit. In particular, this only
|
|
// limits the size of the heap; a true RLIMIT_DATA limits the size of
|
|
// heap + data + bss. The segment sizes need to be plumbed from the
|
|
// loader package to fully enforce RLIMIT_DATA.
|
|
if uint64(addr-mm.brk.Start) > limits.FromContext(ctx).Get(limits.Data).Cur {
|
|
mm.mappingMu.Unlock()
|
|
return mm.brk.End, syserror.ENOMEM
|
|
}
|
|
|
|
oldbrkpg, _ := mm.brk.End.RoundUp()
|
|
newbrkpg, ok := addr.RoundUp()
|
|
if !ok {
|
|
mm.mappingMu.Unlock()
|
|
return mm.brk.End, syserror.EFAULT
|
|
}
|
|
|
|
switch {
|
|
case newbrkpg < oldbrkpg:
|
|
mm.unmapLocked(ctx, usermem.AddrRange{newbrkpg, oldbrkpg})
|
|
mm.mappingMu.Unlock()
|
|
|
|
case oldbrkpg < newbrkpg:
|
|
vseg, ar, err := mm.createVMALocked(ctx, memmap.MMapOpts{
|
|
Length: uint64(newbrkpg - oldbrkpg),
|
|
Addr: oldbrkpg,
|
|
Fixed: true,
|
|
// Compare Linux's
|
|
// arch/x86/include/asm/page_types.h:VM_DATA_DEFAULT_FLAGS.
|
|
Perms: usermem.ReadWrite,
|
|
MaxPerms: usermem.AnyAccess,
|
|
Private: true,
|
|
// Linux: mm/mmap.c:sys_brk() => do_brk_flags() includes
|
|
// mm->def_flags.
|
|
MLockMode: mm.defMLockMode,
|
|
Hint: "[heap]",
|
|
})
|
|
if err != nil {
|
|
mm.mappingMu.Unlock()
|
|
return mm.brk.End, err
|
|
}
|
|
if mm.defMLockMode == memmap.MLockEager {
|
|
mm.populateVMAAndUnlock(ctx, vseg, ar, true)
|
|
} else {
|
|
mm.mappingMu.Unlock()
|
|
}
|
|
|
|
default:
|
|
// Nothing to do.
|
|
mm.mappingMu.Unlock()
|
|
}
|
|
|
|
mm.brk.End = addr
|
|
return addr, nil
|
|
}
|
|
|
|
// MLock implements the semantics of Linux's mlock()/mlock2()/munlock(),
|
|
// depending on mode.
|
|
func (mm *MemoryManager) MLock(ctx context.Context, addr usermem.Addr, length uint64, mode memmap.MLockMode) error {
|
|
// Linux allows this to overflow.
|
|
la, _ := usermem.Addr(length + addr.PageOffset()).RoundUp()
|
|
ar, ok := addr.RoundDown().ToRange(uint64(la))
|
|
if !ok {
|
|
return syserror.EINVAL
|
|
}
|
|
|
|
mm.mappingMu.Lock()
|
|
// Can't defer mm.mappingMu.Unlock(); see below.
|
|
|
|
if mode != memmap.MLockNone {
|
|
// Check against RLIMIT_MEMLOCK.
|
|
if creds := auth.CredentialsFromContext(ctx); !creds.HasCapabilityIn(linux.CAP_IPC_LOCK, creds.UserNamespace.Root()) {
|
|
mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur
|
|
if mlockLimit == 0 {
|
|
mm.mappingMu.Unlock()
|
|
return syserror.EPERM
|
|
}
|
|
if newLockedAS := mm.lockedAS + uint64(ar.Length()) - mm.mlockedBytesRangeLocked(ar); newLockedAS > mlockLimit {
|
|
mm.mappingMu.Unlock()
|
|
return syserror.ENOMEM
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check this after RLIMIT_MEMLOCK for consistency with Linux.
|
|
if ar.Length() == 0 {
|
|
mm.mappingMu.Unlock()
|
|
return nil
|
|
}
|
|
|
|
// Apply the new mlock mode to vmas.
|
|
var unmapped bool
|
|
vseg := mm.vmas.FindSegment(ar.Start)
|
|
for {
|
|
if !vseg.Ok() {
|
|
unmapped = true
|
|
break
|
|
}
|
|
vseg = mm.vmas.Isolate(vseg, ar)
|
|
vma := vseg.ValuePtr()
|
|
prevMode := vma.mlockMode
|
|
vma.mlockMode = mode
|
|
if mode != memmap.MLockNone && prevMode == memmap.MLockNone {
|
|
mm.lockedAS += uint64(vseg.Range().Length())
|
|
} else if mode == memmap.MLockNone && prevMode != memmap.MLockNone {
|
|
mm.lockedAS -= uint64(vseg.Range().Length())
|
|
}
|
|
if ar.End <= vseg.End() {
|
|
break
|
|
}
|
|
vseg, _ = vseg.NextNonEmpty()
|
|
}
|
|
mm.vmas.MergeRange(ar)
|
|
mm.vmas.MergeAdjacent(ar)
|
|
if unmapped {
|
|
mm.mappingMu.Unlock()
|
|
return syserror.ENOMEM
|
|
}
|
|
|
|
if mode == memmap.MLockEager {
|
|
// Ensure that we have usable pmas. Since we didn't return ENOMEM
|
|
// above, ar must be fully covered by vmas, so we can just use
|
|
// NextSegment below.
|
|
mm.activeMu.Lock()
|
|
mm.mappingMu.DowngradeLock()
|
|
for vseg := mm.vmas.FindSegment(ar.Start); vseg.Ok() && vseg.Start() < ar.End; vseg = vseg.NextSegment() {
|
|
if !vseg.ValuePtr().effectivePerms.Any() {
|
|
// Linux: mm/gup.c:__get_user_pages() returns EFAULT in this
|
|
// case, which is converted to ENOMEM by mlock.
|
|
mm.activeMu.Unlock()
|
|
mm.mappingMu.RUnlock()
|
|
return syserror.ENOMEM
|
|
}
|
|
_, _, err := mm.getPMAsLocked(ctx, vseg, vseg.Range().Intersect(ar), pmaOpts{})
|
|
if err != nil {
|
|
mm.activeMu.Unlock()
|
|
mm.mappingMu.RUnlock()
|
|
// Linux: mm/mlock.c:__mlock_posix_error_return()
|
|
if err == syserror.EFAULT {
|
|
return syserror.ENOMEM
|
|
}
|
|
if err == syserror.ENOMEM {
|
|
return syserror.EAGAIN
|
|
}
|
|
return err
|
|
}
|
|
}
|
|
|
|
// Map pmas into the active AddressSpace, if we have one.
|
|
mm.mappingMu.RUnlock()
|
|
if mm.as != nil {
|
|
mm.activeMu.DowngradeLock()
|
|
err := mm.mapASLocked(mm.pmas.LowerBoundSegment(ar.Start), ar, true /* precommit */)
|
|
mm.activeMu.RUnlock()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
} else {
|
|
mm.activeMu.Unlock()
|
|
}
|
|
} else {
|
|
mm.mappingMu.Unlock()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// MLockAllOpts holds options to MLockAll.
|
|
type MLockAllOpts struct {
|
|
// If Current is true, change the memory-locking behavior of all mappings
|
|
// to Mode. If Future is true, upgrade the memory-locking behavior of all
|
|
// future mappings to Mode. At least one of Current or Future must be true.
|
|
Current bool
|
|
Future bool
|
|
Mode memmap.MLockMode
|
|
}
|
|
|
|
// MLockAll implements the semantics of Linux's mlockall()/munlockall(),
|
|
// depending on opts.
|
|
func (mm *MemoryManager) MLockAll(ctx context.Context, opts MLockAllOpts) error {
|
|
if !opts.Current && !opts.Future {
|
|
return syserror.EINVAL
|
|
}
|
|
|
|
mm.mappingMu.Lock()
|
|
// Can't defer mm.mappingMu.Unlock(); see below.
|
|
|
|
if opts.Current {
|
|
if opts.Mode != memmap.MLockNone {
|
|
// Check against RLIMIT_MEMLOCK.
|
|
if creds := auth.CredentialsFromContext(ctx); !creds.HasCapabilityIn(linux.CAP_IPC_LOCK, creds.UserNamespace.Root()) {
|
|
mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur
|
|
if mlockLimit == 0 {
|
|
mm.mappingMu.Unlock()
|
|
return syserror.EPERM
|
|
}
|
|
if uint64(mm.vmas.Span()) > mlockLimit {
|
|
mm.mappingMu.Unlock()
|
|
return syserror.ENOMEM
|
|
}
|
|
}
|
|
}
|
|
for vseg := mm.vmas.FirstSegment(); vseg.Ok(); vseg = vseg.NextSegment() {
|
|
vma := vseg.ValuePtr()
|
|
prevMode := vma.mlockMode
|
|
vma.mlockMode = opts.Mode
|
|
if opts.Mode != memmap.MLockNone && prevMode == memmap.MLockNone {
|
|
mm.lockedAS += uint64(vseg.Range().Length())
|
|
} else if opts.Mode == memmap.MLockNone && prevMode != memmap.MLockNone {
|
|
mm.lockedAS -= uint64(vseg.Range().Length())
|
|
}
|
|
}
|
|
}
|
|
|
|
if opts.Future {
|
|
mm.defMLockMode = opts.Mode
|
|
}
|
|
|
|
if opts.Current && opts.Mode == memmap.MLockEager {
|
|
// Linux: mm/mlock.c:sys_mlockall() => include/linux/mm.h:mm_populate()
|
|
// ignores the return value of __mm_populate(), so all errors below are
|
|
// ignored.
|
|
//
|
|
// Try to get usable pmas.
|
|
mm.activeMu.Lock()
|
|
mm.mappingMu.DowngradeLock()
|
|
for vseg := mm.vmas.FirstSegment(); vseg.Ok(); vseg = vseg.NextSegment() {
|
|
if vseg.ValuePtr().effectivePerms.Any() {
|
|
mm.getPMAsLocked(ctx, vseg, vseg.Range(), pmaOpts{})
|
|
}
|
|
}
|
|
|
|
// Map all pmas into the active AddressSpace, if we have one.
|
|
mm.mappingMu.RUnlock()
|
|
if mm.as != nil {
|
|
mm.activeMu.DowngradeLock()
|
|
mm.mapASLocked(mm.pmas.FirstSegment(), mm.applicationAddrRange(), true /* precommit */)
|
|
mm.activeMu.RUnlock()
|
|
} else {
|
|
mm.activeMu.Unlock()
|
|
}
|
|
} else {
|
|
mm.mappingMu.Unlock()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Decommit implements the semantics of Linux's madvise(MADV_DONTNEED).
|
|
func (mm *MemoryManager) Decommit(addr usermem.Addr, length uint64) error {
|
|
ar, ok := addr.ToRange(length)
|
|
if !ok {
|
|
return syserror.EINVAL
|
|
}
|
|
|
|
mm.mappingMu.RLock()
|
|
defer mm.mappingMu.RUnlock()
|
|
mm.activeMu.Lock()
|
|
defer mm.activeMu.Unlock()
|
|
|
|
// Linux's mm/madvise.c:madvise_dontneed() => mm/memory.c:zap_page_range()
|
|
// is analogous to our mm.invalidateLocked(ar, true, true). We inline this
|
|
// here, with the special case that we synchronously decommit
|
|
// uniquely-owned (non-copy-on-write) pages for private anonymous vma,
|
|
// which is the common case for MADV_DONTNEED. Invalidating these pmas, and
|
|
// allowing them to be reallocated when touched again, increases pma
|
|
// fragmentation, which may significantly reduce performance for
|
|
// non-vectored I/O implementations. Also, decommitting synchronously
|
|
// ensures that Decommit immediately reduces host memory usage.
|
|
var didUnmapAS bool
|
|
pseg := mm.pmas.LowerBoundSegment(ar.Start)
|
|
mem := mm.p.Memory()
|
|
for vseg := mm.vmas.LowerBoundSegment(ar.Start); vseg.Ok() && vseg.Start() < ar.End; vseg = vseg.NextSegment() {
|
|
vma := vseg.ValuePtr()
|
|
if vma.mlockMode != memmap.MLockNone {
|
|
return syserror.EINVAL
|
|
}
|
|
vsegAR := vseg.Range().Intersect(ar)
|
|
// pseg should already correspond to either this vma or a later one,
|
|
// since there can't be a pma without a corresponding vma.
|
|
if checkInvariants {
|
|
if pseg.Ok() && pseg.End() <= vsegAR.Start {
|
|
panic(fmt.Sprintf("pma %v precedes vma %v", pseg.Range(), vsegAR))
|
|
}
|
|
}
|
|
for pseg.Ok() && pseg.Start() < vsegAR.End {
|
|
pma := pseg.ValuePtr()
|
|
if pma.private && !mm.isPMACopyOnWriteLocked(pseg) {
|
|
psegAR := pseg.Range().Intersect(ar)
|
|
if vsegAR.IsSupersetOf(psegAR) && vma.mappable == nil {
|
|
if err := mem.Decommit(pseg.fileRangeOf(psegAR)); err == nil {
|
|
pseg = pseg.NextSegment()
|
|
continue
|
|
}
|
|
// If an error occurs, fall through to the general
|
|
// invalidation case below.
|
|
}
|
|
}
|
|
pseg = mm.pmas.Isolate(pseg, vsegAR)
|
|
pma = pseg.ValuePtr()
|
|
if !didUnmapAS {
|
|
// Unmap all of ar, not just pseg.Range(), to minimize host
|
|
// syscalls. AddressSpace mappings must be removed before
|
|
// mm.decPrivateRef().
|
|
mm.unmapASLocked(ar)
|
|
didUnmapAS = true
|
|
}
|
|
if pma.private {
|
|
mm.decPrivateRef(pseg.fileRange())
|
|
}
|
|
pma.file.DecRef(pseg.fileRange())
|
|
mm.removeRSSLocked(pseg.Range())
|
|
pseg = mm.pmas.Remove(pseg).NextSegment()
|
|
}
|
|
}
|
|
|
|
// "If there are some parts of the specified address space that are not
|
|
// mapped, the Linux version of madvise() ignores them and applies the call
|
|
// to the rest (but returns ENOMEM from the system call, as it should)." -
|
|
// madvise(2)
|
|
if mm.vmas.SpanRange(ar) != ar.Length() {
|
|
return syserror.ENOMEM
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// MSyncOpts holds options to MSync.
|
|
type MSyncOpts struct {
|
|
// Sync has the semantics of MS_SYNC.
|
|
Sync bool
|
|
|
|
// Invalidate has the semantics of MS_INVALIDATE.
|
|
Invalidate bool
|
|
}
|
|
|
|
// MSync implements the semantics of Linux's msync().
|
|
func (mm *MemoryManager) MSync(ctx context.Context, addr usermem.Addr, length uint64, opts MSyncOpts) error {
|
|
if addr != addr.RoundDown() {
|
|
return syserror.EINVAL
|
|
}
|
|
if length == 0 {
|
|
return nil
|
|
}
|
|
la, ok := usermem.Addr(length).RoundUp()
|
|
if !ok {
|
|
return syserror.ENOMEM
|
|
}
|
|
ar, ok := addr.ToRange(uint64(la))
|
|
if !ok {
|
|
return syserror.ENOMEM
|
|
}
|
|
|
|
mm.mappingMu.RLock()
|
|
// Can't defer mm.mappingMu.RUnlock(); see below.
|
|
vseg := mm.vmas.LowerBoundSegment(ar.Start)
|
|
if !vseg.Ok() {
|
|
mm.mappingMu.RUnlock()
|
|
return syserror.ENOMEM
|
|
}
|
|
var unmapped bool
|
|
lastEnd := ar.Start
|
|
for {
|
|
if !vseg.Ok() {
|
|
mm.mappingMu.RUnlock()
|
|
unmapped = true
|
|
break
|
|
}
|
|
if lastEnd < vseg.Start() {
|
|
unmapped = true
|
|
}
|
|
lastEnd = vseg.End()
|
|
vma := vseg.ValuePtr()
|
|
if opts.Invalidate && vma.mlockMode != memmap.MLockNone {
|
|
mm.mappingMu.RUnlock()
|
|
return syserror.EBUSY
|
|
}
|
|
// It's only possible to have dirtied the Mappable through a shared
|
|
// mapping. Don't check if the mapping is writable, because mprotect
|
|
// may have changed this, and also because Linux doesn't.
|
|
if id := vma.id; opts.Sync && id != nil && vma.mappable != nil && !vma.private {
|
|
// We can't call memmap.MappingIdentity.Msync while holding
|
|
// mm.mappingMu since it may take fs locks that precede it in the
|
|
// lock order.
|
|
id.IncRef()
|
|
mr := vseg.mappableRangeOf(vseg.Range().Intersect(ar))
|
|
mm.mappingMu.RUnlock()
|
|
err := id.Msync(ctx, mr)
|
|
id.DecRef()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if lastEnd >= ar.End {
|
|
break
|
|
}
|
|
mm.mappingMu.RLock()
|
|
vseg = mm.vmas.LowerBoundSegment(lastEnd)
|
|
} else {
|
|
if lastEnd >= ar.End {
|
|
mm.mappingMu.RUnlock()
|
|
break
|
|
}
|
|
vseg = vseg.NextSegment()
|
|
}
|
|
}
|
|
|
|
if unmapped {
|
|
return syserror.ENOMEM
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// GetSharedFutexKey is used by kernel.Task.GetSharedKey.
|
|
func (mm *MemoryManager) GetSharedFutexKey(ctx context.Context, addr usermem.Addr) (futex.Key, error) {
|
|
ar, ok := addr.ToRange(4) // sizeof(int32).
|
|
if !ok {
|
|
return futex.Key{}, syserror.EFAULT
|
|
}
|
|
|
|
mm.mappingMu.RLock()
|
|
defer mm.mappingMu.RUnlock()
|
|
vseg, _, err := mm.getVMAsLocked(ctx, ar, usermem.Read, false)
|
|
if err != nil {
|
|
return futex.Key{}, err
|
|
}
|
|
vma := vseg.ValuePtr()
|
|
|
|
if vma.private {
|
|
return futex.Key{
|
|
Kind: futex.KindSharedPrivate,
|
|
Offset: uint64(addr),
|
|
}, nil
|
|
}
|
|
|
|
if vma.id != nil {
|
|
vma.id.IncRef()
|
|
}
|
|
return futex.Key{
|
|
Kind: futex.KindSharedMappable,
|
|
Mappable: vma.mappable,
|
|
MappingIdentity: vma.id,
|
|
Offset: vseg.mappableOffsetAt(addr),
|
|
}, nil
|
|
}
|
|
|
|
// VirtualMemorySize returns the combined length in bytes of all mappings in
|
|
// mm.
|
|
func (mm *MemoryManager) VirtualMemorySize() uint64 {
|
|
mm.mappingMu.RLock()
|
|
defer mm.mappingMu.RUnlock()
|
|
return uint64(mm.usageAS)
|
|
}
|
|
|
|
// VirtualMemorySizeRange returns the combined length in bytes of all mappings
|
|
// in ar in mm.
|
|
func (mm *MemoryManager) VirtualMemorySizeRange(ar usermem.AddrRange) uint64 {
|
|
mm.mappingMu.RLock()
|
|
defer mm.mappingMu.RUnlock()
|
|
return uint64(mm.vmas.SpanRange(ar))
|
|
}
|
|
|
|
// ResidentSetSize returns the value advertised as mm's RSS in bytes.
|
|
func (mm *MemoryManager) ResidentSetSize() uint64 {
|
|
mm.activeMu.RLock()
|
|
defer mm.activeMu.RUnlock()
|
|
return uint64(mm.curRSS)
|
|
}
|
|
|
|
// MaxResidentSetSize returns the value advertised as mm's max RSS in bytes.
|
|
func (mm *MemoryManager) MaxResidentSetSize() uint64 {
|
|
mm.activeMu.RLock()
|
|
defer mm.activeMu.RUnlock()
|
|
return uint64(mm.maxRSS)
|
|
}
|