gvisor/pkg/sentry/platform/kvm/physical_map.go

224 lines
6.9 KiB
Go

// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package kvm
import (
"fmt"
"sort"
"syscall"
"gvisor.googlesource.com/gvisor/pkg/log"
"gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0"
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
)
const (
// reservedMemory is a chunk of physical memory reserved starting at
// physical address zero. There are some special pages in this region,
// so we just call the whole thing off.
//
// Other architectures may define this to be zero.
reservedMemory = 0x100000000
)
type region struct {
virtual uintptr
length uintptr
}
type physicalRegion struct {
region
physical uintptr
}
// physicalRegions contains a list of available physical regions.
//
// The physical value used in physicalRegions is a number indicating the
// physical offset, aligned appropriately and starting above reservedMemory.
var physicalRegions []physicalRegion
// fillAddressSpace fills the host address space with PROT_NONE mappings until
// the number of available bits until we have a host address space size that is
// equal to the physical address space.
//
// The excluded regions are returned.
func fillAddressSpace() (excludedRegions []region) {
// We can cut vSize in half, because the kernel will be using the top
// half and we ignore it while constructing mappings. It's as if we've
// already excluded half the possible addresses.
vSize := uintptr(1) << ring0.VirtualAddressBits()
vSize = vSize >> 1
// We exclude reservedMemory below from our physical memory size, so it
// needs to be dropped here as well. Otherwise, we could end up with
// physical addresses that are beyond what is mapped.
pSize := uintptr(1) << ring0.PhysicalAddressBits()
pSize -= reservedMemory
// Sanity check.
if vSize < pSize {
panic(fmt.Sprintf("vSize (%x) < pSize (%x)", vSize, pSize))
}
// Add specifically excluded regions; see excludeVirtualRegion.
applyVirtualRegions(func(vr virtualRegion) {
if excludeVirtualRegion(vr) {
excludedRegions = append(excludedRegions, vr.region)
vSize -= vr.length
log.Infof("excluded: virtual [%x,%x)", vr.virtual, vr.virtual+vr.length)
}
})
// Calculate the required space and fill it.
//
// Note carefully that we add faultBlockSize to required up front, and
// on each iteration of the loop below (i.e. each new physical region
// we define), we add faultBlockSize again. This is done because the
// computation of physical regions will ensure proper alignments with
// faultBlockSize, potentially causing up to faultBlockSize bytes in
// internal fragmentation for each physical region. So we need to
// account for this properly during allocation.
requiredAddr, ok := usermem.Addr(vSize - pSize + faultBlockSize).RoundUp()
if !ok {
panic(fmt.Sprintf(
"overflow for vSize (%x) - pSize (%x) + faultBlockSize (%x)",
vSize, pSize, faultBlockSize))
}
required := uintptr(requiredAddr)
current := required // Attempted mmap size.
for filled := uintptr(0); filled < required && current > 0; {
addr, _, errno := syscall.RawSyscall6(
syscall.SYS_MMAP,
0, // Suggested address.
current,
syscall.PROT_NONE,
syscall.MAP_ANONYMOUS|syscall.MAP_PRIVATE|syscall.MAP_NORESERVE,
0, 0)
if errno != 0 {
// Attempt half the size; overflow not possible.
currentAddr, _ := usermem.Addr(current >> 1).RoundUp()
current = uintptr(currentAddr)
continue
}
// We filled a block.
filled += current
excludedRegions = append(excludedRegions, region{
virtual: addr,
length: current,
})
// See comment above.
if filled != required {
required += faultBlockSize
}
}
if current == 0 {
panic("filling address space failed")
}
sort.Slice(excludedRegions, func(i, j int) bool {
return excludedRegions[i].virtual < excludedRegions[j].virtual
})
for _, r := range excludedRegions {
log.Infof("region: virtual [%x,%x)", r.virtual, r.virtual+r.length)
}
return excludedRegions
}
// computePhysicalRegions computes physical regions.
func computePhysicalRegions(excludedRegions []region) (physicalRegions []physicalRegion) {
physical := uintptr(reservedMemory)
addValidRegion := func(virtual, length uintptr) {
if length == 0 {
return
}
if virtual == 0 {
virtual += usermem.PageSize
length -= usermem.PageSize
}
if end := virtual + length; end > ring0.MaximumUserAddress {
length -= (end - ring0.MaximumUserAddress)
}
if length == 0 {
return
}
// Round physical up to the same alignment as the virtual
// address (with respect to faultBlockSize).
if offset := virtual &^ faultBlockMask; physical&^faultBlockMask != offset {
if newPhysical := (physical & faultBlockMask) + offset; newPhysical > physical {
physical = newPhysical // Round up by only a little bit.
} else {
physical = ((physical + faultBlockSize) & faultBlockMask) + offset
}
}
physicalRegions = append(physicalRegions, physicalRegion{
region: region{
virtual: virtual,
length: length,
},
physical: physical,
})
physical += length
}
lastExcludedEnd := uintptr(0)
for _, r := range excludedRegions {
addValidRegion(lastExcludedEnd, r.virtual-lastExcludedEnd)
lastExcludedEnd = r.virtual + r.length
}
addValidRegion(lastExcludedEnd, ring0.MaximumUserAddress-lastExcludedEnd)
// Dump our all physical regions.
for _, r := range physicalRegions {
log.Infof("physicalRegion: virtual [%x,%x) => physical [%x,%x)",
r.virtual, r.virtual+r.length, r.physical, r.physical+r.length)
}
return physicalRegions
}
// physicalInit initializes physical address mappings.
func physicalInit() {
physicalRegions = computePhysicalRegions(fillAddressSpace())
}
// applyPhysicalRegions applies the given function on physical regions.
//
// Iteration continues as long as true is returned. The return value is the
// return from the last call to fn, or true if there are no entries.
//
// Precondition: physicalInit must have been called.
func applyPhysicalRegions(fn func(pr physicalRegion) bool) bool {
for _, pr := range physicalRegions {
if !fn(pr) {
return false
}
}
return true
}
// translateToPhysical translates the given virtual address.
//
// Precondition: physicalInit must have been called.
//
//go:nosplit
func translateToPhysical(virtual uintptr) (physical uintptr, length uintptr, ok bool) {
for _, pr := range physicalRegions {
if pr.virtual <= virtual && virtual < pr.virtual+pr.length {
physical = pr.physical + (virtual - pr.virtual)
length = pr.length - (virtual - pr.virtual)
ok = true
return
}
}
return
}