224 lines
6.9 KiB
Go
224 lines
6.9 KiB
Go
// Copyright 2018 Google Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package kvm
|
|
|
|
import (
|
|
"fmt"
|
|
"sort"
|
|
"syscall"
|
|
|
|
"gvisor.googlesource.com/gvisor/pkg/log"
|
|
"gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0"
|
|
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
|
|
)
|
|
|
|
const (
|
|
// reservedMemory is a chunk of physical memory reserved starting at
|
|
// physical address zero. There are some special pages in this region,
|
|
// so we just call the whole thing off.
|
|
//
|
|
// Other architectures may define this to be zero.
|
|
reservedMemory = 0x100000000
|
|
)
|
|
|
|
type region struct {
|
|
virtual uintptr
|
|
length uintptr
|
|
}
|
|
|
|
type physicalRegion struct {
|
|
region
|
|
physical uintptr
|
|
}
|
|
|
|
// physicalRegions contains a list of available physical regions.
|
|
//
|
|
// The physical value used in physicalRegions is a number indicating the
|
|
// physical offset, aligned appropriately and starting above reservedMemory.
|
|
var physicalRegions []physicalRegion
|
|
|
|
// fillAddressSpace fills the host address space with PROT_NONE mappings until
|
|
// the number of available bits until we have a host address space size that is
|
|
// equal to the physical address space.
|
|
//
|
|
// The excluded regions are returned.
|
|
func fillAddressSpace() (excludedRegions []region) {
|
|
// We can cut vSize in half, because the kernel will be using the top
|
|
// half and we ignore it while constructing mappings. It's as if we've
|
|
// already excluded half the possible addresses.
|
|
vSize := uintptr(1) << ring0.VirtualAddressBits()
|
|
vSize = vSize >> 1
|
|
|
|
// We exclude reservedMemory below from our physical memory size, so it
|
|
// needs to be dropped here as well. Otherwise, we could end up with
|
|
// physical addresses that are beyond what is mapped.
|
|
pSize := uintptr(1) << ring0.PhysicalAddressBits()
|
|
pSize -= reservedMemory
|
|
|
|
// Sanity check.
|
|
if vSize < pSize {
|
|
panic(fmt.Sprintf("vSize (%x) < pSize (%x)", vSize, pSize))
|
|
}
|
|
|
|
// Add specifically excluded regions; see excludeVirtualRegion.
|
|
applyVirtualRegions(func(vr virtualRegion) {
|
|
if excludeVirtualRegion(vr) {
|
|
excludedRegions = append(excludedRegions, vr.region)
|
|
vSize -= vr.length
|
|
log.Infof("excluded: virtual [%x,%x)", vr.virtual, vr.virtual+vr.length)
|
|
}
|
|
})
|
|
|
|
// Calculate the required space and fill it.
|
|
//
|
|
// Note carefully that we add faultBlockSize to required up front, and
|
|
// on each iteration of the loop below (i.e. each new physical region
|
|
// we define), we add faultBlockSize again. This is done because the
|
|
// computation of physical regions will ensure proper alignments with
|
|
// faultBlockSize, potentially causing up to faultBlockSize bytes in
|
|
// internal fragmentation for each physical region. So we need to
|
|
// account for this properly during allocation.
|
|
requiredAddr, ok := usermem.Addr(vSize - pSize + faultBlockSize).RoundUp()
|
|
if !ok {
|
|
panic(fmt.Sprintf(
|
|
"overflow for vSize (%x) - pSize (%x) + faultBlockSize (%x)",
|
|
vSize, pSize, faultBlockSize))
|
|
}
|
|
required := uintptr(requiredAddr)
|
|
current := required // Attempted mmap size.
|
|
for filled := uintptr(0); filled < required && current > 0; {
|
|
addr, _, errno := syscall.RawSyscall6(
|
|
syscall.SYS_MMAP,
|
|
0, // Suggested address.
|
|
current,
|
|
syscall.PROT_NONE,
|
|
syscall.MAP_ANONYMOUS|syscall.MAP_PRIVATE|syscall.MAP_NORESERVE,
|
|
0, 0)
|
|
if errno != 0 {
|
|
// Attempt half the size; overflow not possible.
|
|
currentAddr, _ := usermem.Addr(current >> 1).RoundUp()
|
|
current = uintptr(currentAddr)
|
|
continue
|
|
}
|
|
// We filled a block.
|
|
filled += current
|
|
excludedRegions = append(excludedRegions, region{
|
|
virtual: addr,
|
|
length: current,
|
|
})
|
|
// See comment above.
|
|
if filled != required {
|
|
required += faultBlockSize
|
|
}
|
|
}
|
|
if current == 0 {
|
|
panic("filling address space failed")
|
|
}
|
|
sort.Slice(excludedRegions, func(i, j int) bool {
|
|
return excludedRegions[i].virtual < excludedRegions[j].virtual
|
|
})
|
|
for _, r := range excludedRegions {
|
|
log.Infof("region: virtual [%x,%x)", r.virtual, r.virtual+r.length)
|
|
}
|
|
return excludedRegions
|
|
}
|
|
|
|
// computePhysicalRegions computes physical regions.
|
|
func computePhysicalRegions(excludedRegions []region) (physicalRegions []physicalRegion) {
|
|
physical := uintptr(reservedMemory)
|
|
addValidRegion := func(virtual, length uintptr) {
|
|
if length == 0 {
|
|
return
|
|
}
|
|
if virtual == 0 {
|
|
virtual += usermem.PageSize
|
|
length -= usermem.PageSize
|
|
}
|
|
if end := virtual + length; end > ring0.MaximumUserAddress {
|
|
length -= (end - ring0.MaximumUserAddress)
|
|
}
|
|
if length == 0 {
|
|
return
|
|
}
|
|
// Round physical up to the same alignment as the virtual
|
|
// address (with respect to faultBlockSize).
|
|
if offset := virtual &^ faultBlockMask; physical&^faultBlockMask != offset {
|
|
if newPhysical := (physical & faultBlockMask) + offset; newPhysical > physical {
|
|
physical = newPhysical // Round up by only a little bit.
|
|
} else {
|
|
physical = ((physical + faultBlockSize) & faultBlockMask) + offset
|
|
}
|
|
}
|
|
physicalRegions = append(physicalRegions, physicalRegion{
|
|
region: region{
|
|
virtual: virtual,
|
|
length: length,
|
|
},
|
|
physical: physical,
|
|
})
|
|
physical += length
|
|
}
|
|
lastExcludedEnd := uintptr(0)
|
|
for _, r := range excludedRegions {
|
|
addValidRegion(lastExcludedEnd, r.virtual-lastExcludedEnd)
|
|
lastExcludedEnd = r.virtual + r.length
|
|
}
|
|
addValidRegion(lastExcludedEnd, ring0.MaximumUserAddress-lastExcludedEnd)
|
|
|
|
// Dump our all physical regions.
|
|
for _, r := range physicalRegions {
|
|
log.Infof("physicalRegion: virtual [%x,%x) => physical [%x,%x)",
|
|
r.virtual, r.virtual+r.length, r.physical, r.physical+r.length)
|
|
}
|
|
return physicalRegions
|
|
}
|
|
|
|
// physicalInit initializes physical address mappings.
|
|
func physicalInit() {
|
|
physicalRegions = computePhysicalRegions(fillAddressSpace())
|
|
}
|
|
|
|
// applyPhysicalRegions applies the given function on physical regions.
|
|
//
|
|
// Iteration continues as long as true is returned. The return value is the
|
|
// return from the last call to fn, or true if there are no entries.
|
|
//
|
|
// Precondition: physicalInit must have been called.
|
|
func applyPhysicalRegions(fn func(pr physicalRegion) bool) bool {
|
|
for _, pr := range physicalRegions {
|
|
if !fn(pr) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// translateToPhysical translates the given virtual address.
|
|
//
|
|
// Precondition: physicalInit must have been called.
|
|
//
|
|
//go:nosplit
|
|
func translateToPhysical(virtual uintptr) (physical uintptr, length uintptr, ok bool) {
|
|
for _, pr := range physicalRegions {
|
|
if pr.virtual <= virtual && virtual < pr.virtual+pr.length {
|
|
physical = pr.physical + (virtual - pr.virtual)
|
|
length = pr.length - (virtual - pr.virtual)
|
|
ok = true
|
|
return
|
|
}
|
|
}
|
|
return
|
|
}
|