gvisor/pkg/sentry/platform/ring0/pagetables/walker_amd64.go

308 lines
8.8 KiB
Go

// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// +build amd64
package pagetables
// Visitor is a generic type.
type Visitor interface {
// visit is called on each PTE.
visit(start uintptr, pte *PTE, align uintptr)
// requiresAlloc indicates that new entries should be allocated within
// the walked range.
requiresAlloc() bool
// requiresSplit indicates that entries in the given range should be
// split if they are huge or jumbo pages.
requiresSplit() bool
}
// Walker walks page tables.
type Walker struct {
// pageTables are the tables to walk.
pageTables *PageTables
// Visitor is the set of arguments.
visitor Visitor
}
// iterateRange iterates over all appropriate levels of page tables for the given range.
//
// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The
// exception is super pages. If a valid super page (huge or jumbo) cannot be
// installed, then the walk will continue to individual entries.
//
// This algorithm will attempt to maximize the use of super pages whenever
// possible. Whether a super page is provided will be clear through the range
// provided in the callback.
//
// Note that if requiresAlloc is true, then no gaps will be present. However,
// if alloc is not set, then the iteration will likely be full of gaps.
//
// Note that this function should generally be avoided in favor of Map, Unmap,
// etc. when not necessary.
//
// Precondition: start must be page-aligned.
//
// Precondition: start must be less than end.
//
// Precondition: If requiresAlloc is true, then start and end should not span
// non-canonical ranges. If they do, a panic will result.
//
//go:nosplit
func (w *Walker) iterateRange(start, end uintptr) {
if start%pteSize != 0 {
panic("unaligned start")
}
if end < start {
panic("start > end")
}
if start < lowerTop {
if end <= lowerTop {
w.iterateRangeCanonical(start, end)
} else if end > lowerTop && end <= upperBottom {
if w.visitor.requiresAlloc() {
panic("alloc spans non-canonical range")
}
w.iterateRangeCanonical(start, lowerTop)
} else {
if w.visitor.requiresAlloc() {
panic("alloc spans non-canonical range")
}
w.iterateRangeCanonical(start, lowerTop)
w.iterateRangeCanonical(upperBottom, end)
}
} else if start < upperBottom {
if end <= upperBottom {
if w.visitor.requiresAlloc() {
panic("alloc spans non-canonical range")
}
} else {
if w.visitor.requiresAlloc() {
panic("alloc spans non-canonical range")
}
w.iterateRangeCanonical(upperBottom, end)
}
} else {
w.iterateRangeCanonical(start, end)
}
}
// next returns the next address quantized by the given size.
//
//go:nosplit
func next(start uintptr, size uintptr) uintptr {
start &= ^(size - 1)
start += size
return start
}
// iterateRangeCanonical walks a canonical range.
//
//go:nosplit
func (w *Walker) iterateRangeCanonical(start, end uintptr) {
for pgdIndex := uint16((start & pgdMask) >> pgdShift); start < end && pgdIndex < entriesPerPage; pgdIndex++ {
var (
pgdEntry = &w.pageTables.root[pgdIndex]
pudEntries *PTEs
)
if !pgdEntry.Valid() {
if !w.visitor.requiresAlloc() {
// Skip over this entry.
start = next(start, pgdSize)
continue
}
// Allocate a new pgd.
pudEntries = w.pageTables.Allocator.NewPTEs()
pgdEntry.setPageTable(w.pageTables, pudEntries)
} else {
pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address())
}
// Map the next level.
clearPUDEntries := uint16(0)
for pudIndex := uint16((start & pudMask) >> pudShift); start < end && pudIndex < entriesPerPage; pudIndex++ {
var (
pudEntry = &pudEntries[pudIndex]
pmdEntries *PTEs
)
if !pudEntry.Valid() {
if !w.visitor.requiresAlloc() {
// Skip over this entry.
clearPUDEntries++
start = next(start, pudSize)
continue
}
// This level has 1-GB super pages. Is this
// entire region at least as large as a single
// PUD entry? If so, we can skip allocating a
// new page for the pmd.
if start&(pudSize-1) == 0 && end-start >= pudSize {
pudEntry.SetSuper()
w.visitor.visit(uintptr(start), pudEntry, pudSize-1)
if pudEntry.Valid() {
start = next(start, pudSize)
continue
}
}
// Allocate a new pud.
pmdEntries = w.pageTables.Allocator.NewPTEs()
pudEntry.setPageTable(w.pageTables, pmdEntries)
} else if pudEntry.IsSuper() {
// Does this page need to be split?
if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < next(start, pudSize)) {
// Install the relevant entries.
pmdEntries = w.pageTables.Allocator.NewPTEs()
for index := uint16(0); index < entriesPerPage; index++ {
pmdEntries[index].SetSuper()
pmdEntries[index].Set(
pudEntry.Address()+(pmdSize*uintptr(index)),
pudEntry.Opts())
}
pudEntry.setPageTable(w.pageTables, pmdEntries)
} else {
// A super page to be checked directly.
w.visitor.visit(uintptr(start), pudEntry, pudSize-1)
// Might have been cleared.
if !pudEntry.Valid() {
clearPUDEntries++
}
// Note that the super page was changed.
start = next(start, pudSize)
continue
}
} else {
pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address())
}
// Map the next level, since this is valid.
clearPMDEntries := uint16(0)
for pmdIndex := uint16((start & pmdMask) >> pmdShift); start < end && pmdIndex < entriesPerPage; pmdIndex++ {
var (
pmdEntry = &pmdEntries[pmdIndex]
pteEntries *PTEs
)
if !pmdEntry.Valid() {
if !w.visitor.requiresAlloc() {
// Skip over this entry.
clearPMDEntries++
start = next(start, pmdSize)
continue
}
// This level has 2-MB huge pages. If this
// region is contined in a single PMD entry?
// As above, we can skip allocating a new page.
if start&(pmdSize-1) == 0 && end-start >= pmdSize {
pmdEntry.SetSuper()
w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1)
if pmdEntry.Valid() {
start = next(start, pmdSize)
continue
}
}
// Allocate a new pmd.
pteEntries = w.pageTables.Allocator.NewPTEs()
pmdEntry.setPageTable(w.pageTables, pteEntries)
} else if pmdEntry.IsSuper() {
// Does this page need to be split?
if w.visitor.requiresSplit() && (start&(pmdSize-1) != 0 || end < next(start, pmdSize)) {
// Install the relevant entries.
pteEntries = w.pageTables.Allocator.NewPTEs()
for index := uint16(0); index < entriesPerPage; index++ {
pteEntries[index].Set(
pmdEntry.Address()+(pteSize*uintptr(index)),
pmdEntry.Opts())
}
pmdEntry.setPageTable(w.pageTables, pteEntries)
} else {
// A huge page to be checked directly.
w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1)
// Might have been cleared.
if !pmdEntry.Valid() {
clearPMDEntries++
}
// Note that the huge page was changed.
start = next(start, pmdSize)
continue
}
} else {
pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address())
}
// Map the next level, since this is valid.
clearPTEEntries := uint16(0)
for pteIndex := uint16((start & pteMask) >> pteShift); start < end && pteIndex < entriesPerPage; pteIndex++ {
var (
pteEntry = &pteEntries[pteIndex]
)
if !pteEntry.Valid() && !w.visitor.requiresAlloc() {
clearPTEEntries++
start += pteSize
continue
}
// At this point, we are guaranteed that start%pteSize == 0.
w.visitor.visit(uintptr(start), pteEntry, pteSize-1)
if !pteEntry.Valid() {
if w.visitor.requiresAlloc() {
panic("PTE not set after iteration with requiresAlloc!")
}
clearPTEEntries++
}
// Note that the pte was changed.
start += pteSize
continue
}
// Check if we no longer need this page.
if clearPTEEntries == entriesPerPage {
pmdEntry.Clear()
w.pageTables.Allocator.FreePTEs(pteEntries)
clearPMDEntries++
}
}
// Check if we no longer need this page.
if clearPMDEntries == entriesPerPage {
pudEntry.Clear()
w.pageTables.Allocator.FreePTEs(pmdEntries)
clearPUDEntries++
}
}
// Check if we no longer need this page.
if clearPUDEntries == entriesPerPage {
pgdEntry.Clear()
w.pageTables.Allocator.FreePTEs(pudEntries)
}
}
}