Minor page tables improvements.
* Make split safe. * Enable looking up next valid address. * Support mappings with !accessType.Any(), distinct from unmap. These changes allow for the use of pagetables in low-level OS packages, such as ring0, and allow for the use of pagetables for more generic address space reservation (by writing entries with no access specified). Updates #5039 PiperOrigin-RevId: 355109016
This commit is contained in:
parent
0c8cc66117
commit
5fa683ffdf
|
@ -9,7 +9,10 @@ package(licenses = ["notice"])
|
||||||
# architecture builds.
|
# architecture builds.
|
||||||
go_template(
|
go_template(
|
||||||
name = "generic_walker_%s" % arch,
|
name = "generic_walker_%s" % arch,
|
||||||
srcs = ["walker_%s.go" % arch],
|
srcs = [
|
||||||
|
"walker_generic.go",
|
||||||
|
"walker_%s.go" % arch,
|
||||||
|
],
|
||||||
opt_types = [
|
opt_types = [
|
||||||
"Visitor",
|
"Visitor",
|
||||||
],
|
],
|
||||||
|
@ -50,6 +53,7 @@ go_library(
|
||||||
"pcids_x86.go",
|
"pcids_x86.go",
|
||||||
"walker_amd64.go",
|
"walker_amd64.go",
|
||||||
"walker_arm64.go",
|
"walker_arm64.go",
|
||||||
|
"walker_generic.go",
|
||||||
":walker_empty_amd64",
|
":walker_empty_amd64",
|
||||||
":walker_empty_arm64",
|
":walker_empty_arm64",
|
||||||
":walker_lookup_amd64",
|
":walker_lookup_amd64",
|
||||||
|
|
|
@ -60,6 +60,7 @@ type PageTables struct {
|
||||||
|
|
||||||
// Init initializes a set of PageTables.
|
// Init initializes a set of PageTables.
|
||||||
//
|
//
|
||||||
|
// +checkescape:hard,stack
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func (p *PageTables) Init(allocator Allocator) {
|
func (p *PageTables) Init(allocator Allocator) {
|
||||||
p.Allocator = allocator
|
p.Allocator = allocator
|
||||||
|
@ -92,7 +93,6 @@ func NewWithUpper(a Allocator, upperSharedPageTables *PageTables, upperStart uin
|
||||||
}
|
}
|
||||||
|
|
||||||
p.InitArch(a)
|
p.InitArch(a)
|
||||||
|
|
||||||
return p
|
return p
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -112,7 +112,7 @@ type mapVisitor struct {
|
||||||
// visit is used for map.
|
// visit is used for map.
|
||||||
//
|
//
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func (v *mapVisitor) visit(start uintptr, pte *PTE, align uintptr) {
|
func (v *mapVisitor) visit(start uintptr, pte *PTE, align uintptr) bool {
|
||||||
p := v.physical + (start - uintptr(v.target))
|
p := v.physical + (start - uintptr(v.target))
|
||||||
if pte.Valid() && (pte.Address() != p || pte.Opts() != v.opts) {
|
if pte.Valid() && (pte.Address() != p || pte.Opts() != v.opts) {
|
||||||
v.prev = true
|
v.prev = true
|
||||||
|
@ -122,9 +122,10 @@ func (v *mapVisitor) visit(start uintptr, pte *PTE, align uintptr) {
|
||||||
// install a valid entry here, however we must zap any existing
|
// install a valid entry here, however we must zap any existing
|
||||||
// entry to ensure this happens.
|
// entry to ensure this happens.
|
||||||
pte.Clear()
|
pte.Clear()
|
||||||
return
|
return true
|
||||||
}
|
}
|
||||||
pte.Set(p, v.opts)
|
pte.Set(p, v.opts)
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
|
@ -140,7 +141,6 @@ func (*mapVisitor) requiresSplit() bool { return true }
|
||||||
// Precondition: addr & length must be page-aligned, their sum must not overflow.
|
// Precondition: addr & length must be page-aligned, their sum must not overflow.
|
||||||
//
|
//
|
||||||
// +checkescape:hard,stack
|
// +checkescape:hard,stack
|
||||||
//
|
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func (p *PageTables) Map(addr usermem.Addr, length uintptr, opts MapOpts, physical uintptr) bool {
|
func (p *PageTables) Map(addr usermem.Addr, length uintptr, opts MapOpts, physical uintptr) bool {
|
||||||
if p.readOnlyShared {
|
if p.readOnlyShared {
|
||||||
|
@ -158,9 +158,6 @@ func (p *PageTables) Map(addr usermem.Addr, length uintptr, opts MapOpts, physic
|
||||||
length = p.upperStart - uintptr(addr)
|
length = p.upperStart - uintptr(addr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !opts.AccessType.Any() {
|
|
||||||
return p.Unmap(addr, length)
|
|
||||||
}
|
|
||||||
w := mapWalker{
|
w := mapWalker{
|
||||||
pageTables: p,
|
pageTables: p,
|
||||||
visitor: mapVisitor{
|
visitor: mapVisitor{
|
||||||
|
@ -187,9 +184,10 @@ func (*unmapVisitor) requiresSplit() bool { return true }
|
||||||
// visit unmaps the given entry.
|
// visit unmaps the given entry.
|
||||||
//
|
//
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func (v *unmapVisitor) visit(start uintptr, pte *PTE, align uintptr) {
|
func (v *unmapVisitor) visit(start uintptr, pte *PTE, align uintptr) bool {
|
||||||
pte.Clear()
|
pte.Clear()
|
||||||
v.count++
|
v.count++
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unmap unmaps the given range.
|
// Unmap unmaps the given range.
|
||||||
|
@ -199,7 +197,6 @@ func (v *unmapVisitor) visit(start uintptr, pte *PTE, align uintptr) {
|
||||||
// Precondition: addr & length must be page-aligned, their sum must not overflow.
|
// Precondition: addr & length must be page-aligned, their sum must not overflow.
|
||||||
//
|
//
|
||||||
// +checkescape:hard,stack
|
// +checkescape:hard,stack
|
||||||
//
|
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func (p *PageTables) Unmap(addr usermem.Addr, length uintptr) bool {
|
func (p *PageTables) Unmap(addr usermem.Addr, length uintptr) bool {
|
||||||
if p.readOnlyShared {
|
if p.readOnlyShared {
|
||||||
|
@ -241,8 +238,9 @@ func (*emptyVisitor) requiresSplit() bool { return false }
|
||||||
// visit unmaps the given entry.
|
// visit unmaps the given entry.
|
||||||
//
|
//
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func (v *emptyVisitor) visit(start uintptr, pte *PTE, align uintptr) {
|
func (v *emptyVisitor) visit(start uintptr, pte *PTE, align uintptr) bool {
|
||||||
v.count++
|
v.count++
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsEmpty checks if the given range is empty.
|
// IsEmpty checks if the given range is empty.
|
||||||
|
@ -250,7 +248,6 @@ func (v *emptyVisitor) visit(start uintptr, pte *PTE, align uintptr) {
|
||||||
// Precondition: addr & length must be page-aligned.
|
// Precondition: addr & length must be page-aligned.
|
||||||
//
|
//
|
||||||
// +checkescape:hard,stack
|
// +checkescape:hard,stack
|
||||||
//
|
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func (p *PageTables) IsEmpty(addr usermem.Addr, length uintptr) bool {
|
func (p *PageTables) IsEmpty(addr usermem.Addr, length uintptr) bool {
|
||||||
w := emptyWalker{
|
w := emptyWalker{
|
||||||
|
@ -262,20 +259,28 @@ func (p *PageTables) IsEmpty(addr usermem.Addr, length uintptr) bool {
|
||||||
|
|
||||||
// lookupVisitor is used for lookup.
|
// lookupVisitor is used for lookup.
|
||||||
type lookupVisitor struct {
|
type lookupVisitor struct {
|
||||||
target uintptr // Input.
|
target uintptr // Input & Output.
|
||||||
physical uintptr // Output.
|
findFirst bool // Input.
|
||||||
opts MapOpts // Output.
|
physical uintptr // Output.
|
||||||
|
size uintptr // Output.
|
||||||
|
opts MapOpts // Output.
|
||||||
}
|
}
|
||||||
|
|
||||||
// visit matches the given address.
|
// visit matches the given address.
|
||||||
//
|
//
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func (v *lookupVisitor) visit(start uintptr, pte *PTE, align uintptr) {
|
func (v *lookupVisitor) visit(start uintptr, pte *PTE, align uintptr) bool {
|
||||||
if !pte.Valid() {
|
if !pte.Valid() {
|
||||||
return
|
// If looking for the first, then we just keep iterating until
|
||||||
|
// we find a valid entry.
|
||||||
|
return v.findFirst
|
||||||
}
|
}
|
||||||
v.physical = pte.Address() + (start - uintptr(v.target))
|
// Is this within the current range?
|
||||||
|
v.target = start
|
||||||
|
v.physical = pte.Address()
|
||||||
|
v.size = (align + 1)
|
||||||
v.opts = pte.Opts()
|
v.opts = pte.Opts()
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
|
@ -286,20 +291,29 @@ func (*lookupVisitor) requiresSplit() bool { return false }
|
||||||
|
|
||||||
// Lookup returns the physical address for the given virtual address.
|
// Lookup returns the physical address for the given virtual address.
|
||||||
//
|
//
|
||||||
// +checkescape:hard,stack
|
// If findFirst is true, then the next valid address after addr is returned.
|
||||||
|
// If findFirst is false, then only a mapping for addr will be returned.
|
||||||
//
|
//
|
||||||
|
// Note that if size is zero, then no matching entry was found.
|
||||||
|
//
|
||||||
|
// +checkescape:hard,stack
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func (p *PageTables) Lookup(addr usermem.Addr) (physical uintptr, opts MapOpts) {
|
func (p *PageTables) Lookup(addr usermem.Addr, findFirst bool) (virtual usermem.Addr, physical, size uintptr, opts MapOpts) {
|
||||||
mask := uintptr(usermem.PageSize - 1)
|
mask := uintptr(usermem.PageSize - 1)
|
||||||
offset := uintptr(addr) & mask
|
addr &^= usermem.Addr(mask)
|
||||||
w := lookupWalker{
|
w := lookupWalker{
|
||||||
pageTables: p,
|
pageTables: p,
|
||||||
visitor: lookupVisitor{
|
visitor: lookupVisitor{
|
||||||
target: uintptr(addr &^ usermem.Addr(mask)),
|
target: uintptr(addr),
|
||||||
|
findFirst: findFirst,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
w.iterateRange(uintptr(addr), uintptr(addr)+1)
|
end := ^usermem.Addr(0) &^ usermem.Addr(mask)
|
||||||
return w.visitor.physical + offset, w.visitor.opts
|
if !findFirst {
|
||||||
|
end = addr + 1
|
||||||
|
}
|
||||||
|
w.iterateRange(uintptr(addr), uintptr(end))
|
||||||
|
return usermem.Addr(w.visitor.target), w.visitor.physical, w.visitor.size, w.visitor.opts
|
||||||
}
|
}
|
||||||
|
|
||||||
// MarkReadOnlyShared marks the pagetables read-only and can be shared.
|
// MarkReadOnlyShared marks the pagetables read-only and can be shared.
|
||||||
|
|
|
@ -156,12 +156,7 @@ func (p *PTE) IsSect() bool {
|
||||||
//
|
//
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func (p *PTE) Set(addr uintptr, opts MapOpts) {
|
func (p *PTE) Set(addr uintptr, opts MapOpts) {
|
||||||
if !opts.AccessType.Any() {
|
v := (addr &^ optionMask) | nG | readOnly | protDefault
|
||||||
p.Clear()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
v := (addr &^ optionMask) | protDefault | nG | readOnly
|
|
||||||
|
|
||||||
if p.IsSect() {
|
if p.IsSect() {
|
||||||
// Note that this is inherited from the previous instance. Set
|
// Note that this is inherited from the previous instance. Set
|
||||||
// does not change the value of Sect. See above.
|
// does not change the value of Sect. See above.
|
||||||
|
@ -169,6 +164,10 @@ func (p *PTE) Set(addr uintptr, opts MapOpts) {
|
||||||
} else {
|
} else {
|
||||||
v |= typePage
|
v |= typePage
|
||||||
}
|
}
|
||||||
|
if !opts.AccessType.Any() {
|
||||||
|
// Leave as non-valid if no access is available.
|
||||||
|
v &^= pteValid
|
||||||
|
}
|
||||||
|
|
||||||
if opts.Global {
|
if opts.Global {
|
||||||
v = v &^ nG
|
v = v &^ nG
|
||||||
|
|
|
@ -43,6 +43,7 @@ const (
|
||||||
|
|
||||||
// InitArch does some additional initialization related to the architecture.
|
// InitArch does some additional initialization related to the architecture.
|
||||||
//
|
//
|
||||||
|
// +checkescape:hard,stack
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func (p *PageTables) InitArch(allocator Allocator) {
|
func (p *PageTables) InitArch(allocator Allocator) {
|
||||||
if p.upperSharedPageTables != nil {
|
if p.upperSharedPageTables != nil {
|
||||||
|
@ -50,6 +51,7 @@ func (p *PageTables) InitArch(allocator Allocator) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//go:nosplit
|
||||||
func pgdIndex(upperStart uintptr) uintptr {
|
func pgdIndex(upperStart uintptr) uintptr {
|
||||||
if upperStart&(pgdSize-1) != 0 {
|
if upperStart&(pgdSize-1) != 0 {
|
||||||
panic("upperStart should be pgd size aligned")
|
panic("upperStart should be pgd size aligned")
|
||||||
|
|
|
@ -44,6 +44,7 @@ const (
|
||||||
|
|
||||||
// InitArch does some additional initialization related to the architecture.
|
// InitArch does some additional initialization related to the architecture.
|
||||||
//
|
//
|
||||||
|
// +checkescape:hard,stack
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func (p *PageTables) InitArch(allocator Allocator) {
|
func (p *PageTables) InitArch(allocator Allocator) {
|
||||||
if p.upperSharedPageTables != nil {
|
if p.upperSharedPageTables != nil {
|
||||||
|
|
|
@ -34,7 +34,7 @@ type checkVisitor struct {
|
||||||
failed string // Output.
|
failed string // Output.
|
||||||
}
|
}
|
||||||
|
|
||||||
func (v *checkVisitor) visit(start uintptr, pte *PTE, align uintptr) {
|
func (v *checkVisitor) visit(start uintptr, pte *PTE, align uintptr) bool {
|
||||||
v.found = append(v.found, mapping{
|
v.found = append(v.found, mapping{
|
||||||
start: start,
|
start: start,
|
||||||
length: align + 1,
|
length: align + 1,
|
||||||
|
@ -43,7 +43,7 @@ func (v *checkVisitor) visit(start uintptr, pte *PTE, align uintptr) {
|
||||||
})
|
})
|
||||||
if v.failed != "" {
|
if v.failed != "" {
|
||||||
// Don't keep looking for errors.
|
// Don't keep looking for errors.
|
||||||
return
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
if v.current >= len(v.expected) {
|
if v.current >= len(v.expected) {
|
||||||
|
@ -58,6 +58,7 @@ func (v *checkVisitor) visit(start uintptr, pte *PTE, align uintptr) {
|
||||||
v.failed = "opts didn't match"
|
v.failed = "opts didn't match"
|
||||||
}
|
}
|
||||||
v.current++
|
v.current++
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func (*checkVisitor) requiresAlloc() bool { return false }
|
func (*checkVisitor) requiresAlloc() bool { return false }
|
||||||
|
|
|
@ -137,7 +137,10 @@ func (p *PTE) Set(addr uintptr, opts MapOpts) {
|
||||||
p.Clear()
|
p.Clear()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
v := (addr &^ optionMask) | present | accessed
|
v := (addr &^ optionMask)
|
||||||
|
if opts.AccessType.Any() {
|
||||||
|
v |= present | accessed
|
||||||
|
}
|
||||||
if opts.User {
|
if opts.User {
|
||||||
v |= user
|
v |= user
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,104 +16,10 @@
|
||||||
|
|
||||||
package pagetables
|
package pagetables
|
||||||
|
|
||||||
// Visitor is a generic type.
|
|
||||||
type Visitor interface {
|
|
||||||
// visit is called on each PTE.
|
|
||||||
visit(start uintptr, pte *PTE, align uintptr)
|
|
||||||
|
|
||||||
// requiresAlloc indicates that new entries should be allocated within
|
|
||||||
// the walked range.
|
|
||||||
requiresAlloc() bool
|
|
||||||
|
|
||||||
// requiresSplit indicates that entries in the given range should be
|
|
||||||
// split if they are huge or jumbo pages.
|
|
||||||
requiresSplit() bool
|
|
||||||
}
|
|
||||||
|
|
||||||
// Walker walks page tables.
|
|
||||||
type Walker struct {
|
|
||||||
// pageTables are the tables to walk.
|
|
||||||
pageTables *PageTables
|
|
||||||
|
|
||||||
// Visitor is the set of arguments.
|
|
||||||
visitor Visitor
|
|
||||||
}
|
|
||||||
|
|
||||||
// iterateRange iterates over all appropriate levels of page tables for the given range.
|
|
||||||
//
|
|
||||||
// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The
|
|
||||||
// exception is super pages. If a valid super page (huge or jumbo) cannot be
|
|
||||||
// installed, then the walk will continue to individual entries.
|
|
||||||
//
|
|
||||||
// This algorithm will attempt to maximize the use of super pages whenever
|
|
||||||
// possible. Whether a super page is provided will be clear through the range
|
|
||||||
// provided in the callback.
|
|
||||||
//
|
|
||||||
// Note that if requiresAlloc is true, then no gaps will be present. However,
|
|
||||||
// if alloc is not set, then the iteration will likely be full of gaps.
|
|
||||||
//
|
|
||||||
// Note that this function should generally be avoided in favor of Map, Unmap,
|
|
||||||
// etc. when not necessary.
|
|
||||||
//
|
|
||||||
// Precondition: start must be page-aligned.
|
|
||||||
//
|
|
||||||
// Precondition: start must be less than end.
|
|
||||||
//
|
|
||||||
// Precondition: If requiresAlloc is true, then start and end should not span
|
|
||||||
// non-canonical ranges. If they do, a panic will result.
|
|
||||||
//
|
|
||||||
//go:nosplit
|
|
||||||
func (w *Walker) iterateRange(start, end uintptr) {
|
|
||||||
if start%pteSize != 0 {
|
|
||||||
panic("unaligned start")
|
|
||||||
}
|
|
||||||
if end < start {
|
|
||||||
panic("start > end")
|
|
||||||
}
|
|
||||||
if start < lowerTop {
|
|
||||||
if end <= lowerTop {
|
|
||||||
w.iterateRangeCanonical(start, end)
|
|
||||||
} else if end > lowerTop && end <= upperBottom {
|
|
||||||
if w.visitor.requiresAlloc() {
|
|
||||||
panic("alloc spans non-canonical range")
|
|
||||||
}
|
|
||||||
w.iterateRangeCanonical(start, lowerTop)
|
|
||||||
} else {
|
|
||||||
if w.visitor.requiresAlloc() {
|
|
||||||
panic("alloc spans non-canonical range")
|
|
||||||
}
|
|
||||||
w.iterateRangeCanonical(start, lowerTop)
|
|
||||||
w.iterateRangeCanonical(upperBottom, end)
|
|
||||||
}
|
|
||||||
} else if start < upperBottom {
|
|
||||||
if end <= upperBottom {
|
|
||||||
if w.visitor.requiresAlloc() {
|
|
||||||
panic("alloc spans non-canonical range")
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if w.visitor.requiresAlloc() {
|
|
||||||
panic("alloc spans non-canonical range")
|
|
||||||
}
|
|
||||||
w.iterateRangeCanonical(upperBottom, end)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
w.iterateRangeCanonical(start, end)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// next returns the next address quantized by the given size.
|
|
||||||
//
|
|
||||||
//go:nosplit
|
|
||||||
func next(start uintptr, size uintptr) uintptr {
|
|
||||||
start &= ^(size - 1)
|
|
||||||
start += size
|
|
||||||
return start
|
|
||||||
}
|
|
||||||
|
|
||||||
// iterateRangeCanonical walks a canonical range.
|
// iterateRangeCanonical walks a canonical range.
|
||||||
//
|
//
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func (w *Walker) iterateRangeCanonical(start, end uintptr) {
|
func (w *Walker) iterateRangeCanonical(start, end uintptr) bool {
|
||||||
for pgdIndex := uint16((start & pgdMask) >> pgdShift); start < end && pgdIndex < entriesPerPage; pgdIndex++ {
|
for pgdIndex := uint16((start & pgdMask) >> pgdShift); start < end && pgdIndex < entriesPerPage; pgdIndex++ {
|
||||||
var (
|
var (
|
||||||
pgdEntry = &w.pageTables.root[pgdIndex]
|
pgdEntry = &w.pageTables.root[pgdIndex]
|
||||||
|
@ -127,10 +33,10 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocate a new pgd.
|
// Allocate a new pgd.
|
||||||
pudEntries = w.pageTables.Allocator.NewPTEs()
|
pudEntries = w.pageTables.Allocator.NewPTEs() // escapes: depends on allocator.
|
||||||
pgdEntry.setPageTable(w.pageTables, pudEntries)
|
pgdEntry.setPageTable(w.pageTables, pudEntries)
|
||||||
} else {
|
} else {
|
||||||
pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address())
|
pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address()) // escapes: see above.
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map the next level.
|
// Map the next level.
|
||||||
|
@ -155,7 +61,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) {
|
||||||
// new page for the pmd.
|
// new page for the pmd.
|
||||||
if start&(pudSize-1) == 0 && end-start >= pudSize {
|
if start&(pudSize-1) == 0 && end-start >= pudSize {
|
||||||
pudEntry.SetSuper()
|
pudEntry.SetSuper()
|
||||||
w.visitor.visit(uintptr(start), pudEntry, pudSize-1)
|
if !w.visitor.visit(uintptr(start&^(pudSize-1)), pudEntry, pudSize-1) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
if pudEntry.Valid() {
|
if pudEntry.Valid() {
|
||||||
start = next(start, pudSize)
|
start = next(start, pudSize)
|
||||||
continue
|
continue
|
||||||
|
@ -163,14 +71,14 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocate a new pud.
|
// Allocate a new pud.
|
||||||
pmdEntries = w.pageTables.Allocator.NewPTEs()
|
pmdEntries = w.pageTables.Allocator.NewPTEs() // escapes: see above.
|
||||||
pudEntry.setPageTable(w.pageTables, pmdEntries)
|
pudEntry.setPageTable(w.pageTables, pmdEntries)
|
||||||
|
|
||||||
} else if pudEntry.IsSuper() {
|
} else if pudEntry.IsSuper() {
|
||||||
// Does this page need to be split?
|
// Does this page need to be split?
|
||||||
if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < next(start, pudSize)) {
|
if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < next(start, pudSize)) {
|
||||||
// Install the relevant entries.
|
// Install the relevant entries.
|
||||||
pmdEntries = w.pageTables.Allocator.NewPTEs()
|
pmdEntries = w.pageTables.Allocator.NewPTEs() // escapes: see above.
|
||||||
for index := uint16(0); index < entriesPerPage; index++ {
|
for index := uint16(0); index < entriesPerPage; index++ {
|
||||||
pmdEntries[index].SetSuper()
|
pmdEntries[index].SetSuper()
|
||||||
pmdEntries[index].Set(
|
pmdEntries[index].Set(
|
||||||
|
@ -180,7 +88,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) {
|
||||||
pudEntry.setPageTable(w.pageTables, pmdEntries)
|
pudEntry.setPageTable(w.pageTables, pmdEntries)
|
||||||
} else {
|
} else {
|
||||||
// A super page to be checked directly.
|
// A super page to be checked directly.
|
||||||
w.visitor.visit(uintptr(start), pudEntry, pudSize-1)
|
if !w.visitor.visit(uintptr(start&^(pudSize-1)), pudEntry, pudSize-1) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// Might have been cleared.
|
// Might have been cleared.
|
||||||
if !pudEntry.Valid() {
|
if !pudEntry.Valid() {
|
||||||
|
@ -192,7 +102,7 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address())
|
pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address()) // escapes: see above.
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map the next level, since this is valid.
|
// Map the next level, since this is valid.
|
||||||
|
@ -216,7 +126,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) {
|
||||||
// As above, we can skip allocating a new page.
|
// As above, we can skip allocating a new page.
|
||||||
if start&(pmdSize-1) == 0 && end-start >= pmdSize {
|
if start&(pmdSize-1) == 0 && end-start >= pmdSize {
|
||||||
pmdEntry.SetSuper()
|
pmdEntry.SetSuper()
|
||||||
w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1)
|
if !w.visitor.visit(uintptr(start&^(pmdSize-1)), pmdEntry, pmdSize-1) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
if pmdEntry.Valid() {
|
if pmdEntry.Valid() {
|
||||||
start = next(start, pmdSize)
|
start = next(start, pmdSize)
|
||||||
continue
|
continue
|
||||||
|
@ -224,7 +136,7 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocate a new pmd.
|
// Allocate a new pmd.
|
||||||
pteEntries = w.pageTables.Allocator.NewPTEs()
|
pteEntries = w.pageTables.Allocator.NewPTEs() // escapes: see above.
|
||||||
pmdEntry.setPageTable(w.pageTables, pteEntries)
|
pmdEntry.setPageTable(w.pageTables, pteEntries)
|
||||||
|
|
||||||
} else if pmdEntry.IsSuper() {
|
} else if pmdEntry.IsSuper() {
|
||||||
|
@ -240,7 +152,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) {
|
||||||
pmdEntry.setPageTable(w.pageTables, pteEntries)
|
pmdEntry.setPageTable(w.pageTables, pteEntries)
|
||||||
} else {
|
} else {
|
||||||
// A huge page to be checked directly.
|
// A huge page to be checked directly.
|
||||||
w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1)
|
if !w.visitor.visit(uintptr(start&^(pmdSize-1)), pmdEntry, pmdSize-1) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// Might have been cleared.
|
// Might have been cleared.
|
||||||
if !pmdEntry.Valid() {
|
if !pmdEntry.Valid() {
|
||||||
|
@ -252,7 +166,7 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address())
|
pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address()) // escapes: see above.
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map the next level, since this is valid.
|
// Map the next level, since this is valid.
|
||||||
|
@ -269,11 +183,10 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// At this point, we are guaranteed that start%pteSize == 0.
|
// At this point, we are guaranteed that start%pteSize == 0.
|
||||||
w.visitor.visit(uintptr(start), pteEntry, pteSize-1)
|
if !w.visitor.visit(uintptr(start&^(pteSize-1)), pteEntry, pteSize-1) {
|
||||||
if !pteEntry.Valid() {
|
return false
|
||||||
if w.visitor.requiresAlloc() {
|
}
|
||||||
panic("PTE not set after iteration with requiresAlloc!")
|
if !pteEntry.Valid() && !w.visitor.requiresAlloc() {
|
||||||
}
|
|
||||||
clearPTEEntries++
|
clearPTEEntries++
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -285,7 +198,7 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) {
|
||||||
// Check if we no longer need this page.
|
// Check if we no longer need this page.
|
||||||
if clearPTEEntries == entriesPerPage {
|
if clearPTEEntries == entriesPerPage {
|
||||||
pmdEntry.Clear()
|
pmdEntry.Clear()
|
||||||
w.pageTables.Allocator.FreePTEs(pteEntries)
|
w.pageTables.Allocator.FreePTEs(pteEntries) // escapes: see above.
|
||||||
clearPMDEntries++
|
clearPMDEntries++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -293,7 +206,7 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) {
|
||||||
// Check if we no longer need this page.
|
// Check if we no longer need this page.
|
||||||
if clearPMDEntries == entriesPerPage {
|
if clearPMDEntries == entriesPerPage {
|
||||||
pudEntry.Clear()
|
pudEntry.Clear()
|
||||||
w.pageTables.Allocator.FreePTEs(pmdEntries)
|
w.pageTables.Allocator.FreePTEs(pmdEntries) // escapes: see above.
|
||||||
clearPUDEntries++
|
clearPUDEntries++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -301,7 +214,8 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) {
|
||||||
// Check if we no longer need this page.
|
// Check if we no longer need this page.
|
||||||
if clearPUDEntries == entriesPerPage {
|
if clearPUDEntries == entriesPerPage {
|
||||||
pgdEntry.Clear()
|
pgdEntry.Clear()
|
||||||
w.pageTables.Allocator.FreePTEs(pudEntries)
|
w.pageTables.Allocator.FreePTEs(pudEntries) // escapes: see above.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,104 +16,10 @@
|
||||||
|
|
||||||
package pagetables
|
package pagetables
|
||||||
|
|
||||||
// Visitor is a generic type.
|
|
||||||
type Visitor interface {
|
|
||||||
// visit is called on each PTE.
|
|
||||||
visit(start uintptr, pte *PTE, align uintptr)
|
|
||||||
|
|
||||||
// requiresAlloc indicates that new entries should be allocated within
|
|
||||||
// the walked range.
|
|
||||||
requiresAlloc() bool
|
|
||||||
|
|
||||||
// requiresSplit indicates that entries in the given range should be
|
|
||||||
// split if they are huge or jumbo pages.
|
|
||||||
requiresSplit() bool
|
|
||||||
}
|
|
||||||
|
|
||||||
// Walker walks page tables.
|
|
||||||
type Walker struct {
|
|
||||||
// pageTables are the tables to walk.
|
|
||||||
pageTables *PageTables
|
|
||||||
|
|
||||||
// Visitor is the set of arguments.
|
|
||||||
visitor Visitor
|
|
||||||
}
|
|
||||||
|
|
||||||
// iterateRange iterates over all appropriate levels of page tables for the given range.
|
|
||||||
//
|
|
||||||
// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The
|
|
||||||
// exception is sect pages. If a valid sect page (huge or jumbo) cannot be
|
|
||||||
// installed, then the walk will continue to individual entries.
|
|
||||||
//
|
|
||||||
// This algorithm will attempt to maximize the use of sect pages whenever
|
|
||||||
// possible. Whether a sect page is provided will be clear through the range
|
|
||||||
// provided in the callback.
|
|
||||||
//
|
|
||||||
// Note that if requiresAlloc is true, then no gaps will be present. However,
|
|
||||||
// if alloc is not set, then the iteration will likely be full of gaps.
|
|
||||||
//
|
|
||||||
// Note that this function should generally be avoided in favor of Map, Unmap,
|
|
||||||
// etc. when not necessary.
|
|
||||||
//
|
|
||||||
// Precondition: start must be page-aligned.
|
|
||||||
//
|
|
||||||
// Precondition: start must be less than end.
|
|
||||||
//
|
|
||||||
// Precondition: If requiresAlloc is true, then start and end should not span
|
|
||||||
// non-canonical ranges. If they do, a panic will result.
|
|
||||||
//
|
|
||||||
//go:nosplit
|
|
||||||
func (w *Walker) iterateRange(start, end uintptr) {
|
|
||||||
if start%pteSize != 0 {
|
|
||||||
panic("unaligned start")
|
|
||||||
}
|
|
||||||
if end < start {
|
|
||||||
panic("start > end")
|
|
||||||
}
|
|
||||||
if start < lowerTop {
|
|
||||||
if end <= lowerTop {
|
|
||||||
w.iterateRangeCanonical(start, end)
|
|
||||||
} else if end > lowerTop && end <= upperBottom {
|
|
||||||
if w.visitor.requiresAlloc() {
|
|
||||||
panic("alloc spans non-canonical range")
|
|
||||||
}
|
|
||||||
w.iterateRangeCanonical(start, lowerTop)
|
|
||||||
} else {
|
|
||||||
if w.visitor.requiresAlloc() {
|
|
||||||
panic("alloc spans non-canonical range")
|
|
||||||
}
|
|
||||||
w.iterateRangeCanonical(start, lowerTop)
|
|
||||||
w.iterateRangeCanonical(upperBottom, end)
|
|
||||||
}
|
|
||||||
} else if start < upperBottom {
|
|
||||||
if end <= upperBottom {
|
|
||||||
if w.visitor.requiresAlloc() {
|
|
||||||
panic("alloc spans non-canonical range")
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if w.visitor.requiresAlloc() {
|
|
||||||
panic("alloc spans non-canonical range")
|
|
||||||
}
|
|
||||||
w.iterateRangeCanonical(upperBottom, end)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
w.iterateRangeCanonical(start, end)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// next returns the next address quantized by the given size.
|
|
||||||
//
|
|
||||||
//go:nosplit
|
|
||||||
func next(start uintptr, size uintptr) uintptr {
|
|
||||||
start &= ^(size - 1)
|
|
||||||
start += size
|
|
||||||
return start
|
|
||||||
}
|
|
||||||
|
|
||||||
// iterateRangeCanonical walks a canonical range.
|
// iterateRangeCanonical walks a canonical range.
|
||||||
//
|
//
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func (w *Walker) iterateRangeCanonical(start, end uintptr) {
|
func (w *Walker) iterateRangeCanonical(start, end uintptr) bool {
|
||||||
pgdEntryIndex := w.pageTables.root
|
pgdEntryIndex := w.pageTables.root
|
||||||
if start >= upperBottom {
|
if start >= upperBottom {
|
||||||
pgdEntryIndex = w.pageTables.archPageTables.root
|
pgdEntryIndex = w.pageTables.archPageTables.root
|
||||||
|
@ -160,7 +66,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) {
|
||||||
// new page for the pmd.
|
// new page for the pmd.
|
||||||
if start&(pudSize-1) == 0 && end-start >= pudSize {
|
if start&(pudSize-1) == 0 && end-start >= pudSize {
|
||||||
pudEntry.SetSect()
|
pudEntry.SetSect()
|
||||||
w.visitor.visit(uintptr(start), pudEntry, pudSize-1)
|
if !w.visitor.visit(uintptr(start), pudEntry, pudSize-1) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
if pudEntry.Valid() {
|
if pudEntry.Valid() {
|
||||||
start = next(start, pudSize)
|
start = next(start, pudSize)
|
||||||
continue
|
continue
|
||||||
|
@ -185,7 +93,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) {
|
||||||
pudEntry.setPageTable(w.pageTables, pmdEntries)
|
pudEntry.setPageTable(w.pageTables, pmdEntries)
|
||||||
} else {
|
} else {
|
||||||
// A sect page to be checked directly.
|
// A sect page to be checked directly.
|
||||||
w.visitor.visit(uintptr(start), pudEntry, pudSize-1)
|
if !w.visitor.visit(uintptr(start), pudEntry, pudSize-1) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// Might have been cleared.
|
// Might have been cleared.
|
||||||
if !pudEntry.Valid() {
|
if !pudEntry.Valid() {
|
||||||
|
@ -222,7 +132,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) {
|
||||||
// As above, we can skip allocating a new page.
|
// As above, we can skip allocating a new page.
|
||||||
if start&(pmdSize-1) == 0 && end-start >= pmdSize {
|
if start&(pmdSize-1) == 0 && end-start >= pmdSize {
|
||||||
pmdEntry.SetSect()
|
pmdEntry.SetSect()
|
||||||
w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1)
|
if !w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
if pmdEntry.Valid() {
|
if pmdEntry.Valid() {
|
||||||
start = next(start, pmdSize)
|
start = next(start, pmdSize)
|
||||||
continue
|
continue
|
||||||
|
@ -246,7 +158,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) {
|
||||||
pmdEntry.setPageTable(w.pageTables, pteEntries)
|
pmdEntry.setPageTable(w.pageTables, pteEntries)
|
||||||
} else {
|
} else {
|
||||||
// A huge page to be checked directly.
|
// A huge page to be checked directly.
|
||||||
w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1)
|
if !w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// Might have been cleared.
|
// Might have been cleared.
|
||||||
if !pmdEntry.Valid() {
|
if !pmdEntry.Valid() {
|
||||||
|
@ -276,7 +190,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// At this point, we are guaranteed that start%pteSize == 0.
|
// At this point, we are guaranteed that start%pteSize == 0.
|
||||||
w.visitor.visit(uintptr(start), pteEntry, pteSize-1)
|
if !w.visitor.visit(uintptr(start), pteEntry, pteSize-1) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
if !pteEntry.Valid() {
|
if !pteEntry.Valid() {
|
||||||
if w.visitor.requiresAlloc() {
|
if w.visitor.requiresAlloc() {
|
||||||
panic("PTE not set after iteration with requiresAlloc!")
|
panic("PTE not set after iteration with requiresAlloc!")
|
||||||
|
@ -311,4 +227,5 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) {
|
||||||
w.pageTables.Allocator.FreePTEs(pudEntries)
|
w.pageTables.Allocator.FreePTEs(pudEntries)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,110 @@
|
||||||
|
// Copyright 2018 The gVisor Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package pagetables
|
||||||
|
|
||||||
|
// Visitor is a generic type.
|
||||||
|
type Visitor interface {
|
||||||
|
// visit is called on each PTE. The returned boolean indicates whether
|
||||||
|
// the walk should continue.
|
||||||
|
visit(start uintptr, pte *PTE, align uintptr) bool
|
||||||
|
|
||||||
|
// requiresAlloc indicates that new entries should be allocated within
|
||||||
|
// the walked range.
|
||||||
|
requiresAlloc() bool
|
||||||
|
|
||||||
|
// requiresSplit indicates that entries in the given range should be
|
||||||
|
// split if they are huge or jumbo pages.
|
||||||
|
requiresSplit() bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// Walker walks page tables.
|
||||||
|
type Walker struct {
|
||||||
|
// pageTables are the tables to walk.
|
||||||
|
pageTables *PageTables
|
||||||
|
|
||||||
|
// Visitor is the set of arguments.
|
||||||
|
visitor Visitor
|
||||||
|
}
|
||||||
|
|
||||||
|
// iterateRange iterates over all appropriate levels of page tables for the given range.
|
||||||
|
//
|
||||||
|
// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The
|
||||||
|
// exception is super pages. If a valid super page (huge or jumbo) cannot be
|
||||||
|
// installed, then the walk will continue to individual entries.
|
||||||
|
//
|
||||||
|
// This algorithm will attempt to maximize the use of super/sect pages whenever
|
||||||
|
// possible. Whether a super page is provided will be clear through the range
|
||||||
|
// provided in the callback.
|
||||||
|
//
|
||||||
|
// Note that if requiresAlloc is true, then no gaps will be present. However,
|
||||||
|
// if alloc is not set, then the iteration will likely be full of gaps.
|
||||||
|
//
|
||||||
|
// Note that this function should generally be avoided in favor of Map, Unmap,
|
||||||
|
// etc. when not necessary.
|
||||||
|
//
|
||||||
|
// Precondition: start must be page-aligned.
|
||||||
|
// Precondition: start must be less than end.
|
||||||
|
// Precondition: If requiresAlloc is true, then start and end should not span
|
||||||
|
// non-canonical ranges. If they do, a panic will result.
|
||||||
|
//
|
||||||
|
//go:nosplit
|
||||||
|
func (w *Walker) iterateRange(start, end uintptr) {
|
||||||
|
if start%pteSize != 0 {
|
||||||
|
panic("unaligned start")
|
||||||
|
}
|
||||||
|
if end < start {
|
||||||
|
panic("start > end")
|
||||||
|
}
|
||||||
|
if start < lowerTop {
|
||||||
|
if end <= lowerTop {
|
||||||
|
w.iterateRangeCanonical(start, end)
|
||||||
|
} else if end > lowerTop && end <= upperBottom {
|
||||||
|
if w.visitor.requiresAlloc() {
|
||||||
|
panic("alloc spans non-canonical range")
|
||||||
|
}
|
||||||
|
w.iterateRangeCanonical(start, lowerTop)
|
||||||
|
} else {
|
||||||
|
if w.visitor.requiresAlloc() {
|
||||||
|
panic("alloc spans non-canonical range")
|
||||||
|
}
|
||||||
|
if !w.iterateRangeCanonical(start, lowerTop) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.iterateRangeCanonical(upperBottom, end)
|
||||||
|
}
|
||||||
|
} else if start < upperBottom {
|
||||||
|
if end <= upperBottom {
|
||||||
|
if w.visitor.requiresAlloc() {
|
||||||
|
panic("alloc spans non-canonical range")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if w.visitor.requiresAlloc() {
|
||||||
|
panic("alloc spans non-canonical range")
|
||||||
|
}
|
||||||
|
w.iterateRangeCanonical(upperBottom, end)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
w.iterateRangeCanonical(start, end)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// next returns the next address quantized by the given size.
|
||||||
|
//
|
||||||
|
//go:nosplit
|
||||||
|
func next(start uintptr, size uintptr) uintptr {
|
||||||
|
start &= ^(size - 1)
|
||||||
|
start += size
|
||||||
|
return start
|
||||||
|
}
|
Loading…
Reference in New Issue