267 lines
7.5 KiB
Go
267 lines
7.5 KiB
Go
// Copyright 2018 The gVisor Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// Package device defines reserved virtual kernel devices and structures
|
|
// for managing them.
|
|
package device
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"sync"
|
|
"sync/atomic"
|
|
|
|
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
|
|
)
|
|
|
|
// Registry tracks all simple devices and related state on the system for
|
|
// save/restore.
|
|
//
|
|
// The set of devices across save/restore must remain consistent. That is, no
|
|
// devices may be created or removed on restore relative to the saved
|
|
// system. Practically, this means do not create new devices specifically as
|
|
// part of restore.
|
|
//
|
|
// +stateify savable
|
|
type Registry struct {
|
|
// lastAnonDeviceMinor is the last minor device number used for an anonymous
|
|
// device. Must be accessed atomically.
|
|
lastAnonDeviceMinor uint64
|
|
|
|
// mu protects the fields below.
|
|
mu sync.Mutex `state:"nosave"`
|
|
|
|
devices map[ID]*Device
|
|
}
|
|
|
|
// SimpleDevices is the system-wide simple device registry. This is
|
|
// saved/restored by kernel.Kernel, but defined here to allow access without
|
|
// depending on the kernel package. See kernel.Kernel.deviceRegistry.
|
|
var SimpleDevices = newRegistry()
|
|
|
|
func newRegistry() *Registry {
|
|
return &Registry{
|
|
devices: make(map[ID]*Device),
|
|
}
|
|
}
|
|
|
|
// newAnonID assigns a major and minor number to an anonymous device ID.
|
|
func (r *Registry) newAnonID() ID {
|
|
return ID{
|
|
// Anon devices always have a major number of 0.
|
|
Major: 0,
|
|
// Use the next minor number.
|
|
Minor: atomic.AddUint64(&r.lastAnonDeviceMinor, 1),
|
|
}
|
|
}
|
|
|
|
// newAnonDevice allocates a new anonymous device with a unique minor device
|
|
// number, and registers it with r.
|
|
func (r *Registry) newAnonDevice() *Device {
|
|
r.mu.Lock()
|
|
defer r.mu.Unlock()
|
|
d := &Device{
|
|
ID: r.newAnonID(),
|
|
}
|
|
r.devices[d.ID] = d
|
|
return d
|
|
}
|
|
|
|
// LoadFrom initializes the internal state of all devices in r from other. The
|
|
// set of devices in both registries must match. Devices may not be created or
|
|
// destroyed across save/restore.
|
|
func (r *Registry) LoadFrom(other *Registry) {
|
|
r.mu.Lock()
|
|
defer r.mu.Unlock()
|
|
other.mu.Lock()
|
|
defer other.mu.Unlock()
|
|
if len(r.devices) != len(other.devices) {
|
|
panic(fmt.Sprintf("Devices were added or removed when restoring the registry:\nnew:\n%+v\nold:\n%+v", r.devices, other.devices))
|
|
}
|
|
for id, otherD := range other.devices {
|
|
ourD, ok := r.devices[id]
|
|
if !ok {
|
|
panic(fmt.Sprintf("Device %+v could not be restored as it wasn't defined in the new registry", otherD))
|
|
}
|
|
ourD.loadFrom(otherD)
|
|
}
|
|
atomic.StoreUint64(&r.lastAnonDeviceMinor, atomic.LoadUint64(&other.lastAnonDeviceMinor))
|
|
}
|
|
|
|
// ID identifies a device.
|
|
//
|
|
// +stateify savable
|
|
type ID struct {
|
|
Major uint64
|
|
Minor uint64
|
|
}
|
|
|
|
// DeviceID formats a major and minor device number into a standard device number.
|
|
func (i *ID) DeviceID() uint64 {
|
|
return uint64(linux.MakeDeviceID(uint16(i.Major), uint32(i.Minor)))
|
|
}
|
|
|
|
// NewAnonDevice creates a new anonymous device. Packages that require an anonymous
|
|
// device should initialize the device in a global variable in a file called device.go:
|
|
//
|
|
// var myDevice = device.NewAnonDevice()
|
|
func NewAnonDevice() *Device {
|
|
return SimpleDevices.newAnonDevice()
|
|
}
|
|
|
|
// NewAnonMultiDevice creates a new multi-keyed anonymous device. Packages that require
|
|
// a multi-key anonymous device should initialize the device in a global variable in a
|
|
// file called device.go:
|
|
//
|
|
// var myDevice = device.NewAnonMultiDevice()
|
|
func NewAnonMultiDevice() *MultiDevice {
|
|
return &MultiDevice{
|
|
ID: SimpleDevices.newAnonID(),
|
|
}
|
|
}
|
|
|
|
// Device is a simple virtual kernel device.
|
|
//
|
|
// +stateify savable
|
|
type Device struct {
|
|
ID
|
|
|
|
// last is the last generated inode.
|
|
last uint64
|
|
}
|
|
|
|
// loadFrom initializes d from other. The IDs of both devices must match.
|
|
func (d *Device) loadFrom(other *Device) {
|
|
if d.ID != other.ID {
|
|
panic(fmt.Sprintf("Attempting to initialize a device %+v from %+v, but device IDs don't match", d, other))
|
|
}
|
|
atomic.StoreUint64(&d.last, atomic.LoadUint64(&other.last))
|
|
}
|
|
|
|
// NextIno generates a new inode number
|
|
func (d *Device) NextIno() uint64 {
|
|
return atomic.AddUint64(&d.last, 1)
|
|
}
|
|
|
|
// MultiDeviceKey provides a hashable key for a MultiDevice. The key consists
|
|
// of a raw device and inode for a resource, which must consistently identify
|
|
// the unique resource. It may optionally include a secondary device if
|
|
// appropriate.
|
|
//
|
|
// Note that using the path is not enough, because filesystems may rename a file
|
|
// to a different backing resource, at which point the path points to a different
|
|
// entity. Using only the inode is also not enough because the inode is assumed
|
|
// to be unique only within the device on which the resource exists.
|
|
type MultiDeviceKey struct {
|
|
Device uint64
|
|
SecondaryDevice string
|
|
Inode uint64
|
|
}
|
|
|
|
// String stringifies the key.
|
|
func (m MultiDeviceKey) String() string {
|
|
return fmt.Sprintf("key{device: %d, sdevice: %s, inode: %d}", m.Device, m.SecondaryDevice, m.Inode)
|
|
}
|
|
|
|
// MultiDevice allows for remapping resources that come from a variety of raw
|
|
// devices into a single device. The device ID should be one of the static
|
|
// Device IDs above and cannot be reused.
|
|
type MultiDevice struct {
|
|
ID
|
|
|
|
mu sync.Mutex
|
|
last uint64
|
|
cache map[MultiDeviceKey]uint64
|
|
rcache map[uint64]MultiDeviceKey
|
|
}
|
|
|
|
// String stringifies MultiDevice.
|
|
func (m *MultiDevice) String() string {
|
|
buf := bytes.NewBuffer(nil)
|
|
buf.WriteString("cache{")
|
|
for k, v := range m.cache {
|
|
buf.WriteString(fmt.Sprintf("%s -> %d, ", k, v))
|
|
}
|
|
buf.WriteString("}")
|
|
return buf.String()
|
|
}
|
|
|
|
// Map maps a raw device and inode into the inode space of MultiDevice,
|
|
// returning a virtualized inode. Raw devices and inodes can be reused;
|
|
// in this case, the same virtual inode will be returned.
|
|
func (m *MultiDevice) Map(key MultiDeviceKey) uint64 {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
if m.cache == nil {
|
|
m.cache = make(map[MultiDeviceKey]uint64)
|
|
m.rcache = make(map[uint64]MultiDeviceKey)
|
|
}
|
|
|
|
id, ok := m.cache[key]
|
|
if ok {
|
|
return id
|
|
}
|
|
// Step over reserved entries that may have been loaded.
|
|
idx := m.last + 1
|
|
for {
|
|
if _, ok := m.rcache[idx]; !ok {
|
|
break
|
|
}
|
|
idx++
|
|
}
|
|
// We found a non-reserved entry, use it.
|
|
m.last = idx
|
|
m.cache[key] = m.last
|
|
m.rcache[m.last] = key
|
|
return m.last
|
|
}
|
|
|
|
// Load loads a raw device and inode into MultiDevice inode mappings
|
|
// with value as the virtual inode.
|
|
//
|
|
// By design, inodes start from 1 and continue until max uint64. This means
|
|
// that the zero value, which is often the uninitialized value, can be rejected
|
|
// as invalid.
|
|
func (m *MultiDevice) Load(key MultiDeviceKey, value uint64) bool {
|
|
// Reject the uninitialized value; see comment above.
|
|
if value == 0 {
|
|
return false
|
|
}
|
|
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
if m.cache == nil {
|
|
m.cache = make(map[MultiDeviceKey]uint64)
|
|
m.rcache = make(map[uint64]MultiDeviceKey)
|
|
}
|
|
|
|
if val, exists := m.cache[key]; exists && val != value {
|
|
return false
|
|
}
|
|
if k, exists := m.rcache[value]; exists && k != key {
|
|
// Should never happen.
|
|
panic("MultiDevice's caches are inconsistent")
|
|
}
|
|
|
|
// Cache value at key.
|
|
m.cache[key] = value
|
|
|
|
// Prevent value from being used by new inode mappings.
|
|
m.rcache[value] = key
|
|
|
|
return true
|
|
}
|