2019-12-14 01:09:55 +00:00
|
|
|
// Copyright 2019 The gVisor Authors.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
// +build arm64
|
|
|
|
|
|
|
|
package kvm
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"reflect"
|
|
|
|
"sync/atomic"
|
|
|
|
"syscall"
|
|
|
|
"unsafe"
|
|
|
|
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/arch"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/platform"
|
|
|
|
"gvisor.dev/gvisor/pkg/sentry/platform/ring0"
|
2020-01-27 23:17:58 +00:00
|
|
|
"gvisor.dev/gvisor/pkg/usermem"
|
2019-12-14 01:09:55 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
type kvmVcpuInit struct {
|
|
|
|
target uint32
|
|
|
|
features [7]uint32
|
|
|
|
}
|
|
|
|
|
|
|
|
var vcpuInit kvmVcpuInit
|
|
|
|
|
|
|
|
// initArchState initializes architecture-specific state.
|
|
|
|
func (m *machine) initArchState() error {
|
|
|
|
if _, _, errno := syscall.RawSyscall(
|
|
|
|
syscall.SYS_IOCTL,
|
|
|
|
uintptr(m.fd),
|
|
|
|
_KVM_ARM_PREFERRED_TARGET,
|
|
|
|
uintptr(unsafe.Pointer(&vcpuInit))); errno != 0 {
|
|
|
|
panic(fmt.Sprintf("error setting KVM_ARM_PREFERRED_TARGET failed: %v", errno))
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func getPageWithReflect(p uintptr) []byte {
|
|
|
|
return (*(*[0xFFFFFF]byte)(unsafe.Pointer(p & ^uintptr(syscall.Getpagesize()-1))))[:syscall.Getpagesize()]
|
|
|
|
}
|
|
|
|
|
|
|
|
// Work around: move ring0.Vectors() into a specific address with 11-bits alignment.
|
|
|
|
//
|
|
|
|
// According to the design documentation of Arm64,
|
|
|
|
// the start address of exception vector table should be 11-bits aligned.
|
|
|
|
// Please see the code in linux kernel as reference: arch/arm64/kernel/entry.S
|
|
|
|
// But, we can't align a function's start address to a specific address by using golang.
|
|
|
|
// We have raised this question in golang community:
|
|
|
|
// https://groups.google.com/forum/m/#!topic/golang-dev/RPj90l5x86I
|
|
|
|
// This function will be removed when golang supports this feature.
|
|
|
|
//
|
|
|
|
// There are 2 jobs were implemented in this function:
|
|
|
|
// 1, move the start address of exception vector table into the specific address.
|
|
|
|
// 2, modify the offset of each instruction.
|
|
|
|
func updateVectorTable() {
|
|
|
|
fromLocation := reflect.ValueOf(ring0.Vectors).Pointer()
|
|
|
|
offset := fromLocation & (1<<11 - 1)
|
|
|
|
if offset != 0 {
|
|
|
|
offset = 1<<11 - offset
|
|
|
|
}
|
|
|
|
|
|
|
|
toLocation := fromLocation + offset
|
|
|
|
page := getPageWithReflect(toLocation)
|
|
|
|
if err := syscall.Mprotect(page, syscall.PROT_READ|syscall.PROT_WRITE|syscall.PROT_EXEC); err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
page = getPageWithReflect(toLocation + 4096)
|
|
|
|
if err := syscall.Mprotect(page, syscall.PROT_READ|syscall.PROT_WRITE|syscall.PROT_EXEC); err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Move exception-vector-table into the specific address.
|
|
|
|
var entry *uint32
|
|
|
|
var entryFrom *uint32
|
|
|
|
for i := 1; i <= 0x800; i++ {
|
|
|
|
entry = (*uint32)(unsafe.Pointer(toLocation + 0x800 - uintptr(i)))
|
|
|
|
entryFrom = (*uint32)(unsafe.Pointer(fromLocation + 0x800 - uintptr(i)))
|
|
|
|
*entry = *entryFrom
|
|
|
|
}
|
|
|
|
|
|
|
|
// The offset from the address of each unconditionally branch is changed.
|
|
|
|
// We should modify the offset of each instruction.
|
|
|
|
nums := []uint32{0x0, 0x80, 0x100, 0x180, 0x200, 0x280, 0x300, 0x380, 0x400, 0x480, 0x500, 0x580, 0x600, 0x680, 0x700, 0x780}
|
|
|
|
for _, num := range nums {
|
|
|
|
entry = (*uint32)(unsafe.Pointer(toLocation + uintptr(num)))
|
|
|
|
*entry = *entry - (uint32)(offset/4)
|
|
|
|
}
|
|
|
|
|
|
|
|
page = getPageWithReflect(toLocation)
|
|
|
|
if err := syscall.Mprotect(page, syscall.PROT_READ|syscall.PROT_EXEC); err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
page = getPageWithReflect(toLocation + 4096)
|
|
|
|
if err := syscall.Mprotect(page, syscall.PROT_READ|syscall.PROT_EXEC); err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// initArchState initializes architecture-specific state.
|
|
|
|
func (c *vCPU) initArchState() error {
|
|
|
|
var (
|
|
|
|
reg kvmOneReg
|
|
|
|
data uint64
|
|
|
|
regGet kvmOneReg
|
|
|
|
dataGet uint64
|
|
|
|
)
|
|
|
|
|
|
|
|
reg.addr = uint64(reflect.ValueOf(&data).Pointer())
|
|
|
|
regGet.addr = uint64(reflect.ValueOf(&dataGet).Pointer())
|
|
|
|
|
|
|
|
vcpuInit.features[0] |= (1 << _KVM_ARM_VCPU_PSCI_0_2)
|
|
|
|
if _, _, errno := syscall.RawSyscall(
|
|
|
|
syscall.SYS_IOCTL,
|
|
|
|
uintptr(c.fd),
|
|
|
|
_KVM_ARM_VCPU_INIT,
|
|
|
|
uintptr(unsafe.Pointer(&vcpuInit))); errno != 0 {
|
|
|
|
panic(fmt.Sprintf("error setting KVM_ARM_VCPU_INIT failed: %v", errno))
|
|
|
|
}
|
|
|
|
|
|
|
|
// cpacr_el1
|
|
|
|
reg.id = _KVM_ARM64_REGS_CPACR_EL1
|
|
|
|
data = (_FPEN_NOTRAP << _FPEN_SHIFT)
|
|
|
|
if err := c.setOneRegister(®); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// sctlr_el1
|
|
|
|
regGet.id = _KVM_ARM64_REGS_SCTLR_EL1
|
|
|
|
if err := c.getOneRegister(®Get); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
dataGet |= (_SCTLR_M | _SCTLR_C | _SCTLR_I)
|
|
|
|
data = dataGet
|
|
|
|
reg.id = _KVM_ARM64_REGS_SCTLR_EL1
|
|
|
|
if err := c.setOneRegister(®); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// tcr_el1
|
|
|
|
data = _TCR_TXSZ_VA48 | _TCR_CACHE_FLAGS | _TCR_SHARED | _TCR_TG_FLAGS | _TCR_ASID16 | _TCR_IPS_40BITS
|
|
|
|
reg.id = _KVM_ARM64_REGS_TCR_EL1
|
|
|
|
if err := c.setOneRegister(®); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// mair_el1
|
|
|
|
data = _MT_EL1_INIT
|
|
|
|
reg.id = _KVM_ARM64_REGS_MAIR_EL1
|
|
|
|
if err := c.setOneRegister(®); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// ttbr0_el1
|
|
|
|
data = c.machine.kernel.PageTables.TTBR0_EL1(false, 0)
|
|
|
|
|
|
|
|
reg.id = _KVM_ARM64_REGS_TTBR0_EL1
|
|
|
|
if err := c.setOneRegister(®); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
c.SetTtbr0Kvm(uintptr(data))
|
|
|
|
|
|
|
|
// ttbr1_el1
|
|
|
|
data = c.machine.kernel.PageTables.TTBR1_EL1(false, 0)
|
|
|
|
|
|
|
|
reg.id = _KVM_ARM64_REGS_TTBR1_EL1
|
|
|
|
if err := c.setOneRegister(®); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// sp_el1
|
|
|
|
data = c.CPU.StackTop()
|
|
|
|
reg.id = _KVM_ARM64_REGS_SP_EL1
|
|
|
|
if err := c.setOneRegister(®); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// pc
|
|
|
|
reg.id = _KVM_ARM64_REGS_PC
|
|
|
|
data = uint64(reflect.ValueOf(ring0.Start).Pointer())
|
|
|
|
if err := c.setOneRegister(®); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// r8
|
|
|
|
reg.id = _KVM_ARM64_REGS_R8
|
|
|
|
data = uint64(reflect.ValueOf(&c.CPU).Pointer())
|
|
|
|
if err := c.setOneRegister(®); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// vbar_el1
|
|
|
|
reg.id = _KVM_ARM64_REGS_VBAR_EL1
|
|
|
|
|
|
|
|
fromLocation := reflect.ValueOf(ring0.Vectors).Pointer()
|
|
|
|
offset := fromLocation & (1<<11 - 1)
|
|
|
|
if offset != 0 {
|
|
|
|
offset = 1<<11 - offset
|
|
|
|
}
|
|
|
|
|
|
|
|
toLocation := fromLocation + offset
|
|
|
|
data = uint64(ring0.KernelStartAddress | toLocation)
|
|
|
|
if err := c.setOneRegister(®); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
data = ring0.PsrDefaultSet | ring0.KernelFlagsSet
|
|
|
|
reg.id = _KVM_ARM64_REGS_PSTATE
|
|
|
|
if err := c.setOneRegister(®); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
//go:nosplit
|
|
|
|
func (c *vCPU) loadSegments(tid uint64) {
|
|
|
|
// TODO(gvisor.dev/issue/1238): TLS is not supported.
|
|
|
|
// Get TLS from tpidr_el0.
|
|
|
|
atomic.StoreUint64(&c.tid, tid)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *vCPU) setOneRegister(reg *kvmOneReg) error {
|
|
|
|
if _, _, errno := syscall.RawSyscall(
|
|
|
|
syscall.SYS_IOCTL,
|
|
|
|
uintptr(c.fd),
|
|
|
|
_KVM_SET_ONE_REG,
|
|
|
|
uintptr(unsafe.Pointer(reg))); errno != 0 {
|
|
|
|
return fmt.Errorf("error setting one register: %v", errno)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *vCPU) getOneRegister(reg *kvmOneReg) error {
|
|
|
|
if _, _, errno := syscall.RawSyscall(
|
|
|
|
syscall.SYS_IOCTL,
|
|
|
|
uintptr(c.fd),
|
|
|
|
_KVM_GET_ONE_REG,
|
|
|
|
uintptr(unsafe.Pointer(reg))); errno != 0 {
|
|
|
|
return fmt.Errorf("error setting one register: %v", errno)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// setCPUID sets the CPUID to be used by the guest.
|
|
|
|
func (c *vCPU) setCPUID() error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// setSystemTime sets the TSC for the vCPU.
|
|
|
|
func (c *vCPU) setSystemTime() error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// setSignalMask sets the vCPU signal mask.
|
|
|
|
//
|
|
|
|
// This must be called prior to running the vCPU.
|
|
|
|
func (c *vCPU) setSignalMask() error {
|
|
|
|
// The layout of this structure implies that it will not necessarily be
|
|
|
|
// the same layout chosen by the Go compiler. It gets fudged here.
|
|
|
|
var data struct {
|
|
|
|
length uint32
|
|
|
|
mask1 uint32
|
|
|
|
mask2 uint32
|
|
|
|
_ uint32
|
|
|
|
}
|
|
|
|
data.length = 8 // Fixed sigset size.
|
|
|
|
data.mask1 = ^uint32(bounceSignalMask & 0xffffffff)
|
|
|
|
data.mask2 = ^uint32(bounceSignalMask >> 32)
|
|
|
|
if _, _, errno := syscall.RawSyscall(
|
|
|
|
syscall.SYS_IOCTL,
|
|
|
|
uintptr(c.fd),
|
|
|
|
_KVM_SET_SIGNAL_MASK,
|
|
|
|
uintptr(unsafe.Pointer(&data))); errno != 0 {
|
|
|
|
return fmt.Errorf("error setting signal mask: %v", errno)
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// SwitchToUser unpacks architectural-details.
|
|
|
|
func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) (usermem.AccessType, error) {
|
|
|
|
// Check for canonical addresses.
|
|
|
|
if regs := switchOpts.Registers; !ring0.IsCanonical(regs.Pc) {
|
|
|
|
return nonCanonical(regs.Pc, int32(syscall.SIGSEGV), info)
|
|
|
|
} else if !ring0.IsCanonical(regs.Sp) {
|
|
|
|
return nonCanonical(regs.Sp, int32(syscall.SIGBUS), info)
|
|
|
|
}
|
|
|
|
|
|
|
|
var vector ring0.Vector
|
|
|
|
ttbr0App := switchOpts.PageTables.TTBR0_EL1(false, 0)
|
|
|
|
c.SetTtbr0App(uintptr(ttbr0App))
|
|
|
|
|
|
|
|
// TODO(gvisor.dev/issue/1238): full context-switch supporting for Arm64.
|
|
|
|
// The Arm64 user-mode execution state consists of:
|
|
|
|
// x0-x30
|
|
|
|
// PC, SP, PSTATE
|
|
|
|
// V0-V31: 32 128-bit registers for floating point, and simd
|
|
|
|
// FPSR
|
|
|
|
// TPIDR_EL0, used for TLS
|
|
|
|
appRegs := switchOpts.Registers
|
|
|
|
c.SetAppAddr(ring0.KernelStartAddress | uintptr(unsafe.Pointer(appRegs)))
|
|
|
|
|
|
|
|
entersyscall()
|
|
|
|
bluepill(c)
|
|
|
|
vector = c.CPU.SwitchToUser(switchOpts)
|
|
|
|
exitsyscall()
|
|
|
|
|
|
|
|
switch vector {
|
|
|
|
case ring0.Syscall:
|
|
|
|
// Fast path: system call executed.
|
|
|
|
return usermem.NoAccess, nil
|
|
|
|
|
|
|
|
case ring0.PageFault:
|
|
|
|
return c.fault(int32(syscall.SIGSEGV), info)
|
|
|
|
case 0xaa:
|
|
|
|
return usermem.NoAccess, nil
|
|
|
|
default:
|
|
|
|
return usermem.NoAccess, platform.ErrContextSignal
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|