193 lines
5.7 KiB
Go
193 lines
5.7 KiB
Go
// Copyright 2018 Google LLC
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package kvm
|
|
|
|
import (
|
|
"sync/atomic"
|
|
"syscall"
|
|
"unsafe"
|
|
)
|
|
|
|
//go:linkname throw runtime.throw
|
|
func throw(string)
|
|
|
|
// vCPUPtr returns a CPU for the given address.
|
|
//
|
|
//go:nosplit
|
|
func vCPUPtr(addr uintptr) *vCPU {
|
|
return (*vCPU)(unsafe.Pointer(addr))
|
|
}
|
|
|
|
// bytePtr returns a bytePtr for the given address.
|
|
//
|
|
//go:nosplit
|
|
func bytePtr(addr uintptr) *byte {
|
|
return (*byte)(unsafe.Pointer(addr))
|
|
}
|
|
|
|
// bluepillHandler is called from the signal stub.
|
|
//
|
|
// The world may be stopped while this is executing, and it executes on the
|
|
// signal stack. It should only execute raw system calls and functions that are
|
|
// explicitly marked go:nosplit.
|
|
//
|
|
//go:nosplit
|
|
func bluepillHandler(context unsafe.Pointer) {
|
|
// Sanitize the registers; interrupts must always be disabled.
|
|
c := bluepillArchEnter(bluepillArchContext(context))
|
|
|
|
// Increment the number of switches.
|
|
atomic.AddUint32(&c.switches, 1)
|
|
|
|
// Mark this as guest mode.
|
|
switch atomic.SwapUint32(&c.state, vCPUGuest|vCPUUser) {
|
|
case vCPUUser: // Expected case.
|
|
case vCPUUser | vCPUWaiter:
|
|
c.notify()
|
|
default:
|
|
throw("invalid state")
|
|
}
|
|
|
|
for {
|
|
switch _, _, errno := syscall.RawSyscall(syscall.SYS_IOCTL, uintptr(c.fd), _KVM_RUN, 0); errno {
|
|
case 0: // Expected case.
|
|
case syscall.EINTR:
|
|
// First, we process whatever pending signal
|
|
// interrupted KVM. Since we're in a signal handler
|
|
// currently, all signals are masked and the signal
|
|
// must have been delivered directly to this thread.
|
|
sig, _, errno := syscall.RawSyscall6(
|
|
syscall.SYS_RT_SIGTIMEDWAIT,
|
|
uintptr(unsafe.Pointer(&bounceSignalMask)),
|
|
0, // siginfo.
|
|
0, // timeout.
|
|
8, // sigset size.
|
|
0, 0)
|
|
if errno != 0 {
|
|
throw("error waiting for pending signal")
|
|
}
|
|
if sig != uintptr(bounceSignal) {
|
|
throw("unexpected signal")
|
|
}
|
|
|
|
// Check whether the current state of the vCPU is ready
|
|
// for interrupt injection. Because we don't have a
|
|
// PIC, we can't inject an interrupt while they are
|
|
// masked. We need to request a window if it's not
|
|
// ready.
|
|
if c.runData.readyForInterruptInjection == 0 {
|
|
c.runData.requestInterruptWindow = 1
|
|
continue // Rerun vCPU.
|
|
} else {
|
|
// Force injection below; the vCPU is ready.
|
|
c.runData.exitReason = _KVM_EXIT_IRQ_WINDOW_OPEN
|
|
}
|
|
case syscall.EFAULT:
|
|
// If a fault is not serviceable due to the host
|
|
// backing pages having page permissions, instead of an
|
|
// MMIO exit we receive EFAULT from the run ioctl. We
|
|
// always inject an NMI here since we may be in kernel
|
|
// mode and have interrupts disabled.
|
|
if _, _, errno := syscall.RawSyscall(
|
|
syscall.SYS_IOCTL,
|
|
uintptr(c.fd),
|
|
_KVM_NMI, 0); errno != 0 {
|
|
throw("NMI injection failed")
|
|
}
|
|
continue // Rerun vCPU.
|
|
default:
|
|
throw("run failed")
|
|
}
|
|
|
|
switch c.runData.exitReason {
|
|
case _KVM_EXIT_EXCEPTION:
|
|
throw("exception")
|
|
case _KVM_EXIT_IO:
|
|
throw("I/O")
|
|
case _KVM_EXIT_INTERNAL_ERROR:
|
|
// An internal error is typically thrown when emulation
|
|
// fails. This can occur via the MMIO path below (and
|
|
// it might fail because we have multiple regions that
|
|
// are not mapped). We would actually prefer that no
|
|
// emulation occur, and don't mind at all if it fails.
|
|
case _KVM_EXIT_HYPERCALL:
|
|
throw("hypercall")
|
|
case _KVM_EXIT_DEBUG:
|
|
throw("debug")
|
|
case _KVM_EXIT_HLT:
|
|
// Copy out registers.
|
|
bluepillArchExit(c, bluepillArchContext(context))
|
|
|
|
// Return to the vCPUReady state; notify any waiters.
|
|
user := atomic.LoadUint32(&c.state) & vCPUUser
|
|
switch atomic.SwapUint32(&c.state, user) {
|
|
case user | vCPUGuest: // Expected case.
|
|
case user | vCPUGuest | vCPUWaiter:
|
|
c.notify()
|
|
default:
|
|
throw("invalid state")
|
|
}
|
|
return
|
|
case _KVM_EXIT_MMIO:
|
|
// Increment the fault count.
|
|
atomic.AddUint32(&c.faults, 1)
|
|
|
|
// For MMIO, the physical address is the first data item.
|
|
virtual, ok := handleBluepillFault(c.machine, uintptr(c.runData.data[0]))
|
|
if !ok {
|
|
throw("physical address not valid")
|
|
}
|
|
|
|
// We now need to fill in the data appropriately. KVM
|
|
// expects us to provide the result of the given MMIO
|
|
// operation in the runData struct. This is safe
|
|
// because, if a fault occurs here, the same fault
|
|
// would have occurred in guest mode. The kernel should
|
|
// not create invalid page table mappings.
|
|
data := (*[8]byte)(unsafe.Pointer(&c.runData.data[1]))
|
|
length := (uintptr)((uint32)(c.runData.data[2]))
|
|
write := (uint8)((c.runData.data[2] >> 32 & 0xff)) != 0
|
|
for i := uintptr(0); i < length; i++ {
|
|
b := bytePtr(uintptr(virtual) + i)
|
|
if write {
|
|
// Write to the given address.
|
|
*b = data[i]
|
|
} else {
|
|
// Read from the given address.
|
|
data[i] = *b
|
|
}
|
|
}
|
|
case _KVM_EXIT_IRQ_WINDOW_OPEN:
|
|
// Interrupt: we must have requested an interrupt
|
|
// window; set the interrupt line.
|
|
if _, _, errno := syscall.RawSyscall(
|
|
syscall.SYS_IOCTL,
|
|
uintptr(c.fd),
|
|
_KVM_INTERRUPT,
|
|
uintptr(unsafe.Pointer(&bounce))); errno != 0 {
|
|
throw("interrupt injection failed")
|
|
}
|
|
// Clear previous injection request.
|
|
c.runData.requestInterruptWindow = 0
|
|
case _KVM_EXIT_SHUTDOWN:
|
|
throw("shutdown")
|
|
case _KVM_EXIT_FAIL_ENTRY:
|
|
throw("entry failed")
|
|
default:
|
|
throw("unknown failure")
|
|
}
|
|
}
|
|
}
|