// Copyright 2018 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package kvm import ( "sync/atomic" "syscall" "unsafe" ) //go:linkname throw runtime.throw func throw(string) // vCPUPtr returns a CPU for the given address. // //go:nosplit func vCPUPtr(addr uintptr) *vCPU { return (*vCPU)(unsafe.Pointer(addr)) } // bytePtr returns a bytePtr for the given address. // //go:nosplit func bytePtr(addr uintptr) *byte { return (*byte)(unsafe.Pointer(addr)) } // bluepillHandler is called from the signal stub. // // The world may be stopped while this is executing, and it executes on the // signal stack. It should only execute raw system calls and functions that are // explicitly marked go:nosplit. // //go:nosplit func bluepillHandler(context unsafe.Pointer) { // Sanitize the registers; interrupts must always be disabled. c := bluepillArchEnter(bluepillArchContext(context)) // Increment the number of switches. atomic.AddUint32(&c.switches, 1) // Mark this as guest mode. switch atomic.SwapUint32(&c.state, vCPUGuest|vCPUUser) { case vCPUUser: // Expected case. case vCPUUser | vCPUWaiter: c.notify() default: throw("invalid state") } for { switch _, _, errno := syscall.RawSyscall(syscall.SYS_IOCTL, uintptr(c.fd), _KVM_RUN, 0); errno { case 0: // Expected case. case syscall.EINTR: // First, we process whatever pending signal // interrupted KVM. Since we're in a signal handler // currently, all signals are masked and the signal // must have been delivered directly to this thread. sig, _, errno := syscall.RawSyscall6( syscall.SYS_RT_SIGTIMEDWAIT, uintptr(unsafe.Pointer(&bounceSignalMask)), 0, // siginfo. 0, // timeout. 8, // sigset size. 0, 0) if errno != 0 { throw("error waiting for pending signal") } if sig != uintptr(bounceSignal) { throw("unexpected signal") } // Check whether the current state of the vCPU is ready // for interrupt injection. Because we don't have a // PIC, we can't inject an interrupt while they are // masked. We need to request a window if it's not // ready. if c.runData.readyForInterruptInjection == 0 { c.runData.requestInterruptWindow = 1 continue // Rerun vCPU. } else { // Force injection below; the vCPU is ready. c.runData.exitReason = _KVM_EXIT_IRQ_WINDOW_OPEN } case syscall.EFAULT: // If a fault is not serviceable due to the host // backing pages having page permissions, instead of an // MMIO exit we receive EFAULT from the run ioctl. We // always inject an NMI here since we may be in kernel // mode and have interrupts disabled. if _, _, errno := syscall.RawSyscall( syscall.SYS_IOCTL, uintptr(c.fd), _KVM_NMI, 0); errno != 0 { throw("NMI injection failed") } continue // Rerun vCPU. default: throw("run failed") } switch c.runData.exitReason { case _KVM_EXIT_EXCEPTION: throw("exception") case _KVM_EXIT_IO: throw("I/O") case _KVM_EXIT_INTERNAL_ERROR: // An internal error is typically thrown when emulation // fails. This can occur via the MMIO path below (and // it might fail because we have multiple regions that // are not mapped). We would actually prefer that no // emulation occur, and don't mind at all if it fails. case _KVM_EXIT_HYPERCALL: throw("hypercall") case _KVM_EXIT_DEBUG: throw("debug") case _KVM_EXIT_HLT: // Copy out registers. bluepillArchExit(c, bluepillArchContext(context)) // Return to the vCPUReady state; notify any waiters. user := atomic.LoadUint32(&c.state) & vCPUUser switch atomic.SwapUint32(&c.state, user) { case user | vCPUGuest: // Expected case. case user | vCPUGuest | vCPUWaiter: c.notify() default: throw("invalid state") } return case _KVM_EXIT_MMIO: // Increment the fault count. atomic.AddUint32(&c.faults, 1) // For MMIO, the physical address is the first data item. virtual, ok := handleBluepillFault(c.machine, uintptr(c.runData.data[0])) if !ok { throw("physical address not valid") } // We now need to fill in the data appropriately. KVM // expects us to provide the result of the given MMIO // operation in the runData struct. This is safe // because, if a fault occurs here, the same fault // would have occurred in guest mode. The kernel should // not create invalid page table mappings. data := (*[8]byte)(unsafe.Pointer(&c.runData.data[1])) length := (uintptr)((uint32)(c.runData.data[2])) write := (uint8)((c.runData.data[2] >> 32 & 0xff)) != 0 for i := uintptr(0); i < length; i++ { b := bytePtr(uintptr(virtual) + i) if write { // Write to the given address. *b = data[i] } else { // Read from the given address. data[i] = *b } } case _KVM_EXIT_IRQ_WINDOW_OPEN: // Interrupt: we must have requested an interrupt // window; set the interrupt line. if _, _, errno := syscall.RawSyscall( syscall.SYS_IOCTL, uintptr(c.fd), _KVM_INTERRUPT, uintptr(unsafe.Pointer(&bounce))); errno != 0 { throw("interrupt injection failed") } // Clear previous injection request. c.runData.requestInterruptWindow = 0 case _KVM_EXIT_SHUTDOWN: throw("shutdown") case _KVM_EXIT_FAIL_ENTRY: throw("entry failed") default: throw("unknown failure") } } }