kvm/x86: handle a case when interrupts are enabled in the kernel space
Before we thought that interrupts are always disabled in the kernel space, but here is a case when goruntime switches on a goroutine which has been saved in the host mode. On restore, the popf instruction is used to restore flags and this means that all flags what the goroutine has in the host mode will be restored in the kernel mode. And in the host mode, interrupts are always enabled. The long story short, we can't use the IF flag for determine whether a tasks is running in user or kernel mode. This patch reworks the code so that in userspace, the first bit of the IOPL flag will be always set. This doesn't give any new privilidges for a task because CPL in userspace is always 3. But then we can use this flag to distinguish user and kernel modes. The IOPL flag is never set in the kernel and host modes. Reported-by: syzbot+5036b325a8eb15c030cf@syzkaller.appspotmail.com Reported-by: syzbot+034d580e89ad67b8dc75@syzkaller.appspotmail.com Signed-off-by: Andrei Vagin <avagin@gmail.com>
This commit is contained in:
parent
0cea647218
commit
de85b045d4
|
@ -83,5 +83,34 @@ func bluepillStopGuest(c *vCPU) {
|
|||
//
|
||||
//go:nosplit
|
||||
func bluepillReadyStopGuest(c *vCPU) bool {
|
||||
return c.runData.readyForInterruptInjection != 0
|
||||
if c.runData.readyForInterruptInjection == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
if c.runData.ifFlag == 0 {
|
||||
// This is impossible if readyForInterruptInjection is 1.
|
||||
throw("interrupts are disabled")
|
||||
}
|
||||
|
||||
// Disable interrupts if we are in the kernel space.
|
||||
//
|
||||
// When the Sentry switches into the kernel mode, it disables
|
||||
// interrupts. But when goruntime switches on a goroutine which has
|
||||
// been saved in the host mode, it restores flags and this enables
|
||||
// interrupts. See the comment of UserFlagsSet for more details.
|
||||
uregs := userRegs{}
|
||||
err := c.getUserRegisters(&uregs)
|
||||
if err != 0 {
|
||||
throw("failed to get user registers")
|
||||
}
|
||||
|
||||
if ring0.IsKernelFlags(uregs.RFLAGS) {
|
||||
uregs.RFLAGS &^= ring0.KernelFlagsClear
|
||||
err = c.setUserRegisters(&uregs)
|
||||
if err != 0 {
|
||||
throw("failed to set user registers")
|
||||
}
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ const (
|
|||
_KVM_SET_REGS = 0x4090ae82
|
||||
_KVM_SET_SREGS = 0x4138ae84
|
||||
_KVM_GET_REGS = 0x8090ae81
|
||||
_KVM_GET_SREGS = 0x8138ae83
|
||||
_KVM_GET_SUPPORTED_CPUID = 0xc008ae05
|
||||
_KVM_SET_CPUID2 = 0x4008ae90
|
||||
_KVM_SET_SIGNAL_MASK = 0x4004ae8b
|
||||
|
|
|
@ -153,8 +153,8 @@ func (c *vCPU) initArchState() error {
|
|||
}
|
||||
|
||||
// Set the user registers.
|
||||
if err := c.setUserRegisters(&kernelUserRegs); err != nil {
|
||||
return err
|
||||
if errno := c.setUserRegisters(&kernelUserRegs); errno != 0 {
|
||||
return fmt.Errorf("error setting user registers: %v", errno)
|
||||
}
|
||||
|
||||
// Allocate some floating point state save area for the local vCPU.
|
||||
|
|
|
@ -137,15 +137,17 @@ func (c *vCPU) setSignalMask() error {
|
|||
}
|
||||
|
||||
// setUserRegisters sets user registers in the vCPU.
|
||||
func (c *vCPU) setUserRegisters(uregs *userRegs) error {
|
||||
//
|
||||
//go:nosplit
|
||||
func (c *vCPU) setUserRegisters(uregs *userRegs) syscall.Errno {
|
||||
if _, _, errno := syscall.RawSyscall(
|
||||
syscall.SYS_IOCTL,
|
||||
uintptr(c.fd),
|
||||
_KVM_SET_REGS,
|
||||
uintptr(unsafe.Pointer(uregs))); errno != 0 {
|
||||
return fmt.Errorf("error setting user registers: %v", errno)
|
||||
return errno
|
||||
}
|
||||
return nil
|
||||
return 0
|
||||
}
|
||||
|
||||
// getUserRegisters reloads user registers in the vCPU.
|
||||
|
@ -175,3 +177,17 @@ func (c *vCPU) setSystemRegisters(sregs *systemRegs) error {
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// getSystemRegisters sets system registers.
|
||||
//
|
||||
//go:nosplit
|
||||
func (c *vCPU) getSystemRegisters(sregs *systemRegs) syscall.Errno {
|
||||
if _, _, errno := syscall.RawSyscall(
|
||||
syscall.SYS_IOCTL,
|
||||
uintptr(c.fd),
|
||||
_KVM_GET_SREGS,
|
||||
uintptr(unsafe.Pointer(sregs))); errno != 0 {
|
||||
return errno
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
|
|
@ -193,13 +193,10 @@ TEXT ·Start(SB),NOSPLIT,$0
|
|||
|
||||
// See entry_amd64.go.
|
||||
TEXT ·sysenter(SB),NOSPLIT,$0
|
||||
// Interrupts are always disabled while we're executing in kernel mode
|
||||
// and always enabled while executing in user mode. Therefore, we can
|
||||
// reliably look at the flags in R11 to determine where this syscall
|
||||
// was from.
|
||||
TESTL $_RFLAGS_IF, R11
|
||||
// _RFLAGS_IOPL0 is always set in the user mode and it is never set in
|
||||
// the kernel mode. See the comment of UserFlagsSet for more details.
|
||||
TESTL $_RFLAGS_IOPL0, R11
|
||||
JZ kernel
|
||||
|
||||
user:
|
||||
SWAP_GS()
|
||||
MOVQ AX, ENTRY_SCRATCH0(GS) // Save user AX on scratch.
|
||||
|
@ -278,7 +275,7 @@ TEXT ·exception(SB),NOSPLIT,$0
|
|||
// ERROR_CODE (sp+8)
|
||||
// VECTOR (sp+0)
|
||||
//
|
||||
TESTL $_RFLAGS_IF, 32(SP)
|
||||
TESTL $_RFLAGS_IOPL0, 32(SP)
|
||||
JZ kernel
|
||||
|
||||
user:
|
||||
|
|
|
@ -45,6 +45,7 @@ func Emit(w io.Writer) {
|
|||
|
||||
fmt.Fprintf(w, "\n// Bits.\n")
|
||||
fmt.Fprintf(w, "#define _RFLAGS_IF 0x%02x\n", _RFLAGS_IF)
|
||||
fmt.Fprintf(w, "#define _RFLAGS_IOPL0 0x%02x\n", _RFLAGS_IOPL0)
|
||||
fmt.Fprintf(w, "#define _KERNEL_FLAGS 0x%02x\n", KernelFlagsSet)
|
||||
|
||||
fmt.Fprintf(w, "\n// Vectors.\n")
|
||||
|
|
|
@ -39,7 +39,9 @@ const (
|
|||
|
||||
_RFLAGS_AC = 1 << 18
|
||||
_RFLAGS_NT = 1 << 14
|
||||
_RFLAGS_IOPL = 3 << 12
|
||||
_RFLAGS_IOPL0 = 1 << 12
|
||||
_RFLAGS_IOPL1 = 1 << 13
|
||||
_RFLAGS_IOPL = _RFLAGS_IOPL0 | _RFLAGS_IOPL1
|
||||
_RFLAGS_DF = 1 << 10
|
||||
_RFLAGS_IF = 1 << 9
|
||||
_RFLAGS_STEP = 1 << 8
|
||||
|
@ -67,15 +69,45 @@ const (
|
|||
KernelFlagsSet = _RFLAGS_RESERVED
|
||||
|
||||
// UserFlagsSet are always set in userspace.
|
||||
UserFlagsSet = _RFLAGS_RESERVED | _RFLAGS_IF
|
||||
//
|
||||
// _RFLAGS_IOPL is a set of two bits and it shows the I/O privilege
|
||||
// level. The Current Privilege Level (CPL) of the task must be less
|
||||
// than or equal to the IOPL in order for the task or program to access
|
||||
// I/O ports.
|
||||
//
|
||||
// Here, _RFLAGS_IOPL0 is used only to determine whether the task is
|
||||
// running in the kernel or userspace mode. In the user mode, the CPL is
|
||||
// always 3 and it doesn't matter what IOPL is set if it is bellow CPL.
|
||||
//
|
||||
// We need to have one bit which will be always different in user and
|
||||
// kernel modes. And we have to remember that even though we have
|
||||
// KernelFlagsClear, we still can see some of these flags in the kernel
|
||||
// mode. This can happen when the goruntime switches on a goroutine
|
||||
// which has been saved in the host mode. On restore, the popf
|
||||
// instruction is used to restore flags and this means that all flags
|
||||
// what the goroutine has in the host mode will be restored in the
|
||||
// kernel mode.
|
||||
//
|
||||
// _RFLAGS_IOPL0 is never set in host and kernel modes and we always set
|
||||
// it in the user mode. So if this flag is set, the task is running in
|
||||
// the user mode and if it isn't set, the task is running in the kernel
|
||||
// mode.
|
||||
UserFlagsSet = _RFLAGS_RESERVED | _RFLAGS_IF | _RFLAGS_IOPL0
|
||||
|
||||
// KernelFlagsClear should always be clear in the kernel.
|
||||
KernelFlagsClear = _RFLAGS_STEP | _RFLAGS_IF | _RFLAGS_IOPL | _RFLAGS_AC | _RFLAGS_NT
|
||||
|
||||
// UserFlagsClear are always cleared in userspace.
|
||||
UserFlagsClear = _RFLAGS_NT | _RFLAGS_IOPL
|
||||
UserFlagsClear = _RFLAGS_NT | _RFLAGS_IOPL1
|
||||
)
|
||||
|
||||
// IsKernelFlags returns true if rflags coresponds to the kernel mode.
|
||||
//
|
||||
// go:nosplit
|
||||
func IsKernelFlags(rflags uint64) bool {
|
||||
return rflags&_RFLAGS_IOPL0 == 0
|
||||
}
|
||||
|
||||
// Vector is an exception vector.
|
||||
type Vector uintptr
|
||||
|
||||
|
|
Loading…
Reference in New Issue