From 61b0b19497e9ac417de5a600e6ff06d52db4268f Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Mon, 21 May 2018 16:48:41 -0700 Subject: [PATCH] Dramatically improve handling of KVM vCPU pool. Especially in situations with small numbers of vCPUs, the existing system resulted in excessive thrashing. Now, execution contexts co-ordinate as smoothly as they can to share a small number of cores. PiperOrigin-RevId: 197483323 Change-Id: I0afc0c5363ea9386994355baf3904bf5fe08c56c --- pkg/sentry/platform/kvm/context.go | 5 +- pkg/sentry/platform/kvm/kvm_test.go | 5 +- pkg/sentry/platform/kvm/machine.go | 71 +++++++++++++++++++---------- 3 files changed, 48 insertions(+), 33 deletions(-) diff --git a/pkg/sentry/platform/kvm/context.go b/pkg/sentry/platform/kvm/context.go index c9bfbc136..dec26a23a 100644 --- a/pkg/sentry/platform/kvm/context.go +++ b/pkg/sentry/platform/kvm/context.go @@ -41,10 +41,7 @@ func (c *context) Switch(as platform.AddressSpace, ac arch.Context, _ int32) (*a fp := (*byte)(ac.FloatingPointData()) // Grab a vCPU. - cpu, err := c.machine.Get() - if err != nil { - return nil, usermem.NoAccess, err - } + cpu := c.machine.Get() // Enable interrupts (i.e. calls to vCPU.Notify). if !c.interrupt.Enable(cpu) { diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go index 778a6d187..a3466fbed 100644 --- a/pkg/sentry/platform/kvm/kvm_test.go +++ b/pkg/sentry/platform/kvm/kvm_test.go @@ -59,10 +59,7 @@ func kvmTest(t testHarness, setup func(*KVM), fn func(*vCPU) bool) { } }() for { - c, err = k.machine.Get() - if err != nil { - t.Fatalf("error getting vCPU: %v", err) - } + c = k.machine.Get() if !fn(c) { break } diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go index 3ee21fe21..9b7e5130c 100644 --- a/pkg/sentry/platform/kvm/machine.go +++ b/pkg/sentry/platform/kvm/machine.go @@ -48,6 +48,9 @@ type machine struct { // mu protects vCPUs. mu sync.Mutex + // available is notified when vCPUs are available. + available sync.Cond + // vCPUs are the machine vCPUs. // // This is eventually keyed by system TID, but is initially indexed by @@ -118,6 +121,7 @@ func newMachine(vm int, vCPUs int) (*machine, error) { fd: vm, vCPUs: make(map[uint64]*vCPU), } + m.available.L = &m.mu if vCPUs > _KVM_NR_VCPUS { // Hard cap at KVM's limit. vCPUs = _KVM_NR_VCPUS @@ -284,25 +288,21 @@ func (m *machine) Destroy() { } // Get gets an available vCPU. -func (m *machine) Get() (*vCPU, error) { +func (m *machine) Get() *vCPU { runtime.LockOSThread() tid := procid.Current() m.mu.Lock() - for { - // Check for an exact match. - if c := m.vCPUs[tid]; c != nil { - c.lock() - m.mu.Unlock() - return c, nil - } + // Check for an exact match. + if c := m.vCPUs[tid]; c != nil { + c.lock() + m.mu.Unlock() + return c + } + for { // Scan for an available vCPU. for origTID, c := range m.vCPUs { - // We can only steal a vCPU that is the vCPUReady - // state. That is, it must not be heading to user mode - // with some other thread, have a waiter registered, or - // be in guest mode already. if atomic.CompareAndSwapUint32(&c.state, vCPUReady, vCPUUser) { delete(m.vCPUs, origTID) m.vCPUs[tid] = c @@ -313,24 +313,44 @@ func (m *machine) Get() (*vCPU, error) { // may be stale. c.loadSegments() atomic.StoreUint64(&c.tid, tid) - return c, nil + return c } } - // Everything is already in guest mode. - // - // We hold the pool lock here, so we should be able to kick - // something out of kernel mode and have it bounce into host - // mode when it tries to grab the vCPU again. - for _, c := range m.vCPUs { - c.BounceToHost() + // Scan for something not in user mode. + for origTID, c := range m.vCPUs { + if !atomic.CompareAndSwapUint32(&c.state, vCPUGuest, vCPUGuest|vCPUWaiter) { + continue + } + + // The vCPU is not be able to transition to + // vCPUGuest|vCPUUser or to vCPUUser because that + // transition requires holding the machine mutex, as we + // do now. There is no path to register a waiter on + // just the vCPUReady state. + for { + c.waitUntilNot(vCPUGuest | vCPUWaiter) + if atomic.CompareAndSwapUint32(&c.state, vCPUReady, vCPUUser) { + break + } + } + + // Steal the vCPU. + delete(m.vCPUs, origTID) + m.vCPUs[tid] = c + m.mu.Unlock() + + // See above. + c.loadSegments() + atomic.StoreUint64(&c.tid, tid) + return c } - // Give other threads an opportunity to run. We don't yield the - // pool lock above, so if they try to regrab the lock we will - // serialize at this point. This is extreme, but we don't - // expect to exhaust all vCPUs frequently. - yield() + // Everything is executing in user mode. Wait until something + // is available. Note that signaling the condition variable + // will have the extra effect of kicking the vCPUs out of guest + // mode if that's where they were. + m.available.Wait() } } @@ -338,6 +358,7 @@ func (m *machine) Get() (*vCPU, error) { func (m *machine) Put(c *vCPU) { c.unlock() runtime.UnlockOSThread() + m.available.Signal() } // lock marks the vCPU as in user mode.