gvisor/pkg/sentry/platform/ptrace/ptrace.go

// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package ptrace provides a ptrace-based implementation of the platform
// interface. This is useful for development and testing purposes primarily,
// and runs on stock kernels without special permissions.
//
// In a nutshell, it works as follows:
//
// The creation of a new address space creates a new child processes with a
// single thread which is traced by a single goroutine.
//
// A context is just a collection of temporary variables. Calling Switch on a
// context does the following:
//
//	Locks the runtime thread.
//
//	Looks up a traced subprocess thread for the current runtime thread. If
//	none exists, the dedicated goroutine is asked to create a new stopped
//	thread in the subprocess. This stopped subprocess thread is then traced
//	by the current thread and this information is stored for subsequent
//	switches.
//
//	The context is then bound with information about the subprocess thread
//	so that the context may be appropriately interrupted via a signal.
//
//	The requested operation is performed in the traced subprocess thread
//	(e.g. set registers, execute, return).
//
// FIXME: This package is currently sloppy with cleanup.
//
// Lock order:
//
// subprocess.mu
//   context.mu
package ptrace

import (
	"sync"

	"gvisor.googlesource.com/gvisor/pkg/abi/linux"
	"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
	"gvisor.googlesource.com/gvisor/pkg/sentry/platform"
	"gvisor.googlesource.com/gvisor/pkg/sentry/platform/filemem"
	"gvisor.googlesource.com/gvisor/pkg/sentry/platform/interrupt"
	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
)

var (
	// stubStart is the link address for our stub, and determines the
	// maximum user address. This is valid only after a call to stubInit.
	//
	// We attempt to link the stub here, and adjust downward as needed.
	stubStart uintptr = 0x7fffffff0000

	// stubEnd is the first byte past the end of the stub, as with
	// stubStart this is valid only after a call to stubInit.
	stubEnd uintptr

	// stubInitialized controls one-time stub initialization.
	stubInitialized sync.Once
)

type context struct {
	// signalInfo is the signal info, if and when a signal is received.
	signalInfo arch.SignalInfo

	// interrupt is the interrupt context.
	interrupt interrupt.Forwarder

	// mu protects the following fields.
	mu sync.Mutex

	// If lastFaultSP is non-nil, the last context switch was due to a fault
	// received while executing lastFaultSP. Only context.Switch may set
	// lastFaultSP to a non-nil value.
	lastFaultSP *subprocess

	// lastFaultAddr is the last faulting address; this is only meaningful if
	// lastFaultSP is non-nil.
	lastFaultAddr usermem.Addr

	// lastFaultIP is the address of the last faulting instruction;
	// this is also only meaningful if lastFaultSP is non-nil.
	lastFaultIP usermem.Addr
}

// Switch runs the provided context in the given address space.
func (c *context) Switch(as platform.AddressSpace, ac arch.Context, cpu int32) (*arch.SignalInfo, usermem.AccessType, error) {
	s := as.(*subprocess)
	isSyscall := s.switchToApp(c, ac)

	var (
		faultSP   *subprocess
		faultAddr usermem.Addr
		faultIP   usermem.Addr
	)
	if !isSyscall && linux.Signal(c.signalInfo.Signo) == linux.SIGSEGV {
		faultSP = s
		faultAddr = usermem.Addr(c.signalInfo.Addr())
		faultIP = usermem.Addr(ac.IP())
	}

	// Update the context to reflect the outcome of this context switch.
	c.mu.Lock()
	lastFaultSP := c.lastFaultSP
	lastFaultAddr := c.lastFaultAddr
	lastFaultIP := c.lastFaultIP
	// At this point, c may not yet be in s.contexts, so c.lastFaultSP won't be
	// updated by s.Unmap(). This is fine; we only need to synchronize with
	// calls to s.Unmap() that occur after the handling of this fault.
	c.lastFaultSP = faultSP
	c.lastFaultAddr = faultAddr
	c.lastFaultIP = faultIP
	c.mu.Unlock()

	// Update subprocesses to reflect the outcome of this context switch.
	if lastFaultSP != faultSP {
		if lastFaultSP != nil {
			lastFaultSP.mu.Lock()
			delete(lastFaultSP.contexts, c)
			lastFaultSP.mu.Unlock()
		}
		if faultSP != nil {
			faultSP.mu.Lock()
			faultSP.contexts[c] = struct{}{}
			faultSP.mu.Unlock()
		}
	}

	if isSyscall {
		return nil, usermem.NoAccess, nil
	}

	si := c.signalInfo

	if faultSP == nil {
		// Non-fault signal.
		return &si, usermem.NoAccess, platform.ErrContextSignal
	}

	// Got a page fault. Ideally, we'd get real fault type here, but ptrace
	// doesn't expose this information. Instead, we use a simple heuristic:
	//
	// It was an instruction fault iff the faulting addr == instruction
	// pointer.
	//
	// It was a write fault if the fault is immediately repeated.
	at := usermem.Read
	if faultAddr == faultIP {
		at.Execute = true
	}
	if lastFaultSP == faultSP &&
		lastFaultAddr == faultAddr &&
		lastFaultIP == faultIP {
		at.Write = true
	}

	// Unfortunately, we have to unilaterally return ErrContextSignalCPUID
	// here, in case this fault was generated by a CPUID exception. There
	// is no way to distinguish between CPUID-generated faults and regular
	// page faults.
	return &si, at, platform.ErrContextSignalCPUID
}

// Interrupt interrupts the running guest application associated with this context.
func (c *context) Interrupt() {
	c.interrupt.NotifyInterrupt()
}

// PTrace represents a collection of ptrace subprocesses.
type PTrace struct {
	platform.MMapMinAddr
	platform.NoCPUPreemptionDetection
	*filemem.FileMem
}

// New returns a new ptrace-based implementation of the platform interface.
func New() (*PTrace, error) {
	stubInitialized.Do(func() {
		// Initialize the stub.
		stubInit()

		// Create the master process for the global pool. This must be
		// done before initializing any other processes.
		master, err := newSubprocess(createStub)
		if err != nil {
			// Should never happen.
			panic("unable to initialize ptrace master: " + err.Error())
		}

		// Set the master on the globalPool.
		globalPool.master = master
	})

	fm, err := filemem.New("ptrace-memory")
	if err != nil {
		return nil, err
	}

	return &PTrace{FileMem: fm}, nil
}

// SupportsAddressSpaceIO implements platform.Platform.SupportsAddressSpaceIO.
func (*PTrace) SupportsAddressSpaceIO() bool {
	return false
}

// CooperativelySchedulesAddressSpace implements platform.Platform.CooperativelySchedulesAddressSpace.
func (*PTrace) CooperativelySchedulesAddressSpace() bool {
	return false
}

// MapUnit implements platform.Platform.MapUnit.
func (*PTrace) MapUnit() uint64 {
	// The host kernel manages page tables and arbitrary-sized mappings
	// have effectively the same cost.
	return 0
}

// MaxUserAddress returns the first address that may not be used by user
// applications.
func (*PTrace) MaxUserAddress() usermem.Addr {
	return usermem.Addr(stubStart)
}

// NewAddressSpace returns a new subprocess.
func (p *PTrace) NewAddressSpace(_ interface{}) (platform.AddressSpace, <-chan struct{}, error) {
	as, err := newSubprocess(globalPool.master.createStub)
	return as, nil, err
}

// NewContext returns an interruptible context.
func (*PTrace) NewContext() platform.Context {
	return &context{}
}

// Memory returns the platform memory used to do allocations.
func (p *PTrace) Memory() platform.Memory {
	return p.FileMem
}