diff --git a/pkg/sentry/arch/arch_aarch64.go b/pkg/sentry/arch/arch_aarch64.go index fd95eb2d2..0f433ee79 100644 --- a/pkg/sentry/arch/arch_aarch64.go +++ b/pkg/sentry/arch/arch_aarch64.go @@ -101,6 +101,8 @@ func NewFloatingPointData() *FloatingPointData { // State contains the common architecture bits for aarch64 (the build tag of this // file ensures it's only built on aarch64). +// +// +stateify savable type State struct { // The system registers. Regs Registers diff --git a/pkg/sentry/arch/arch_arm64.go b/pkg/sentry/arch/arch_arm64.go index cabbf60e0..550741d8c 100644 --- a/pkg/sentry/arch/arch_arm64.go +++ b/pkg/sentry/arch/arch_arm64.go @@ -73,6 +73,8 @@ const ( ) // context64 represents an ARM64 context. +// +// +stateify savable type context64 struct { State sigFPState []aarch64FPState // fpstate to be restored on sigreturn. diff --git a/pkg/sentry/platform/kvm/kvm_const_arm64.go b/pkg/sentry/platform/kvm/kvm_const_arm64.go index fdc599477..9a7be3655 100644 --- a/pkg/sentry/platform/kvm/kvm_const_arm64.go +++ b/pkg/sentry/platform/kvm/kvm_const_arm64.go @@ -72,6 +72,7 @@ const ( _TCR_T0SZ_VA48 = 64 - 48 // VA=48 _TCR_T1SZ_VA48 = 64 - 48 // VA=48 + _TCR_A1 = 1 << 22 _TCR_ASID16 = 1 << 36 _TCR_TBI0 = 1 << 37 diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go index 307a7645f..905712076 100644 --- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go @@ -61,7 +61,6 @@ func (c *vCPU) initArchState() error { reg.addr = uint64(reflect.ValueOf(&data).Pointer()) regGet.addr = uint64(reflect.ValueOf(&dataGet).Pointer()) - vcpuInit.target = _KVM_ARM_TARGET_GENERIC_V8 vcpuInit.features[0] |= (1 << _KVM_ARM_VCPU_PSCI_0_2) if _, _, errno := syscall.RawSyscall( syscall.SYS_IOCTL, @@ -272,8 +271,16 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) return c.fault(int32(syscall.SIGSEGV), info) case ring0.Vector(bounce): // ring0.VirtualizationException return usermem.NoAccess, platform.ErrContextInterrupt + case ring0.El0Sync_undef, + ring0.El1Sync_undef: + *info = arch.SignalInfo{ + Signo: int32(syscall.SIGILL), + Code: 1, // ILL_ILLOPC (illegal opcode). + } + info.SetAddr(switchOpts.Registers.Pc) // Include address. + return usermem.AccessType{}, platform.ErrContextSignal default: - return usermem.NoAccess, platform.ErrContextSignal + panic(fmt.Sprintf("unexpected vector: 0x%x", vector)) } } diff --git a/pkg/sentry/platform/ring0/entry_arm64.s b/pkg/sentry/platform/ring0/entry_arm64.s index d8a7bc2f9..9d29b7168 100644 --- a/pkg/sentry/platform/ring0/entry_arm64.s +++ b/pkg/sentry/platform/ring0/entry_arm64.s @@ -364,6 +364,9 @@ TEXT ·Halt(SB),NOSPLIT,$0 CMP RSV_REG, R9 BNE mmio_exit MOVD $0, CPU_REGISTERS+PTRACE_R9(RSV_REG) + + // Flush dcache. + WORD $0xd5087e52 // DC CISW mmio_exit: // Disable fpsimd. WORD $0xd5381041 // MRS CPACR_EL1, R1 @@ -381,6 +384,9 @@ mmio_exit: MRS VBAR_EL1, R9 MOVD R0, 0x0(R9) + // Flush dcahce. + WORD $0xd5087e52 // DC CISW + RET // HaltAndResume halts execution and point the pointer to the resume function. @@ -414,6 +420,7 @@ TEXT ·Current(SB),NOSPLIT,$0-8 // Prepare the vcpu environment for container application. TEXT ·kernelExitToEl0(SB),NOSPLIT,$0 // Step1, save sentry context into memory. + MRS TPIDR_EL1, RSV_REG REGISTERS_SAVE(RSV_REG, CPU_REGISTERS) MOVD RSV_REG_APP, CPU_REGISTERS+PTRACE_R9(RSV_REG) @@ -425,34 +432,13 @@ TEXT ·kernelExitToEl0(SB),NOSPLIT,$0 MOVD CPU_REGISTERS+PTRACE_R3(RSV_REG), R3 - // Step2, save SP_EL1, PSTATE into kernel temporary stack. - // switch to temporary stack. + // Step2, switch to temporary stack. LOAD_KERNEL_STACK(RSV_REG) - WORD $0xd538d092 //MRS TPIDR_EL1, R18 - SUB $STACK_FRAME_SIZE, RSP, RSP - MOVD CPU_REGISTERS+PTRACE_SP(RSV_REG), R11 - MOVD CPU_REGISTERS+PTRACE_PSTATE(RSV_REG), R12 - STP (R11, R12), 16*0(RSP) - - MOVD CPU_REGISTERS+PTRACE_R11(RSV_REG), R11 - MOVD CPU_REGISTERS+PTRACE_R12(RSV_REG), R12 - - // Step3, test user pagetable. - // If user pagetable is empty, trapped in el1_ia. - WORD $0xd538d092 //MRS TPIDR_EL1, R18 - SWITCH_TO_APP_PAGETABLE(RSV_REG) - WORD $0xd538d092 //MRS TPIDR_EL1, R18 - SWITCH_TO_KVM_PAGETABLE(RSV_REG) - WORD $0xd538d092 //MRS TPIDR_EL1, R18 - - // If pagetable is not empty, recovery kernel temporary stack. - ADD $STACK_FRAME_SIZE, RSP, RSP - - // Step4, load app context pointer. + // Step3, load app context pointer. MOVD CPU_APP_ADDR(RSV_REG), RSV_REG_APP - // Step5, prepare the environment for container application. + // Step4, prepare the environment for container application. // set sp_el0. MOVD PTRACE_SP(RSV_REG_APP), R1 WORD $0xd5184101 //MSR R1, SP_EL0 @@ -480,13 +466,13 @@ TEXT ·kernelExitToEl0(SB),NOSPLIT,$0 LDP 16*0(RSP), (RSV_REG, RSV_REG_APP) ADD $STACK_FRAME_SIZE, RSP, RSP + ISB $15 ERET() // kernelExitToEl1 is the entrypoint for sentry in guest_el1. // Prepare the vcpu environment for sentry. TEXT ·kernelExitToEl1(SB),NOSPLIT,$0 WORD $0xd538d092 //MRS TPIDR_EL1, R18 - MOVD CPU_REGISTERS+PTRACE_PSTATE(RSV_REG), R1 WORD $0xd5184001 //MSR R1, SPSR_EL1 @@ -503,6 +489,8 @@ TEXT ·kernelExitToEl1(SB),NOSPLIT,$0 // Start is the CPU entrypoint. TEXT ·Start(SB),NOSPLIT,$0 + // Flush dcache. + WORD $0xd5087e52 // DC CISW // Init. MOVD $SCTLR_EL1_DEFAULT, R1 MSR R1, SCTLR_EL1 @@ -558,6 +546,7 @@ TEXT ·El1_sync(SB),NOSPLIT,$0 B el1_invalid el1_da: +el1_ia: WORD $0xd538d092 //MRS TPIDR_EL1, R18 WORD $0xd538601a //MRS FAR_EL1, R26 @@ -570,9 +559,6 @@ el1_da: B ·HaltAndResume(SB) -el1_ia: - B ·HaltAndResume(SB) - el1_sp_pc: B ·Shutdown(SB) @@ -644,9 +630,10 @@ el0_svc: MOVD $Syscall, R3 MOVD R3, CPU_VECTOR_CODE(RSV_REG) - B ·HaltAndResume(SB) + B ·kernelExitToEl1(SB) el0_da: +el0_ia: WORD $0xd538d092 //MRS TPIDR_EL1, R18 WORD $0xd538601a //MRS FAR_EL1, R26 @@ -658,10 +645,10 @@ el0_da: MOVD $PageFault, R3 MOVD R3, CPU_VECTOR_CODE(RSV_REG) - B ·HaltAndResume(SB) + MRS ESR_EL1, R3 + MOVD R3, CPU_ERROR_CODE(RSV_REG) -el0_ia: - B ·Shutdown(SB) + B ·kernelExitToEl1(SB) el0_fpsimd_acc: B ·Shutdown(SB) @@ -676,7 +663,10 @@ el0_sp_pc: B ·Shutdown(SB) el0_undef: - B ·Shutdown(SB) + MOVD $El0Sync_undef, R3 + MOVD R3, CPU_VECTOR_CODE(RSV_REG) + + B ·kernelExitToEl1(SB) el0_dbg: B ·Shutdown(SB) diff --git a/pkg/sentry/platform/ring0/kernel_arm64.go b/pkg/sentry/platform/ring0/kernel_arm64.go index 42009dac0..d0afa1aaa 100644 --- a/pkg/sentry/platform/ring0/kernel_arm64.go +++ b/pkg/sentry/platform/ring0/kernel_arm64.go @@ -53,7 +53,6 @@ func IsCanonical(addr uint64) bool { //go:nosplit func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) { - // Sanitize registers. regs := switchOpts.Registers regs.Pstate &= ^uint64(PsrFlagsClear) @@ -69,6 +68,5 @@ func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) { vector = c.vecCode - // Perform the switch. return } diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go b/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go index 78510ebed..6409d1d91 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go @@ -72,13 +72,14 @@ const ( ) const ( - mtNormal = 0x4 << 2 + mtDevicenGnRE = 0x1 << 2 + mtNormal = 0x4 << 2 ) const ( executeDisable = xn optionMask = 0xfff | 0xfff<<48 - protDefault = accessed | shared | mtNormal + protDefault = accessed | shared ) // MapOpts are x86 options. @@ -184,8 +185,10 @@ func (p *PTE) Set(addr uintptr, opts MapOpts) { if opts.User { v |= user + v |= mtNormal } else { v = v &^ user + v |= mtDevicenGnRE // Strong order for the addresses with ring0.KernelStartAddress. } atomic.StoreUintptr((*uintptr)(p), v) } @@ -200,7 +203,7 @@ func (p *PTE) setPageTable(pt *PageTables, ptes *PTEs) { // This should never happen. panic("unaligned physical address!") } - v := addr | typeTable | protDefault + v := addr | typeTable | protDefault | mtNormal atomic.StoreUintptr((*uintptr)(p), v) }