diff --git a/src/coreclr/debug/ee/controller.cpp b/src/coreclr/debug/ee/controller.cpp index 52a3fbe68d7024..8f86df4227d464 100644 --- a/src/coreclr/debug/ee/controller.cpp +++ b/src/coreclr/debug/ee/controller.cpp @@ -19,6 +19,10 @@ #include "../../vm/methoditer.h" #include "../../vm/tailcallhelp.h" +#if defined(TARGET_ARM64) +extern "C" void* PacStripPtr(void* ptr); +#endif // TARGET_ARM64 + const char *GetTType( TraceType tt); #define IsSingleStep(exception) ((exception) == EXCEPTION_SINGLE_STEP) diff --git a/src/coreclr/inc/cfi.h b/src/coreclr/inc/cfi.h index 3d7ec0f4cc11f8..a0f4c246a4b0b7 100644 --- a/src/coreclr/inc/cfi.h +++ b/src/coreclr/inc/cfi.h @@ -9,7 +9,9 @@ enum CFI_OPCODE { CFI_ADJUST_CFA_OFFSET, // Offset is adjusted relative to the current one. CFI_DEF_CFA_REGISTER, // New register is used to compute CFA - CFI_REL_OFFSET // Register is saved at offset from the current CFA + CFI_REL_OFFSET, // Register is saved at offset from the current CFA + CFI_DEF_CFA, // Take address from register and add offset to it + CFI_NEGATE_RA_STATE, // Sign the return address in lr with paciasp }; struct CFI_CODE diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 47ecfbea7dc7de..6fdcb7496dea44 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -251,6 +251,11 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog) } } + if (JitConfig.JitPacEnabled() != 0) + { + GetEmitter()->emitPacInEpilog(); + } + // For OSR, we must also adjust the SP to remove the Tier0 frame. // if (m_compiler->opts.IsOSR()) @@ -487,12 +492,11 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, if ((spOffset == 0) && (spDelta >= -512)) { - // We can use pre-indexed addressing. + // We can use pre-indexed addressing when the stack adjustment fits in the instruction. // stp REG, REG + 1, [SP, #spDelta]! // 64-bit STP offset range: -512 to 504, multiple of 8. GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_PRE_INDEX); m_compiler->unwindSaveRegPairPreindexed(reg1, reg2, spDelta); - needToSaveRegs = false; } else // (spOffset != 0) || (spDelta < -512) @@ -511,6 +515,8 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, // 64-bit STP offset range: -512 to 504, multiple of 8. assert(spOffset <= 504); assert((spOffset % 8) == 0); + assert(reg1 != REG_LR); + GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); if (TargetOS::IsUnix && m_compiler->generateCFIUnwindCodes()) @@ -622,6 +628,7 @@ void CodeGen::genRestoreRegPair(regNumber reg1, assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both // FP/SIMD + assert(reg1 != REG_LR); if (spDelta != 0) { @@ -1384,6 +1391,11 @@ void CodeGen::genFuncletProlog(BasicBlock* block) m_compiler->unwindBegProlog(); + if (JitConfig.JitPacEnabled() != 0) + { + GetEmitter()->emitPacInProlog(); + } + regMaskTP maskSaveRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT; regMaskTP maskSaveRegsInt = genFuncletInfo.fiSaveRegs & ~maskSaveRegsFloat; @@ -1669,6 +1681,11 @@ void CodeGen::genFuncletEpilog(BasicBlock* /* block */) } } + if (JitConfig.JitPacEnabled() != 0) + { + GetEmitter()->emitPacInEpilog(); + } + inst_RV(INS_ret, REG_LR, TYP_I_IMPL); m_compiler->unwindReturn(REG_LR); @@ -5675,6 +5692,18 @@ void CodeGen::genOSRHandleTier0CalleeSavedRegistersAndFrame() genRestoreRegPair(REG_FP, REG_LR, REG_FP, 0, 0, false, REG_IP1, nullptr, /* reportUnwindData */ false); + if (JitConfig.JitPacEnabled() != 0) + { + // Tier0 signed LR with the Tier0 caller SP before allocating its frame. + // Recreate that SP from the current Tier0 body SP so we can authenticate + // LR before the OSR prolog later re-signs it with the OSR SP. + genInstrWithConstant(INS_add, EA_PTRSIZE, REG_IP0, REG_SPBASE, patchpointInfo->TotalFrameSize(), REG_IP0, + /* inUnwindRegion */ false); + GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_IP1, REG_LR, /* canSkip */ false); + GetEmitter()->emitIns(TargetOS::IsWindows ? INS_autib1716 : INS_autia1716); + GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_LR, REG_IP1, /* canSkip */ false); + } + // Emit phantom unwind data for the tier0 frame. m_compiler->unwindAllocStack(patchpointInfo->TotalFrameSize()); // Emit nops to make the prolog 1:1 in unwind codes to instructions. This diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 699e1b103fe1d3..8f518b11c3cc91 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -4484,6 +4484,13 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe } #endif // DEBUG +#if defined(TARGET_ARM64) + if (JitConfig.JitPacEnabled() != 0) + { + GetEmitter()->emitPacInProlog(); + } +#endif // TARGET_ARM64 + // The frameType number is arbitrary, is defined below, and corresponds to one of the frame styles we // generate based on various sizes. int frameType = 0; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index cc2e46f8203e8c..95ff00eced7435 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9799,6 +9799,7 @@ class Compiler void unwindSaveRegPair(regNumber reg1, regNumber reg2, int offset); // stp reg1, reg2, [sp, #offset] void unwindSaveRegPairPreindexed(regNumber reg1, regNumber reg2, int offset); // stp reg1, reg2, [sp, #offset]! void unwindSaveNext(); // unwind code: save_next + void unwindPacSignLR(); // unwind code: pac_sign_lr void unwindReturn(regNumber reg); // ret lr #endif // defined(TARGET_ARM64) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index d871a91cd8a516..f2960afeda6e1e 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -3347,6 +3347,11 @@ class emitter instrDescAlign* emitNewInstrAlign(); #endif +#if defined(TARGET_ARM64) + void emitPacInProlog(); + void emitPacInEpilog(); +#endif + instrDesc* emitNewInstrSmall(emitAttr attr); instrDesc* emitNewInstr(emitAttr attr = EA_4BYTE); instrDesc* emitNewInstrSC(emitAttr attr, cnsval_ssize_t cns); diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 25add148df4844..fca116d7b95f61 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1435,6 +1435,32 @@ static const char * const bRegNames[] = // clang-format on +//------------------------------------------------------------------------ +// emitPacInProlog: Sign LR as part of Pointer Authentication (PAC) support +// +void emitter::emitPacInProlog() +{ + if (JitConfig.JitPacEnabled() == 0) + { + return; + } + emitIns(TargetOS::IsWindows ? INS_pacibsp : INS_paciasp); + m_compiler->unwindPacSignLR(); +} + +//------------------------------------------------------------------------ +// emitPacInEpilog: unsign LR as part of Pointer Authentication (PAC) support +// +void emitter::emitPacInEpilog() +{ + if (JitConfig.JitPacEnabled() == 0) + { + return; + } + emitIns(TargetOS::IsWindows ? INS_autibsp : INS_autiasp); + m_compiler->unwindPacSignLR(); +} + //------------------------------------------------------------------------ // emitRegName: Returns a general-purpose register name or SIMD and floating-point scalar register name. // @@ -16250,9 +16276,10 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; - case IF_PC_0A: - case IF_PC_1A: - case IF_PC_2A: + case IF_PC_0A: // autia1716, autiasp, autib1716, autibsp, autibz, autiaz, pacia1716, paciasp, pacib1716, + // pacibsp, pacibz, paciaz, xpaclri + case IF_PC_1A: // autiza, autizb, paciza, pacizb, xpacd, xpaci + case IF_PC_2A: // autia, autib, pacia, pacib switch (ins) { case INS_xpacd: diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 88ac0fe83e4924..290515d40a04a3 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -126,6 +126,9 @@ CONFIG_STRING(JitInlineMethodsWithEHRange, "JitInlineMethodsWithEHRange") CONFIG_INTEGER(JitLongAddress, "JitLongAddress", 0) // Force using the large pseudo instruction form for long address CONFIG_INTEGER(JitMaxUncheckedOffset, "JitMaxUncheckedOffset", 8) +#if defined(TARGET_ARM64) +RELEASE_CONFIG_INTEGER(JitPacEnabled, "JitPacEnabled", 1) +#endif // Enable devirtualization for generic virtual methods RELEASE_CONFIG_INTEGER(JitEnableGenericVirtualDevirtualization, "JitEnableGenericVirtualDevirtualization", 1) diff --git a/src/coreclr/jit/unwind.cpp b/src/coreclr/jit/unwind.cpp index 1b4d454e142326..97d819e04cb379 100644 --- a/src/coreclr/jit/unwind.cpp +++ b/src/coreclr/jit/unwind.cpp @@ -398,6 +398,11 @@ void Compiler::DumpCfiInfo(bool isHotCode, assert(dwarfReg == DWARF_REG_ILLEGAL); printf(" CodeOffset: 0x%02X Op: AdjustCfaOffset Offset:0x%X\n", codeOffset, offset); break; + case CFI_NEGATE_RA_STATE: + assert(dwarfReg == DWARF_REG_ILLEGAL); + assert(offset == 0); + printf(" CodeOffset: 0x%02X Op: NegateRAState\n", codeOffset); + break; default: printf(" Unrecognized CFI_CODE: 0x%llX\n", *(UINT64*)pCode); break; diff --git a/src/coreclr/jit/unwindarm64.cpp b/src/coreclr/jit/unwindarm64.cpp index f842737171c0b4..620438ec644e5a 100644 --- a/src/coreclr/jit/unwindarm64.cpp +++ b/src/coreclr/jit/unwindarm64.cpp @@ -635,6 +635,33 @@ void Compiler::unwindSaveNext() pu->AddCode(0xE6); } +void Compiler::unwindPacSignLR() +{ + if (JitConfig.JitPacEnabled() == 0) + { + return; + } +#if defined(FEATURE_CFI_SUPPORT) + if (generateCFIUnwindCodes()) + { + // Emit NEGATE_RA_STATE opcode in prologs. + if (!compGeneratingProlog) + { + return; + } + FuncInfoDsc* func = funCurrentFunc(); + UNATIVE_OFFSET cbProlog = unwindGetCurrentOffset(func); + // Maps to DW_CFA_AARCH64_negate_ra_state + createCfiCode(func, cbProlog, CFI_NEGATE_RA_STATE, DWARF_REG_ILLEGAL); + + return; + } +#endif // FEATURE_CFI_SUPPORT + + // pac_sign_lr: 11111100: sign the return address in lr with the platform PAC key + funCurrentFunc()->uwi.AddCode(0xFC); +} + void Compiler::unwindReturn(regNumber reg) { // Nothing to do; we will always have at least one trailing "end" opcode in our padding. @@ -1081,6 +1108,12 @@ void DumpUnwindInfo(Compiler* comp, printf(" %02X save_next\n", b1); } + else if (b1 == 0xFC) + { + // pac_sign_lr: 11111100 : sign the return address in lr with the platform PAC key. + + printf(" %02X pac_sign_lr\n", b1); + } else { // Unknown / reserved unwind code diff --git a/src/coreclr/nativeaot/Runtime/ICodeManager.h b/src/coreclr/nativeaot/Runtime/ICodeManager.h index a508aad49f769e..52765d516444c7 100644 --- a/src/coreclr/nativeaot/Runtime/ICodeManager.h +++ b/src/coreclr/nativeaot/Runtime/ICodeManager.h @@ -163,7 +163,8 @@ class ICodeManager virtual bool GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet, // in - PTR_PTR_VOID * ppvRetAddrLocation // out + PTR_PTR_VOID * ppvRetAddrLocation, // out + uintptr_t * pSpForArm64PacSign // out ) PURE_VIRTUAL #ifdef TARGET_X86 diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index f90c2dd32dddd1..6a63f7804a1289 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -64,6 +64,18 @@ EXTERN_C CODE_LOCATION RhpRethrow2; #define FAILFAST_OR_DAC_FAIL_UNCONDITIONALLY(msg) { ASSERT_UNCONDITIONALLY(msg); RhFailFast(); } #endif +#if defined(TARGET_ARM64) +extern "C" void* PacStripPtr(void* ptr); +#endif // TARGET_ARM64 + +static TADDR ReturnAddressToCanonicalPC(TADDR returnAddress) +{ +#if defined(TARGET_ARM64) + returnAddress = (TADDR)PacStripPtr((void*)returnAddress); +#endif // TARGET_ARM64 + return PCODEToPINSTR(dac_cast(returnAddress)); +} + StackFrameIterator::StackFrameIterator(Thread * pThreadToWalk, PInvokeTransitionFrame* pInitialTransitionFrame) { STRESS_LOG0(LF_STACKWALK, LL_INFO10000, "----Init---- [ GC ]\n"); @@ -163,7 +175,7 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PInvokeTransitionF #if !defined(FEATURE_PORTABLE_HELPERS) // @TODO: no portable version of regdisplay memset(&m_RegDisplay, 0, sizeof(m_RegDisplay)); - m_RegDisplay.SetIP((PCODE)PCODEToPINSTR((PCODE)pFrame->m_RIP)); + m_RegDisplay.SetIP(ReturnAddressToCanonicalPC(dac_cast(pFrame->m_RIP))); SetControlPC(dac_cast(m_RegDisplay.GetIP())); PTR_uintptr_t pPreservedRegsCursor = (PTR_uintptr_t)PTR_HOST_MEMBER_TADDR(PInvokeTransitionFrame, pFrame, m_PreservedRegs); @@ -406,14 +418,15 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CO // This codepath is used by the hijack stackwalk and we can get arbitrary ControlPCs from there. If this // context has a non-managed control PC, then we're done. - if (!m_pInstance->IsManaged(dac_cast(pCtx->GetIp()))) + TADDR controlPC = ReturnAddressToCanonicalPC(pCtx->GetIp()); + if (!m_pInstance->IsManaged(dac_cast(controlPC))) return; // // control state // m_RegDisplay.SP = pCtx->GetSp(); - m_RegDisplay.IP = PCODEToPINSTR(pCtx->GetIp()); + m_RegDisplay.IP = controlPC; SetControlPC(dac_cast(m_RegDisplay.GetIP())); #ifdef TARGET_ARM @@ -626,14 +639,15 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC // This codepath is used by the hijack stackwalk. The IP must be in managed code // or in a conservatively reported assembly thunk. - ASSERT(IsValidReturnAddress((void*)pCtx->GetIp())); + TADDR controlPC = ReturnAddressToCanonicalPC(pCtx->GetIp()); + ASSERT(IsValidReturnAddress(dac_cast(controlPC))); // // control state // - SetControlPC(dac_cast(pCtx->GetIp())); + SetControlPC(dac_cast(controlPC)); m_RegDisplay.SP = pCtx->GetSp(); - m_RegDisplay.IP = pCtx->GetIp(); + m_RegDisplay.IP = controlPC; #ifdef TARGET_UNIX #define PTR_TO_REG(ptr, reg) (&((ptr)->reg())) @@ -1216,7 +1230,7 @@ void StackFrameIterator::UnwindFuncletInvokeThunk() m_RegDisplay.pFP = SP++; - m_RegDisplay.SetIP(*SP++); + m_RegDisplay.SetIP(ReturnAddressToCanonicalPC(*SP++)); m_RegDisplay.pX19 = SP++; m_RegDisplay.pX20 = SP++; @@ -1629,7 +1643,7 @@ void StackFrameIterator::UnwindUniversalTransitionThunk() stackFrame->UnwindVolatileArgRegisters(&m_RegDisplay); PTR_uintptr_t addressOfPushedCallerIP = stackFrame->get_AddressOfPushedCallerIP(); - m_RegDisplay.SetIP(PCODEToPINSTR(*addressOfPushedCallerIP)); + m_RegDisplay.SetIP(ReturnAddressToCanonicalPC(*addressOfPushedCallerIP)); m_RegDisplay.SetSP((uintptr_t)dac_cast(stackFrame->get_CallerSP())); SetControlPC(dac_cast(m_RegDisplay.GetIP())); #if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) @@ -1760,7 +1774,7 @@ void StackFrameIterator::UnwindThrowSiteThunk() ASSERT_UNCONDITIONALLY("NYI for this arch"); #endif - m_RegDisplay.SetIP(PCODEToPINSTR(pContext->IP)); + m_RegDisplay.SetIP(ReturnAddressToCanonicalPC(pContext->IP)); m_RegDisplay.SetSP(pContext->GetSp()); SetControlPC(dac_cast(m_RegDisplay.GetIP())); @@ -1854,7 +1868,7 @@ void StackFrameIterator::NextInternal() // if the thread is safe to walk, it better not have a hijack in place. ASSERT(!m_pThread->IsHijacked()); - SetControlPC(dac_cast(PCODEToPINSTR(m_RegDisplay.GetIP()))); + SetControlPC(dac_cast(m_RegDisplay.GetIP())); PTR_VOID collapsingTargetFrame = NULL; diff --git a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S index c327e304b8e290..144e2688adf61a 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.S @@ -153,6 +153,7 @@ // Fix the stack by restoring the original return address // ldr lr, [x9, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + xpaclri // // Clear hijack state diff --git a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm index bd44842a8a8be6..12c5d76b2f0d94 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm @@ -124,6 +124,7 @@ PROBE_FRAME_SIZE field 0 ;; Fix the stack by restoring the original return address ;; ldr lr, [x9, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + DCD 0xD50320FF ;; xpaclri instruction in binary to avoid error while compiling with non-PAC enabled compilers ;; ;; Clear hijack state diff --git a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S index ea5d91a1a1c1f9..b68c5589e3431f 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S +++ b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.S @@ -3,3 +3,24 @@ #include #include "AsmOffsets.inc" + +// void* PacStripPtr(void *); +// This function strips the pointer of PAC info that is passed as an argument. +// We prefer to strip a pointer where it's not going to be used to branch execution to. +.arch_extension pauth + LEAF_ENTRY PacStripPtr, _TEXT + xpaci x0 + ret + LEAF_END PacStripPtr, _TEXT + +// void* PacSignPtr(void *, void *); +// This function signs the input pointer using x1 as salt. +// Thus we need to move input in lr, sign it and then copy it back to the result register. +.arch_extension pauth + LEAF_ENTRY PacSignPtr, _TEXT + mov x17, x0 + mov x16, x1 + pacia1716 + mov x0, x17 + ret + LEAF_END PacSignPtr, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm index 49baea4977259b..1695a494fc8103 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/MiscStubs.asm @@ -5,4 +5,23 @@ TEXTAREA +; void* PacStripPtr(void *); +; This function strips the pointer of PAC info that is passed as an argument. +; We prefer to strip a pointer where it's not going to be used to branch execution to. + LEAF_ENTRY PacStripPtr + DCD 0xDAC143E0 ; xpaci x0 instruction in binary to avoid requiring PAC-enabled assemblers + ret + LEAF_END PacStripPtr + +; void* PacSignPtr(void *, void *); +; This function signs the input pointer using x1 as salt. +; Thus we need to move input in lr, sign it and then copy it back to the result register. + LEAF_ENTRY PacSignPtr + mov x17, x0 + mov x16, x1 + DCD 0xD503215F ; pacib1716 instruction in binary to avoid error while compiling with non-PAC enabled compilers + mov x0, x17 + ret + LEAF_END PacSignPtr + end diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index 3e002ce0775934..da1195acaa4e64 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -37,6 +37,11 @@ static Thread* g_RuntimeInitializingThread; #endif //!DACCESS_COMPILE +#if defined(TARGET_ARM64) +extern "C" void* PacSignPtr(void* ptr, void* sp); +extern "C" void* PacStripPtr(void* ptr); +#endif // TARGET_ARM64 + ee_alloc_context::PerThreadRandom::PerThreadRandom() { minipal_xoshiro128pp_init(&random_state, (uint32_t)minipal_hires_ticks()); @@ -795,11 +800,14 @@ void Thread::HijackReturnAddress(NATIVE_CONTEXT* pSuspendCtx, HijackFunc* pfnHij void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, HijackFunc* pfnHijackFunction) { void** ppvRetAddrLocation; + uintptr_t spForPacSign = 0; frameIterator->CalculateCurrentMethodState(); + if (frameIterator->GetCodeManager()->GetReturnAddressHijackInfo(frameIterator->GetMethodInfo(), frameIterator->GetRegisterSet(), - &ppvRetAddrLocation)) + &ppvRetAddrLocation, + &spForPacSign)) { ASSERT(ppvRetAddrLocation != NULL); @@ -811,8 +819,14 @@ void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, Hijack CrossThreadUnhijack(); void* pvRetAddr = *ppvRetAddrLocation; + ASSERT(pvRetAddr != NULL); + +#if defined(TARGET_ARM64) + ASSERT(StackFrameIterator::IsValidReturnAddress(PacStripPtr(pvRetAddr))); +#else ASSERT(StackFrameIterator::IsValidReturnAddress(pvRetAddr)); +#endif // TARGET_ARM64 m_ppvHijackedReturnAddressLocation = ppvRetAddrLocation; m_pvHijackedReturnAddress = pvRetAddr; @@ -822,7 +836,14 @@ void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, Hijack frameIterator->GetRegisterSet())); #endif - *ppvRetAddrLocation = (void*)pfnHijackFunction; + void* pvHijackedAddr = (void*)pfnHijackFunction; +#if defined(TARGET_ARM64) + if (spForPacSign != 0) + { + pvHijackedAddr = PacSignPtr(pvHijackedAddr, (void*)spForPacSign); + } +#endif // TARGET_ARM64 + *ppvRetAddrLocation = pvHijackedAddr; STRESS_LOG2(LF_STACKWALK, LL_INFO10000, "InternalHijack: TgtThread = %llx, IP = %p\n", GetOSThreadId(), frameIterator->GetRegisterSet()->GetIP()); diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index 266b56bd1f6e4e..dca47eb39c31c7 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -22,6 +22,10 @@ #include "eventtracebase.h" +#if defined(TARGET_ARM64) +extern "C" void* PacStripPtr(void* ptr); +#endif // TARGET_ARM64 + #define UBF_FUNC_KIND_MASK 0x03 #define UBF_FUNC_KIND_ROOT 0x00 #define UBF_FUNC_KIND_HANDLER 0x01 @@ -64,6 +68,202 @@ UnixNativeCodeManager::~UnixNativeCodeManager() { } +#if defined(TARGET_ARM64) +static size_t readULEB(const uint8_t *&p, const uint8_t *end) +{ + size_t result = 0; + unsigned shift = 0; + while (p < end) + { + uint8_t byte = *p++; + result |= size_t(byte & 0x7F) << shift; + if ((byte & 0x80) == 0) // clear top bit indicates the last by of the value + break; + shift += 7; + } + return result; +} + +static ssize_t readSLEB(const uint8_t *&p, const uint8_t *end) +{ + ssize_t result = 0; + unsigned shift = 0; + uint8_t byte = 0; + + while (p < end) + { + byte = *p++; + result |= ssize_t(byte & 0x7F) << shift; + shift += 7; + if ((byte & 0x80) == 0) // clear top bit indicates the last by of the value + { + break; + } + } + + if ((shift < (sizeof(result) * 8)) && ((byte & 0x40) != 0)) + { + result |= -((ssize_t)1 << shift); + } + + return result; +} + +struct PacFrameInfo +{ + bool hasPac; + int cfaOffset; + int lrOffset; + int pacCfaOffset; +}; + +static bool TryGetPacFrameInfo(UnixNativeMethodInfo *pNativeMethodInfo, + PacFrameInfo *pPacFrameInfo) +{ + const uint8_t* p = (const uint8_t*)pNativeMethodInfo->unwind_info; + uint32_t fdeLength = *dac_cast((uint8_t*)p); + const uint8_t* end = p + fdeLength; + p += sizeof(uint32_t); // FDE length + + if (*dac_cast((uint8_t*)p) == 0) + return false; + + p += sizeof(uint32_t); // CIE pointer + p += sizeof(uint32_t); // PC start + p += sizeof(uint32_t); // function length + + size_t augmentationLength = readULEB(p, end); + if ((size_t)(end - p) < augmentationLength) + return false; + p += augmentationLength; + + constexpr int DataAlignFactor = -4; + constexpr uint8_t ReturnAddressRegister = 30; + + int cfaOffset = 0; + int lrOffset = INT_MIN; + int pacCfaOffset = 0; + bool hasPac = false; + + while (p < end) + { + uint8_t op = *p++; + + if (op == DW_CFA_AARCH64_negate_ra_state) + { + pacCfaOffset = cfaOffset; + hasPac = true; + continue; + } + + if ((op & 0xC0) == DW_CFA_advance_loc) + { + continue; + } + + if ((op & 0xC0) == DW_CFA_offset) + { + uint8_t dwarfReg = op & 0x3F; + ssize_t offsetFactor = (ssize_t)readULEB(p, end); + if (dwarfReg == ReturnAddressRegister) + { + lrOffset = cfaOffset + (int)(offsetFactor * DataAlignFactor); + } + continue; + } + + switch (op) + { + case DW_CFA_nop: + break; + + case DW_CFA_advance_loc1: + p += sizeof(uint8_t); + break; + + case DW_CFA_advance_loc2: + p += sizeof(uint16_t); + break; + + case DW_CFA_advance_loc4: + p += sizeof(uint32_t); + break; + + case DW_CFA_offset_extended: + { + uint8_t dwarfReg = (uint8_t)readULEB(p, end); + ssize_t offsetFactor = (ssize_t)readULEB(p, end); + if (dwarfReg == ReturnAddressRegister) + { + lrOffset = cfaOffset + (int)(offsetFactor * DataAlignFactor); + } + break; + } + + case DW_CFA_offset_extended_sf: + { + uint8_t dwarfReg = (uint8_t)readULEB(p, end); + ssize_t offsetFactor = readSLEB(p, end); + if (dwarfReg == ReturnAddressRegister) + { + lrOffset = cfaOffset + (int)(offsetFactor * DataAlignFactor); + } + break; + } + + case DW_CFA_def_cfa: + readULEB(p, end); // register + cfaOffset = (int)readULEB(p, end); + break; + + case DW_CFA_def_cfa_register: + readULEB(p, end); // register + break; + + case DW_CFA_def_cfa_offset: + cfaOffset = (int)readULEB(p, end); + break; + + case DW_CFA_def_cfa_sf: + readULEB(p, end); // register + cfaOffset = (int)(readSLEB(p, end) * DataAlignFactor); + break; + + case DW_CFA_def_cfa_offset_sf: + cfaOffset = (int)(readSLEB(p, end) * DataAlignFactor); + break; + + default: + return false; + } + } + + pPacFrameInfo->hasPac = hasPac; + pPacFrameInfo->cfaOffset = cfaOffset; + pPacFrameInfo->lrOffset = lrOffset; + pPacFrameInfo->pacCfaOffset = pacCfaOffset; + return true; +} + +static bool TryGetSpForPacSigning(const PacFrameInfo& pacFrameInfo, + PTR_PTR_VOID ppvRetAddrLocation, + uintptr_t *pSpForPacSign) +{ + if (!pacFrameInfo.hasPac) + { + *pSpForPacSign = 0; + return true; + } + + if (ppvRetAddrLocation == NULL || pacFrameInfo.lrOffset == INT_MIN) + return false; + + *pSpForPacSign = dac_cast(ppvRetAddrLocation) + + (pacFrameInfo.cfaOffset - pacFrameInfo.lrOffset - pacFrameInfo.pacCfaOffset); + return true; +} +#endif // TARGET_ARM64 + // Virtually unwind stack to the caller of the context specified by the REGDISPLAY bool UnixNativeCodeManager::VirtualUnwind(MethodInfo* pMethodInfo, REGDISPLAY* pRegisterSet) { @@ -364,6 +564,10 @@ bool UnixNativeCodeManager::UnwindStackFrame(MethodInfo * pMethodInfo, return false; } +#if defined(TARGET_ARM64) + pRegisterSet->SetIP((PCODE)PacStripPtr((void*)pRegisterSet->GetIP())); +#endif // TARGET_ARM64 + return true; } @@ -381,7 +585,7 @@ bool UnixNativeCodeManager::IsUnwindable(PTR_VOID pvAddress) pMethodInfo = &methodInfo; #endif -#if (defined(TARGET_APPLE) && defined(TARGET_ARM64)) || defined(TARGET_ARM) +#if defined(TARGET_ARM64) || defined(TARGET_ARM) // VirtualUnwind can't unwind epilogues and some prologues. return TrailingEpilogueInstructionsCount(pMethodInfo, pvAddress) == 0 && IsInProlog(pMethodInfo, pvAddress) != 1; #else @@ -501,7 +705,7 @@ static bool IsArmPrologInstruction(uint16_t* pInstr) #endif -#if (defined(TARGET_APPLE) && defined(TARGET_ARM64)) || defined(TARGET_ARM) +#if defined(TARGET_ARM64) || defined(TARGET_ARM) // checks for known prolog instructions generated by ILC and returns // 1 - in prolog // 0 - not in prolog @@ -870,6 +1074,16 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho #define LDP_BITS2 0x28400000 #define LDP_MASK2 0x7E400000 +// add sp, sp, #imm +// 1001 0001 0xxx xxxx xxxx xx11 1111 1111 +#define ADD_SP_SP_BITS 0x910003FF +#define ADD_SP_SP_MASK 0xFF8003FF + +// sub sp, fp, #imm +// 1101 0001 0xxx xxxx xxxx xx11 1011 1111 +#define SUB_SP_FP_BITS 0xD10003BF +#define SUB_SP_FP_MASK 0xFF8003FF + // Branches, Exception Generating and System instruction group // xxx1 01xx xxxx xxxx xxxx xxxx xxxx xxxx #define BEGS_BITS 0x14000000 @@ -924,6 +1138,26 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho return -1; } } + + // Post-index restore sequences such as "ldp x19, x20, [sp], #0x10" also adjust SP + // before the final AUTIASP/RET. We avoid signing with a partially-restored SP. + int baseRegister = (instr >> 5) & 0x1f; + if (baseRegister == 31) + { + if ((instr & LDP_MASK2) == LDP_BITS2 || + (instr & LDR_MASK2) == LDR_BITS2) + { + return -1; + } + } + + // Stack pointer adjustments can happen before AUTIASP/RET in some epilog layouts, + // so treat them as being in the epilog as well. + if ((instr & ADD_SP_SP_MASK) == ADD_SP_SP_BITS || + (instr & SUB_SP_FP_MASK) == SUB_SP_FP_BITS) + { + return -1; + } } #elif defined(TARGET_ARM) @@ -1147,7 +1381,8 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet, // in - PTR_PTR_VOID * ppvRetAddrLocation) // out + PTR_PTR_VOID * ppvRetAddrLocation, // out + uintptr_t * pSpForArm64PacSign) // out { UnixNativeMethodInfo* pNativeMethodInfo = (UnixNativeMethodInfo*)pMethodInfo; @@ -1164,6 +1399,22 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn if ((unwindBlockFlags & UBF_FUNC_REVERSE_PINVOKE) != 0) return false; +#if defined(TARGET_ARM64) + PacFrameInfo pacFrameInfo = {}; + bool hasPacFrameInfo = TryGetPacFrameInfo(pNativeMethodInfo, &pacFrameInfo); + bool pacPresent = hasPacFrameInfo && pacFrameInfo.hasPac; + if (pacPresent) + { + // For PAC frames we only hijack locations where the current frame state is + // unambiguous. Partial prologs can save FP/LR before FP is established, and some + // epilog layouts adjust SP before the final AUTIASP/RET sequence. + if (IsInProlog(pMethodInfo, (PTR_VOID)pRegisterSet->IP) == 1) + { + return false; + } + } +#endif + #if defined(TARGET_ARM) // Ensure that PC doesn't have the Thumb bit set. Prolog and epilog // checks depend on it. @@ -1176,9 +1427,22 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn // can't figure, possibly a breakpoint instruction return false; } - else if (epilogueInstructions > 0) + +#if defined(TARGET_ARM64) + if (pacPresent && epilogueInstructions != 0) + { + return false; + } +#endif + + if (epilogueInstructions > 0) { + *pSpForArm64PacSign = 0; *ppvRetAddrLocation = (PTR_PTR_VOID)(pRegisterSet->GetSP() + (sizeof(TADDR) * (epilogueInstructions - 1))); +#if defined(TARGET_ARM64) + if (!TryGetSpForPacSigning(pacFrameInfo, *ppvRetAddrLocation, pSpForArm64PacSign)) + return false; +#endif return true; } @@ -1201,6 +1465,7 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn // Unwind the current method context to the caller's context to get its stack pointer // and obtain the location of the return address on the stack #if defined(TARGET_AMD64) + *pSpForArm64PacSign = 0; if (!VirtualUnwind(pMethodInfo, pRegisterSet)) { @@ -1211,6 +1476,7 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn return true; #elif defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + *pSpForArm64PacSign = 0; if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0) p += sizeof(int32_t); @@ -1235,6 +1501,17 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn return false; } +#if defined(TARGET_ARM64) + if (pacPresent) + { + // We hijack the caller frame later. To retrieve signing SP for correct PAC + // processing, we need to pacFrameInfo for the caller frame. Currently bail + // out of hijacking in this case. + // ToDo-PAC: Enable hijacking caller frame + return false; + } +#endif + PTR_uintptr_t oldLocation = pRegisterSet->GetReturnAddressRegisterLocation(); if (!VirtualUnwind(pMethodInfo, pRegisterSet)) { diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h index ca3f3f2272bde1..7d0969d85dedc2 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h @@ -63,7 +63,7 @@ class UnixNativeCodeManager : public ICodeManager bool IsUnwindable(PTR_VOID pvAddress); -#if (defined(TARGET_APPLE) && defined(TARGET_ARM64)) || defined(TARGET_ARM) +#if defined(TARGET_ARM64) || defined(TARGET_ARM) int IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddress); #endif @@ -71,7 +71,8 @@ class UnixNativeCodeManager : public ICodeManager bool GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet, // in - PTR_PTR_VOID * ppvRetAddrLocation); // out + PTR_PTR_VOID * ppvRetAddrLocation, // out + uintptr_t * pSpForArm64PacSign);// out PTR_VOID RemapHardwareFaultToGCSafePoint(MethodInfo * pMethodInfo, PTR_VOID controlPC); diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp index 83ad8b3f8be6d5..8bd0b67c5b2299 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp @@ -22,6 +22,10 @@ #include "eventtracebase.h" +#if defined(TARGET_ARM64) +extern "C" void* PacStripPtr(void* ptr); +#endif // TARGET_ARM64 + #ifdef TARGET_X86 // Disable contracts @@ -812,7 +816,7 @@ bool CoffNativeCodeManager::UnwindStackFrame(MethodInfo * pMethodInfo, &contextPointers); pRegisterSet->SP = context.Sp; - pRegisterSet->IP = context.Pc; + pRegisterSet->IP = (PCODE)PacStripPtr((void*)context.Pc); if (!(flags & USFF_GcUnwind)) { @@ -839,9 +843,240 @@ bool CoffNativeCodeManager::IsUnwindable(PTR_VOID pvAddress) return true; } +#if defined(TARGET_ARM64) +static bool TryGetSpForPacSigning(PTR_VOID pUnwindDataBlob, + size_t unwindDataBlobSize, + PTR_PTR_VOID ppvRetAddrLocation, + TADDR *pSpForPacSign) +{ + ASSERT(pSpForPacSign != nullptr); + + *pSpForPacSign = 0; + + //TODO-PAC: Bail out in prolog and epilog for consistency with GetPacSignInfo() in JIT + + ASSERT(unwindDataBlobSize >= sizeof(DWORD)); + + PTR_uint8_t unwindDataPtr = dac_cast(pUnwindDataBlob); + PTR_uint8_t unwindDataEndPtr = unwindDataPtr + unwindDataBlobSize; + + // For unwind info layout details refer https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling?view=msvc-170#arm64-exception-handling-information + // Read the header word. + DWORD HeaderWord = *dac_cast((uint8_t*)unwindDataPtr); + unwindDataPtr += sizeof(DWORD); + + ASSERT(((HeaderWord >> 18) & 3) == 0); // Version 0 is the only supported version. + + ULONG UnwindWords = (HeaderWord >> 27) & 31; + ULONG EpilogScopeCount = (HeaderWord >> 22) & 31; + if (EpilogScopeCount == 0 && UnwindWords == 0) + { + if ((unwindDataPtr + sizeof(DWORD)) > unwindDataEndPtr) + { + return false; + } + + DWORD extendedCounts = *dac_cast((uint8_t*)unwindDataPtr); + unwindDataPtr += sizeof(DWORD); + UnwindWords = (extendedCounts >> 16) & 0xFF; + EpilogScopeCount = extendedCounts & 0xFFFF; + } + + if ((HeaderWord & (1 << 21)) != 0) + { + EpilogScopeCount = 0; + } + + if ((unwindDataPtr + (EpilogScopeCount * sizeof(DWORD)) + (UnwindWords * sizeof(DWORD))) > unwindDataEndPtr) + { + return false; + } + + PTR_uint8_t UnwindCodePtr = unwindDataPtr + (EpilogScopeCount * sizeof(DWORD)); + PTR_uint8_t UnwindCodesEndPtr = UnwindCodePtr + (UnwindWords * sizeof(DWORD)); + + auto GetUnwindOpSize = [](BYTE unwindCode) -> SIZE_T + { + if (unwindCode < 0xC0) + { + return 1; + } + else if (unwindCode < 0xE0) + { + return 2; + } + else + { + static const BYTE UnwindCodeSizeTable[32] = + { + 4,1,2,1,1,1,1,3, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 2,3,4,5,1,1,1,1 + }; + + return UnwindCodeSizeTable[unwindCode - 0xE0]; + } + }; + + TADDR* unwindOpStarts = (TADDR*)_alloca((UnwindCodesEndPtr - UnwindCodePtr) * sizeof(TADDR)); + ULONG unwindOpIndex = 0; + for (PTR_uint8_t unwindOpPtr = UnwindCodePtr; unwindOpPtr < UnwindCodesEndPtr;) + { + BYTE curCode = *unwindOpPtr; + if (curCode == 0xE4) // end + { + break; + } + + SIZE_T unwindOpSize = GetUnwindOpSize(curCode); + if ((unwindOpPtr + unwindOpSize) > UnwindCodesEndPtr) + { + return false; + } + + unwindOpStarts[unwindOpIndex++] = dac_cast(unwindOpPtr); + unwindOpPtr += unwindOpSize; + } + + SSIZE_T currentSpOffset = 0; + SSIZE_T lrSlotOffset = -1; + SSIZE_T pacSpOffset = 0; + bool hasPacSignLR = false; + constexpr SSIZE_T PtrSize = 8; + + // ARM64 prolog unwind codes are stored in reverse prolog order. Replay them in prolog order so + // PACIASP/PACIBSP captures the SP that was live when LR was originally signed. + while (unwindOpIndex != 0) + { + UnwindCodePtr = dac_cast(unwindOpStarts[--unwindOpIndex]); + BYTE CurCode = *UnwindCodePtr; + + if (((CurCode & 0xFC) == 0xC8) || // save_regp + ((CurCode & 0xFE) == 0xD8) || // save_fregp + ((CurCode & 0xFE) == 0xDC) || // save_freg + CurCode == 0xE1 || // set_fp + CurCode == 0xE2 || // add_fp + CurCode == 0xE3 || // nop + CurCode == 0xE5 || // end_c + CurCode == 0xE6) // save_next + { + continue; + } + + if ((CurCode & 0xE0) == 0x00) // alloc_s + { + currentSpOffset -= (CurCode & 0x1F) * 16; + continue; + } + + if ((CurCode & 0xE0) == 0x20) // save_r19r20_x + { + currentSpOffset -= (CurCode & 0x1F) * 8; + continue; + } + + if ((CurCode & 0xC0) == 0x40) // save_fplr + { + lrSlotOffset = currentSpOffset + ((CurCode & 0x3F) * 8) + PtrSize; + continue; + } + + if ((CurCode & 0xC0) == 0x80) // save_fplr_x + { + currentSpOffset -= ((CurCode & 0x3F) + 1) * 8; + lrSlotOffset = currentSpOffset + PtrSize; + continue; + } + + if ((CurCode & 0xF8) == 0xC0) // alloc_m + { + ULONG x = ((CurCode & 0x7) << 8) | *(UnwindCodePtr + 1); + currentSpOffset -= x * 16; + continue; + } + + if (((CurCode & 0xFC) == 0xCC) || // save_regp_x + ((CurCode & 0xFE) == 0xDA)) // save_fregp_x + { + ULONG z = *(UnwindCodePtr + 1) & 0x3F; + currentSpOffset -= (z + 1) * 8; + continue; + } + + if ((CurCode & 0xFC) == 0xD0) // save_reg + { + BYTE nextCode = *(UnwindCodePtr + 1); + ULONG x = ((CurCode & 0x3) << 2) | (nextCode >> 6); + ULONG z = nextCode & 0x3F; + if (x == 11) // R30 / LR is the 12th GP register in the save_reg encodings + { + lrSlotOffset = currentSpOffset + z * 8; + } + + continue; + } + + if ((CurCode & 0xFE) == 0xD4) // save_reg_x + { + BYTE nextCode = *(UnwindCodePtr + 1); + ULONG x = ((CurCode & 0x1) << 3) | (nextCode >> 5); + currentSpOffset -= ((nextCode & 0x1F) + 1) * 8; + if (x == 11) // R30 / LR is the 12th GP register in the save_reg encodings + { + lrSlotOffset = currentSpOffset; + } + + continue; + } + + if ((CurCode & 0xFE) == 0xD6) // save_lrpair + { + ULONG z = *(UnwindCodePtr + 1) & 0x3F; + lrSlotOffset = currentSpOffset + z * 8 + PtrSize; + continue; + } + + if (CurCode == 0xDE) // save_freg_x + { + ULONG z = *(UnwindCodePtr + 1) & 0x1F; + currentSpOffset -= (z + 1) * 8; + continue; + } + + if (CurCode == 0xE0) // alloc_l + { + ULONG x = (*(UnwindCodePtr + 1) << 16) | (*(UnwindCodePtr + 2) << 8) | *(UnwindCodePtr + 3); + currentSpOffset -= x * 16; + continue; + } + + if (CurCode == 0xFC) // pac_sign_lr + { + pacSpOffset = currentSpOffset; + hasPacSignLR = true; + continue; + } + + return false; + } + + if (!hasPacSignLR) + { + return true; + } + + if (lrSlotOffset == -1) + { + return false; + } + + *pSpForPacSign = (TADDR)((SSIZE_T)dac_cast(ppvRetAddrLocation) - (lrSlotOffset - pacSpOffset)); + return true; +} +#endif //TARGET_ARM64 + bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet, // in - PTR_PTR_VOID * ppvRetAddrLocation) // out + PTR_PTR_VOID * ppvRetAddrLocation, // out + uintptr_t * pSpForArm64PacSign) // out { CoffNativeMethodInfo * pNativeMethodInfo = (CoffNativeMethodInfo *)pMethodInfo; @@ -872,6 +1107,7 @@ bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn #endif #if defined(TARGET_AMD64) + *pSpForArm64PacSign = 0; context.Rsp = pRegisterSet->GetSP(); context.Rbp = pRegisterSet->GetFP(); context.Rip = pRegisterSet->GetIP(); @@ -888,6 +1124,7 @@ bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn *ppvRetAddrLocation = (PTR_PTR_VOID)(context.Rsp - sizeof (PVOID)); return true; #elif defined(TARGET_ARM64) + *pSpForArm64PacSign = 0; if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0) p += sizeof(int32_t); @@ -943,8 +1180,14 @@ bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn } *ppvRetAddrLocation = (PTR_PTR_VOID)contextPointers.Lr; + if (!TryGetSpForPacSigning(pUnwindDataBlob, unwindDataBlobSize, *ppvRetAddrLocation, pSpForArm64PacSign)) + { + return false; + } + return true; #else + *pSpForArm64PacSign = 0; EstablisherFrame = 0; HandlerData = NULL; return false; diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h index c85f5250967793..dbeb3956d483ea 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h @@ -92,7 +92,8 @@ class CoffNativeCodeManager : public ICodeManager bool GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet, // in - PTR_PTR_VOID * ppvRetAddrLocation); // out + PTR_PTR_VOID * ppvRetAddrLocation, // out + uintptr_t * pSpForArm64PacSign); // out #ifdef TARGET_X86 GCRefKind GetReturnValueKind(MethodInfo * pMethodInfo, diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCfiOpcode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCfiOpcode.cs index 25d81deb39aabc..24a51314396555 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCfiOpcode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCfiOpcode.cs @@ -11,6 +11,7 @@ internal enum CFI_OPCODE CFI_ADJUST_CFA_OFFSET, // Offset is adjusted relative to the current one. CFI_DEF_CFA_REGISTER, // New register is used to compute CFA CFI_REL_OFFSET, // Register is saved at offset from the current CFA - CFI_DEF_CFA // Take address from register and add offset to it. + CFI_DEF_CFA, // Take address from register and add offset to it. + CFI_NEGATE_RA_STATE, // Sign the return address in lr with the platform PAC key } } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfFde.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfFde.cs index 4c1444c179a701..4a8b2076872590 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfFde.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfFde.cs @@ -116,6 +116,10 @@ private static byte[] CfiCodeToInstructions(DwarfCie cie, byte[] blobData) cfaOffset = cfiOffset; cfiCodeOffset += DwarfHelper.WriteULEB128(cfiCode.AsSpan(cfiCodeOffset), (uint)cfaOffset); break; + + case CFI_OPCODE.CFI_NEGATE_RA_STATE: + cfiCode[cfiCodeOffset++] = DW_CFA_AARCH64_negate_ra_state; + break; } } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Eabi/EabiUnwindConverter.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Eabi/EabiUnwindConverter.cs index 5db4b2cfeb7e25..f5d6372535a9bd 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Eabi/EabiUnwindConverter.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Eabi/EabiUnwindConverter.cs @@ -121,6 +121,10 @@ public static byte[] ConvertCFIToEabi(byte[] blobData) EmitSpAdjustment(cfiOffset); } break; + + case CFI_OPCODE.CFI_NEGATE_RA_STATE: + // Do nothing here. + break; } } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/MachObjectWriter.Aot.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/MachObjectWriter.Aot.cs index 6cb3f71f117d88..840405a4350d22 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/MachObjectWriter.Aot.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/MachObjectWriter.Aot.cs @@ -160,6 +160,10 @@ private static uint GetArm64CompactUnwindCode(byte[] blobData) switch (opcode) { + case CFI_OPCODE.CFI_NEGATE_RA_STATE: + // Fall back to DWARF so the AArch64 negate_ra_state opcode is preserved for libunwind. + return UNWIND_ARM64_MODE_DWARF; + case CFI_OPCODE.CFI_DEF_CFA_REGISTER: cfaRegister = dwarfReg; diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs index 135762578a5308..a260373a6e1b04 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs @@ -101,7 +101,8 @@ private enum CFI_OPCODE CFI_ADJUST_CFA_OFFSET, // Offset is adjusted relative to the current one. CFI_DEF_CFA_REGISTER, // New register is used to compute CFA CFI_REL_OFFSET, // Register is saved at offset from the current CFA - CFI_DEF_CFA // Take address from register and add offset to it. + CFI_DEF_CFA, // Take address from register and add offset to it. + CFI_NEGATE_RA_STATE, // Sign the return address in lr with the platform PAC key } // Get the CFI data in the same shape as clang/LLVM generated one. This improves the compatibility with libunwind and other unwind solutions @@ -132,6 +133,7 @@ private static byte[] CompressARM64CFI(byte[] blobData) } int offset = 0; + bool shouldAddPACOpCode = false; while (offset < blobData.Length) { codeOffset = Math.Max(codeOffset, blobData[offset++]); @@ -185,6 +187,10 @@ private static byte[] CompressARM64CFI(byte[] blobData) } } break; + + case CFI_OPCODE.CFI_NEGATE_RA_STATE: + shouldAddPACOpCode = true; + break; } } @@ -194,6 +200,14 @@ private static byte[] CompressARM64CFI(byte[] blobData) using (BinaryWriter cfiWriter = new BinaryWriter(cfiStream)) { + if (shouldAddPACOpCode) + { + cfiWriter.Write((byte)codeOffset); + cfiWriter.Write((byte)CFI_OPCODE.CFI_NEGATE_RA_STATE); + cfiWriter.Write((short)-1); + cfiWriter.Write(0); + } + if (cfaRegister != -1) { cfiWriter.Write((byte)codeOffset); diff --git a/src/coreclr/unwinder/arm64/unwinder.cpp b/src/coreclr/unwinder/arm64/unwinder.cpp index f76a97c67f88ec..f3c398c123af66 100644 --- a/src/coreclr/unwinder/arm64/unwinder.cpp +++ b/src/coreclr/unwinder/arm64/unwinder.cpp @@ -31,6 +31,10 @@ #define FIELD_OFFSET(type, field) ((LONG)__builtin_offsetof(type, field)) #endif +#if !defined(DACCESS_COMPILE) +extern "C" void* PacAuthPtr(void* ptr, void* sp); +#endif // !defined(DACCESS_COMPILE) + #ifdef HOST_UNIX #define RtlZeroMemory ZeroMemory @@ -251,16 +255,75 @@ do { #endif // !defined(DEBUGGER_UNWIND) -// // Macros for stripping pointer authentication (PAC) bits. -// +#if !defined(DACCESS_COMPILE) -#if !defined(DEBUGGER_STRIP_PAC) +#define HANDLE_PAC(pointer, sp) RtlHandlePacOnline(pointer, sp) -// NOTE: Pointer authentication is not used by .NET, so the implementation does nothing -#define STRIP_PAC(Params, pointer) +FORCEINLINE +VOID RtlHandlePacOnline(_Inout_ PULONG64 Pointer, _In_ ULONG64 Sp) -#endif +/*++ + +Routine Description: + + This routine authenticates an ARM64 pointer authenticated with PACIASP + using the supplied stack pointer as the modifier. Hence this should only + be called when authenticating a pointer at runtime (not debugger). + +Arguments: + + Pointer - Supplies a pointer to the pointer whose PAC will be authenticated. + + Sp - Supplies the stack pointer value that was used as the PAC modifier. + +Return Value: + + None. + +--*/ + +{ + *Pointer = (ULONG64)PacAuthPtr((void *)(*Pointer), (void *)Sp); +} +#else + +#define HANDLE_PAC(pointer, sp) RtlStripPacManual(pointer, sp) + +FORCEINLINE +VOID +RtlStripPacManual( + _Inout_ PULONG64 Pointer, + _In_ ULONG64 Sp + ) +/*++ + +Routine Description: + + This routine manually strips the ARM64 Pointer Authentication Code (PAC) + from a pointer. This is functionally similar to the XPAC family of + instructions. + + N.B. Even though PAC is only supported on ARM64, this routine is available + on all architectures to conveniently enable scenarios such as the + Debugger. + +Arguments: + + Pointer - Supplies a pointer to the pointer whose PAC will be stripped. + +Return Value: + + None. + +--*/ +{ + UNREFERENCED_PARAMETER(Sp); + *Pointer &= 0x0000FFFFFFFFFFFF; + return; +} + +#endif // !defined(DACCESS_COMPILE) // // Macros to clarify opcode parsing @@ -2343,7 +2406,7 @@ Return Value: return STATUS_UNWIND_INVALID_SEQUENCE; } - STRIP_PAC(UnwindParams, &ContextRecord->Lr); + HANDLE_PAC(&ContextRecord->Lr, ContextRecord->Sp); // // TODO: Implement support for UnwindFlags RTL_VIRTUAL_UNWIND2_VALIDATE_PAC. diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index b60f2a118be155..30b624a1f7309e 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -147,11 +147,47 @@ NESTED_ENTRY OnHijackTripThread, _TEXT, NoHandler EPILOG_RESTORE_REG_PAIR x25, x26, 64 EPILOG_RESTORE_REG_PAIR x27, x28, 80 EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 192 + xpaclri EPILOG_RETURN NESTED_END OnHijackTripThread, _TEXT #endif // FEATURE_HIJACK +// void* PacStripPtr(void *); +// This function strips the pointer of PAC info that is passed as an argument. +// We prefer to strip a pointer where it's not going to be used to branch execution to. +.arch_extension pauth + LEAF_ENTRY PacStripPtr, _TEXT + xpaci x0 + ret + LEAF_END PacStripPtr, _TEXT + +// void* PacSignPtr(void *, void *); +// This function signs the input pointer using x1 as salt. +// To avoid failing on non-PAC enabled machines, we use pacia1716 which signs lr explicitly. +// Thus we need to move input in lr, sign it and then copy it back to the result register. +.arch_extension pauth + LEAF_ENTRY PacSignPtr, _TEXT + mov x17, x0 + mov x16, x1 + pacia1716 + mov x0, x17 + ret + LEAF_END PacSignPtr, _TEXT + +// void* PacAuthPtr(void *, void *); +// This function authenticates the input signed-pointer using x1 as salt. +// To avoid failing on non-PAC enabled machines, we use pacia1716 which authenticates lr explicitly. +// Thus we need to move input in lr, authenticate it and then copy it back to the result register. +.arch_extension pauth + LEAF_ENTRY PacAuthPtr, _TEXT + mov x17, x0 + mov x16, x1 + autia1716 + mov x0, x17 + ret + LEAF_END PacAuthPtr, _TEXT + // ------------------------------------------------------------------ // Redirection Stub for GC in fully interruptible method //GenerateRedirectedHandledJITCaseStub GCThreadControl diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index e7f6f8083b6d58..8d3a816caa9fe1 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -311,11 +311,45 @@ NoFloatingPointRetVal EPILOG_RESTORE_REG_PAIR x25, x26, #64 EPILOG_RESTORE_REG_PAIR x27, x28, #80 EPILOG_RESTORE_REG_PAIR fp, lr, #192! + + DCD 0xD50320FF ; xpaclri instruction in binary to avoid error while compiling with non-PAC enabled compilers EPILOG_RETURN NESTED_END #endif ; FEATURE_HIJACK +; void* PacStripPtr(void *); +; This function strips the pointer of PAC info that is passed as an argument. +; We prefer to strip a pointer where it's not going to be used to branch execution to. + LEAF_ENTRY PacStripPtr + DCD 0xDAC143E0 ; xpaci x0 instruction in binary to avoid requiring PAC-enabled assemblers + ret + LEAF_END PacStripPtr + +; void* PacSignPtr(void *, void *); +; This function signs the input pointer using x1 as salt. +; To avoid failing on non-PAC enabled machines, we use pacib1716 which signs lr explicitly. +; Thus we need to move input in lr, sign it and then copy it back to the result register. + LEAF_ENTRY PacSignPtr + mov x17, x0 + mov x16, x1 + DCD 0xD503215F ; pacib1716 instruction in binary to avoid error while compiling with non-PAC enabled compilers + mov x0, x17 + ret + LEAF_END PacSignPtr + +; void* PacAuthPtr(void *, void *); +; This function authenticates the input signed-pointer using x1 as salt. +; To avoid failing on non-PAC enabled machines, we use autib1716 which authenticates lr explicitly. +; Thus we need to move input in lr, authenticate it and then copy it back to the result register. + LEAF_ENTRY PacAuthPtr + mov x17, x0 + mov x16, x1 + DCD 0xD50321DF ; autib1716 instruction in binary to avoid error while compiling with non-PAC enabled compilers + mov x0, x17 + ret + LEAF_END PacAuthPtr + ;; ------------------------------------------------------------------ ;; Redirection Stub for GC in fully interruptible method GenerateRedirectedHandledJITCaseStub GCThreadControl diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h index 55e327dff3beda..802fcce7af29a7 100644 --- a/src/coreclr/vm/arm64/cgencpu.h +++ b/src/coreclr/vm/arm64/cgencpu.h @@ -45,6 +45,10 @@ class ComCallMethodDesc; extern PCODE GetPreStubEntryPoint(); +#ifndef DACCESS_COMPILE +extern "C" void* PacAuthPtr(void* ptr, void* sp); +#endif + #define STACK_ALIGN_SIZE 16 #define JUMP_ALLOCATE_SIZE 16 // # bytes to allocate for a jump instruction @@ -208,7 +212,7 @@ typedef struct _PROFILE_PLATFORM_SPECIFIC_DATA inline PCODE GetIP(const T_CONTEXT * context) { LIMITED_METHOD_DAC_CONTRACT; - return context->Pc; + return (PCODE) context->Pc; } inline void SetIP(T_CONTEXT *context, PCODE eip) { diff --git a/src/coreclr/vm/arm64/stubs.cpp b/src/coreclr/vm/arm64/stubs.cpp index 89e9c3f8b45727..60298085c77d2a 100644 --- a/src/coreclr/vm/arm64/stubs.cpp +++ b/src/coreclr/vm/arm64/stubs.cpp @@ -474,7 +474,7 @@ void HijackFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats pRD->IsCallerContextValid = FALSE; pRD->IsCallerSPValid = FALSE; - pRD->pCurrentContext->Pc = m_ReturnAddress; + pRD->pCurrentContext->Pc = GetReturnAddress(); size_t s = sizeof(struct HijackArgs); _ASSERTE(s%8 == 0); // HijackArgs contains register values and hence will be a multiple of 8 // stack must be multiple of 16. So if s is not multiple of 16 then there must be padding of 8 bytes diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index 5e87f606da0be9..80d6419535d476 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6312,6 +6312,244 @@ bool IsIPInEpilog(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, BOOL *pSaf return fIsInEpilog; } +#if defined(TARGET_ARM64) +// Read the PAC state for a managed ARM64 frame and, when PAC is enabled, recover the +// SP value that was live when PACIASP signed the return address in LR. +bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR retAddrLocation, TADDR *pSpForPacSign) +{ + _ASSERTE(pContextToCheck != nullptr); + _ASSERTE(pCodeInfo->IsValid()); + _ASSERTE(pSpForPacSign != nullptr); + + *pSpForPacSign = 0; + + // In prolog or epilog while the current frame is still being established or torn down + // retrieving correct SP is complex. We conservatively bail-out in this case. + // TODO-PAC: Explore opportunities to retrieve SP while in prolog/epilog. + if (IsIPInProlog(pCodeInfo)) + { + return false; + } + + BOOL unused = TRUE; + if (IsIPInEpilog(pContextToCheck, pCodeInfo, &unused)) + { + return false; + } + + // Lookup the function entry for the IP + PTR_RUNTIME_FUNCTION FunctionEntry = pCodeInfo->GetFunctionEntry(); + + // We should always get a function entry for a managed method + _ASSERTE(FunctionEntry != NULL); + DWORD_PTR ImageBase = pCodeInfo->GetModuleBase(); + + _ASSERTE((FunctionEntry->UnwindData & 3) == 0); // Packed unwind data are not used with managed code + ULONG_PTR UnwindDataPtr = (ULONG_PTR)(ImageBase + FunctionEntry->UnwindData); + + // For unwind info layout details refer https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling?view=msvc-170#arm64-exception-handling-information + // Read the header word. + DWORD HeaderWord = *(DWORD*)UnwindDataPtr; + UnwindDataPtr += sizeof(DWORD); + + _ASSERTE(((HeaderWord >> 18) & 3) == 0); // Version 0 is the only supported version. + + ULONG UnwindWords = (HeaderWord >> 27) & 31; + ULONG EpilogScopeCount = (HeaderWord >> 22) & 31; + if (EpilogScopeCount == 0 && UnwindWords == 0) + { + DWORD extendedCounts = *(DWORD*)UnwindDataPtr; + UnwindDataPtr += sizeof(DWORD); + UnwindWords = (extendedCounts >> 16) & 0xFF; + EpilogScopeCount = extendedCounts & 0xFFFF; + } + + if ((HeaderWord & (1 << 21)) != 0) + { + EpilogScopeCount = 0; + } + + ULONG_PTR UnwindCodePtr = UnwindDataPtr + sizeof(DWORD) * EpilogScopeCount; + ULONG_PTR UnwindCodesEndPtr = UnwindCodePtr + sizeof(DWORD) * UnwindWords; + + auto GetUnwindOpSize = [](BYTE unwindCode) -> SIZE_T + { + if (unwindCode < 0xC0) + { + return 1; + } + else if (unwindCode < 0xE0) + { + return 2; + } + else + { + static const BYTE UnwindCodeSizeTable[32] = + { + 4,1,2,1,1,1,1,3, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 2,3,4,5,1,1,1,1 + }; + + return UnwindCodeSizeTable[unwindCode - 0xE0]; + } + }; + + ULONG_PTR* unwindOpStarts = (ULONG_PTR*)_alloca((UnwindCodesEndPtr - UnwindCodePtr) * sizeof(ULONG_PTR)); + ULONG unwindOpIndex = 0; + for (ULONG_PTR unwindOpPtr = UnwindCodePtr; unwindOpPtr < UnwindCodesEndPtr;) + { + BYTE curCode = *(BYTE*)unwindOpPtr; + if (curCode == 0xE4) // end + { + break; + } + + SIZE_T unwindOpSize = GetUnwindOpSize(curCode); + if ((unwindOpPtr + unwindOpSize) > UnwindCodesEndPtr) + { + return false; + } + + unwindOpStarts[unwindOpIndex++] = unwindOpPtr; + unwindOpPtr += unwindOpSize; + } + + SSIZE_T currentSpOffset = 0; + SSIZE_T lrSlotOffset = SSIZE_T_MIN; + SSIZE_T pacSpOffset = 0; + BOOL hasPacSignLR = false; + constexpr SSIZE_T PtrSize = 8; + + // ARM64 prolog unwind codes are stored in reverse prolog order. Replay them in prolog order so + // PACIASP/PACIBSP captures the SP that was live when LR was originally signed. + while (unwindOpIndex != 0) + { + UnwindCodePtr = unwindOpStarts[--unwindOpIndex]; + ULONG CurCode = *(BYTE*)UnwindCodePtr; + + if (((CurCode & 0xFC) == 0xC8) || // save_regp + ((CurCode & 0xFE) == 0xD8) || // save_fregp + ((CurCode & 0xFE) == 0xDC) || // save_freg + CurCode == 0xE1 || // set_fp + CurCode == 0xE2 || // add_fp + CurCode == 0xE3 || // nop + CurCode == 0xE5 || // end_c + CurCode == 0xE6) // save_next + { + continue; + } + + if ((CurCode & 0xE0) == 0x00) // alloc_s + { + currentSpOffset -= (CurCode & 0x1F) * 16; + continue; + } + + if ((CurCode & 0xE0) == 0x20) // save_r19r20_x + { + currentSpOffset -= (CurCode & 0x1F) * 8; + continue; + } + + if ((CurCode & 0xC0) == 0x40) // save_fplr + { + lrSlotOffset = currentSpOffset + ((CurCode & 0x3F) * 8) + PtrSize; + continue; + } + + if ((CurCode & 0xC0) == 0x80) // save_fplr_x + { + currentSpOffset -= ((CurCode & 0x3F) + 1) * 8; + lrSlotOffset = currentSpOffset + PtrSize; + continue; + } + + if ((CurCode & 0xF8) == 0xC0) // alloc_m + { + ULONG x = ((CurCode & 0x7) << 8) | *(BYTE*)(UnwindCodePtr + 1); + currentSpOffset -= x * 16; + continue; + } + + if (((CurCode & 0xFC) == 0xCC) || // save_regp_x + ((CurCode & 0xFE) == 0xDA)) // save_fregp_x + { + ULONG z = *(BYTE*)(UnwindCodePtr + 1) & 0x3F; + currentSpOffset -= (z + 1) * 8; + continue; + } + + if ((CurCode & 0xFC) == 0xD0) // save_reg + { + BYTE nextCode = *(BYTE*)(UnwindCodePtr + 1); + ULONG x = ((CurCode & 0x3) << 2) | (nextCode >> 6); + ULONG z = nextCode & 0x3F; + if (x == 11) // R30 / LR is the 12th GP register in the save_reg encodings + { + lrSlotOffset = currentSpOffset + z * 8; + } + + continue; + } + + if ((CurCode & 0xFE) == 0xD4) // save_reg_x + { + BYTE nextCode = *(BYTE*)(UnwindCodePtr + 1); + ULONG x = ((CurCode & 0x1) << 3) | (nextCode >> 5); + currentSpOffset -= ((nextCode & 0x1F) + 1) * 8; + if (x == 11) // R30 / LR is the 12th GP register in the save_reg encodings + { + lrSlotOffset = currentSpOffset; + } + + continue; + } + + if ((CurCode & 0xFE) == 0xD6) // save_lrpair + { + ULONG z = *(BYTE*)(UnwindCodePtr + 1) & 0x3F; + lrSlotOffset = currentSpOffset + z * 8 + PtrSize; + continue; + } + + if (CurCode == 0xDE) // save_freg_x + { + ULONG z = *(BYTE*)(UnwindCodePtr + 1) & 0x1F; + currentSpOffset -= (z + 1) * 8; + continue; + } + + if (CurCode == 0xE0) // alloc_l + { + ULONG x = (*(BYTE*)(UnwindCodePtr + 1) << 16) | (*(BYTE*)(UnwindCodePtr + 2) << 8) | *(BYTE*)(UnwindCodePtr + 3); + currentSpOffset -= x * 16; + continue; + } + + if (CurCode == 0xFC) // pac_sign_lr + { + pacSpOffset = currentSpOffset; + hasPacSignLR = true; + continue; + } + + return false; + } + + if (!hasPacSignLR) + { + return true; + } + + if (lrSlotOffset == SSIZE_T_MIN) + { + return false; + } + + *pSpForPacSign = (TADDR)((SSIZE_T)retAddrLocation - (lrSlotOffset - pacSpOffset)); + return true; +} +#endif // TARGET_ARM64 + #endif // FEATURE_HIJACK && (!TARGET_X86 || TARGET_UNIX) #define EXCEPTION_VISUALCPP_DEBUGGER ((DWORD) (1<<30 | 0x6D<<16 | 5000)) diff --git a/src/coreclr/vm/excep.h b/src/coreclr/vm/excep.h index a37f1a490d3cfd..97157b3566edee 100644 --- a/src/coreclr/vm/excep.h +++ b/src/coreclr/vm/excep.h @@ -29,7 +29,9 @@ BOOL AdjustContextForJITHelpers(EXCEPTION_RECORD *pExceptionRecord, CONTEXT *pCo // General purpose functions for use on an IP in jitted code. bool IsIPInProlog(EECodeInfo *pCodeInfo); bool IsIPInEpilog(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, BOOL *pSafeToInjectThreadAbort); - +#if defined(TARGET_ARM64) +bool GetPacSignInfo(PTR_CONTEXT pContextToCheck, EECodeInfo *pCodeInfo, TADDR retAddrLocation, TADDR *pSpForPacSign); +#endif // TARGET_ARM64 #endif // FEATURE_HIJACK && (!TARGET_X86 || TARGET_UNIX) // Enums diff --git a/src/coreclr/vm/frames.h b/src/coreclr/vm/frames.h index d2da804694077c..111092195921cc 100644 --- a/src/coreclr/vm/frames.h +++ b/src/coreclr/vm/frames.h @@ -1299,6 +1299,20 @@ class HijackFrame : public Frame m_ReturnAddress); } + PCODE GetReturnAddress_Impl() + { + LIMITED_METHOD_DAC_CONTRACT; + +#if defined(TARGET_ARM64) && !defined(DACCESS_COMPILE) + if (m_SpForPacSign != 0) + { + return (PCODE)PacAuthPtr((void*)m_ReturnAddress, (void*)m_SpForPacSign); + } +#endif + + return (PCODE)m_ReturnAddress; + } + BOOL NeedsUpdateRegDisplay_Impl() { LIMITED_METHOD_CONTRACT; @@ -1326,11 +1340,18 @@ class HijackFrame : public Frame // HijackFrames are created by trip functions. See OnHijackTripThread() // They are real C++ objects on the stack. // So, it's a public function -- but that doesn't mean you should make some. - HijackFrame(LPVOID returnAddress, Thread *thread, HijackArgs *args); + HijackFrame(LPVOID returnAddress, Thread *thread, HijackArgs *args +#if defined(TARGET_ARM64) + , LPVOID spForPacSign +#endif + ); protected: TADDR m_ReturnAddress; +#if defined(TARGET_ARM64) + TADDR m_SpForPacSign; +#endif PTR_Thread m_Thread; DPTR(HijackArgs) m_Args; diff --git a/src/coreclr/vm/tailcallhelp.cpp b/src/coreclr/vm/tailcallhelp.cpp index da2dcca7d1da13..92efd671e2a6d0 100644 --- a/src/coreclr/vm/tailcallhelp.cpp +++ b/src/coreclr/vm/tailcallhelp.cpp @@ -10,6 +10,9 @@ #include "gcrefmap.h" #include "threads.h" +#if defined(TARGET_ARM64) +extern "C" void* PacStripPtr(void* ptr); +#endif // TARGET_ARM64 FCIMPL0(void*, TailCallHelp::GetTailCallArgBuffer) { @@ -39,7 +42,15 @@ FCIMPL2(void*, TailCallHelp::GetTailCallInfo, void** retAddrSlot, void** retAddr Thread* thread = GetThread(); - *retAddr = thread->GetReturnAddress(retAddrSlot); + void* retAddrFromSlot = thread->GetReturnAddress(retAddrSlot); + +#if defined(TARGET_ARM64) + // We strip the return address here as it's only used for comparison and + // not being used to branch execution to. + retAddrFromSlot = PacStripPtr(retAddrFromSlot); +#endif // TARGET_ARM64 + *retAddr = retAddrFromSlot; + return thread->GetTailCallTls(); } FCIMPLEND diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h index a28139db66d73f..a0e7949fe0fd43 100644 --- a/src/coreclr/vm/threads.h +++ b/src/coreclr/vm/threads.h @@ -2518,6 +2518,9 @@ class Thread void HijackThread(ExecutionState *esb X86_ARG(ReturnKind returnKind) X86_ARG(bool hasAsyncRet)); VOID *m_pvHJRetAddr; // original return address (before hijack) +#ifdef TARGET_ARM64 + VOID *m_pSpForPacSign; // stack pointer value that was used to sign LR with PACIASP +#endif VOID **m_ppvHJRetAddrPtr; // place we bashed a new return address MethodDesc *m_HijackedFunction; // remember what we hijacked diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 61a5190ea1ded6..4c52f7a49e19c5 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -22,6 +22,11 @@ #define HIJACK_NONINTERRUPTIBLE_THREADS +#if defined(TARGET_ARM64) +extern "C" void* PacSignPtr(void* ptr, void* sp); +extern "C" void* PacAuthPtr(void* ptr, void* sp); +#endif // TARGET_ARM64 + bool ThreadSuspend::s_fSuspendRuntimeInProgress = false; bool ThreadSuspend::s_fSuspended = false; @@ -4464,6 +4469,9 @@ struct ExecutionState bool m_IsInterruptible; // is this code interruptible? MethodDesc *m_pFD; // current function/method we're executing VOID **m_ppvRetAddrPtr; // pointer to return address in frame +#if defined(TARGET_ARM64) + VOID *m_pSpForPacSign; // stack pointer value that was used to sign LR with PACIASP +#endif DWORD m_RelOffset; // relative offset at which we're currently executing in this fcn IJitManager *m_pJitManager; METHODTOKEN m_MethodToken; @@ -4471,8 +4479,10 @@ struct ExecutionState ExecutionState() { LIMITED_METHOD_CONTRACT; -#ifdef TARGET_X86 +#if defined(TARGET_X86) m_FirstPass = true; +#elif defined(TARGET_ARM64) + m_pSpForPacSign = nullptr; #endif } }; @@ -4535,6 +4545,10 @@ void Thread::HijackThread(ExecutionState *esb X86_ARG(ReturnKind returnKind) X86 // Remember the place that the return would have gone m_pvHJRetAddr = *esb->m_ppvRetAddrPtr; +#if defined(TARGET_ARM64) + m_pSpForPacSign = esb->m_pSpForPacSign; +#endif + IS_VALID_CODE_PTR((FARPROC) (TADDR)m_pvHJRetAddr); // TODO [DAVBR]: For the full fix for VsWhidbey 450273, the below // may be uncommented once isLegalManagedCodeCaller works properly @@ -4546,6 +4560,13 @@ void Thread::HijackThread(ExecutionState *esb X86_ARG(ReturnKind returnKind) X86 m_HijackedFunction = esb->m_pFD; // Bash the stack to return to one of our stubs +#if defined(TARGET_ARM64) + if (m_pSpForPacSign != nullptr) + { + pvHijackAddr = PacSignPtr(pvHijackAddr, m_pSpForPacSign); + } +#endif // TARGET_ARM64 + *esb->m_ppvRetAddrPtr = pvHijackAddr; SetThreadState(TS_Hijacked); } @@ -4625,6 +4646,9 @@ StackWalkAction SWCB_GetExecutionState(CrawlFrame *pCF, VOID *pData) pES->m_pFD = pCF->GetFunction(); pES->m_MethodToken = pCF->GetMethodToken(); pES->m_ppvRetAddrPtr = 0; +#if defined(TARGET_ARM64) + pES->m_pSpForPacSign = nullptr; +#endif pES->m_IsInterruptible = pCF->IsGcSafe(); pES->m_RelOffset = pCF->GetRelOffset(); pES->m_pJitManager = pCF->GetJitManager(); @@ -4769,9 +4793,16 @@ StackWalkAction SWCB_GetExecutionState(CrawlFrame *pCF, VOID *pData) return action; } -HijackFrame::HijackFrame(LPVOID returnAddress, Thread *thread, HijackArgs *args) +HijackFrame::HijackFrame(LPVOID returnAddress, Thread *thread, HijackArgs *args +#if defined(TARGET_ARM64) + , LPVOID spForPacSign +#endif + ) : Frame(FrameIdentifier::HijackFrame), m_ReturnAddress((TADDR)returnAddress), +#if defined(TARGET_ARM64) + m_SpForPacSign((TADDR)spForPacSign), +#endif m_Thread(thread), m_Args(args) { @@ -4802,12 +4833,18 @@ void STDCALL OnHijackWorker(HijackArgs * pArgs) thread->ResetThreadState(Thread::TS_Hijacked); - // Fix up our caller's stack, so it can resume from the hijack correctly + // Keep the actual resume address in the saved LR slot. HijackFrame + // authenticates the return address on demand for stackwalk/GC, but + // OnHijackTripThread will later return via the saved LR in HijackArgs. pArgs->ReturnAddress = (size_t)thread->m_pvHJRetAddr; // Build a frame so that stack crawling can proceed from here back to where // we will resume execution. - HijackFrame frame((void *)pArgs->ReturnAddress, thread, pArgs); + HijackFrame frame(thread->m_pvHJRetAddr, thread, pArgs +#if defined(TARGET_ARM64) + , thread->m_pSpForPacSign +#endif + ); #ifdef _DEBUG BOOL GCOnTransition = FALSE; @@ -5246,8 +5283,17 @@ BOOL Thread::HandledJITCase() X86_ONLY(ReturnKind returnKind;) X86_ONLY(bool hasAsyncRet;) + ARM64_ONLY(TADDR spForPacSign = 0;) if (GetReturnAddressHijackInfo(&codeInfo X86_ARG(&returnKind) X86_ARG(&hasAsyncRet))) { +#if defined(TARGET_ARM64) + if (!GetPacSignInfo(&ctx, &codeInfo, dac_cast(esb.m_ppvRetAddrPtr), &spForPacSign)) + { + return FALSE; + } + + esb.m_pSpForPacSign = (PVOID)spForPacSign; +#endif // TARGET_ARM64 HijackThread(&esb X86_ARG(returnKind) X86_ARG(hasAsyncRet)); } } @@ -5798,6 +5844,15 @@ void HandleSuspensionForInterruptedThread(CONTEXT *interruptedContext) StackWalkerWalkingThreadHolder threadStackWalking(pThread); // Hijack the return address to point to the appropriate routine based on the method's return type. + ARM64_ONLY(TADDR spForPacSign = 0;) +#if defined(TARGET_ARM64) + if (!GetPacSignInfo(interruptedContext, &codeInfo, dac_cast(executionState.m_ppvRetAddrPtr), &spForPacSign)) + { + return; + } + + executionState.m_pSpForPacSign = (PVOID)spForPacSign; +#endif // TARGET_ARM64 pThread->HijackThread(&executionState X86_ARG(returnKind) X86_ARG(hasAsyncRet)); } }