From 743a8769a0533f9913153a056bf6a123107927f2 Mon Sep 17 00:00:00 2001 From: Sebastian Nickolls Date: Tue, 25 Nov 2025 14:09:28 +0000 Subject: [PATCH 1/2] Support spill temps with unknown size on ARM64 Allocates spill temps to the UnknownSizeFrame when the type being spilled has an unknown size. The current slot recycling system has been adapted to handle TYP_SIMD and TYP_MASK as special cases. --- src/coreclr/jit/codegenarmarch.cpp | 15 +++--- src/coreclr/jit/compiler.h | 12 +++++ src/coreclr/jit/compiler.hpp | 42 +++++++++++------ src/coreclr/jit/emitarm64.cpp | 26 ++++++++-- src/coreclr/jit/lclvars.cpp | 43 ++++++++++++++++- src/coreclr/jit/regset.cpp | 76 ++++++++++++++++++++++-------- src/coreclr/jit/regset.h | 14 ++++-- 7 files changed, 179 insertions(+), 49 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 2b23d8b3f7b0af..6e177b0a7ad3d9 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -4873,14 +4873,13 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe } #if defined(TARGET_ARM64) -/***************************************************************************** - * - * Generates code for creating the UnknownSizeFrame stack space. - * - * See Compiler::UnknownSizeFrame for implementation details. The space contains - * stack allocations for Vector. - */ - +//---------------------------------------------------------------------------- +// +// genUnknownSizeFrame: Generates code for creating the UnknownSizeFrame stack space. +// +// See Compiler::UnknownSizeFrame for implementation details. The space contains +// stack allocations for Vector. +// void CodeGen::genUnknownSizeFrame() { assert(m_compiler->compLocallocUsed && m_compiler->compUsesUnknownSizeFrame); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 875106b746944b..b5f4a002ef7a6a 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1375,6 +1375,10 @@ class TempDsc } void tdAdjustTempOffs(int offs) { +#ifdef TARGET_ARM64 + // Cannot adjust temporary offsets on the UnknownSizeFrame. + assert(!varTypeHasUnknownSize(tdType)); +#endif tdOffs += offs; assert(tdLegalOffset()); } @@ -4320,6 +4324,7 @@ class Compiler void lvaAlignFrame(); void lvaAssignFrameOffsetsToPromotedStructs(); int lvaAllocateTemps(int stkOffs, bool mustDoubleAlign); + void lvaAllocateUnknownSizeTemp(TempDsc* temp); #ifdef DEBUG void lvaDumpRegLocation(unsigned lclNum); @@ -4521,6 +4526,13 @@ class Compiler return GetOffset(varDsc->GetUnknownSizeFrameIndex(), varDsc->TypeIs(TYP_MASK)); } + int GetAddressingOffset(TempDsc* tmpDsc) + { + assert(tmpDsc->tdTempOffs() >= 0); + assert(varTypeHasUnknownSize(tmpDsc->tdTempType())); + return GetOffset((unsigned)tmpDsc->tdTempOffs(), tmpDsc->tdTempType() == TYP_MASK); + } + // This system ensures we don't try and generate an address on the frame // without finishing all allocations. void Finalize() diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index cf012b39d2e855..18b443b7b7acea 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -2773,13 +2773,7 @@ inline FPbased = isFramePointerUsed(); if (lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT) { - TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum); - // The temp might be in use, since this might be during code generation. - if (tmpDsc == nullptr) - { - tmpDsc = codeGen->regSet.tmpFindNum(varNum, RegSet::TEMP_USAGE_USED); - } - assert(tmpDsc != nullptr); + TempDsc* tmpDsc = codeGen->regSet.tmpGetNum(varNum); assert(!varTypeHasUnknownSize(tmpDsc->tdTempType())); varOffset = tmpDsc->tdTempOffs(); } @@ -3449,14 +3443,34 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX /*****************************************************************************/ -/* static */ inline unsigned RegSet::tmpSlot(unsigned size) +/* static */ inline unsigned RegSet::tmpSlot(var_types type) { - noway_assert(size >= sizeof(int)); - noway_assert(size <= TEMP_MAX_SIZE); - assert((size % sizeof(int)) == 0); - - assert(size < UINT32_MAX); - return size / sizeof(int) - 1; + unsigned slot = UINT32_MAX; + switch (type) + { +#if defined(FEATURE_SIMD) && defined(TARGET_ARM64) + // Special slots are allocated for TYP_SIMD and TYP_MASK, because they + // have unknown size and therefore can't share slots with other types. + case TYP_SIMD: + slot = TEMP_SLOT_COUNT - 1; + break; + case TYP_MASK: + slot = TEMP_SLOT_COUNT - 2; + break; +#endif + default: + { + assert(!varTypeHasUnknownSize(type)); + unsigned size = genTypeSize(type); + noway_assert(size >= sizeof(int)); + noway_assert(size <= TEMP_MAX_SIZE); + assert((size % sizeof(int)) == 0); + slot = size / sizeof(int) - 1; + } + break; + } + assert(slot < TEMP_SLOT_COUNT); + return slot; } /***************************************************************************** diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index a0711a07f3cd7d..da56820d919774 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -8209,14 +8209,23 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va assert(offs >= 0); - if (varx >= 0 && m_compiler->lvaIsUnknownSizeLocal(varx)) + if ((varx >= 0 && m_compiler->lvaIsUnknownSizeLocal(varx)) || + (varx < 0 && codeGen->regSet.tmpIsUnknownSizeTemp(varx))) { // SVE locals are TYP_SIMD or TYP_MASK, both should be placed on the UnknownSizeFrame. // The base address of these locals should be REG_UNKBASE (x19). assert(offs == 0); isSimple = false; reg2 = REG_UNKBASE; - imm = m_compiler->unkSizeFrame.GetAddressingOffset(m_compiler->lvaGetDesc(varx)); + + if (varx >= 0) + { + imm = m_compiler->unkSizeFrame.GetAddressingOffset(m_compiler->lvaGetDesc(varx)); + } + else + { + imm = m_compiler->unkSizeFrame.GetAddressingOffset(codeGen->regSet.tmpGetNum(varx)); + } switch (ins) { @@ -8522,7 +8531,8 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va regNumber reg2 = REG_NA; ssize_t imm = 0; - if (varx >= 0 && m_compiler->lvaIsUnknownSizeLocal(varx)) + if ((varx >= 0 && m_compiler->lvaIsUnknownSizeLocal(varx)) || + (varx < 0 && codeGen->regSet.tmpIsUnknownSizeTemp(varx))) { // SVE locals are TYP_SIMD or TYP_MASK, both should be placed on the UnknownSizeFrame. // The base address of these locals should be REG_UNKBASE (x19). @@ -8531,10 +8541,18 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va assert(attr == EA_SCALABLE); reg2 = REG_UNKBASE; - imm = m_compiler->unkSizeFrame.GetAddressingOffset(m_compiler->lvaGetDesc(varx)); fmt = isPredicateRegister(reg1) ? IF_SVE_JG_2A : IF_SVE_JH_2A; isSimple = false; + if (varx >= 0) + { + imm = m_compiler->unkSizeFrame.GetAddressingOffset(m_compiler->lvaGetDesc(varx)); + } + else + { + imm = m_compiler->unkSizeFrame.GetAddressingOffset(codeGen->regSet.tmpGetNum(varx)); + } + // TODO-SVE: Handle generation of base address for large immediate scaled by VL/PL. assert(isValidSimm<9>(imm)); } diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 33cb583fcfb3a5..94f295c1971863 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -1051,7 +1051,7 @@ void Compiler::lvaClassifyParameterABI(Classifier& classifier) // ; callee saved regs bool startsAtR0 = (doubleAlignMask & 1) == 1; bool r2XorR3 = ((codeGen->regSet.rsMaskPreSpillRegArg & RBM_R2) == 0) != - ((codeGen->regSet.rsMaskPreSpillRegArg & RBM_R3) == 0); + ((codeGen->regSet.rsMaskPreSpillRegArg & RBM_R3) == 0); if (startsAtR0 && r2XorR3) { codeGen->regSet.rsMaskPreSpillAlign = @@ -4564,6 +4564,11 @@ void Compiler::lvaFixVirtualFrameOffsets() assert(codeGen->regSet.tmpAllFree()); for (TempDsc* temp = codeGen->regSet.tmpListBeg(); temp != nullptr; temp = codeGen->regSet.tmpListNxt(temp)) { + if (varTypeHasUnknownSize(temp->tdTempType())) + { + continue; + } + temp->tdAdjustTempOffs(delta + frameLocalsDelta); } @@ -6126,6 +6131,13 @@ int Compiler::lvaAllocateTemps(int stkOffs, bool mustDoubleAlign) var_types tempType = temp->tdTempType(); unsigned size = temp->tdTempSize(); + if (varTypeHasUnknownSize(tempType)) + { + // This temp will be allocated on the unknown size frame, get the offset from there. + lvaAllocateUnknownSizeTemp(temp); + continue; + } + /* Figure out and record the stack offset of the temp */ /* Need to align the offset? */ @@ -6183,6 +6195,35 @@ int Compiler::lvaAllocateTemps(int stkOffs, bool mustDoubleAlign) return stkOffs; } +//------------------------------------------------------------------------------- +// lvaAllocateUnknownSizeTemp: Allocate a slot for a temp on the UnknownSizeFrame +// +// Arguments: +// temp - The temp to allocate. varTypeHasUnknownSize() must be true for this +// temp. +void Compiler::lvaAllocateUnknownSizeTemp(TempDsc* temp) +{ + assert(varTypeHasUnknownSize(temp->tdTempType())); + + int offset = 0; + switch (temp->tdTempType()) + { +#if defined(TARGET_ARM64) && defined(FEATURE_SIMD) + case TYP_SIMD: + offset = unkSizeFrame.AllocVector(); + break; + case TYP_MASK: + offset = unkSizeFrame.AllocMask(); + break; +#endif + default: + unreached(); + } + assert(offset >= 0); + + temp->tdSetTempOffs(offset); +} + #ifdef DEBUG /***************************************************************************** diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp index 58434e6a7912f5..2fcd275641bf46 100644 --- a/src/coreclr/jit/regset.cpp +++ b/src/coreclr/jit/regset.cpp @@ -625,12 +625,9 @@ TempDsc* RegSet::tmpGetTemp(var_types type) type = tmpNormalizeType(type); unsigned size = genTypeSize(type); - // If TYP_STRUCT ever gets in here we do bad things (tmpSlot returns -1) - noway_assert(size >= sizeof(int) && size != SIZE_UNKNOWN); - /* Find the slot to search for a free temp of the right size */ - unsigned slot = tmpSlot(size); + unsigned slot = tmpSlot(type); /* Look for a temp with a matching type */ @@ -687,19 +684,22 @@ void RegSet::tmpPreAllocateTemps(var_types type, unsigned count) assert(type == tmpNormalizeType(type)); unsigned size = genTypeSize(type); - // If TYP_STRUCT ever gets in here we do bad things (tmpSlot returns -1) - noway_assert(size >= sizeof(int) && size != SIZE_UNKNOWN); - // Find the slot to search for a free temp of the right size. - // Note that slots are shared by types of the identical size (e.g., TYP_REF and TYP_LONG on AMD64), + // Note that slots can be shared by types of the identical size (e.g., TYP_REF and TYP_LONG on AMD64), // so we can't assert that the slot is empty when we get here. - unsigned slot = tmpSlot(size); + unsigned slot = tmpSlot(type); for (unsigned i = 0; i < count; i++) { tmpCount++; - tmpSize += size; + + if (size != SIZE_UNKNOWN) + { + // We don't count temps that have unknown size, because they will be allocated in a different + // part of the frame to temps that have a known size. + tmpSize += size; + } #ifdef TARGET_ARM if (type == TYP_DOUBLE) @@ -738,7 +738,7 @@ void RegSet::tmpRlsTemp(TempDsc* temp) /* Add the temp to the 'free' list */ - slot = tmpSlot(temp->tdTempSize()); + slot = tmpSlot(temp->tdTempType()); #ifdef DEBUG if (m_compiler->verbose) @@ -795,6 +795,40 @@ TempDsc* RegSet::tmpFindNum(int tnum, TEMP_USAGE_TYPE usageType /* = TEMP_USAGE_ return nullptr; } +//---------------------------------------------------------------------------- +// tmpGetNum: Given a temp number, get the corresponding temp. +// +// This looks for temps in the free list and the used list, meaning it can only be used after code +// generation. +// +// It will assert that the temp is found. This should be called for a temp that is known to exist. +// +TempDsc* RegSet::tmpGetNum(int tnum) const +{ + TempDsc* tmp = tmpFindNum(tnum, TEMP_USAGE_FREE); + if (tmp == nullptr) + { + tmp = tmpFindNum(tnum, TEMP_USAGE_USED); + } + assert(tmp != nullptr); + return tmp; +} + +//---------------------------------------------------------------------------- +// tmpIsUnknownSizeTemp: Given a temp number, does the corresponding temp have an unknown size? +// +// It will assert that the temp is found. This should be called for a temp that is known to exist. +// +// Arguments: +// tnum - Temp number to test +// +// Returns: +// true when the temp has an unknown size at compile-time. +bool RegSet::tmpIsUnknownSizeTemp(int tnum) const +{ + return varTypeHasUnknownSize(tmpGetNum(tnum)->tdTempType()); +} + /***************************************************************************** * * A helper function is used to iterate over all the temps. @@ -832,12 +866,13 @@ TempDsc* RegSet::tmpListNxt(TempDsc* curTemp, TEMP_USAGE_TYPE usageType /* = TEM assert(curTemp != nullptr); TempDsc* temp = curTemp->tdNext; + unsigned size = curTemp->tdTempSize(); + if (temp == nullptr) { - unsigned size = curTemp->tdTempSize(); - // If there are no more temps in the list, check if there are more - // slots (for bigger sized temps) to walk. + // slots (for bigger sized temps) to walk. This is only possible if + // the temps have a known size. TempDsc* const* tmpLists; if (usageType == TEMP_USAGE_FREE) @@ -849,14 +884,17 @@ TempDsc* RegSet::tmpListNxt(TempDsc* curTemp, TEMP_USAGE_TYPE usageType /* = TEM tmpLists = tmpUsed; } - while (size < TEMP_MAX_SIZE && temp == nullptr) + unsigned slot = tmpSlot(curTemp->tdTempType()) + 1; + while (slot < TEMP_SLOT_COUNT && temp == nullptr) { - size += sizeof(int); - unsigned slot = tmpSlot(size); - temp = tmpLists[slot]; + temp = tmpLists[slot]; + slot++; } - assert((temp == nullptr) || (temp->tdTempSize() == size)); + if (temp == nullptr) + { + assert(slot == TEMP_SLOT_COUNT); + } } return temp; diff --git a/src/coreclr/jit/regset.h b/src/coreclr/jit/regset.h index c35d706d476dec..d20a9fb4ce6d23 100644 --- a/src/coreclr/jit/regset.h +++ b/src/coreclr/jit/regset.h @@ -212,6 +212,8 @@ class RegSet TempDsc* tmpGetTemp(var_types type); // get temp for the given type void tmpRlsTemp(TempDsc* temp); TempDsc* tmpFindNum(int temp, TEMP_USAGE_TYPE usageType = TEMP_USAGE_FREE) const; + TempDsc* tmpGetNum(int temp) const; + bool tmpIsUnknownSizeTemp(int tnum) const; void tmpEnd(); TempDsc* tmpListBeg(TEMP_USAGE_TYPE usageType = TEMP_USAGE_FREE) const; @@ -246,7 +248,7 @@ class RegSet // Used by RegSet::rsSpillChk() unsigned tmpGetCount; // Temps which haven't been released yet #endif - static unsigned tmpSlot(unsigned size); // which slot in tmpFree[] or tmpUsed[] to use + static unsigned tmpSlot(var_types type); // which slot in tmpFree[] or tmpUsed[] to use enum TEMP_CONSTANTS : unsigned { @@ -259,11 +261,17 @@ class RegSet #else // !FEATURE_SIMD TEMP_MAX_SIZE = sizeof(double), #endif // !FEATURE_SIMD + +#if defined(TARGET_ARM64) && defined(FEATURE_SIMD) + // There are two extra slots for temps with unknown size (TYP_SIMD/TYP_MASK) + TEMP_SLOT_COUNT = (TEMP_MAX_SIZE / sizeof(int)) + 2 +#else TEMP_SLOT_COUNT = (TEMP_MAX_SIZE / sizeof(int)) +#endif }; - TempDsc* tmpFree[TEMP_MAX_SIZE / sizeof(int)]; - TempDsc* tmpUsed[TEMP_MAX_SIZE / sizeof(int)]; + TempDsc* tmpFree[TEMP_SLOT_COUNT]; + TempDsc* tmpUsed[TEMP_SLOT_COUNT]; }; #endif // _REGSET_H From 985de7e8a0e146960dfb6af93a343cd0e33614eb Mon Sep 17 00:00:00 2001 From: Sebastian Nickolls Date: Fri, 8 May 2026 09:13:29 +0000 Subject: [PATCH 2/2] Formatting --- src/coreclr/jit/lclvars.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 94f295c1971863..93a0d85532c97e 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -1051,7 +1051,7 @@ void Compiler::lvaClassifyParameterABI(Classifier& classifier) // ; callee saved regs bool startsAtR0 = (doubleAlignMask & 1) == 1; bool r2XorR3 = ((codeGen->regSet.rsMaskPreSpillRegArg & RBM_R2) == 0) != - ((codeGen->regSet.rsMaskPreSpillRegArg & RBM_R3) == 0); + ((codeGen->regSet.rsMaskPreSpillRegArg & RBM_R3) == 0); if (startsAtR0 && r2XorR3) { codeGen->regSet.rsMaskPreSpillAlign =