From 7fd0bb5923295e8b67686d75c96b2aa6185968a8 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 28 May 2026 17:55:38 -0700 Subject: [PATCH 1/5] [x64] Add optional secondary frame pointer for large frames On x64, addressing modes only encode a signed 8-bit displacement cheaply (disp8, -128..+127); larger offsets require a 4-byte disp32. Methods with large stack frames therefore emit many oversized stack-local references. This adds an optional secondary frame/stack pointer, reserved in a callee-saved register (RBX), offset by a configurable number of bytes from the primary base. Stack locals that fall outside disp8 range of the primary base but inside disp8 range of the secondary pointer are rewritten to use the secondary pointer, shrinking those references from disp32 to disp8. Gated behind the default-off config JitSecondFramePtr (the byte offset; 0x100 = 256) and restricted to OptimizationDisabled() (MinOpts/Tier0) on x64 only. LSRA reserves the register; codegen sets it up in the prolog after unwindEndProlog(); emit rewrites eligible SV refs to [rbx+disp8]. EH/funclet support: EH methods always use RBP frames on x64. The secondary pointer is re-established (lea rbx,[rbp-offset]) only in filter funclet prologs, since the VM's CallEHFilterFunclet restores only RBP. Catch/ finally/fault funclets need no re-establishment because CallEHFunclet restores all nonvolatiles (including RBX) from the establisher context. SuperPMI asmdiffs across all x64 collections (JitSecondFramePtr=0x100): overall -7,353,695 bytes, 100% in MinOpts, FullOpts unchanged (0 diffs). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/codegencommon.cpp | 28 +++++++++- src/coreclr/jit/codegenxarch.cpp | 15 ++++++ src/coreclr/jit/compiler.h | 12 +++++ src/coreclr/jit/emitxarch.cpp | 85 ++++++++++++++++++++++++++++++- src/coreclr/jit/emitxarch.h | 3 ++ src/coreclr/jit/jitconfigvalues.h | 5 ++ src/coreclr/jit/lsra.cpp | 29 +++++++++++ src/coreclr/jit/targetamd64.h | 5 ++ 8 files changed, 179 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 6cc7cb9874d6c8..8303aef1544304 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -4791,6 +4791,15 @@ void CodeGen::genFinalizeFrame() } #endif // TARGET_ARM +#if defined(TARGET_AMD64) + // x64 spike: the secondary frame-pointer register (if reserved) must be saved/restored as a + // callee-save, so mark it modified before compCalleeRegsPushed is computed below. + if (regSet.rsMaskResvd != RBM_NONE) + { + regSet.rsSetRegsModified(regSet.rsMaskResvd); + } +#endif // TARGET_AMD64 + #ifdef TARGET_ARM64 if (m_compiler->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && TargetOS::IsApplePlatform) { @@ -5511,9 +5520,8 @@ void CodeGen::genFnProlog() } #endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64 +#if defined(TARGET_AMD64) // For x64 OSR we have to finish saving callee saves. - // -#ifdef TARGET_AMD64 if (inheritsCalleeSaves) { genOSRSaveRemainingCalleeSavedRegisters(); @@ -5554,6 +5562,22 @@ void CodeGen::genFnProlog() // //------------------------------------------------------------------------- +#if defined(TARGET_AMD64) + // x64 spike: establish the secondary frame-pointer register. This runs after the frame pointer + // (if any) is established and after SP is final, so both candidate bases are live. It is placed + // after the OS-reported prolog because the register was already saved (with its own unwind code) + // by genPushCalleeSavedRegisters; this lea merely loads a derived address and needs no unwind + // data. The register is excluded from allocation, so it stays live for the method body. + if (m_compiler->compSecondFramePtrReg != REG_NA) + { + const regNumber base = m_compiler->compSecondFramePtrFPbased ? REG_FPBASE : REG_SPBASE; + const int disp = m_compiler->compSecondFramePtrFPbased ? -m_compiler->compSecondFramePtrOffset + : m_compiler->compSecondFramePtrOffset; + GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, m_compiler->compSecondFramePtrReg, base, disp); + regSet.verifyRegUsed(m_compiler->compSecondFramePtrReg); + } +#endif // TARGET_AMD64 + #ifdef TARGET_ARM64 if (m_compiler->compUsesUnknownSizeFrame) { diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 4cc36d3133fde0..a5cf344492efbd 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -10948,6 +10948,21 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // This is the end of the OS-reported prolog for purposes of unwinding m_compiler->unwindEndProlog(); + // x64 spike (see JitSecondFramePtr): re-establish the secondary frame-pointer register inside + // FILTER funclets only. For catch/finally/fault funclets the runtime helper CallEHFunclet + // restores all nonvolatile registers (including RBX) from the establisher frame's CONTEXT before + // invoking the funclet, so RBX already holds RBP - offset and no work is needed here. Filter + // funclets are invoked via CallEHFilterFunclet, which restores ONLY RBP, so RBX must be recomputed. + // The funclet shares the parent (establisher) frame via RBP, parent locals live at the same + // RBP-relative offsets, and EH methods always use an RBP frame, so the base is always RBP. + if ((m_compiler->compSecondFramePtrReg != REG_NA) && (m_compiler->funCurrentFunc()->funKind == FUNC_FILTER)) + { + assert(m_compiler->compSecondFramePtrFPbased); + GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, m_compiler->compSecondFramePtrReg, REG_FPBASE, + -m_compiler->compSecondFramePtrOffset); + regSet.verifyRegUsed(m_compiler->compSecondFramePtrReg); + } + genClearAvxStateInProlog(); } diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 5dd03596daf24f..a944db88a241c7 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -10918,6 +10918,18 @@ class Compiler bool compMaskConvertUsed = false; // Does the method have Convert Mask To Vector nodes. bool compUsesThrowHelper = false; // There is a call to a THROW_HELPER for the compiled method. +#ifdef TARGET_AMD64 + // x64 spike (see JitSecondFramePtr): when enabled, a callee-saved register is reserved to + // act as a secondary stack base pointer. It holds (primaryBase +/- compSecondFramePtrOffset), + // used to address far locals with a short (disp8) displacement. REG_NA means the feature is + // off. compSecondFramePtrFPbased records whether the secondary register shadows the frame + // pointer (RBP, locals at negative offsets) or the stack pointer (RSP, locals at positive + // offsets); only accesses matching that base are redirected. + regNumber compSecondFramePtrReg = REG_NA; + int compSecondFramePtrOffset = 0; + bool compSecondFramePtrFPbased = false; +#endif // TARGET_AMD64 + // NOTE: These values are only reliable after // the importing is completely finished. diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index eadf46bb988a87..10c2e5a0e771b4 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -5252,6 +5252,66 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id) } //------------------------------------------------------------------------ +#if defined(TARGET_AMD64) +//------------------------------------------------------------------------ +// emitIsSecondFramePtrCandidate: Determine whether a stack access should be +// redirected through the reserved secondary frame-pointer register (an x64 +// spike enabled by JitConfig.JitSecondFramePtr). +// +// Arguments: +// ins -- the instruction +// EBPbased -- whether the canonical access is frame-pointer (RBP) based +// dsp -- the canonical (full effective) displacement off the base +// pAdjustedDsp - [out] the displacement off the secondary register +// +// Return Value: +// true if the access can use [REG_OPT_RSVD2 + disp8] instead of a disp32 +// form off the canonical base; false to use the canonical base/displacement. +// +// Notes: +// Must be deterministic between size estimation and output. Redirects an access whose +// canonical displacement does not fit in a disp8 but does once shifted by the secondary +// offset, and only when its base (RBP vs RSP) matches the base the secondary register +// shadows. Instructions with special displacement/encoding handling (EVEX, APX extended +// EVEX, SSE 0F38/0F3A, crc32) are excluded so the simple [reg+disp8] form always applies. +// +bool emitter::emitIsSecondFramePtrCandidate(instruction ins, bool EBPbased, int dsp, int* pAdjustedDsp) +{ + if (m_compiler->compSecondFramePtrReg == REG_NA) + { + return false; + } + + // Only redirect accesses whose canonical base matches the base the secondary register shadows + // (RBP for FP-based frames, RSP otherwise). Mixing bases would compute a wrong displacement. + if (EBPbased != m_compiler->compSecondFramePtrFPbased) + { + return false; + } + + if (IsEvexEncodableInstruction(ins) || IsApxExtendedEvexInstruction(ins) || EncodedBySSE38orSSE3A(ins) || + (ins == INS_crc32)) + { + return false; + } + + // The secondary register holds (base - offset) for RBP frames (locals at negative offsets) and + // (base + offset) for RSP frames (locals at positive offsets); invert accordingly. + const int adjusted = EBPbased ? (dsp + m_compiler->compSecondFramePtrOffset) + : (dsp - m_compiler->compSecondFramePtrOffset); + const bool rawFits = ((signed char)dsp == (ssize_t)dsp); + const bool adjFits = ((signed char)adjusted == (ssize_t)adjusted); + + if (rawFits || !adjFits) + { + return false; + } + + *pAdjustedDsp = adjusted; + return true; +} +#endif // TARGET_AMD64 + // emitInsSizeSVCalcDisp: Calculate instruction size. // // Arguments: @@ -5276,6 +5336,16 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSVCalcDisp(instrDesc* id, code_t code, adr = m_compiler->lvaFrameAddress(var, &EBPbased); dsp = adr + id->idAddr()->iiaLclVar.lvaOffset(); +#if defined(TARGET_AMD64) + // x64 spike: when the access can be redirected through the secondary frame pointer it uses a + // [reg+disp8] form with no SIB byte, regardless of EVEX/zero-disp handling below. + int secondDsp; + if (emitIsSecondFramePtrCandidate(ins, EBPbased, dsp, &secondDsp)) + { + return size + sizeof(char); + } +#endif // TARGET_AMD64 + dspIsZero = (dsp == 0); bool tryCompress = true; @@ -15778,7 +15848,20 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) // for stack variables the dsp should never be a reloc assert(id->idIsDspReloc() == 0); - if (EBPbased) +#if defined(TARGET_AMD64) + int secondDsp; + if (emitIsSecondFramePtrCandidate(ins, EBPbased, dsp, &secondDsp)) + { + // Redirect through the secondary frame pointer (a low callee-saved register, e.g. RBX): + // modrm mod=01, rm=base => low byte 0x40 | (reg & 7); no SIB byte and no REX.B since the + // register is < R8 and is not RSP/R12. + assert((unsigned)REG_OPT_RSVD2 < 8); + dst += emitOutputWord(dst, code | (((0x40 | (REG_OPT_RSVD2 & 0x07))) << 8)); + dst += emitOutputByte(dst, secondDsp); + } + else +#endif // TARGET_AMD64 + if (EBPbased) { // EBP-based variable: does the offset fit in a byte? if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index db536ab58ce677..6de51723e4f3d8 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -65,6 +65,9 @@ struct CnsVal UNATIVE_OFFSET emitInsSize(instrDesc* id, code_t code, bool includeRexPrefixSize); UNATIVE_OFFSET emitInsSizeSVCalcDisp(instrDesc* id, code_t code, int var, int dsp); +#if defined(TARGET_AMD64) + bool emitIsSecondFramePtrCandidate(instruction ins, bool EBPbased, int dsp, int* pAdjustedDsp); +#endif // TARGET_AMD64 UNATIVE_OFFSET emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp); UNATIVE_OFFSET emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp, int val); UNATIVE_OFFSET emitInsSizeRR(instrDesc* id, code_t code); diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 6e36a55a67ae02..574700232f8d52 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -699,6 +699,11 @@ RELEASE_CONFIG_INTEGER(JitInlinePolicyProfile, "JitInlinePolicyProfile", 0) RELEASE_CONFIG_INTEGER(JitInlinePolicyProfileThreshold, "JitInlinePolicyProfileThreshold", 40) CONFIG_STRING(JitObjectStackAllocationRange, "JitObjectStackAllocationRange") RELEASE_CONFIG_INTEGER(JitObjectStackAllocation, "JitObjectStackAllocation", 1) + +// x64 spike: when non-zero, reserve a callee-saved register as a secondary stack base +// pointer set to SP + , used to address far locals with a short (disp8) displacement. +// 0 disables the feature. Typical experimental value: 256. +RELEASE_CONFIG_INTEGER(JitSecondFramePtr, "JitSecondFramePtr", 0) RELEASE_CONFIG_INTEGER(JitObjectStackAllocationRefClass, "JitObjectStackAllocationRefClass", 1) RELEASE_CONFIG_INTEGER(JitObjectStackAllocationBoxedValueClass, "JitObjectStackAllocationBoxedValueClass", 1) RELEASE_CONFIG_INTEGER(JitObjectStackAllocationConditionalEscape, "JitObjectStackAllocationConditionalEscape", 1) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 3c8a99a354bae8..e0b6f7d2b9300a 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2588,6 +2588,35 @@ void LinearScan::setFrameType() } #endif // TARGET_ARMARCH || TARGET_RISCV64 +#if defined(TARGET_AMD64) + // x64 spike (see JitSecondFramePtr): reserve a callee-saved register to act as a secondary + // stack base pointer for large frames, extending cheap disp8 addressing. SuperPMI asmdiffs + // showed this is a net code-size win when optimizations are disabled (accurate frame estimate, + // no register-pressure cost) but a loss with optimizations on (enregistration makes the + // pre-allocation frame estimate a poor predictor and removing a register raises spill + // pressure), so restrict it to the optimizations-disabled path. Bail on localloc/OSR to keep + // the spike contained. Methods with EH are supported only on RBP frames: funclets re-establish + // the secondary pointer from RBP (the establisher frame pointer), so an SP base would not be + // recoverable inside a funclet. + { + const int secondOffset = (int)JitConfig.JitSecondFramePtr(); + if ((secondOffset != 0) && m_compiler->opts.OptimizationDisabled() && + ((frameType == FT_ESP_FRAME) || (frameType == FT_EBP_FRAME)) && !m_compiler->compLocallocUsed && + !m_compiler->opts.IsOSR() && ((m_compiler->compHndBBtabCount == 0) || (frameType == FT_EBP_FRAME)) && + (m_compiler->lvaFrameSize(Compiler::REGALLOC_FRAME_LAYOUT) > 256)) + { + m_compiler->compSecondFramePtrReg = REG_OPT_RSVD2; + m_compiler->compSecondFramePtrOffset = secondOffset; + m_compiler->compSecondFramePtrFPbased = (frameType == FT_EBP_FRAME); + m_compiler->codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD2; + removeMask |= RBM_OPT_RSVD2.GetIntRegSet(); + JITDUMP(" Reserved REG_OPT_RSVD2 (%s) as secondary frame pointer (%s%s%d)\n", + getRegName(REG_OPT_RSVD2), m_compiler->compSecondFramePtrFPbased ? "RBP" : "RSP", + m_compiler->compSecondFramePtrFPbased ? "-" : "+", secondOffset); + } + } +#endif // TARGET_AMD64 + #ifdef TARGET_ARM if (m_compiler->compLocallocUsed) { diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index 55c0dae697d7b6..6dba0ba5b25943 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -417,6 +417,11 @@ #define RBM_SPBASE RBM_ESP #define STR_SPBASE "rsp" +// x64 spike: secondary stack base pointer register (see JitSecondFramePtr). +// A low callee-saved register (RBX) is used to avoid REX.B / SIB encoding complications. +#define REG_OPT_RSVD2 REG_EBX +#define RBM_OPT_RSVD2 RBM_EBX + #define FIRST_ARG_STACK_OFFS (REGSIZE_BYTES) // return address #ifdef UNIX_AMD64_ABI From 91029e46815c04b3d5689e039cc6a95bbea0ee7a Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 29 May 2026 09:32:26 -0700 Subject: [PATCH 2/5] JIT: make disasm faithful for secondary frame-pointer redirects When an access is redirected through the secondary frame pointer (REG_OPT_RSVD2/RBX) the emitted bytes use [rbx+disp8], but emitDispFrameRef previously still printed the canonical [rbp/rsp+disp], so the listing did not match the encoded instruction. Print the actual [rbx+disp8] operand and append the canonical reference as a parenthesized suffix, e.g. mov qword ptr [rbx+0x78] (rbp-0x88), rax Plumb the instruction through emitDispFrameRef so it can reuse emitIsSecondFramePtrCandidate (the same decision emitOutputSV makes), keeping display and emitted bytes in lockstep. Display-only change; no codegen impact. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/emit.h | 2 +- src/coreclr/jit/emitarm.cpp | 2 +- src/coreclr/jit/emitarm64.cpp | 2 +- src/coreclr/jit/emitloongarch64.cpp | 2 +- src/coreclr/jit/emitriscv64.cpp | 2 +- src/coreclr/jit/emitxarch.cpp | 86 +++++++++++++++++++++++------ 6 files changed, 74 insertions(+), 22 deletions(-) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 014d5f962e3013..acbcb73aa1d724 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -2628,7 +2628,7 @@ class emitter void emitDispGCinfo(); void emitDispJumpList(); void emitDispClsVar(CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, bool reloc = false); - void emitDispFrameRef(int varx, int disp, int offs, bool asmfm); + void emitDispFrameRef(int varx, int disp, int offs, bool asmfm, instruction ins = INS_none); void emitDispInsAddr(const BYTE* code); void emitDispInsOffs(unsigned offs, bool doffs); void emitDispInsHex(instrDesc* id, BYTE* code, size_t sz); diff --git a/src/coreclr/jit/emitarm.cpp b/src/coreclr/jit/emitarm.cpp index f0696f010d3331..72320b3f069c3e 100644 --- a/src/coreclr/jit/emitarm.cpp +++ b/src/coreclr/jit/emitarm.cpp @@ -7810,7 +7810,7 @@ void emitter::emitDispIns( * Display a stack frame reference. */ -void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm) +void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm, instruction ins) { #ifdef DEBUG printf("["); diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index ec30c012bd5ee5..1929e78dd27137 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -14908,7 +14908,7 @@ void emitter::emitDispInsHelp( * Display a stack frame reference. */ -void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm) +void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm, instruction ins) { #ifdef DEBUG printf("["); diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp index 716e15f392627f..a48d94d004520b 100644 --- a/src/coreclr/jit/emitloongarch64.cpp +++ b/src/coreclr/jit/emitloongarch64.cpp @@ -4627,7 +4627,7 @@ void emitter::emitDispIns( * * Display a stack frame reference. */ -void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm) +void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm, instruction ins) { NYI_LOONGARCH64("emitDispFrameRef-----unused on LoongArch64."); } diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp index f1de046359846e..7222d12e64882c 100644 --- a/src/coreclr/jit/emitriscv64.cpp +++ b/src/coreclr/jit/emitriscv64.cpp @@ -4917,7 +4917,7 @@ void emitter::emitDispIns( * Display a stack frame reference. */ -void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm) +void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm, instruction ins) { NYI_RISCV64("emitDispFrameRef-----unimplemented/unused on RISCV64 yet----"); } diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 10c2e5a0e771b4..15c040ef4e0460 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -12295,10 +12295,10 @@ void emitter::emitDispClsVar(CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, bool rel * Display a stack frame reference. */ -void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm) +void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm, instruction ins) { - int addr; - bool bEBP; + int addr = 0; + bool bEBP = false; printf("["); @@ -12323,6 +12323,20 @@ void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm) } } +#if defined(TARGET_AMD64) + // x64 spike: when an access is redirected through the secondary frame pointer the emitted bytes + // use [REG_OPT_RSVD2 + disp8] rather than the canonical [rbp/rsp + disp]. Print what is actually + // encoded so the listing matches the bytes, then append the logical reference as a comment. + bool secondRedirected = false; + int secondDsp = 0; + if ((m_compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT) && asmfm) + { + bool bEBPcand = false; + int rawDsp = m_compiler->lvaFrameAddress(varx, &bEBPcand) + disp; + secondRedirected = emitIsSecondFramePtrCandidate(ins, bEBPcand, rawDsp, &secondDsp); + } +#endif // TARGET_AMD64 + if (m_compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT) { if (!asmfm) @@ -12332,7 +12346,23 @@ void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm) addr = m_compiler->lvaFrameAddress(varx, &bEBP) + disp; - if (bEBP) +#if defined(TARGET_AMD64) + if (secondRedirected) + { + printf("%s", emitRegName(REG_OPT_RSVD2, EA_PTRSIZE)); + + if (secondDsp < 0) + { + printf("-0x%02X", -secondDsp); + } + else if (secondDsp > 0) + { + printf("+0x%02X", secondDsp); + } + } + else +#endif // TARGET_AMD64 + if (bEBP) { printf(STR_FPBASE); @@ -12370,6 +12400,28 @@ void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm) } printf("]"); + +#if defined(TARGET_AMD64) + if (secondRedirected) + { + // Show the canonical frame reference the redirected access stands in for. Use a parenthesized + // suffix (mirroring the varName annotation below) rather than a ';' comment, since operands + // may still follow on the same line. + printf(" (%s", bEBP ? STR_FPBASE : STR_SPBASE); + + if (addr < 0) + { + printf("-0x%02X", -addr); + } + else if (addr > 0) + { + printf("+0x%02X", addr); + } + + printf(")"); + } +#endif // TARGET_AMD64 + #ifdef DEBUG if ((varx >= 0) && m_compiler->opts.varNames && (((IL_OFFSET)offs) != BAD_IL_OFFSET)) { @@ -13305,7 +13357,7 @@ void emitter::emitDispIns( #endif emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), - id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + id->idDebugOnlyInfo()->idVarRefOffs, asmfm, ins); #if !FEATURE_FIXED_OUT_ARGS if (ins == INS_pop) @@ -13324,7 +13376,7 @@ void emitter::emitDispIns( printf("%s", sstr); emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), - id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + id->idDebugOnlyInfo()->idVarRefOffs, asmfm, ins); emitDispEmbMasking(id); printf(", %s", emitRegName(id->idReg1(), attr)); @@ -13339,7 +13391,7 @@ void emitter::emitDispIns( printf("%s", sstr); emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), - id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + id->idDebugOnlyInfo()->idVarRefOffs, asmfm, ins); emitDispEmbMasking(id); emitDispConstant(id); break; @@ -13353,7 +13405,7 @@ void emitter::emitDispIns( printf("%s", sstr); emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), - id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + id->idDebugOnlyInfo()->idVarRefOffs, asmfm, ins); emitDispEmbMasking(id); printf(", %s", emitRegName(id->idReg1(), attr)); @@ -13387,7 +13439,7 @@ void emitter::emitDispIns( emitDispEmbMasking(id); printf(", %s", sstr); emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), - id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + id->idDebugOnlyInfo()->idVarRefOffs, asmfm, ins); emitDispEmbBroadcastCount(id); break; @@ -13401,7 +13453,7 @@ void emitter::emitDispIns( emitDispEmbMasking(id); printf(", %s", sstr); emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), - id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + id->idDebugOnlyInfo()->idVarRefOffs, asmfm, ins); emitDispEmbBroadcastCount(id); emitDispConstant(id); break; @@ -13421,7 +13473,7 @@ void emitter::emitDispIns( printf("%s", emitRegName(id->idReg1(), attr)); printf(", %s", sstr); emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), - id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + id->idDebugOnlyInfo()->idVarRefOffs, asmfm, ins); printf(", %s", emitRegName(id->idReg2(), attr)); break; } @@ -13430,7 +13482,7 @@ void emitter::emitDispIns( emitDispEmbMasking(id); printf(", %s, %s", emitRegName(id->idReg2(), attr), sstr); emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), - id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + id->idDebugOnlyInfo()->idVarRefOffs, asmfm, ins); emitDispEmbBroadcastCount(id); break; } @@ -13441,7 +13493,7 @@ void emitter::emitDispIns( emitDispEmbMasking(id); printf(", %s, %s", emitRegName(id->idReg2(), attr), sstr); emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), - id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + id->idDebugOnlyInfo()->idVarRefOffs, asmfm, ins); emitDispEmbBroadcastCount(id); emitDispConstant(id); break; @@ -13464,7 +13516,7 @@ void emitter::emitDispIns( printf(", %s, %s", emitRegName(id->idReg2(), attr), sstr); emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), - id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + id->idDebugOnlyInfo()->idVarRefOffs, asmfm, ins); emitDispEmbBroadcastCount(id); if (!hasMaskReg) @@ -13483,7 +13535,7 @@ void emitter::emitDispIns( printf(", %s", sstr); emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), - id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + id->idDebugOnlyInfo()->idVarRefOffs, asmfm, ins); emitDispEmbBroadcastCount(id); printf(", %s", emitRegName(id->idReg2(), attr)); break; @@ -13493,7 +13545,7 @@ void emitter::emitDispIns( { printf("%s", sstr); emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), - id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + id->idDebugOnlyInfo()->idVarRefOffs, asmfm, ins); emitDispEmbMasking(id); printf(", %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr)); break; @@ -14191,7 +14243,7 @@ void emitter::emitDispIns( assert(id->idInsFmt() == IF_SWR_LABEL); instrDescLbl* idlbl = (instrDescLbl*)id; - emitDispFrameRef(idlbl->dstLclVar.lvaVarNum(), idlbl->dstLclVar.lvaOffset(), 0, asmfm); + emitDispFrameRef(idlbl->dstLclVar.lvaVarNum(), idlbl->dstLclVar.lvaOffset(), 0, asmfm, ins); printf(", "); } From fe7aa81453e8d058d93f3517aa9fb2e75ab40768 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 29 May 2026 09:45:17 -0700 Subject: [PATCH 3/5] Show secondary frame-pointer canonical ref as trailing disasm comment When a stack access is redirected through the secondary frame pointer (REG_OPT_RSVD2), the disassembly now prints the real [rbx+disp8] operand and emits the canonical frame reference (e.g. rbp-0x88) as an end-of-line ';' comment, rather than an inline parenthetical that could be misread as sitting between operands. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/emitxarch.cpp | 44 +++++++++++++++++++++++------------ src/coreclr/jit/emitxarch.h | 9 +++++++ 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 15c040ef4e0460..42148918cc5f7d 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -12404,21 +12404,11 @@ void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm, instruc #if defined(TARGET_AMD64) if (secondRedirected) { - // Show the canonical frame reference the redirected access stands in for. Use a parenthesized - // suffix (mirroring the varName annotation below) rather than a ';' comment, since operands - // may still follow on the same line. - printf(" (%s", bEBP ? STR_FPBASE : STR_SPBASE); - - if (addr < 0) - { - printf("-0x%02X", -addr); - } - else if (addr > 0) - { - printf("+0x%02X", addr); - } - - printf(")"); + // Defer the canonical frame reference to a trailing comment at the end of the instruction + // line (operands may still follow the memory operand on the same line). + emitDispSecondFramePtrPending = true; + emitDispSecondFramePtrFPbased = bEBP; + emitDispSecondFramePtrAddr = addr; } #endif // TARGET_AMD64 @@ -12964,6 +12954,10 @@ void emitter::emitDispIns( instruction ins = id->idIns(); +#if defined(TARGET_AMD64) + emitDispSecondFramePtrPending = false; +#endif // TARGET_AMD64 + #ifdef DEBUG if (m_compiler->verbose) { @@ -14317,6 +14311,26 @@ void emitter::emitDispIns( } #endif +#if defined(TARGET_AMD64) + if (emitDispSecondFramePtrPending) + { + // The memory operand was emitted as [REG_OPT_RSVD2 + disp8]; show the canonical frame + // reference it stands in for as a trailing comment. + printf(" ; %s", emitDispSecondFramePtrFPbased ? STR_FPBASE : STR_SPBASE); + + if (emitDispSecondFramePtrAddr < 0) + { + printf("-0x%02X", -emitDispSecondFramePtrAddr); + } + else if (emitDispSecondFramePtrAddr > 0) + { + printf("+0x%02X", emitDispSecondFramePtrAddr); + } + + emitDispSecondFramePtrPending = false; + } +#endif // TARGET_AMD64 + printf("\n"); } diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 6de51723e4f3d8..e8e909cca64ee1 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -817,6 +817,15 @@ void emitDispReloc(ssize_t value) const; void emitDispAddrMode(instrDesc* id, bool noDetail = false) const; void emitDispShift(instruction ins, int cnt = 0) const; +#if defined(TARGET_AMD64) +// Display state for secondary frame-pointer redirects (see emitDispFrameRef): when a stack access +// is redirected through REG_OPT_RSVD2 the operand shows [rbx+disp8] and the canonical frame +// reference is emitted as a trailing comment finalized at the end of the instruction line. +bool emitDispSecondFramePtrPending = false; +bool emitDispSecondFramePtrFPbased = false; +int emitDispSecondFramePtrAddr = 0; +#endif // TARGET_AMD64 + const char* emitXMMregName(unsigned reg) const; const char* emitYMMregName(unsigned reg) const; const char* emitZMMregName(unsigned reg) const; From 04d786daac81bb230e292f32146cb88fc54b9324 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 29 May 2026 10:36:34 -0700 Subject: [PATCH 4/5] x64 secondary frame pointer: defer reservation decision to FINAL layout The LSRA-time reservation keyed only off total frame size, so ~27% of methods reserved RBX (push/lea/pop plus unwind data) but never used it: no local actually landed in the secondary disp8 band. The band can't be tested at LSRA, since REGALLOC-layout offsets have no base-register flag and are inflated by an over-estimated callee-save area. Reserve RBX at LSRA only as a cheap candidate (out of allocation), then make the precise band-occupancy decision in genFinalizeFrame once FINAL offsets are known. If nothing lands in the band, cancel the reservation so no push/lea/unwind is emitted; otherwise mark the register modified and redo the frame layout to account for the push. aspnet2 asmdiffs: unused-RBX setups drop from 13 to 0; size win improves from -3864 to -3982 bytes. No replay failures. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/codegen.h | 3 +- src/coreclr/jit/codegencommon.cpp | 132 +++++++++++++++--- src/coreclr/jit/codegeninterface.h | 10 ++ src/coreclr/jit/codegenxarch.cpp | 23 ++-- src/coreclr/jit/compiler.h | 12 -- src/coreclr/jit/emitxarch.cpp | 212 ++++++++++++++--------------- src/coreclr/jit/emitxarch.h | 8 +- src/coreclr/jit/jitconfigvalues.h | 9 +- src/coreclr/jit/lsra.cpp | 64 ++++++--- src/coreclr/jit/targetamd64.h | 4 +- 10 files changed, 293 insertions(+), 184 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 30df97543bec50..d407407bba814a 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -600,8 +600,9 @@ class CodeGen final : public CodeGenInterface #ifdef TARGET_AMD64 void genPushCalleeSavedRegistersFromMaskAPX(regMaskTP rsPushRegs); unsigned genPopCalleeSavedRegistersFromMaskAPX(regMaskTP rsPopRegs); + bool genSecondFramePtrIsProfitable(); #endif // TARGET_AMD64 -#endif // !defined(TARGET_XARCH) +#endif // defined(TARGET_XARCH) #endif // !defined(TARGET_ARM64) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 8303aef1544304..2c863b0285cd76 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -4792,12 +4792,9 @@ void CodeGen::genFinalizeFrame() #endif // TARGET_ARM #if defined(TARGET_AMD64) - // x64 spike: the secondary frame-pointer register (if reserved) must be saved/restored as a - // callee-save, so mark it modified before compCalleeRegsPushed is computed below. - if (regSet.rsMaskResvd != RBM_NONE) - { - regSet.rsSetRegsModified(regSet.rsMaskResvd); - } + // The secondary frame-pointer register is reserved as a candidate during LSRA (it is in + // regSet.rsMaskResvd). Unlike ARM, do NOT mark it modified here: whether it is actually pushed is + // decided below by genSecondFramePtrIsProfitable, once FINAL offsets are known. #endif // TARGET_AMD64 #ifdef TARGET_ARM64 @@ -5002,6 +4999,37 @@ void CodeGen::genFinalizeFrame() m_compiler->lvaAssignFrameOffsets(Compiler::FINAL_FRAME_LAYOUT); +#if defined(TARGET_AMD64) + // A secondary frame pointer was reserved as a candidate during LSRA. Now that FINAL offsets are + // known, decide whether establishing it pays off. These offsets assume the register is NOT pushed; + // pushing it only shifts RBP-relative locals deeper and leaves RSP-relative locals unchanged, so + // scanning them is a sound, conservative test. If profitable, mark it modified (so prolog/epilog + // save and restore it) and redo the layout to account for the push; otherwise cancel the + // reservation so no push/lea or unwind data is emitted. + if (genSecondFramePtrReg != REG_NA) + { + if (genSecondFramePtrIsProfitable()) + { + // Reset the layout state first: rsSetRegsModified forbids adding a callee-saved register + // once FINAL layout is complete, and we are about to redo the layout for the push anyway. + m_compiler->lvaDoneFrameLayout = Compiler::REGALLOC_FRAME_LAYOUT; + + regSet.rsSetRegsModified(genRegMask(genSecondFramePtrReg)); + + regMaskTP maskCalleeRegsPushed = regSet.rsGetModifiedCalleeSavedRegsMask(); + maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED; + m_compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed); + + m_compiler->lvaAssignFrameOffsets(Compiler::FINAL_FRAME_LAYOUT); + } + else + { + JITDUMP("Cancelling secondary frame pointer: no local lands in the secondary disp8 band\n"); + genSecondFramePtrReg = REG_NA; + } + } +#endif // TARGET_AMD64 + #ifdef DEBUG if (m_compiler->opts.dspCode || m_compiler->opts.disAsm || m_compiler->opts.disAsm2 || verbose) { @@ -5010,6 +5038,72 @@ void CodeGen::genFinalizeFrame() #endif } +#if defined(TARGET_AMD64) +//------------------------------------------------------------------------ +// genSecondFramePtrIsProfitable: decide whether the reserved secondary frame-pointer register is +// worth establishing, given FINAL frame offsets. +// +// Return Value: +// true if some on-frame local has an access that needs a disp32 off the primary base but fits a +// disp8 off the secondary base; false otherwise. +// +// Notes: +// Mirrors emitter::emitIsSecondFramePtrCandidate's band test, so must run after FINAL offsets are +// assigned. This is necessary but not sufficient: it finds at least one redirectable access but does +// not count them, so a method with only one or two redirects can still regress a few bytes (each +// redirect saves 3 bytes, while setup costs a push + lea, a pop per epilog, and an unwind code). +// Counting sites would need an IR walk (MinOpts has no precise ref counts), not worth the Tier0 cost. +// +bool CodeGen::genSecondFramePtrIsProfitable() +{ + assert(genSecondFramePtrReg != REG_NA); + assert(m_compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); + + const bool wantFPbased = genSecondFramePtrFPbased; + const int offset = genSecondFramePtrOffset; + + // A redirect applies when the raw displacement does NOT fit a disp8 but the adjusted displacement + // (dsp +/- offset) does. Precompute the range of raw displacements that fit a disp8 once adjusted. + const int adjFitLo = wantFPbased ? (-128 - offset) : (-128 + offset); + const int adjFitHi = wantFPbased ? (127 - offset) : (127 + offset); + + for (unsigned varNum = 0; varNum < m_compiler->lvaCount; varNum++) + { + const LclVarDsc* const varDsc = m_compiler->lvaGetDesc(varNum); + if (!varDsc->lvOnFrame || m_compiler->lvaIsUnknownSizeLocal(varNum)) + { + continue; + } + + bool fpBased; + const int loDsp = m_compiler->lvaFrameAddress((int)varNum, &fpBased); + if (fpBased != wantFPbased) + { + continue; + } + + // Accesses to this local span [loDsp, hiDsp] (base slot plus any field/element offset). + const int hiDsp = loDsp + (int)m_compiler->lvaLclStackHomeSize(varNum) - 1; + + // Intersect that range with the adjusted-fits-disp8 range. + const int interLo = (loDsp > adjFitLo) ? loDsp : adjFitLo; + const int interHi = (hiDsp < adjFitHi) ? hiDsp : adjFitHi; + if (interLo > interHi) + { + continue; + } + + // The redirect only helps where the raw displacement itself needs a disp32 (|dsp| > 127). + if ((interLo < -128) || (interHi > 127)) + { + return true; + } + } + + return false; +} +#endif // TARGET_AMD64 + /***************************************************************************** * * Generates code for a function prolog. @@ -5520,8 +5614,9 @@ void CodeGen::genFnProlog() } #endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64 -#if defined(TARGET_AMD64) // For x64 OSR we have to finish saving callee saves. + // +#ifdef TARGET_AMD64 if (inheritsCalleeSaves) { genOSRSaveRemainingCalleeSavedRegisters(); @@ -5563,18 +5658,17 @@ void CodeGen::genFnProlog() //------------------------------------------------------------------------- #if defined(TARGET_AMD64) - // x64 spike: establish the secondary frame-pointer register. This runs after the frame pointer - // (if any) is established and after SP is final, so both candidate bases are live. It is placed - // after the OS-reported prolog because the register was already saved (with its own unwind code) - // by genPushCalleeSavedRegisters; this lea merely loads a derived address and needs no unwind - // data. The register is excluded from allocation, so it stays live for the method body. - if (m_compiler->compSecondFramePtrReg != REG_NA) - { - const regNumber base = m_compiler->compSecondFramePtrFPbased ? REG_FPBASE : REG_SPBASE; - const int disp = m_compiler->compSecondFramePtrFPbased ? -m_compiler->compSecondFramePtrOffset - : m_compiler->compSecondFramePtrOffset; - GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, m_compiler->compSecondFramePtrReg, base, disp); - regSet.verifyRegUsed(m_compiler->compSecondFramePtrReg); + // Establish the secondary frame-pointer register. This runs after the frame pointer (if any) and + // after SP is final, so both candidate bases are live. It sits after the OS-reported prolog: the + // register was already saved (with its own unwind code) by genPushCalleeSavedRegisters, so this lea + // just loads a derived address and needs no unwind data. The register is out of allocation, so it + // stays live for the method body. + if (genSecondFramePtrReg != REG_NA) + { + const regNumber base = genSecondFramePtrFPbased ? REG_FPBASE : REG_SPBASE; + const int disp = genSecondFramePtrFPbased ? -genSecondFramePtrOffset : genSecondFramePtrOffset; + GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, genSecondFramePtrReg, base, disp); + regSet.verifyRegUsed(genSecondFramePtrReg); } #endif // TARGET_AMD64 diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index e696c886cd9e28..fc02b9a1d2bcee 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -282,6 +282,16 @@ class CodeGenInterface m_cgFrameRequired = value; } +#ifdef TARGET_AMD64 + // Secondary stack base pointer (see JitSecondFramePtr). When set, this callee-saved register holds + // (primaryBase +/- genSecondFramePtrOffset) and addresses far locals with a disp8 displacement; + // REG_NA means off. genSecondFramePtrFPbased tells whether it shadows RBP (locals at negative + // offsets) or RSP (positive); only accesses on that base are redirected. + regNumber genSecondFramePtrReg = REG_NA; + int genSecondFramePtrOffset = 0; + bool genSecondFramePtrFPbased = false; +#endif // TARGET_AMD64 + #if !HAS_FIXED_REGISTER_SET void SetStackPointerReg(unsigned funcletIndex, regNumber reg); diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index a5cf344492efbd..877b4a16faf689 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -10948,19 +10948,16 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // This is the end of the OS-reported prolog for purposes of unwinding m_compiler->unwindEndProlog(); - // x64 spike (see JitSecondFramePtr): re-establish the secondary frame-pointer register inside - // FILTER funclets only. For catch/finally/fault funclets the runtime helper CallEHFunclet - // restores all nonvolatile registers (including RBX) from the establisher frame's CONTEXT before - // invoking the funclet, so RBX already holds RBP - offset and no work is needed here. Filter - // funclets are invoked via CallEHFilterFunclet, which restores ONLY RBP, so RBX must be recomputed. - // The funclet shares the parent (establisher) frame via RBP, parent locals live at the same - // RBP-relative offsets, and EH methods always use an RBP frame, so the base is always RBP. - if ((m_compiler->compSecondFramePtrReg != REG_NA) && (m_compiler->funCurrentFunc()->funKind == FUNC_FILTER)) - { - assert(m_compiler->compSecondFramePtrFPbased); - GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, m_compiler->compSecondFramePtrReg, REG_FPBASE, - -m_compiler->compSecondFramePtrOffset); - regSet.verifyRegUsed(m_compiler->compSecondFramePtrReg); + // Re-establish the secondary frame-pointer register, but only in FILTER funclets. Catch/finally/ + // fault funclets are entered via CallEHFunclet, which restores all nonvolatiles (including RBX) from + // the establisher CONTEXT, so RBX already holds RBP - offset. Filter funclets use CallEHFilterFunclet, + // which restores only RBP, so RBX must be recomputed. EH methods always use an RBP frame and the + // funclet shares the parent frame via RBP, so the base is always RBP. + if ((genSecondFramePtrReg != REG_NA) && (m_compiler->funCurrentFunc()->funKind == FUNC_FILTER)) + { + assert(genSecondFramePtrFPbased); + GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, genSecondFramePtrReg, REG_FPBASE, -genSecondFramePtrOffset); + regSet.verifyRegUsed(genSecondFramePtrReg); } genClearAvxStateInProlog(); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index a944db88a241c7..5dd03596daf24f 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -10918,18 +10918,6 @@ class Compiler bool compMaskConvertUsed = false; // Does the method have Convert Mask To Vector nodes. bool compUsesThrowHelper = false; // There is a call to a THROW_HELPER for the compiled method. -#ifdef TARGET_AMD64 - // x64 spike (see JitSecondFramePtr): when enabled, a callee-saved register is reserved to - // act as a secondary stack base pointer. It holds (primaryBase +/- compSecondFramePtrOffset), - // used to address far locals with a short (disp8) displacement. REG_NA means the feature is - // off. compSecondFramePtrFPbased records whether the secondary register shadows the frame - // pointer (RBP, locals at negative offsets) or the stack pointer (RSP, locals at positive - // offsets); only accesses matching that base are redirected. - regNumber compSecondFramePtrReg = REG_NA; - int compSecondFramePtrOffset = 0; - bool compSecondFramePtrFPbased = false; -#endif // TARGET_AMD64 - // NOTE: These values are only reliable after // the importing is completely finished. diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 42148918cc5f7d..8d4ea9cf05bc21 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -5251,12 +5251,10 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id) return sz; } -//------------------------------------------------------------------------ #if defined(TARGET_AMD64) //------------------------------------------------------------------------ -// emitIsSecondFramePtrCandidate: Determine whether a stack access should be -// redirected through the reserved secondary frame-pointer register (an x64 -// spike enabled by JitConfig.JitSecondFramePtr). +// emitIsSecondFramePtrCandidate: should this stack access be redirected through the secondary +// frame-pointer register (see JitConfig.JitSecondFramePtr)? // // Arguments: // ins -- the instruction @@ -5265,26 +5263,26 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id) // pAdjustedDsp - [out] the displacement off the secondary register // // Return Value: -// true if the access can use [REG_OPT_RSVD2 + disp8] instead of a disp32 -// form off the canonical base; false to use the canonical base/displacement. +// true if the access can use [REG_OPT_RSVD2 + disp8] instead of a disp32 form off the canonical +// base; false to keep the canonical base/displacement. // // Notes: -// Must be deterministic between size estimation and output. Redirects an access whose -// canonical displacement does not fit in a disp8 but does once shifted by the secondary -// offset, and only when its base (RBP vs RSP) matches the base the secondary register -// shadows. Instructions with special displacement/encoding handling (EVEX, APX extended -// EVEX, SSE 0F38/0F3A, crc32) are excluded so the simple [reg+disp8] form always applies. +// Must be deterministic between size estimation and output. Redirects an access whose canonical +// displacement needs a disp32 but fits a disp8 once shifted by the secondary offset, and only when +// its base (RBP vs RSP) matches the base the secondary register shadows. Instructions with special +// displacement/encoding handling (EVEX, APX extended EVEX, SSE 0F38/0F3A, crc32) are excluded so the +// plain [reg+disp8] form always applies. // bool emitter::emitIsSecondFramePtrCandidate(instruction ins, bool EBPbased, int dsp, int* pAdjustedDsp) { - if (m_compiler->compSecondFramePtrReg == REG_NA) + if (codeGen->genSecondFramePtrReg == REG_NA) { return false; } // Only redirect accesses whose canonical base matches the base the secondary register shadows // (RBP for FP-based frames, RSP otherwise). Mixing bases would compute a wrong displacement. - if (EBPbased != m_compiler->compSecondFramePtrFPbased) + if (EBPbased != codeGen->genSecondFramePtrFPbased) { return false; } @@ -5297,10 +5295,9 @@ bool emitter::emitIsSecondFramePtrCandidate(instruction ins, bool EBPbased, int // The secondary register holds (base - offset) for RBP frames (locals at negative offsets) and // (base + offset) for RSP frames (locals at positive offsets); invert accordingly. - const int adjusted = EBPbased ? (dsp + m_compiler->compSecondFramePtrOffset) - : (dsp - m_compiler->compSecondFramePtrOffset); - const bool rawFits = ((signed char)dsp == (ssize_t)dsp); - const bool adjFits = ((signed char)adjusted == (ssize_t)adjusted); + const int adjusted = EBPbased ? (dsp + codeGen->genSecondFramePtrOffset) : (dsp - codeGen->genSecondFramePtrOffset); + const bool rawFits = ((signed char)dsp == (ssize_t)dsp); + const bool adjFits = ((signed char)adjusted == (ssize_t)adjusted); if (rawFits || !adjFits) { @@ -5312,6 +5309,7 @@ bool emitter::emitIsSecondFramePtrCandidate(instruction ins, bool EBPbased, int } #endif // TARGET_AMD64 +//------------------------------------------------------------------------ // emitInsSizeSVCalcDisp: Calculate instruction size. // // Arguments: @@ -5337,8 +5335,8 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSVCalcDisp(instrDesc* id, code_t code, dsp = adr + id->idAddr()->iiaLclVar.lvaOffset(); #if defined(TARGET_AMD64) - // x64 spike: when the access can be redirected through the secondary frame pointer it uses a - // [reg+disp8] form with no SIB byte, regardless of EVEX/zero-disp handling below. + // A redirected access uses a [reg+disp8] form with no SIB byte, regardless of the EVEX/zero-disp + // handling below. int secondDsp; if (emitIsSecondFramePtrCandidate(ins, EBPbased, dsp, &secondDsp)) { @@ -12324,15 +12322,15 @@ void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm, instruc } #if defined(TARGET_AMD64) - // x64 spike: when an access is redirected through the secondary frame pointer the emitted bytes - // use [REG_OPT_RSVD2 + disp8] rather than the canonical [rbp/rsp + disp]. Print what is actually - // encoded so the listing matches the bytes, then append the logical reference as a comment. + // A redirected access is emitted as [REG_OPT_RSVD2 + disp8] rather than the canonical + // [rbp/rsp + disp]. Print what is actually encoded so the listing matches the bytes, then append + // the logical reference as a trailing comment. bool secondRedirected = false; int secondDsp = 0; if ((m_compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT) && asmfm) { - bool bEBPcand = false; - int rawDsp = m_compiler->lvaFrameAddress(varx, &bEBPcand) + disp; + bool bEBPcand = false; + int rawDsp = m_compiler->lvaFrameAddress(varx, &bEBPcand) + disp; secondRedirected = emitIsSecondFramePtrCandidate(ins, bEBPcand, rawDsp, &secondDsp); } #endif // TARGET_AMD64 @@ -12363,40 +12361,40 @@ void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm, instruc else #endif // TARGET_AMD64 if (bEBP) - { - printf(STR_FPBASE); - - if (addr < 0) { - printf("-0x%02X", -addr); + printf(STR_FPBASE); + + if (addr < 0) + { + printf("-0x%02X", -addr); + } + else if (addr > 0) + { + printf("+0x%02X", addr); + } } - else if (addr > 0) + else { - printf("+0x%02X", addr); - } - } - else - { - /* Adjust the offset by amount currently pushed on the stack */ + /* Adjust the offset by amount currently pushed on the stack */ - printf(STR_SPBASE); + printf(STR_SPBASE); - if (addr < 0) - { - printf("-0x%02X", -addr); - } - else if (addr > 0) - { - printf("+0x%02X", addr); - } + if (addr < 0) + { + printf("-0x%02X", -addr); + } + else if (addr > 0) + { + printf("+0x%02X", addr); + } #if !FEATURE_FIXED_OUT_ARGS - if (emitCurStackLvl) - printf("+0x%02X", emitCurStackLvl); + if (emitCurStackLvl) + printf("+0x%02X", emitCurStackLvl); #endif // !FEATURE_FIXED_OUT_ARGS - } + } } printf("]"); @@ -12404,8 +12402,8 @@ void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm, instruc #if defined(TARGET_AMD64) if (secondRedirected) { - // Defer the canonical frame reference to a trailing comment at the end of the instruction - // line (operands may still follow the memory operand on the same line). + // Defer the canonical frame reference to a trailing comment: operands may still follow the + // memory operand on the same line. emitDispSecondFramePtrPending = true; emitDispSecondFramePtrFPbased = bEBP; emitDispSecondFramePtrAddr = addr; @@ -15928,97 +15926,97 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) else #endif // TARGET_AMD64 if (EBPbased) - { - // EBP-based variable: does the offset fit in a byte? - if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) { - if (dspInByte) + // EBP-based variable: does the offset fit in a byte? + if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) + { + if (dspInByte) + { + dst += emitOutputByte(dst, code | 0x45); + dst += emitOutputByte(dst, dsp); + } + else + { + dst += emitOutputByte(dst, code | 0x85); + dst += emitOutputLong(dst, dsp); + } + } + else if (dspInByte) { - dst += emitOutputByte(dst, code | 0x45); + dst += emitOutputWord(dst, code | 0x4500); dst += emitOutputByte(dst, dsp); } else { - dst += emitOutputByte(dst, code | 0x85); + dst += emitOutputWord(dst, code | 0x8500); dst += emitOutputLong(dst, dsp); } } - else if (dspInByte) - { - dst += emitOutputWord(dst, code | 0x4500); - dst += emitOutputByte(dst, dsp); - } else { - dst += emitOutputWord(dst, code | 0x8500); - dst += emitOutputLong(dst, dsp); - } - } - else - { #if !FEATURE_FIXED_OUT_ARGS - // Adjust the offset by the amount currently pushed on the CPU stack - dsp += emitCurStackLvl; + // Adjust the offset by the amount currently pushed on the CPU stack + dsp += emitCurStackLvl; - if (IsEvexEncodableInstruction(ins) || IsApxExtendedEvexInstruction(ins)) - { - // We cannot reliably predict the encoding size up front so we shouldn't - // have encountered a scenario marked with compressed displacement. We - // did predict cases that could use the small encoding for VEX scenarios + if (IsEvexEncodableInstruction(ins) || IsApxExtendedEvexInstruction(ins)) + { + // We cannot reliably predict the encoding size up front so we shouldn't + // have encountered a scenario marked with compressed displacement. We + // did predict cases that could use the small encoding for VEX scenarios - assert(!HasCompressedDisplacement(id)); + assert(!HasCompressedDisplacement(id)); - if (!TakesEvexPrefix(id)) + if (!TakesEvexPrefix(id)) + { + dspInByte = ((signed char)dsp == (ssize_t)dsp); + } + } + else { dspInByte = ((signed char)dsp == (ssize_t)dsp); } - } - else - { - dspInByte = ((signed char)dsp == (ssize_t)dsp); - } - dspIsZero = (dsp == 0); + dspIsZero = (dsp == 0); #endif // !FEATURE_FIXED_OUT_ARGS - // Does the offset fit in a byte? - if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) - { - if (dspIsZero) + // Does the offset fit in a byte? + if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) + { + if (dspIsZero) + { + dst += emitOutputByte(dst, code | 0x04); + dst += emitOutputByte(dst, 0x24); + } + else if (dspInByte) + { + dst += emitOutputByte(dst, code | 0x44); + dst += emitOutputByte(dst, 0x24); + dst += emitOutputByte(dst, dsp); + } + else + { + dst += emitOutputByte(dst, code | 0x84); + dst += emitOutputByte(dst, 0x24); + dst += emitOutputLong(dst, dsp); + } + } + else if (dspIsZero) { - dst += emitOutputByte(dst, code | 0x04); + dst += emitOutputWord(dst, code | 0x0400); dst += emitOutputByte(dst, 0x24); } else if (dspInByte) { - dst += emitOutputByte(dst, code | 0x44); + dst += emitOutputWord(dst, code | 0x4400); dst += emitOutputByte(dst, 0x24); dst += emitOutputByte(dst, dsp); } else { - dst += emitOutputByte(dst, code | 0x84); + dst += emitOutputWord(dst, code | 0x8400); dst += emitOutputByte(dst, 0x24); dst += emitOutputLong(dst, dsp); } } - else if (dspIsZero) - { - dst += emitOutputWord(dst, code | 0x0400); - dst += emitOutputByte(dst, 0x24); - } - else if (dspInByte) - { - dst += emitOutputWord(dst, code | 0x4400); - dst += emitOutputByte(dst, 0x24); - dst += emitOutputByte(dst, dsp); - } - else - { - dst += emitOutputWord(dst, code | 0x8400); - dst += emitOutputByte(dst, 0x24); - dst += emitOutputLong(dst, dsp); - } - } // Now generate the constant value, if present if (addc) diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index e8e909cca64ee1..6e81aacef9e014 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -66,7 +66,7 @@ struct CnsVal UNATIVE_OFFSET emitInsSize(instrDesc* id, code_t code, bool includeRexPrefixSize); UNATIVE_OFFSET emitInsSizeSVCalcDisp(instrDesc* id, code_t code, int var, int dsp); #if defined(TARGET_AMD64) - bool emitIsSecondFramePtrCandidate(instruction ins, bool EBPbased, int dsp, int* pAdjustedDsp); +bool emitIsSecondFramePtrCandidate(instruction ins, bool EBPbased, int dsp, int* pAdjustedDsp); #endif // TARGET_AMD64 UNATIVE_OFFSET emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp); UNATIVE_OFFSET emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp, int val); @@ -818,9 +818,9 @@ void emitDispAddrMode(instrDesc* id, bool noDetail = false) const; void emitDispShift(instruction ins, int cnt = 0) const; #if defined(TARGET_AMD64) -// Display state for secondary frame-pointer redirects (see emitDispFrameRef): when a stack access -// is redirected through REG_OPT_RSVD2 the operand shows [rbx+disp8] and the canonical frame -// reference is emitted as a trailing comment finalized at the end of the instruction line. +// Display state for secondary frame-pointer redirects (see emitDispFrameRef): the operand shows +// [rbx+disp8] and the canonical frame reference is emitted as a trailing comment, finalized at the +// end of the instruction line. bool emitDispSecondFramePtrPending = false; bool emitDispSecondFramePtrFPbased = false; int emitDispSecondFramePtrAddr = 0; diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 574700232f8d52..a768eda8066580 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -700,10 +700,11 @@ RELEASE_CONFIG_INTEGER(JitInlinePolicyProfileThreshold, "JitInlinePolicyProfileT CONFIG_STRING(JitObjectStackAllocationRange, "JitObjectStackAllocationRange") RELEASE_CONFIG_INTEGER(JitObjectStackAllocation, "JitObjectStackAllocation", 1) -// x64 spike: when non-zero, reserve a callee-saved register as a secondary stack base -// pointer set to SP + , used to address far locals with a short (disp8) displacement. -// 0 disables the feature. Typical experimental value: 256. -RELEASE_CONFIG_INTEGER(JitSecondFramePtr, "JitSecondFramePtr", 0) +// When non-zero, reserve a callee-saved register as a secondary stack base pointer, offset by this +// many bytes from the primary base, to address far locals with a disp8 displacement. 0 disables. +// 256 is canonical: it tiles the two disp8 windows contiguously for a 512-byte cheap range; other +// values overlap (less reach) or leave a gap. x64 only. +RELEASE_CONFIG_INTEGER(JitSecondFramePtr, "JitSecondFramePtr", 0x100) RELEASE_CONFIG_INTEGER(JitObjectStackAllocationRefClass, "JitObjectStackAllocationRefClass", 1) RELEASE_CONFIG_INTEGER(JitObjectStackAllocationBoxedValueClass, "JitObjectStackAllocationBoxedValueClass", 1) RELEASE_CONFIG_INTEGER(JitObjectStackAllocationConditionalEscape, "JitObjectStackAllocationConditionalEscape", 1) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index e0b6f7d2b9300a..916b6bf1bf1319 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2589,30 +2589,50 @@ void LinearScan::setFrameType() #endif // TARGET_ARMARCH || TARGET_RISCV64 #if defined(TARGET_AMD64) - // x64 spike (see JitSecondFramePtr): reserve a callee-saved register to act as a secondary - // stack base pointer for large frames, extending cheap disp8 addressing. SuperPMI asmdiffs - // showed this is a net code-size win when optimizations are disabled (accurate frame estimate, - // no register-pressure cost) but a loss with optimizations on (enregistration makes the - // pre-allocation frame estimate a poor predictor and removing a register raises spill - // pressure), so restrict it to the optimizations-disabled path. Bail on localloc/OSR to keep - // the spike contained. Methods with EH are supported only on RBP frames: funclets re-establish - // the secondary pointer from RBP (the establisher frame pointer), so an SP base would not be - // recoverable inside a funclet. - { - const int secondOffset = (int)JitConfig.JitSecondFramePtr(); - if ((secondOffset != 0) && m_compiler->opts.OptimizationDisabled() && - ((frameType == FT_ESP_FRAME) || (frameType == FT_EBP_FRAME)) && !m_compiler->compLocallocUsed && - !m_compiler->opts.IsOSR() && ((m_compiler->compHndBBtabCount == 0) || (frameType == FT_EBP_FRAME)) && - (m_compiler->lvaFrameSize(Compiler::REGALLOC_FRAME_LAYOUT) > 256)) - { - m_compiler->compSecondFramePtrReg = REG_OPT_RSVD2; - m_compiler->compSecondFramePtrOffset = secondOffset; - m_compiler->compSecondFramePtrFPbased = (frameType == FT_EBP_FRAME); + // Consider reserving a callee-saved register as a secondary stack base pointer holding + // (primaryBase +/- offset), so far locals in a large frame can use cheap disp8 addressing + // (see JitSecondFramePtr). + { + const int secondFramePtrOffset = (int)JitConfig.JitSecondFramePtr(); + + // Only a win with optimizations disabled: with opts on, enregistration makes the pre-layout + // frame estimate a poor predictor and removing a register from allocation raises spill pressure. + const bool optDisabled = m_compiler->opts.OptimizationDisabled(); + + // We need a single base register (RBP or RSP) that is fixed for the whole method body. Other + // frame types (e.g. double-aligned) are not handled. + const bool haveFixedBase = (frameType == FT_EBP_FRAME) || (frameType == FT_ESP_FRAME); + + // The frame must be large enough that some local can fall outside the primary disp8 window. + // Necessary but not sufficient; the precise band test is deferred to genSecondFramePtrIsProfitable + // (FINAL offsets are not available yet). + const bool frameLargeEnough = m_compiler->lvaFrameSize(Compiler::REGALLOC_FRAME_LAYOUT) > 256; + + // With EH we require an RBP frame: filter funclets re-establish the pointer from RBP (the + // establisher frame pointer), so an RSP base would not be recoverable inside a funclet. + const bool ehCompatible = (m_compiler->compHndBBtabCount == 0) || (frameType == FT_EBP_FRAME); + + // OSR reuses the Tier0 frame with a bespoke callee-save/SP setup the secondary-pointer prolog + // does not handle. OSR is normally optimized, so optDisabled already excludes it; this guard only + // matters under stress modes that force MinOpts on an OSR method. + const bool notOsr = !m_compiler->opts.IsOSR(); + + if ((secondFramePtrOffset != 0) && optDisabled && haveFixedBase && frameLargeEnough && ehCompatible && notOsr) + { + // Reserve the register only as a candidate: remove it from allocation now, but defer the + // real decision -- does any local land in the secondary disp8 band -- to genFinalizeFrame + // (genSecondFramePtrIsProfitable), which can still cancel the reservation. The REGALLOC-layout + // offsets available here are unreliable (base-register flag not yet set, offsets inflated by + // an over-estimated callee-save area) so cannot drive the band test. + const bool wantFPbased = (frameType == FT_EBP_FRAME); + m_compiler->codeGen->genSecondFramePtrReg = REG_OPT_RSVD2; + m_compiler->codeGen->genSecondFramePtrOffset = secondFramePtrOffset; + m_compiler->codeGen->genSecondFramePtrFPbased = wantFPbased; m_compiler->codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD2; removeMask |= RBM_OPT_RSVD2.GetIntRegSet(); - JITDUMP(" Reserved REG_OPT_RSVD2 (%s) as secondary frame pointer (%s%s%d)\n", - getRegName(REG_OPT_RSVD2), m_compiler->compSecondFramePtrFPbased ? "RBP" : "RSP", - m_compiler->compSecondFramePtrFPbased ? "-" : "+", secondOffset); + JITDUMP(" Reserved REG_OPT_RSVD2 (%s) as candidate secondary frame pointer (%s%s%d)\n", + getRegName(REG_OPT_RSVD2), wantFPbased ? "RBP" : "RSP", wantFPbased ? "-" : "+", + secondFramePtrOffset); } } #endif // TARGET_AMD64 diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index 6dba0ba5b25943..309d58b1e8c276 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -417,8 +417,8 @@ #define RBM_SPBASE RBM_ESP #define STR_SPBASE "rsp" -// x64 spike: secondary stack base pointer register (see JitSecondFramePtr). -// A low callee-saved register (RBX) is used to avoid REX.B / SIB encoding complications. +// Secondary stack base pointer register (see JitSecondFramePtr). A low callee-saved register +// (RBX) avoids REX.B / SIB encoding complications. #define REG_OPT_RSVD2 REG_EBX #define RBM_OPT_RSVD2 RBM_EBX From 33bd2fcb016ae3a4efc4464035d921eaee14d3e3 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 30 May 2026 09:56:25 -0700 Subject: [PATCH 5/5] JIT: cut throughput cost of x64 secondary frame pointer The reservation gate ran lvaFrameSize(REGALLOC_FRAME_LAYOUT) eagerly for every method -- a full layout pass even in fullopts, where the feature never engages. Replace it with a cheap O(numLocals) estimate (outgoing-arg space plus stack-home sizes, bailing once past the disp8 window); the precise band test in genSecondFramePtrIsProfitable still re-checks with FINAL offsets. Also skip the per-stack-access candidate check in the common case via a cached emitter flag, set once in emitBegFN, instead of dereferencing codeGen on every access. No code-size change; recovers the throughput regressions flagged in CI. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/emit.cpp | 3 +++ src/coreclr/jit/emitxarch.cpp | 6 +++--- src/coreclr/jit/emitxarch.h | 3 +++ src/coreclr/jit/lsra.cpp | 27 +++++++++++++++++++++------ 4 files changed, 30 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 7744c36d4b9e13..7822c699c59189 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -1341,6 +1341,9 @@ void emitter::emitBegFN(bool hasFramePtr emitHasFramePtr = hasFramePtr; +#if defined(TARGET_AMD64) + emitSecondFramePtrActive = (codeGen->genSecondFramePtrReg != REG_NA); +#endif #ifdef DEBUG emitChkAlign = chkAlign; #endif diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 8d4ea9cf05bc21..81e276f14ca1de 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -5338,7 +5338,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSVCalcDisp(instrDesc* id, code_t code, // A redirected access uses a [reg+disp8] form with no SIB byte, regardless of the EVEX/zero-disp // handling below. int secondDsp; - if (emitIsSecondFramePtrCandidate(ins, EBPbased, dsp, &secondDsp)) + if (emitSecondFramePtrActive && emitIsSecondFramePtrCandidate(ins, EBPbased, dsp, &secondDsp)) { return size + sizeof(char); } @@ -12327,7 +12327,7 @@ void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm, instruc // the logical reference as a trailing comment. bool secondRedirected = false; int secondDsp = 0; - if ((m_compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT) && asmfm) + if (emitSecondFramePtrActive && (m_compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT) && asmfm) { bool bEBPcand = false; int rawDsp = m_compiler->lvaFrameAddress(varx, &bEBPcand) + disp; @@ -15914,7 +15914,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) #if defined(TARGET_AMD64) int secondDsp; - if (emitIsSecondFramePtrCandidate(ins, EBPbased, dsp, &secondDsp)) + if (emitSecondFramePtrActive && emitIsSecondFramePtrCandidate(ins, EBPbased, dsp, &secondDsp)) { // Redirect through the secondary frame pointer (a low callee-saved register, e.g. RBX): // modrm mod=01, rm=base => low byte 0x40 | (reg & 7); no SIB byte and no REX.B since the diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 6e81aacef9e014..40bc2ef0f43378 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -66,6 +66,9 @@ struct CnsVal UNATIVE_OFFSET emitInsSize(instrDesc* id, code_t code, bool includeRexPrefixSize); UNATIVE_OFFSET emitInsSizeSVCalcDisp(instrDesc* id, code_t code, int var, int dsp); #if defined(TARGET_AMD64) +// Set once per method in emitBegFN: true only when codegen reserved a secondary frame pointer. Lets the +// per-stack-access fast path skip the candidate check (and its codeGen deref) in the common case. +bool emitSecondFramePtrActive; bool emitIsSecondFramePtrCandidate(instruction ins, bool EBPbased, int dsp, int* pAdjustedDsp); #endif // TARGET_AMD64 UNATIVE_OFFSET emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp); diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 916b6bf1bf1319..411854a6cf84e9 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2603,11 +2603,6 @@ void LinearScan::setFrameType() // frame types (e.g. double-aligned) are not handled. const bool haveFixedBase = (frameType == FT_EBP_FRAME) || (frameType == FT_ESP_FRAME); - // The frame must be large enough that some local can fall outside the primary disp8 window. - // Necessary but not sufficient; the precise band test is deferred to genSecondFramePtrIsProfitable - // (FINAL offsets are not available yet). - const bool frameLargeEnough = m_compiler->lvaFrameSize(Compiler::REGALLOC_FRAME_LAYOUT) > 256; - // With EH we require an RBP frame: filter funclets re-establish the pointer from RBP (the // establisher frame pointer), so an RSP base would not be recoverable inside a funclet. const bool ehCompatible = (m_compiler->compHndBBtabCount == 0) || (frameType == FT_EBP_FRAME); @@ -2617,7 +2612,27 @@ void LinearScan::setFrameType() // matters under stress modes that force MinOpts on an OSR method. const bool notOsr = !m_compiler->opts.IsOSR(); - if ((secondFramePtrOffset != 0) && optDisabled && haveFixedBase && frameLargeEnough && ehCompatible && notOsr) + // The frame must be large enough that some local can fall outside the primary disp8 window. + // x64 has no REGALLOC-time layout to consult, so rather than run a full lvaFrameSize pass just to + // gate this (opt-disabled-only) reservation, estimate the local area cheaply: locals sit above the + // outgoing-arg space, so sum that plus the stack-home sizes until the window is exceeded. The estimate + // need not be exact (it omits temps/callee-saves): genSecondFramePtrIsProfitable re-checks with FINAL + // offsets and cancels the reservation if no local lands in the secondary band. + auto frameLikelyLargeEnough = [this]() -> bool { + unsigned size = m_compiler->lvaOutgoingArgSpaceSize; + for (unsigned lclNum = 0; lclNum < m_compiler->lvaCount; lclNum++) + { + size += m_compiler->lvaLclStackHomeSize(lclNum); + if (size > 256) + { + return true; + } + } + return false; + }; + + if ((secondFramePtrOffset != 0) && optDisabled && haveFixedBase && ehCompatible && notOsr && + frameLikelyLargeEnough()) { // Reserve the register only as a candidate: remove it from allocation now, but defer the // real decision -- does any local land in the secondary disp8 band -- to genFinalizeFrame