Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -600,8 +600,9 @@ class CodeGen final : public CodeGenInterface
#ifdef TARGET_AMD64
void genPushCalleeSavedRegistersFromMaskAPX(regMaskTP rsPushRegs);
unsigned genPopCalleeSavedRegistersFromMaskAPX(regMaskTP rsPopRegs);
bool genSecondFramePtrIsProfitable();
#endif // TARGET_AMD64
#endif // !defined(TARGET_XARCH)
#endif // defined(TARGET_XARCH)

#endif // !defined(TARGET_ARM64)

Expand Down
118 changes: 118 additions & 0 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4791,6 +4791,12 @@ void CodeGen::genFinalizeFrame()
}
#endif // TARGET_ARM

#if defined(TARGET_AMD64)
// The secondary frame-pointer register is reserved as a candidate during LSRA (it is in
// regSet.rsMaskResvd). Unlike ARM, do NOT mark it modified here: whether it is actually pushed is
// decided below by genSecondFramePtrIsProfitable, once FINAL offsets are known.
#endif // TARGET_AMD64

#ifdef TARGET_ARM64
if (m_compiler->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && TargetOS::IsApplePlatform)
{
Expand Down Expand Up @@ -4993,6 +4999,37 @@ void CodeGen::genFinalizeFrame()

m_compiler->lvaAssignFrameOffsets(Compiler::FINAL_FRAME_LAYOUT);

#if defined(TARGET_AMD64)
// A secondary frame pointer was reserved as a candidate during LSRA. Now that FINAL offsets are
// known, decide whether establishing it pays off. These offsets assume the register is NOT pushed;
// pushing it only shifts RBP-relative locals deeper and leaves RSP-relative locals unchanged, so
// scanning them is a sound, conservative test. If profitable, mark it modified (so prolog/epilog
// save and restore it) and redo the layout to account for the push; otherwise cancel the
// reservation so no push/lea or unwind data is emitted.
if (genSecondFramePtrReg != REG_NA)
{
if (genSecondFramePtrIsProfitable())
{
// Reset the layout state first: rsSetRegsModified forbids adding a callee-saved register
// once FINAL layout is complete, and we are about to redo the layout for the push anyway.
m_compiler->lvaDoneFrameLayout = Compiler::REGALLOC_FRAME_LAYOUT;

regSet.rsSetRegsModified(genRegMask(genSecondFramePtrReg));

regMaskTP maskCalleeRegsPushed = regSet.rsGetModifiedCalleeSavedRegsMask();
maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED;
m_compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed);

m_compiler->lvaAssignFrameOffsets(Compiler::FINAL_FRAME_LAYOUT);
}
else
{
JITDUMP("Cancelling secondary frame pointer: no local lands in the secondary disp8 band\n");
genSecondFramePtrReg = REG_NA;
}
}
#endif // TARGET_AMD64

#ifdef DEBUG
if (m_compiler->opts.dspCode || m_compiler->opts.disAsm || m_compiler->opts.disAsm2 || verbose)
{
Expand All @@ -5001,6 +5038,72 @@ void CodeGen::genFinalizeFrame()
#endif
}

#if defined(TARGET_AMD64)
//------------------------------------------------------------------------
// genSecondFramePtrIsProfitable: decide whether the reserved secondary frame-pointer register is
// worth establishing, given FINAL frame offsets.
//
// Return Value:
// true if some on-frame local has an access that needs a disp32 off the primary base but fits a
// disp8 off the secondary base; false otherwise.
//
// Notes:
// Mirrors emitter::emitIsSecondFramePtrCandidate's band test, so must run after FINAL offsets are
// assigned. This is necessary but not sufficient: it finds at least one redirectable access but does
// not count them, so a method with only one or two redirects can still regress a few bytes (each
// redirect saves 3 bytes, while setup costs a push + lea, a pop per epilog, and an unwind code).
// Counting sites would need an IR walk (MinOpts has no precise ref counts), not worth the Tier0 cost.
//
bool CodeGen::genSecondFramePtrIsProfitable()
{
assert(genSecondFramePtrReg != REG_NA);
assert(m_compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT);

const bool wantFPbased = genSecondFramePtrFPbased;
const int offset = genSecondFramePtrOffset;

// A redirect applies when the raw displacement does NOT fit a disp8 but the adjusted displacement
// (dsp +/- offset) does. Precompute the range of raw displacements that fit a disp8 once adjusted.
const int adjFitLo = wantFPbased ? (-128 - offset) : (-128 + offset);
const int adjFitHi = wantFPbased ? (127 - offset) : (127 + offset);

for (unsigned varNum = 0; varNum < m_compiler->lvaCount; varNum++)
{
const LclVarDsc* const varDsc = m_compiler->lvaGetDesc(varNum);
if (!varDsc->lvOnFrame || m_compiler->lvaIsUnknownSizeLocal(varNum))
{
continue;
}

bool fpBased;
const int loDsp = m_compiler->lvaFrameAddress((int)varNum, &fpBased);
if (fpBased != wantFPbased)
{
continue;
}

// Accesses to this local span [loDsp, hiDsp] (base slot plus any field/element offset).
const int hiDsp = loDsp + (int)m_compiler->lvaLclStackHomeSize(varNum) - 1;

// Intersect that range with the adjusted-fits-disp8 range.
const int interLo = (loDsp > adjFitLo) ? loDsp : adjFitLo;
const int interHi = (hiDsp < adjFitHi) ? hiDsp : adjFitHi;
if (interLo > interHi)
{
continue;
}

// The redirect only helps where the raw displacement itself needs a disp32 (|dsp| > 127).
if ((interLo < -128) || (interHi > 127))
{
return true;
}
}

return false;
}
#endif // TARGET_AMD64

/*****************************************************************************
*
* Generates code for a function prolog.
Expand Down Expand Up @@ -5554,6 +5657,21 @@ void CodeGen::genFnProlog()
//
//-------------------------------------------------------------------------

#if defined(TARGET_AMD64)
// Establish the secondary frame-pointer register. This runs after the frame pointer (if any) and
// after SP is final, so both candidate bases are live. It sits after the OS-reported prolog: the
// register was already saved (with its own unwind code) by genPushCalleeSavedRegisters, so this lea
// just loads a derived address and needs no unwind data. The register is out of allocation, so it
// stays live for the method body.
if (genSecondFramePtrReg != REG_NA)
{
const regNumber base = genSecondFramePtrFPbased ? REG_FPBASE : REG_SPBASE;
const int disp = genSecondFramePtrFPbased ? -genSecondFramePtrOffset : genSecondFramePtrOffset;
GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, genSecondFramePtrReg, base, disp);
regSet.verifyRegUsed(genSecondFramePtrReg);
}
#endif // TARGET_AMD64

#ifdef TARGET_ARM64
if (m_compiler->compUsesUnknownSizeFrame)
{
Expand Down
10 changes: 10 additions & 0 deletions src/coreclr/jit/codegeninterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,16 @@ class CodeGenInterface
m_cgFrameRequired = value;
}

#ifdef TARGET_AMD64
// Secondary stack base pointer (see JitSecondFramePtr). When set, this callee-saved register holds
// (primaryBase +/- genSecondFramePtrOffset) and addresses far locals with a disp8 displacement;
// REG_NA means off. genSecondFramePtrFPbased tells whether it shadows RBP (locals at negative
// offsets) or RSP (positive); only accesses on that base are redirected.
regNumber genSecondFramePtrReg = REG_NA;
int genSecondFramePtrOffset = 0;
bool genSecondFramePtrFPbased = false;
#endif // TARGET_AMD64

#if !HAS_FIXED_REGISTER_SET

void SetStackPointerReg(unsigned funcletIndex, regNumber reg);
Expand Down
12 changes: 12 additions & 0 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10948,6 +10948,18 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
// This is the end of the OS-reported prolog for purposes of unwinding
m_compiler->unwindEndProlog();

// Re-establish the secondary frame-pointer register, but only in FILTER funclets. Catch/finally/
// fault funclets are entered via CallEHFunclet, which restores all nonvolatiles (including RBX) from
// the establisher CONTEXT, so RBX already holds RBP - offset. Filter funclets use CallEHFilterFunclet,
// which restores only RBP, so RBX must be recomputed. EH methods always use an RBP frame and the
// funclet shares the parent frame via RBP, so the base is always RBP.
if ((genSecondFramePtrReg != REG_NA) && (m_compiler->funCurrentFunc()->funKind == FUNC_FILTER))
{
assert(genSecondFramePtrFPbased);
GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, genSecondFramePtrReg, REG_FPBASE, -genSecondFramePtrOffset);
regSet.verifyRegUsed(genSecondFramePtrReg);
}

genClearAvxStateInProlog();
}

Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1341,6 +1341,9 @@ void emitter::emitBegFN(bool hasFramePtr

emitHasFramePtr = hasFramePtr;

#if defined(TARGET_AMD64)
emitSecondFramePtrActive = (codeGen->genSecondFramePtrReg != REG_NA);
#endif
#ifdef DEBUG
emitChkAlign = chkAlign;
#endif
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -2628,7 +2628,7 @@ class emitter
void emitDispGCinfo();
void emitDispJumpList();
void emitDispClsVar(CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, bool reloc = false);
void emitDispFrameRef(int varx, int disp, int offs, bool asmfm);
void emitDispFrameRef(int varx, int disp, int offs, bool asmfm, instruction ins = INS_none);
void emitDispInsAddr(const BYTE* code);
void emitDispInsOffs(unsigned offs, bool doffs);
void emitDispInsHex(instrDesc* id, BYTE* code, size_t sz);
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/emitarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7810,7 +7810,7 @@ void emitter::emitDispIns(
* Display a stack frame reference.
*/

void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm, instruction ins)
{
#ifdef DEBUG
printf("[");
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14908,7 +14908,7 @@ void emitter::emitDispInsHelp(
* Display a stack frame reference.
*/

void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm, instruction ins)
{
#ifdef DEBUG
printf("[");
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/emitloongarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4627,7 +4627,7 @@ void emitter::emitDispIns(
*
* Display a stack frame reference.
*/
void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm, instruction ins)
{
NYI_LOONGARCH64("emitDispFrameRef-----unused on LoongArch64.");
}
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/emitriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4917,7 +4917,7 @@ void emitter::emitDispIns(
* Display a stack frame reference.
*/

void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm, instruction ins)
{
NYI_RISCV64("emitDispFrameRef-----unimplemented/unused on RISCV64 yet----");
}
Expand Down
Loading
Loading