Skip to content

Commit

Permalink
Enable EVEX feature: Embedded Rounding for Avx512F.Add() (#94684)
Browse files Browse the repository at this point in the history
* some workaround with embedded rounding in compiler backend.

* extend _idEvexbContext to 2bit to distinguish embedded broadcast and embedded rounding

* Expose APIs with rounding mode.

* Apply format patch

* Do not include the third parameter in Avx512.Add(left, right)

* split _idEvexbContext bits and made a explicit convert function from uint8_t to insOpts for embedded rounding mode.

* Remove unexpected comment-out

* Fix unexpected deletion

* resolve comments:
removed redundent bits in instDesc for EVEX.b context.

Introduced `emitDispEmbRounding` to display the embedded rounding feature in the disassembly.

* bug fix:
fix un-needed assertion check.

* Apply format patch.

* Resolve comments:
merge INS_OPTS_EVEX_b and INS_OPTS_EVEX_er_rd
Do a pre-check for embedded rounding before lowering.

* Add a helper function to generalize the logic when lowering the embedded rounding intrinsics.

* Resolve comments:
1. fix typo in commnets
2. Add SetEvexBroadcastIfNeeded
3. Added mask in insOpts

* 1. Add unit case for non-default rounding mode
2. removed round-to-even, the default option from InsOpts as it will be handled on the default path.

* formatting

* 1. Create a fallback jump table for embedded rounding APIs when control byte is not constant.
2. Create a template to generate the unit tests for embedded rounding APIs.
3. nit: fix naming.

* remove hand-written unit tests for embedded rounding.

* formatting

* Resolve comments.

* formatting

* revert changes:
let SetEmbRoundingMode accept unexpected values to accomadate the jump table generatation logics.
  • Loading branch information
Ruihan-Yin authored Jan 19, 2024
1 parent 5a6135e commit 2d751ca
Show file tree
Hide file tree
Showing 19 changed files with 760 additions and 45 deletions.
1 change: 1 addition & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -971,6 +971,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
void genHWIntrinsic_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, regNumber reg, GenTree* rmOp);
void genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival);
void genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr);
void genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival);
void genHWIntrinsic_R_R_RM(
GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, GenTree* op2);
void genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival);
Expand Down
53 changes: 39 additions & 14 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -774,8 +774,12 @@ class emitter
unsigned _idCallAddr : 1; // IL indirect calls: can make a direct call to iiaAddr
unsigned _idNoGC : 1; // Some helpers don't get recorded in GC tables
#if defined(TARGET_XARCH)
unsigned _idEvexbContext : 1; // does EVEX.b need to be set.
#endif // TARGET_XARCH
// EVEX.b can indicate several context: embedded broadcast, embedded rounding.
// For normal and embedded broadcast intrinsics, EVEX.L'L has the same semantic, vector length.
// For embedded rounding, EVEX.L'L semantic changes to indicate the rounding mode.
// Multiple bits in _idEvexbContext are used to inform emitter to specially handle the EVEX.L'L bits.
unsigned _idEvexbContext : 2;
#endif // TARGET_XARCH

#ifdef TARGET_ARM64

Expand Down Expand Up @@ -808,8 +812,8 @@ class emitter

////////////////////////////////////////////////////////////////////////
// Space taken up to here:
// x86: 47 bits
// amd64: 47 bits
// x86: 48 bits
// amd64: 48 bits
// arm: 48 bits
// arm64: 53 bits
// loongarch64: 46 bits
Expand All @@ -828,7 +832,7 @@ class emitter
#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
#define ID_EXTRA_BITFIELD_BITS (14)
#elif defined(TARGET_XARCH)
#define ID_EXTRA_BITFIELD_BITS (15)
#define ID_EXTRA_BITFIELD_BITS (16)
#else
#error Unsupported or unset target architecture
#endif
Expand Down Expand Up @@ -863,8 +867,8 @@ class emitter

////////////////////////////////////////////////////////////////////////
// Space taken up to here (with/without prev offset, assuming host==target):
// x86: 53/49 bits
// amd64: 54/49 bits
// x86: 54/50 bits
// amd64: 55/50 bits
// arm: 54/50 bits
// arm64: 60/55 bits
// loongarch64: 53/48 bits
Expand All @@ -880,8 +884,8 @@ class emitter

////////////////////////////////////////////////////////////////////////
// Small constant size (with/without prev offset, assuming host==target):
// x86: 11/15 bits
// amd64: 10/15 bits
// x86: 10/14 bits
// amd64: 9/14 bits
// arm: 10/14 bits
// arm64: 4/9 bits
// loongarch64: 11/16 bits
Expand Down Expand Up @@ -1578,15 +1582,35 @@ class emitter
}

#ifdef TARGET_XARCH
bool idIsEvexbContext() const
bool idIsEvexbContextSet() const
{
return _idEvexbContext != 0;
}
void idSetEvexbContext()

void idSetEvexbContext(insOpts instOptions)
{
assert(_idEvexbContext == 0);
_idEvexbContext = 1;
assert(_idEvexbContext == 1);
if (instOptions == INS_OPTS_EVEX_eb_er_rd)
{
_idEvexbContext = 1;
}
else if (instOptions == INS_OPTS_EVEX_er_ru)
{
_idEvexbContext = 2;
}
else if (instOptions == INS_OPTS_EVEX_er_rz)
{
_idEvexbContext = 3;
}
else
{
unreached();
}
}

unsigned idGetEvexbContext() const
{
return _idEvexbContext;
}
#endif

Expand Down Expand Up @@ -2166,6 +2190,7 @@ class emitter
void emitDispInsOffs(unsigned offs, bool doffs);
void emitDispInsHex(instrDesc* id, BYTE* code, size_t sz);
void emitDispEmbBroadcastCount(instrDesc* id);
void emitDispEmbRounding(instrDesc* id);
void emitDispIns(instrDesc* id,
bool isNew,
bool doffs,
Expand Down Expand Up @@ -3814,7 +3839,7 @@ inline unsigned emitter::emitGetInsCIargs(instrDesc* id)
//
emitAttr emitter::emitGetMemOpSize(instrDesc* id) const
{
if (id->idIsEvexbContext())
if (id->idIsEvexbContextSet())
{
// should have the assumption that Evex.b now stands for the embedded broadcast context.
// reference: Section 2.7.5 in Intel 64 and ia-32 architectures software developer's manual volume 2.
Expand Down
128 changes: 106 additions & 22 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1139,6 +1139,30 @@ static bool isLowSimdReg(regNumber reg)
#endif
}

//------------------------------------------------------------------------
// GetEmbRoundingMode: Get the rounding mode for embedded rounding
//
// Arguments:
// mode -- the flag from the corresponding GenTree node indicating the mode.
//
// Return Value:
// the instruction option carrying the rounding mode information.
//
insOpts emitter::GetEmbRoundingMode(uint8_t mode) const
{
switch (mode)
{
case 1:
return INS_OPTS_EVEX_eb_er_rd;
case 2:
return INS_OPTS_EVEX_er_ru;
case 3:
return INS_OPTS_EVEX_er_rz;
default:
unreached();
}
}

//------------------------------------------------------------------------
// encodeRegAsIval: Encodes a register as an ival for use by a SIMD instruction
//
Expand Down Expand Up @@ -1309,18 +1333,50 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt

if (attr == EA_32BYTE)
{
// Set L bit to 1 in case of instructions that operate on 256-bits.
// Set EVEX.L'L bits to 01 in case of instructions that operate on 256-bits.
code |= LBIT_IN_BYTE_EVEX_PREFIX;
}
else if (attr == EA_64BYTE)
{
// Set L' bits to 11 in case of instructions that operate on 512-bits.
// Set EVEX.L'L bits to 10 in case of instructions that operate on 512-bits.
code |= LPRIMEBIT_IN_BYTE_EVEX_PREFIX;
}

if (id->idIsEvexbContext())
if (id->idIsEvexbContextSet())
{
code |= EVEX_B_BIT;

if (!id->idHasMem())
{
// embedded rounding case.
unsigned roundingMode = id->idGetEvexbContext();
if (roundingMode == 1)
{
// {rd-sae}
code &= ~(LPRIMEBIT_IN_BYTE_EVEX_PREFIX);
code |= LBIT_IN_BYTE_EVEX_PREFIX;
}
else if (roundingMode == 2)
{
// {ru-sae}
code |= LPRIMEBIT_IN_BYTE_EVEX_PREFIX;
code &= ~(LBIT_IN_BYTE_EVEX_PREFIX);
}
else if (roundingMode == 3)
{
// {rz-sae}
code |= LPRIMEBIT_IN_BYTE_EVEX_PREFIX;
code |= LBIT_IN_BYTE_EVEX_PREFIX;
}
else
{
unreached();
}
}
else
{
assert(id->idGetEvexbContext() == 1);
}
}

regNumber maskReg = REG_NA;
Expand Down Expand Up @@ -6742,11 +6798,7 @@ void emitter::emitIns_R_R_A(
id->idIns(ins);
id->idReg1(reg1);
id->idReg2(reg2);
if (instOptions == INS_OPTS_EVEX_b)
{
assert(UseEvexEncoding());
id->idSetEvexbContext();
}
SetEvexBroadcastIfNeeded(id, instOptions);

emitHandleMemOp(indir, id, (ins == INS_mulx) ? IF_RWR_RWR_ARD : emitInsModeFormat(ins, IF_RRD_RRD_ARD), ins);

Expand Down Expand Up @@ -6871,11 +6923,7 @@ void emitter::emitIns_R_R_C(instruction ins,
id->idReg1(reg1);
id->idReg2(reg2);
id->idAddr()->iiaFieldHnd = fldHnd;
if (instOptions == INS_OPTS_EVEX_b)
{
assert(UseEvexEncoding());
id->idSetEvexbContext();
}
SetEvexBroadcastIfNeeded(id, instOptions);

UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins));
id->idCodeSize(sz);
Expand All @@ -6889,7 +6937,8 @@ void emitter::emitIns_R_R_C(instruction ins,
* Add an instruction with three register operands.
*/

void emitter::emitIns_R_R_R(instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2)
void emitter::emitIns_R_R_R(
instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, insOpts instOptions)
{
assert(IsAvx512OrPriorInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins) || IsKInstruction(ins));
Expand All @@ -6901,6 +6950,13 @@ void emitter::emitIns_R_R_R(instruction ins, emitAttr attr, regNumber targetReg,
id->idReg2(reg1);
id->idReg3(reg2);

if ((instOptions & INS_OPTS_b_MASK) != INS_OPTS_NONE)
{
// if EVEX.b needs to be set in this path, then it should be embedded rounding.
assert(UseEvexEncoding());
id->idSetEvexbContext(instOptions);
}

UNATIVE_OFFSET sz = emitInsSizeRR(id, insCodeRM(ins));
id->idCodeSize(sz);

Expand All @@ -6921,12 +6977,8 @@ void emitter::emitIns_R_R_S(
id->idReg1(reg1);
id->idReg2(reg2);
id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
SetEvexBroadcastIfNeeded(id, instOptions);

if (instOptions == INS_OPTS_EVEX_b)
{
assert(UseEvexEncoding());
id->idSetEvexbContext();
}
#ifdef DEBUG
id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
#endif
Expand Down Expand Up @@ -8224,11 +8276,11 @@ void emitter::emitIns_SIMD_R_R_C(instruction ins,
// op2Reg -- The register of the second operand
//
void emitter::emitIns_SIMD_R_R_R(
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg)
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, insOpts instOptions)
{
if (UseSimdEncoding())
{
emitIns_R_R_R(ins, attr, targetReg, op1Reg, op2Reg);
emitIns_R_R_R(ins, attr, targetReg, op1Reg, op2Reg, instOptions);
}
else
{
Expand Down Expand Up @@ -10656,7 +10708,7 @@ void emitter::emitDispInsHex(instrDesc* id, BYTE* code, size_t sz)
//
void emitter::emitDispEmbBroadcastCount(instrDesc* id)
{
if (!id->idIsEvexbContext())
if (!id->idIsEvexbContextSet())
{
return;
}
Expand All @@ -10665,6 +10717,37 @@ void emitter::emitDispEmbBroadcastCount(instrDesc* id)
printf(" {1to%d}", vectorSize / baseSize);
}

// emitDispEmbRounding: Display the tag where embedded rounding is activated
//
// Arguments:
// id - The instruction descriptor
//
void emitter::emitDispEmbRounding(instrDesc* id)
{
if (!id->idIsEvexbContextSet())
{
return;
}
assert(!id->idHasMem());
unsigned roundingMode = id->idGetEvexbContext();
if (roundingMode == 1)
{
printf(" {rd-sae}");
}
else if (roundingMode == 2)
{
printf(" {ru-sae}");
}
else if (roundingMode == 3)
{
printf(" {rz-sae}");
}
else
{
unreached();
}
}

//--------------------------------------------------------------------
// emitDispIns: Dump the given instruction to jitstdout.
//
Expand Down Expand Up @@ -11533,6 +11616,7 @@ void emitter::emitDispIns(
printf("%s, ", emitRegName(id->idReg1(), attr));
printf("%s, ", emitRegName(reg2, attr));
printf("%s", emitRegName(reg3, attr));
emitDispEmbRounding(id);
break;
}

Expand Down
Loading

0 comments on commit 2d751ca

Please sign in to comment.