From d7ae8c61f0937df4dcffe88ba96eae65fa523a78 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 13 Jun 2024 14:45:50 -0700 Subject: [PATCH] Add basic support for folding and normalizing hwintrinsic trees in morph (#103143) * Add basic support for folding hwintrinsic trees in morph * Reduce the amount of copying required to evaluated vector constants * Have gtFoldExprHWIntrinsic handle side effects --- src/coreclr/jit/compiler.h | 4 + src/coreclr/jit/gentree.cpp | 1476 +++++++++++++++++- src/coreclr/jit/gentree.h | 369 ++++- src/coreclr/jit/hwintrinsicarm64.cpp | 4 + src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 1 + src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 1 + src/coreclr/jit/hwintrinsiclistarm64.h | 1 + src/coreclr/jit/hwintrinsiclistxarch.h | 1 + src/coreclr/jit/hwintrinsicxarch.cpp | 4 + src/coreclr/jit/importercalls.cpp | 22 +- src/coreclr/jit/lowerxarch.cpp | 57 +- src/coreclr/jit/lsraarm64.cpp | 1 + src/coreclr/jit/lsraxarch.cpp | 1 + src/coreclr/jit/morph.cpp | 73 +- src/coreclr/jit/simd.h | 180 ++- src/coreclr/jit/simdashwintrinsic.cpp | 25 +- src/coreclr/jit/valuenum.cpp | 1494 ++++++++++--------- src/coreclr/jit/valuenum.h | 90 +- 18 files changed, 2917 insertions(+), 887 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index ff1fa2e20c44b0..3b17dfa13638d3 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3649,6 +3649,10 @@ class Compiler GenTree* gtFoldTypeCompare(GenTree* tree); GenTree* gtFoldTypeEqualityCall(bool isEq, GenTree* op1, GenTree* op2); +#if defined(FEATURE_HW_INTRINSICS) + GenTree* gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree); +#endif // FEATURE_HW_INTRINSICS + // Options to control behavior of gtTryRemoveBoxUpstreamEffects enum BoxRemovalOptions { diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 3afbbf15266003..aeddaf3e32142e 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -13599,6 +13599,14 @@ GenTree* Compiler::gtFoldExpr(GenTree* tree) { return gtFoldExprConditional(tree); } + +#if defined(FEATURE_HW_INTRINSICS) + if (tree->OperIsHWIntrinsic()) + { + return gtFoldExprHWIntrinsic(tree->AsHWIntrinsic()); + } +#endif // FEATURE_HW_INTRINSICS + return tree; } @@ -18610,6 +18618,330 @@ bool Compiler::IsValidForShuffle(GenTreeVecCon* vecCon, unsigned simdSize, var_t return true; } + +//------------------------------------------------------------------------ +// GenTreeVecCon::EvaluateUnaryInPlace: Evaluates this constant using the given operation +// +// Arguments: +// oper - the operation to use in the evaluation +// scalar - true if this is a scalar operation; otherwise, false +// baseType - the base type of the constant being checked +// +void GenTreeVecCon::EvaluateUnaryInPlace(genTreeOps oper, bool scalar, var_types baseType) +{ + switch (gtType) + { + case TYP_SIMD8: + { + simd8_t result = {}; + EvaluateUnarySimd(oper, scalar, baseType, &result, gtSimd8Val); + gtSimd8Val = result; + break; + } + + case TYP_SIMD12: + { + simd12_t result = {}; + EvaluateUnarySimd(oper, scalar, baseType, &result, gtSimd12Val); + gtSimd12Val = result; + break; + } + + case TYP_SIMD16: + { + simd16_t result = {}; + EvaluateUnarySimd(oper, scalar, baseType, &result, gtSimd16Val); + gtSimd16Val = result; + break; + } + +#if defined(TARGET_XARCH) + case TYP_SIMD32: + { + simd32_t result = {}; + EvaluateUnarySimd(oper, scalar, baseType, &result, gtSimd32Val); + gtSimd32Val = result; + break; + } + + case TYP_SIMD64: + { + simd64_t result = {}; + EvaluateUnarySimd(oper, scalar, baseType, &result, gtSimd64Val); + gtSimd64Val = result; + break; + } +#endif // TARGET_XARCH + + default: + { + unreached(); + } + } +} + +//------------------------------------------------------------------------ +// GenTreeVecCon::EvaluateUnaryInPlace: Evaluates this constant using the given operation +// +// Arguments: +// oper - the operation to use in the evaluation +// scalar - true if this is a scalar operation; otherwise, false +// baseType - the base type of the constant being checked +// other - the other vector constant to use in the evaluation +// +void GenTreeVecCon::EvaluateBinaryInPlace(genTreeOps oper, bool scalar, var_types baseType, GenTreeVecCon* other) +{ + switch (gtType) + { + case TYP_SIMD8: + { + simd8_t result = {}; + EvaluateBinarySimd(oper, scalar, baseType, &result, gtSimd8Val, other->gtSimd8Val); + gtSimd8Val = result; + break; + } + + case TYP_SIMD12: + { + simd12_t result = {}; + EvaluateBinarySimd(oper, scalar, baseType, &result, gtSimd12Val, other->gtSimd12Val); + gtSimd12Val = result; + break; + } + + case TYP_SIMD16: + { + simd16_t result = {}; + EvaluateBinarySimd(oper, scalar, baseType, &result, gtSimd16Val, other->gtSimd16Val); + gtSimd16Val = result; + break; + } + +#if defined(TARGET_XARCH) + case TYP_SIMD32: + { + simd32_t result = {}; + EvaluateBinarySimd(oper, scalar, baseType, &result, gtSimd32Val, other->gtSimd32Val); + gtSimd32Val = result; + break; + } + + case TYP_SIMD64: + { + simd64_t result = {}; + EvaluateBinarySimd(oper, scalar, baseType, &result, gtSimd64Val, other->gtSimd64Val); + gtSimd64Val = result; + break; + } +#endif // TARGET_XARCH + + default: + { + unreached(); + } + } +} + +//------------------------------------------------------------------------ +// GenTreeVecCon::EvaluateBroadcastInPlace: Evaluates this constant using a broadcast +// +// Arguments: +// baseType - the base type of the constant being checked +// scalar - the value to broadcast as part of the evaluation +// +void GenTreeVecCon::EvaluateBroadcastInPlace(var_types baseType, double scalar) +{ + switch (baseType) + { + case TYP_FLOAT: + { + EvaluateBroadcastInPlace(static_cast(scalar)); + break; + } + + case TYP_DOUBLE: + { + EvaluateBroadcastInPlace(static_cast(scalar)); + break; + } + + default: + { + unreached(); + } + } +} + +//------------------------------------------------------------------------ +// GenTreeVecCon::EvaluateBroadcastInPlace: Evaluates this constant using a broadcast +// +// Arguments: +// baseType - the base type of the constant being checked +// scalar - the value to broadcast as part of the evaluation +// +void GenTreeVecCon::EvaluateBroadcastInPlace(var_types baseType, int64_t scalar) +{ + switch (baseType) + { + case TYP_BYTE: + { + EvaluateBroadcastInPlace(static_cast(scalar)); + break; + } + + case TYP_UBYTE: + { + EvaluateBroadcastInPlace(static_cast(scalar)); + break; + } + + case TYP_SHORT: + { + EvaluateBroadcastInPlace(static_cast(scalar)); + break; + } + + case TYP_USHORT: + { + EvaluateBroadcastInPlace(static_cast(scalar)); + break; + } + + case TYP_INT: + { + EvaluateBroadcastInPlace(static_cast(scalar)); + break; + } + + case TYP_UINT: + { + EvaluateBroadcastInPlace(static_cast(scalar)); + break; + } + + case TYP_LONG: + { + EvaluateBroadcastInPlace(static_cast(scalar)); + break; + } + + case TYP_ULONG: + { + EvaluateBroadcastInPlace(static_cast(scalar)); + break; + } + + default: + { + unreached(); + } + } +} + +//------------------------------------------------------------------------ +// GenTreeVecCon::IsBroadcast: Determines if this vector constant is a broadcast +// +// Arguments: +// simdBaseType - the base type of the constant being checked +// +// Returns: +// true if the constant represents a broadcast value; otherwise, false +// +bool GenTreeVecCon::IsBroadcast(var_types simdBaseType) const +{ + assert(varTypeIsSIMD(gtType)); + assert(varTypeIsArithmetic(simdBaseType)); + + int elementCount = ElementCount(genTypeSize(gtType), simdBaseType); + + switch (simdBaseType) + { + case TYP_BYTE: + case TYP_UBYTE: + { + return ElementsAreSame(>SimdVal.u8[0], elementCount); + } + + case TYP_SHORT: + case TYP_USHORT: + { + return ElementsAreSame(>SimdVal.u16[0], elementCount); + } + + case TYP_FLOAT: + case TYP_INT: + case TYP_UINT: + { + return ElementsAreSame(>SimdVal.u32[0], elementCount); + } + + case TYP_DOUBLE: + case TYP_LONG: + case TYP_ULONG: + { + return ElementsAreSame(>SimdVal.u64[0], elementCount); + } + + default: + { + return false; + } + } +} + +//------------------------------------------------------------------------ +// GenTreeVecCon::IsNaN: Determines if this vector constant has all elements being NaN +// +// Arguments: +// simdBaseType - the base type of the constant being checked +// +// Returns: +// true if the constant has all elements being NaN; otherwise, false +// +bool GenTreeVecCon::IsNaN(var_types simdBaseType) const +{ + assert(varTypeIsFloating(simdBaseType)); + uint32_t elementCount = ElementCount(genTypeSize(gtType), simdBaseType); + + for (uint32_t i = 0; i < elementCount; i++) + { + double element = GetElementFloating(simdBaseType, i); + + if (!FloatingPointUtils::isNaN(element)) + { + return false; + } + } + + return true; +} + +//------------------------------------------------------------------------ +// GenTreeVecCon::IsNaN: Determines if this vector constant has all elements being -0 +// +// Arguments: +// simdBaseType - the base type of the constant being checked +// +// Returns: +// true if the constant has all elements being -0; otherwise, false +// +bool GenTreeVecCon::IsNegativeZero(var_types simdBaseType) const +{ + assert(varTypeIsFloating(simdBaseType)); + uint32_t elementCount = ElementCount(genTypeSize(gtType), simdBaseType); + + for (uint32_t i = 0; i < elementCount; i++) + { + double element = GetElementFloating(simdBaseType, i); + + if (!FloatingPointUtils::isNegativeZero(element)) + { + return false; + } + } + + return true; +} #endif // FEATURE_HW_INTRINSICS*/ //------------------------------------------------------------------------ @@ -25682,8 +26014,6 @@ GenTree* Compiler::gtNewSimdShuffleNode( if (simdSize == 16) { lookupIntrinsic = NI_AdvSimd_Arm64_VectorTableLookup; - - op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector64_ToVector128, simdBaseJitType, simdSize); } // VectorTableLookup is only valid on byte/sbyte @@ -27485,7 +27815,7 @@ bool GenTreeHWIntrinsic::OperIsCreateScalarUnsafe() const // bool GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic(genTreeOps oper) { - return (oper == GT_AND) || (oper == GT_AND_NOT) || (oper == GT_OR) || (oper == GT_XOR); + return (oper == GT_AND) || (oper == GT_AND_NOT) || (oper == GT_NOT) || (oper == GT_OR) || (oper == GT_XOR); } //------------------------------------------------------------------------ @@ -27496,7 +27826,8 @@ bool GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic(genTreeOps oper) // bool GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic() const { - genTreeOps oper = HWOperGet(); + bool isScalar = false; + genTreeOps oper = HWOperGet(&isScalar); return OperIsBitwiseHWIntrinsic(oper); } @@ -27847,8 +28178,10 @@ void GenTreeHWIntrinsic::Initialize(NamedIntrinsic intrinsicId) //------------------------------------------------------------------------------ // HWOperGet : Returns Oper based on the HWIntrinsicId // -genTreeOps GenTreeHWIntrinsic::HWOperGet() const +genTreeOps GenTreeHWIntrinsic::HWOperGet(bool* isScalar) const { + *isScalar = false; + switch (GetHWIntrinsicId()) { #if defined(TARGET_XARCH) @@ -27911,20 +28244,257 @@ genTreeOps GenTreeHWIntrinsic::HWOperGet() const case NI_AVX512F_AndNot: case NI_AVX512DQ_AndNot: case NI_AVX10v1_V512_AndNot: +#elif defined(TARGET_ARM64) + case NI_AdvSimd_BitwiseClear: +#endif { return GT_AND_NOT; } -#endif - // TODO: Handle other cases - default: +#if defined(TARGET_XARCH) + case NI_SSE_Add: + case NI_SSE2_Add: + case NI_AVX_Add: + case NI_AVX2_Add: + case NI_AVX512F_Add: + case NI_AVX512BW_Add: +#elif defined(TARGET_ARM64) + case NI_AdvSimd_Add: + case NI_AdvSimd_Arm64_Add: +#endif { - return GT_NONE; + return GT_ADD; } - } -} - -#endif // FEATURE_HW_INTRINSICS + +#if defined(TARGET_XARCH) + case NI_SSE_AddScalar: + case NI_SSE2_AddScalar: + case NI_AVX512F_AddScalar: + case NI_AVX10v1_AddScalar: +#elif defined(TARGET_ARM64) + case NI_AdvSimd_AddScalar: +#endif + { + *isScalar = true; + return GT_ADD; + } + +#if defined(TARGET_XARCH) + case NI_SSE_Divide: + case NI_SSE2_Divide: + case NI_AVX_Divide: + case NI_AVX512F_Divide: +#elif defined(TARGET_ARM64) + case NI_AdvSimd_Arm64_Divide: +#endif + { + return GT_DIV; + } + +#if defined(TARGET_XARCH) + case NI_SSE_DivideScalar: + case NI_SSE2_DivideScalar: + case NI_AVX512F_DivideScalar: + case NI_AVX10v1_DivideScalar: +#elif defined(TARGET_ARM64) + case NI_AdvSimd_DivideScalar: +#endif + { + *isScalar = true; + return GT_DIV; + } + +#if defined(TARGET_XARCH) + case NI_SSE_Multiply: + case NI_SSE2_MultiplyLow: + case NI_SSE41_MultiplyLow: + case NI_AVX_Multiply: + case NI_AVX2_MultiplyLow: + case NI_AVX512F_MultiplyLow: + case NI_AVX512BW_MultiplyLow: + case NI_AVX512DQ_MultiplyLow: + case NI_AVX512DQ_VL_MultiplyLow: + case NI_AVX10v1_MultiplyLow: + case NI_AVX10v1_V512_MultiplyLow: +#elif defined(TARGET_ARM64) + case NI_AdvSimd_Multiply: + case NI_AdvSimd_Arm64_Multiply: +#endif + { + return GT_MUL; + } + +#if defined(TARGET_XARCH) + case NI_SSE2_Multiply: + case NI_AVX512F_Multiply: + { + if (varTypeIsFloating(GetSimdBaseType())) + { + return GT_MUL; + } + return GT_NONE; + } +#endif + +#if defined(TARGET_XARCH) + case NI_SSE_MultiplyScalar: + case NI_SSE2_MultiplyScalar: + case NI_AVX512F_MultiplyScalar: + case NI_AVX10v1_MultiplyScalar: +#elif defined(TARGET_ARM64) + case NI_AdvSimd_MultiplyScalar: +#endif + { + *isScalar = true; + return GT_MUL; + } + +#if defined(TARGET_ARM64) + case NI_AdvSimd_Negate: + case NI_AdvSimd_Arm64_Negate: + { + return GT_NEG; + } + + case NI_AdvSimd_NegateScalar: + case NI_AdvSimd_Arm64_NegateScalar: + { + *isScalar = true; + return GT_NEG; + } +#endif + +#if defined(TARGET_XARCH) + case NI_AVX512F_RotateLeft: + case NI_AVX512F_RotateLeftVariable: + case NI_AVX512F_VL_RotateLeft: + case NI_AVX512F_VL_RotateLeftVariable: + case NI_AVX10v1_RotateLeft: + case NI_AVX10v1_RotateLeftVariable: + { + return GT_ROL; + } + + case NI_AVX512F_RotateRight: + case NI_AVX512F_RotateRightVariable: + case NI_AVX512F_VL_RotateRight: + case NI_AVX512F_VL_RotateRightVariable: + case NI_AVX10v1_RotateRight: + case NI_AVX10v1_RotateRightVariable: + { + return GT_ROR; + } +#endif // TARGET_XARCH + +#ifdef TARGET_ARM64 + case NI_AdvSimd_ShiftLeftLogical: +#else + case NI_SSE2_ShiftLeftLogical: + case NI_AVX2_ShiftLeftLogical: + case NI_AVX2_ShiftLeftLogicalVariable: + case NI_AVX512F_ShiftLeftLogical: + case NI_AVX512F_ShiftLeftLogicalVariable: + case NI_AVX512BW_ShiftLeftLogical: + case NI_AVX512BW_ShiftLeftLogicalVariable: +#endif + { + return GT_LSH; + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_ShiftLeftLogicalScalar: + { + *isScalar = true; + return GT_LSH; + } +#endif + +#ifdef TARGET_ARM64 + case NI_AdvSimd_ShiftRightArithmetic: +#else + case NI_SSE2_ShiftRightArithmetic: + case NI_AVX2_ShiftRightArithmetic: + case NI_AVX2_ShiftRightArithmeticVariable: + case NI_AVX512F_ShiftRightArithmetic: + case NI_AVX512F_ShiftRightArithmeticVariable: + case NI_AVX512F_VL_ShiftRightArithmetic: + case NI_AVX512F_VL_ShiftRightArithmeticVariable: + case NI_AVX512BW_ShiftRightArithmetic: + case NI_AVX512BW_ShiftRightArithmeticVariable: + case NI_AVX10v1_ShiftRightArithmetic: + case NI_AVX10v1_ShiftRightArithmeticVariable: +#endif + { + return GT_RSH; + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_ShiftRightArithmeticScalar: + { + *isScalar = true; + return GT_RSH; + } +#endif + +#ifdef TARGET_ARM64 + case NI_AdvSimd_ShiftRightLogical: +#else + case NI_SSE2_ShiftRightLogical: + case NI_AVX2_ShiftRightLogical: + case NI_AVX2_ShiftRightLogicalVariable: + case NI_AVX512F_ShiftRightLogical: + case NI_AVX512F_ShiftRightLogicalVariable: + case NI_AVX512BW_ShiftRightLogical: + case NI_AVX512BW_ShiftRightLogicalVariable: +#endif + { + return GT_RSZ; + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_ShiftRightLogicalScalar: + { + *isScalar = true; + return GT_RSZ; + } +#endif + +#if defined(TARGET_XARCH) + case NI_SSE_Subtract: + case NI_SSE2_Subtract: + case NI_AVX_Subtract: + case NI_AVX2_Subtract: + case NI_AVX512F_Subtract: + case NI_AVX512BW_Subtract: +#elif defined(TARGET_ARM64) + case NI_AdvSimd_Subtract: + case NI_AdvSimd_Arm64_Subtract: +#endif + { + return GT_SUB; + } + +#if defined(TARGET_XARCH) + case NI_SSE_SubtractScalar: + case NI_SSE2_SubtractScalar: + case NI_AVX512F_SubtractScalar: + case NI_AVX10v1_SubtractScalar: +#elif defined(TARGET_ARM64) + case NI_AdvSimd_SubtractScalar: +#endif + { + *isScalar = true; + return GT_SUB; + } + + default: + { + return GT_NONE; + } + } +} + +#endif // FEATURE_HW_INTRINSICS //--------------------------------------------------------------------------------------- // gtNewMustThrowException: @@ -28741,8 +29311,13 @@ uint8_t GenTreeHWIntrinsic::GetTernaryControlByte(GenTreeHWIntrinsic* second) co const uint8_t B = 0xCC; const uint8_t C = 0xAA; - genTreeOps firstOper = HWOperGet(); - genTreeOps secondOper = second->HWOperGet(); + bool isScalar = false; + + genTreeOps firstOper = HWOperGet(&isScalar); + assert(!isScalar); + + genTreeOps secondOper = second->HWOperGet(&isScalar); + assert(!isScalar); uint8_t AB = 0; uint8_t ABC = 0; @@ -28936,3 +29511,874 @@ bool GenTree::CanDivOrModPossiblyOverflow(Compiler* comp) const // Not enough known information; therefore we might overflow. return true; } + +#if defined(FEATURE_HW_INTRINSICS) +GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) +{ + assert(tree->OperIsHWIntrinsic()); + + // NOTE: MinOpts() is always true for Tier0 so we have to check explicit flags instead. + // To be fixed in https://github.com/dotnet/runtime/pull/77465 + const bool tier0opts = !opts.compDbgCode && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT); + if (!tier0opts) + { + return tree; + } + + GenTree* op1 = nullptr; + GenTree* op2 = nullptr; + GenTree* op3 = nullptr; + size_t opCount = tree->GetOperandCount(); + + switch (opCount) + { + case 3: + { + op3 = tree->Op(3); + FALLTHROUGH; + } + + case 2: + { + op2 = tree->Op(2); + FALLTHROUGH; + } + + case 1: + { + op1 = tree->Op(1); + break; + } + + default: + { + return tree; + } + } + + bool isScalar = false; + genTreeOps oper = tree->HWOperGet(&isScalar); + +#if defined(TARGET_XARCH) + if (oper == GT_AND_NOT) + { + // xarch does: ~op1 & op2, we need op1 & ~op2 + std::swap(op1, op2); + } +#endif // TARGET_XARCH + + GenTree* cnsNode = nullptr; + GenTree* otherNode = nullptr; + + if (op1->OperIsConst()) + { + cnsNode = op1; + otherNode = op2; + } + else if ((op2 != nullptr) && op2->OperIsConst()) + { + cnsNode = op2; + otherNode = op1; + } + else + { + // No constants, so nothing to fold + return tree; + } + + GenTree* resultNode = tree; + + NamedIntrinsic ni = tree->GetHWIntrinsicId(); + var_types retType = tree->TypeGet(); + var_types simdBaseType = tree->GetSimdBaseType(); + CorInfoType simdBaseJitType = tree->GetSimdBaseJitType(); + unsigned int simdSize = tree->GetSimdSize(); + + if (otherNode == nullptr) + { + assert(op2 == nullptr); + assert(op3 == nullptr); + + if (oper != GT_NONE) + { + cnsNode->AsVecCon()->EvaluateUnaryInPlace(oper, isScalar, simdBaseType); + resultNode = cnsNode; + } + else + { + switch (ni) + { +#ifdef TARGET_ARM64 + case NI_ArmBase_LeadingZeroCount: +#else + case NI_LZCNT_LeadingZeroCount: +#endif + { + assert(!varTypeIsSmall(retType) && !varTypeIsLong(retType)); + + int32_t value = static_cast(cnsNode->AsIntConCommon()->IconValue()); + uint32_t result = BitOperations::LeadingZeroCount(static_cast(value)); + + cnsNode->AsIntConCommon()->SetIconValue(static_cast(result)); + resultNode = cnsNode; + break; + } + +#ifdef TARGET_ARM64 + case NI_ArmBase_Arm64_LeadingZeroCount: + { + assert(varTypeIsInt(retType)); + + int64_t value = cnsNode->AsIntConCommon()->IntegralValue(); + uint32_t result = BitOperations::LeadingZeroCount(static_cast(value)); + + cnsNode->AsIntConCommon()->SetIconValue(static_cast(result)); + resultNode = cnsNode; + break; + } +#else + case NI_LZCNT_X64_LeadingZeroCount: + { + assert(varTypeIsLong(retType)); + + int64_t value = cnsNode->AsIntConCommon()->IntegralValue(); + uint32_t result = BitOperations::LeadingZeroCount(static_cast(value)); + + cnsNode->AsIntConCommon()->SetIntegralValue(static_cast(result)); + resultNode = cnsNode; + break; + } +#endif + + case NI_Vector128_AsVector3: + case NI_Vector128_AsVector128Unsafe: +#ifdef TARGET_ARM64 + case NI_Vector64_ToVector128Unsafe: + case NI_Vector128_GetLower: +#else + case NI_Vector128_AsVector2: + case NI_Vector128_ToVector256Unsafe: + case NI_Vector256_GetLower: + case NI_Vector256_ToVector512Unsafe: + case NI_Vector512_GetLower: + case NI_Vector512_GetLower128: +#endif + { + // These are all going to a smaller type taking the lowest bits + // or are unsafely going to a larger type, so we just need to retype + // the constant and we're good to go. + + cnsNode->gtType = retType; + resultNode = cnsNode; + break; + } + +#ifdef TARGET_ARM64 + case NI_Vector64_ToVector128: + { + assert(retType == TYP_SIMD16); + assert(cnsNode->gtType == TYP_SIMD8); + cnsNode->AsVecCon()->gtSimd16Val.v64[1] = {}; + + cnsNode->gtType = retType; + resultNode = cnsNode; + break; + } +#else + case NI_Vector128_ToVector256: + { + assert(retType == TYP_SIMD32); + assert(cnsNode->gtType == TYP_SIMD16); + cnsNode->AsVecCon()->gtSimd32Val.v128[1] = {}; + + cnsNode->gtType = retType; + resultNode = cnsNode; + break; + } + + case NI_Vector128_ToVector512: + { + assert(retType == TYP_SIMD64); + assert(cnsNode->gtType == TYP_SIMD16); + cnsNode->AsVecCon()->gtSimd64Val.v128[1] = {}; + cnsNode->AsVecCon()->gtSimd64Val.v256[1] = {}; + + cnsNode->gtType = retType; + resultNode = cnsNode; + break; + } + + case NI_Vector256_ToVector512: + { + assert(retType == TYP_SIMD32); + assert(cnsNode->gtType == TYP_SIMD32); + cnsNode->AsVecCon()->gtSimd64Val.v256[1] = {}; + + cnsNode->gtType = retType; + resultNode = cnsNode; + break; + } +#endif + +#ifdef TARGET_ARM64 + case NI_Vector128_GetUpper: + { + assert(retType == TYP_SIMD8); + assert(cnsNode->gtType == TYP_SIMD16); + cnsNode->AsVecCon()->gtSimd8Val = cnsNode->AsVecCon()->gtSimd16Val.v64[1]; + + cnsNode->gtType = retType; + resultNode = cnsNode; + break; + } +#else + case NI_Vector256_GetUpper: + { + assert(retType == TYP_SIMD16); + assert(cnsNode->gtType == TYP_SIMD32); + cnsNode->AsVecCon()->gtSimd16Val = cnsNode->AsVecCon()->gtSimd32Val.v128[1]; + + cnsNode->gtType = retType; + resultNode = cnsNode; + break; + } + + case NI_Vector512_GetUpper: + { + assert(retType == TYP_SIMD32); + assert(cnsNode->gtType == TYP_SIMD64); + cnsNode->AsVecCon()->gtSimd32Val = cnsNode->AsVecCon()->gtSimd64Val.v256[1]; + + cnsNode->gtType = retType; + resultNode = cnsNode; + break; + } +#endif + + case NI_Vector128_ToScalar: +#ifdef TARGET_ARM64 + case NI_Vector64_ToScalar: +#else + case NI_Vector256_ToScalar: + case NI_Vector512_ToScalar: +#endif + { + var_types simdType = getSIMDTypeForSize(simdSize); + + if (varTypeIsFloating(retType)) + { + double result = cnsNode->AsVecCon()->ToScalarFloating(simdBaseType); + + resultNode = gtNewDconNode(result, retType); + } + else + { + assert(varTypeIsIntegral(retType)); + int64_t result = cnsNode->AsVecCon()->ToScalarIntegral(simdBaseType); + + if (varTypeIsLong(retType)) + { + resultNode = gtNewLconNode(result); + } + else + { + resultNode = gtNewIconNode(static_cast(result), retType); + } + } + break; + } + + default: + { + break; + } + } + } + } + else if (otherNode->OperIsConst()) + { + if (oper != GT_NONE) + { + assert(op3 == nullptr); + +#if defined(TARGET_XARCH) + if ((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ)) + { + if (otherNode->TypeIs(TYP_SIMD16)) + { + if ((ni != NI_AVX2_ShiftLeftLogicalVariable) && (ni != NI_AVX2_ShiftRightArithmeticVariable) && + (ni != NI_AVX512F_VL_ShiftRightArithmeticVariable) && + (ni != NI_AVX10v1_ShiftRightArithmeticVariable) && (ni != NI_AVX2_ShiftRightLogicalVariable)) + { + // The xarch shift instructions support taking the shift amount as + // a simd16, in which case they take the shift amount from the lower + // 64-bits. + + int64_t shiftAmount = otherNode->AsVecCon()->GetElementIntegral(TYP_LONG, 0); + + if ((genTypeSize(simdBaseType) != 8) && (shiftAmount > INT_MAX)) + { + // Ensure we don't lose track the the amount is an overshift + shiftAmount = -1; + } + otherNode->AsVecCon()->EvaluateBroadcastInPlace(simdBaseType, shiftAmount); + } + } + } +#endif // TARGET_XARCH + + if (otherNode->IsIntegralConst()) + { + int64_t scalar = otherNode->AsIntConCommon()->IntegralValue(); + + otherNode = gtNewVconNode(retType); + otherNode->AsVecCon()->EvaluateBroadcastInPlace(simdBaseType, scalar); + } + + cnsNode->AsVecCon()->EvaluateBinaryInPlace(oper, isScalar, simdBaseType, otherNode->AsVecCon()); + resultNode = cnsNode; + } + else + { + switch (ni) + { + case NI_Vector128_GetElement: +#ifdef TARGET_ARM64 + case NI_Vector64_GetElement: +#else + case NI_Vector256_GetElement: + case NI_Vector512_GetElement: +#endif + { + assert(op3 == nullptr); + uint32_t index = static_cast(otherNode->AsIntConCommon()->IconValue()); + + if (index >= GenTreeVecCon::ElementCount(simdSize, simdBaseType)) + { + // Nothing to fold for out of range indexes + break; + } + + var_types simdType = getSIMDTypeForSize(simdSize); + + if (varTypeIsFloating(retType)) + { + double result = cnsNode->AsVecCon()->GetElementFloating(simdBaseType, index); + + resultNode = gtNewDconNode(result, retType); + } + else + { + assert(varTypeIsIntegral(retType)); + int64_t result = cnsNode->AsVecCon()->GetElementIntegral(simdBaseType, index); + + if (varTypeIsLong(retType)) + { + resultNode = gtNewLconNode(result); + } + else + { + resultNode = gtNewIconNode(static_cast(result), retType); + } + } + break; + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_MultiplyByScalar: + case NI_AdvSimd_Arm64_MultiplyByScalar: + { + assert(op3 == nullptr); + + // MultiplyByScalar takes a vector as the second operand but only utilizes element 0 + // We need to extract it and then functionally broadcast it up for the evaluation to + // work as expected. + + if (varTypeIsFloating(simdBaseType)) + { + double scalar = otherNode->AsVecCon()->ToScalarFloating(simdBaseType); + otherNode->AsVecCon()->EvaluateBroadcastInPlace(simdBaseType, scalar); + } + else + { + assert(varTypeIsIntegral(simdBaseType)); + int64_t scalar = otherNode->AsVecCon()->ToScalarIntegral(simdBaseType); + otherNode->AsVecCon()->EvaluateBroadcastInPlace(simdBaseType, scalar); + } + + cnsNode->AsVecCon()->EvaluateBinaryInPlace(GT_MUL, isScalar, simdBaseType, otherNode->AsVecCon()); + resultNode = cnsNode; + break; + } +#endif + + case NI_Vector128_WithElement: +#ifdef TARGET_ARM64 + case NI_Vector64_WithElement: +#else + case NI_Vector256_WithElement: + case NI_Vector512_WithElement: +#endif + { + if (!op3->OperIsConst()) + { + break; + } + + uint32_t index = static_cast(op2->AsIntConCommon()->IconValue()); + + if (index >= GenTreeVecCon::ElementCount(simdSize, simdBaseType)) + { + // Nothing to fold for out of range indexes + break; + } + + var_types simdType = getSIMDTypeForSize(simdSize); + + if (varTypeIsFloating(simdBaseType)) + { + double value = op3->AsDblCon()->DconValue(); + cnsNode->AsVecCon()->SetElementFloating(simdBaseType, index, value); + resultNode = cnsNode; + } + else + { + assert(varTypeIsIntegral(simdBaseType)); + int64_t value = op3->AsIntConCommon()->IntegralValue(); + cnsNode->AsVecCon()->SetElementIntegral(simdBaseType, index, value); + resultNode = cnsNode; + } + break; + } + +#ifdef TARGET_ARM64 + case NI_Vector128_WithLower: + { + assert(retType == TYP_SIMD16); + assert(cnsNode->gtType == TYP_SIMD16); + assert(otherNode->gtType == TYP_SIMD8); + cnsNode->AsVecCon()->gtSimd16Val.v64[0] = otherNode->AsVecCon()->gtSimd8Val; + + resultNode = cnsNode; + break; + } +#else + case NI_Vector256_WithLower: + { + assert(retType == TYP_SIMD32); + assert(cnsNode->gtType == TYP_SIMD32); + assert(otherNode->gtType == TYP_SIMD16); + cnsNode->AsVecCon()->gtSimd32Val.v128[0] = otherNode->AsVecCon()->gtSimd16Val; + + resultNode = cnsNode; + break; + } + + case NI_Vector512_WithLower: + { + assert(retType == TYP_SIMD64); + assert(cnsNode->gtType == TYP_SIMD64); + assert(otherNode->gtType == TYP_SIMD32); + cnsNode->AsVecCon()->gtSimd64Val.v256[0] = otherNode->AsVecCon()->gtSimd32Val; + + resultNode = cnsNode; + break; + } +#endif + +#ifdef TARGET_ARM64 + case NI_Vector128_WithUpper: + { + assert(retType == TYP_SIMD16); + assert(cnsNode->gtType == TYP_SIMD16); + assert(otherNode->gtType == TYP_SIMD8); + cnsNode->AsVecCon()->gtSimd16Val.v64[1] = otherNode->AsVecCon()->gtSimd8Val; + + resultNode = cnsNode; + break; + } +#else + case NI_Vector256_WithUpper: + { + assert(retType == TYP_SIMD32); + assert(cnsNode->gtType == TYP_SIMD32); + assert(otherNode->gtType == TYP_SIMD16); + cnsNode->AsVecCon()->gtSimd32Val.v128[1] = otherNode->AsVecCon()->gtSimd16Val; + + resultNode = cnsNode; + break; + } + + case NI_Vector512_WithUpper: + { + assert(retType == TYP_SIMD64); + assert(cnsNode->gtType == TYP_SIMD64); + assert(otherNode->gtType == TYP_SIMD32); + cnsNode->AsVecCon()->gtSimd64Val.v256[1] = otherNode->AsVecCon()->gtSimd32Val; + + resultNode = cnsNode; + break; + } +#endif + + default: + { + break; + } + } + } + } + else if (op3 == nullptr) + { + switch (oper) + { + case GT_ADD: + { + if (varTypeIsFloating(simdBaseType)) + { + // Handle `x + NaN == NaN` and `NaN + x == NaN` + // This is safe for all floats since we do not fault for sNaN + + if (cnsNode->IsVectorNaN(simdBaseType)) + { + resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT); + break; + } + + // Handle `x + -0 == x` and `-0 + x == x` + + if (cnsNode->IsVectorNegativeZero(simdBaseType)) + { + resultNode = otherNode; + break; + } + + // We cannot handle `x + 0 == x` or `0 + x == x` since `-0 + 0 == 0` + break; + } + + // Handle `x + 0 == x` and `0 + x == x` + if (cnsNode->IsVectorZero()) + { + resultNode = otherNode; + } + break; + } + + case GT_AND: + { + // Handle `x & 0 == 0` and `0 & x == 0` + if (cnsNode->IsVectorZero()) + { + resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT); + break; + } + + // Handle `x & AllBitsSet == x` and `AllBitsSet & x == x` + if (cnsNode->IsVectorAllBitsSet()) + { + resultNode = otherNode; + } + break; + } + + case GT_AND_NOT: + { + // Handle `x & ~0 == x` and `0 & ~x == 0` + if (cnsNode->IsVectorZero()) + { + if (cnsNode == op1) + { + resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT); + break; + } + else + { + resultNode = otherNode; + } + break; + } + + // Handle `x & ~AllBitsSet == 0` + if (cnsNode->IsVectorAllBitsSet() && (cnsNode == op2)) + { + resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT); + } + break; + } + + case GT_DIV: + { + if (varTypeIsFloating(simdBaseType)) + { + // Handle `x / NaN == NaN` and `NaN / x == NaN` + // This is safe for all floats since we do not fault for sNaN + + if (cnsNode->IsVectorNaN(simdBaseType)) + { + resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT); + break; + } + } + + // Handle `x / 1 == x`. + // This is safe for all floats since we do not fault for sNaN + + if (cnsNode != op2) + { + break; + } + + if (!cnsNode->IsVectorBroadcast(simdBaseType)) + { + break; + } + + if (cnsNode->AsVecCon()->IsScalarOne(simdBaseType)) + { + resultNode = otherNode; + } + break; + } + + case GT_MUL: + { + if (!varTypeIsFloating(simdBaseType)) + { + // Handle `x * 0 == 0` and `0 * x == 0` + // Not safe for floating-point when x == -0.0, NaN, +Inf, -Inf + if (cnsNode->IsVectorZero()) + { + resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT); + break; + } + } + else + { + // Handle `x * NaN == NaN` and `NaN * x == NaN` + // This is safe for all floats since we do not fault for sNaN + + if (cnsNode->IsVectorNaN(simdBaseType)) + { + resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT); + break; + } + + // We cannot handle `x * 0 == 0` or ` 0 * x == 0` since `-0 * 0 == -0` + // We cannot handle `x * -0 == -0` or `-0 * x == -0` since `-0 * -0 == 0` + } + + // Handle `x * 1 == x` and `1 * x == x` + // This is safe for all floats since we do not fault for sNaN + + if (!cnsNode->IsVectorBroadcast(simdBaseType)) + { + break; + } + + if (cnsNode->AsVecCon()->IsScalarOne(simdBaseType)) + { + resultNode = otherNode; + } + break; + } + + case GT_OR: + { + // Handle `x | 0 == x` and `0 | x == x` + if (cnsNode->IsVectorZero()) + { + resultNode = otherNode; + break; + } + + // Handle `x | AllBitsSet == AllBitsSet` and `AllBitsSet | x == AllBitsSet` + if (cnsNode->IsVectorAllBitsSet()) + { + resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT); + } + break; + } + + case GT_ROL: + case GT_ROR: + case GT_LSH: + case GT_RSH: + case GT_RSZ: + { + // Handle `x rol 0 == x` and `0 rol x == 0` + // Handle `x ror 0 == x` and `0 ror x == 0` + // Handle `x << 0 == x` and `0 << x == 0` + // Handle `x >> 0 == x` and `0 >> x == 0` + // Handle `x >>> 0 == x` and `0 >>> x == 0` + + if (cnsNode->IsVectorZero()) + { + if (cnsNode == op2) + { + resultNode = otherNode; + } + else + { + resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT); + } + } + else if (cnsNode->IsIntegralConst(0)) + { + assert(cnsNode == op2); + resultNode = otherNode; + } + break; + } + + case GT_SUB: + { + if (varTypeIsFloating(simdBaseType)) + { + // Handle `x - NaN == NaN` and `NaN - x == NaN` + // This is safe for all floats since we do not fault for sNaN + + if (cnsNode->IsVectorNaN(simdBaseType)) + { + resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT); + break; + } + + // We cannot handle `x - -0 == x` since `-0 - -0 == 0` + } + + // Handle `x - 0 == x` + if ((op2 == cnsNode) && cnsNode->IsVectorZero()) + { + resultNode = otherNode; + } + break; + } + + case GT_XOR: + { + // Handle `x | 0 == x` and `0 | x == x` + if (cnsNode->IsVectorZero()) + { + resultNode = otherNode; + } + break; + } + + default: + { + break; + } + } + + switch (ni) + { +#ifdef TARGET_ARM64 + case NI_AdvSimd_MultiplyByScalar: + case NI_AdvSimd_Arm64_MultiplyByScalar: + { + if (!varTypeIsFloating(simdBaseType)) + { + // Handle `x * 0 == 0` and `0 * x == 0` + // Not safe for floating-point when x == -0.0, NaN, +Inf, -Inf + if (cnsNode == op1) + { + if (cnsNode->IsVectorZero()) + { + resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT); + break; + } + } + else + { + assert(cnsNode == op2); + + if (cnsNode->AsVecCon()->IsScalarZero(simdBaseType)) + { + int64_t val = 0; + + cnsNode->gtType = retType; + cnsNode->AsVecCon()->EvaluateBroadcastInPlace(simdBaseType, val); + + resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT); + break; + } + } + } + else + { + // Handle `x * NaN == NaN` and `NaN * x == NaN` + // This is safe for all floats since we do not fault for sNaN + + if (cnsNode == op1) + { + if (cnsNode->IsVectorNaN(simdBaseType)) + { + resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT); + break; + } + } + else + { + assert(cnsNode == op2); + double val = cnsNode->AsVecCon()->GetElementFloating(simdBaseType, 0); + + if (FloatingPointUtils::isNaN(val)) + { + cnsNode->gtType = retType; + cnsNode->AsVecCon()->EvaluateBroadcastInPlace(simdBaseType, val); + + resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT); + break; + } + } + + // We cannot handle `x * 0 == 0` or ` 0 * x == 0` since `-0 * 0 == -0` + // We cannot handle `x * -0 == -0` or `-0 * x == -0` since `-0 * -0 == 0` + } + + // Handle x * 1 => x, but only if the scalar RHS is <1, ...>. + if ((cnsNode == op2) && cnsNode->AsVecCon()->IsScalarOne(simdBaseType)) + { + resultNode = otherNode; + } + break; + } +#endif + + default: + { + break; + } + } + } + + if (resultNode != tree) + { + if (fgGlobalMorph) + { + // We can sometimes produce a comma over the constant if the original op + // had a side effect or even a new constant node, so just ensure we set + // the flag (which will be already set for the operands otherwise). + INDEBUG(resultNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + + if (resultNode->OperIs(GT_COMMA)) + { + INDEBUG(resultNode->AsOp()->gtGetOp2()->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + } + } + + if (resultNode->OperIsConst()) + { + if (vnStore != nullptr) + { + fgValueNumberTreeConst(resultNode); + } + + // Make sure no side effect flags are set on this constant node. + resultNode->gtFlags &= ~GTF_ALL_EFFECT; + } + } + return resultNode; +} +#endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index cb4b5584761d94..f7ceedffb1e792 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1763,8 +1763,11 @@ struct GenTree inline bool IsFloatPositiveZero() const; inline bool IsFloatNegativeZero() const; inline bool IsVectorZero() const; + inline bool IsVectorNegativeZero(var_types simdBaseType) const; + inline bool IsVectorNaN(var_types simdBaseType) const; inline bool IsVectorCreate() const; inline bool IsVectorAllBitsSet() const; + inline bool IsVectorBroadcast(var_types simdBaseType) const; inline bool IsMaskAllBitsSet() const; inline bool IsVectorConst(); @@ -6634,7 +6637,7 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic static bool Equals(GenTreeHWIntrinsic* op1, GenTreeHWIntrinsic* op2); - genTreeOps HWOperGet() const; + genTreeOps HWOperGet(bool* isScalar) const; private: void SetHWIntrinsicId(NamedIntrinsic intrinsicId); @@ -6878,6 +6881,178 @@ struct GenTreeVecCon : public GenTree #endif // FEATURE_HW_INTRINSICS + void EvaluateUnaryInPlace(genTreeOps oper, bool scalar, var_types baseType); + void EvaluateBinaryInPlace(genTreeOps oper, bool scalar, var_types baseType, GenTreeVecCon* other); + + template + void EvaluateBroadcastInPlace(TBase scalar) + { + switch (gtType) + { +#if defined(FEATURE_SIMD) + case TYP_SIMD8: + { + simd8_t result = {}; + BroadcastConstantToSimd(&result, scalar); + gtSimd8Val = result; + break; + } + + case TYP_SIMD12: + { + simd12_t result = {}; + BroadcastConstantToSimd(&result, scalar); + gtSimd12Val = result; + break; + } + + case TYP_SIMD16: + { + simd16_t result = {}; + BroadcastConstantToSimd(&result, scalar); + gtSimd16Val = result; + break; + } + +#if defined(TARGET_XARCH) + case TYP_SIMD32: + { + simd32_t result = {}; + BroadcastConstantToSimd(&result, scalar); + gtSimd32Val = result; + break; + } + + case TYP_SIMD64: + { + simd64_t result = {}; + BroadcastConstantToSimd(&result, scalar); + gtSimd64Val = result; + break; + } +#endif // TARGET_XARCH +#endif // FEATURE_SIMD + + default: + { + unreached(); + } + } + } + + void EvaluateBroadcastInPlace(var_types baseType, double scalar); + void EvaluateBroadcastInPlace(var_types baseType, int64_t scalar); + + void SetElementFloating(var_types simdBaseType, int32_t index, double value) + { + switch (gtType) + { +#if defined(FEATURE_SIMD) + case TYP_SIMD8: + { + simd8_t result = {}; + EvaluateWithElementFloating(simdBaseType, &result, gtSimd8Val, index, value); + gtSimd8Val = result; + break; + } + + case TYP_SIMD12: + { + simd12_t result = {}; + EvaluateWithElementFloating(simdBaseType, &result, gtSimd12Val, index, value); + gtSimd12Val = result; + break; + } + + case TYP_SIMD16: + { + simd16_t result = {}; + EvaluateWithElementFloating(simdBaseType, &result, gtSimd16Val, index, value); + gtSimd16Val = result; + break; + } + +#if defined(TARGET_XARCH) + case TYP_SIMD32: + { + simd32_t result = {}; + EvaluateWithElementFloating(simdBaseType, &result, gtSimd32Val, index, value); + gtSimd32Val = result; + break; + } + + case TYP_SIMD64: + { + simd64_t result = {}; + EvaluateWithElementFloating(simdBaseType, &result, gtSimd64Val, index, value); + gtSimd64Val = result; + break; + } +#endif // TARGET_XARCH +#endif // FEATURE_SIMD + + default: + { + unreached(); + } + } + } + + void SetElementIntegral(var_types simdBaseType, int32_t index, int64_t value) + { + switch (gtType) + { +#if defined(FEATURE_SIMD) + case TYP_SIMD8: + { + simd8_t result = {}; + EvaluateWithElementIntegral(simdBaseType, &result, gtSimd8Val, index, value); + gtSimd8Val = result; + break; + } + + case TYP_SIMD12: + { + simd12_t result = {}; + EvaluateWithElementIntegral(simdBaseType, &result, gtSimd12Val, index, value); + gtSimd12Val = result; + break; + } + + case TYP_SIMD16: + { + simd16_t result = {}; + EvaluateWithElementIntegral(simdBaseType, &result, gtSimd16Val, index, value); + gtSimd16Val = result; + break; + } + +#if defined(TARGET_XARCH) + case TYP_SIMD32: + { + simd32_t result = {}; + EvaluateWithElementIntegral(simdBaseType, &result, gtSimd32Val, index, value); + gtSimd32Val = result; + break; + } + + case TYP_SIMD64: + { + simd64_t result = {}; + EvaluateWithElementIntegral(simdBaseType, &result, gtSimd64Val, index, value); + gtSimd64Val = result; + break; + } +#endif // TARGET_XARCH +#endif // FEATURE_SIMD + + default: + { + unreached(); + } + } + } + bool IsAllBitsSet() const { switch (gtType) @@ -6923,6 +7098,8 @@ struct GenTreeVecCon : public GenTree } } + bool IsBroadcast(var_types simdBaseType) const; + static bool Equals(const GenTreeVecCon* left, const GenTreeVecCon* right) { var_types gtType = left->TypeGet(); @@ -6975,6 +7152,10 @@ struct GenTreeVecCon : public GenTree } } + bool IsNaN(var_types simdBaseType) const; + + bool IsNegativeZero(var_types simdBaseType) const; + bool IsZero() const { switch (gtType) @@ -7020,6 +7201,144 @@ struct GenTreeVecCon : public GenTree } } + double GetElementFloating(var_types simdBaseType, int32_t index) const + { + switch (gtType) + { +#if defined(FEATURE_SIMD) + case TYP_SIMD8: + { + return EvaluateGetElementFloating(simdBaseType, gtSimd8Val, index); + } + + case TYP_SIMD12: + { + return EvaluateGetElementFloating(simdBaseType, gtSimd12Val, index); + } + + case TYP_SIMD16: + { + return EvaluateGetElementFloating(simdBaseType, gtSimd16Val, index); + } + +#if defined(TARGET_XARCH) + case TYP_SIMD32: + { + return EvaluateGetElementFloating(simdBaseType, gtSimd32Val, index); + } + + case TYP_SIMD64: + { + return EvaluateGetElementFloating(simdBaseType, gtSimd64Val, index); + } +#endif // TARGET_XARCH +#endif // FEATURE_SIMD + + default: + { + unreached(); + } + } + } + + int64_t GetElementIntegral(var_types simdBaseType, int32_t index) const + { + switch (gtType) + { +#if defined(FEATURE_SIMD) + case TYP_SIMD8: + { + return EvaluateGetElementIntegral(simdBaseType, gtSimd8Val, index); + } + + case TYP_SIMD12: + { + return EvaluateGetElementIntegral(simdBaseType, gtSimd12Val, index); + } + + case TYP_SIMD16: + { + return EvaluateGetElementIntegral(simdBaseType, gtSimd16Val, index); + } + +#if defined(TARGET_XARCH) + case TYP_SIMD32: + { + return EvaluateGetElementIntegral(simdBaseType, gtSimd32Val, index); + } + + case TYP_SIMD64: + { + return EvaluateGetElementIntegral(simdBaseType, gtSimd64Val, index); + } +#endif // TARGET_XARCH +#endif // FEATURE_SIMD + + default: + { + unreached(); + } + } + } + + double ToScalarFloating(var_types simdBaseType) const + { + return GetElementFloating(simdBaseType, 0); + } + + int64_t ToScalarIntegral(var_types simdBaseType) const + { + return GetElementIntegral(simdBaseType, 0); + } + + bool IsElementZero(var_types simdBaseType, int32_t index) const + { + switch (simdBaseType) + { + case TYP_FLOAT: + { + return GetElementIntegral(TYP_INT, index) == 0; + } + + case TYP_DOUBLE: + { + return GetElementIntegral(TYP_LONG, index) == 0; + } + + default: + { + return GetElementIntegral(simdBaseType, index) == 0; + } + } + } + + bool IsElementOne(var_types simdBaseType, int32_t index) const + { + switch (simdBaseType) + { + case TYP_FLOAT: + case TYP_DOUBLE: + { + return GetElementFloating(simdBaseType, index) == 1; + } + + default: + { + return GetElementIntegral(simdBaseType, index) == 1; + } + } + } + + bool IsScalarZero(var_types simdBaseType) const + { + return IsElementZero(simdBaseType, 0); + } + + bool IsScalarOne(var_types simdBaseType) const + { + return IsElementOne(simdBaseType, 0); + } + GenTreeVecCon(var_types type) : GenTree(GT_CNS_VEC, type) { @@ -9238,6 +9557,36 @@ inline bool GenTree::IsVectorZero() const return IsCnsVec() && AsVecCon()->IsZero(); } +//------------------------------------------------------------------- +// IsVectorNegativeZero: returns true if this node is a vector constant with all elements negative zero. +// +// Arguments: +// simdBaseType - the base type of the constant being checked +// +// Returns: +// True if this node is a vector constant with all elements negative zero +// +inline bool GenTree::IsVectorNegativeZero(var_types simdBaseType) const +{ + assert(varTypeIsFloating(simdBaseType)); + return IsCnsVec() && AsVecCon()->IsNegativeZero(simdBaseType); +} + +//------------------------------------------------------------------- +// IsVectorZero: returns true if this node is a vector constant with all bits zero. +// +// Arguments: +// simdBaseType - the base type of the constant being checked +// +// Returns: +// True if this node is a vector constant with all bits zero +// +inline bool GenTree::IsVectorNaN(var_types simdBaseType) const +{ + assert(varTypeIsFloating(simdBaseType)); + return IsCnsVec() && AsVecCon()->IsNaN(simdBaseType); +} + //------------------------------------------------------------------- // IsVectorCreate: returns true if this node is the creation of a vector. // Does not include "Unsafe" method calls. @@ -9288,6 +9637,24 @@ inline bool GenTree::IsVectorAllBitsSet() const return false; } +//------------------------------------------------------------------- +// IsVectorBroadcast: returns true if this node is a vector constant with the same value in all elements. +// +// Returns: +// True if this node is a vector constant with the same value in all elements. +// +inline bool GenTree::IsVectorBroadcast(var_types simdBaseType) const +{ +#ifdef FEATURE_SIMD + if (IsCnsVec()) + { + return AsVecCon()->IsBroadcast(simdBaseType); + } +#endif // FEATURE_SIMD + + return false; +} + inline bool GenTree::IsMaskAllBitsSet() const { #ifdef TARGET_ARM64 diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 5fbc00b5997120..3a481e29345172 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -664,6 +664,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, return vecCon; } + op1 = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector64_ToVector128Unsafe, simdBaseJitType, 8); + GenTree* idx = gtNewIconNode(2, TYP_INT); GenTree* zero = gtNewZeroConNode(TYP_FLOAT); op1 = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseJitType, 16); @@ -690,6 +692,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, return vecCon; } + op1 = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector128_AsVector128Unsafe, simdBaseJitType, 12); + GenTree* idx = gtNewIconNode(3, TYP_INT); GenTree* zero = gtNewZeroConNode(TYP_FLOAT); retNode = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseJitType, 16); diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 41e6385e169b7f..5f82b9e3ebcb79 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -1521,6 +1521,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; case NI_Vector64_ToVector128Unsafe: + case NI_Vector128_AsVector128Unsafe: case NI_Vector128_GetLower: GetEmitter()->emitIns_Mov(ins, emitSize, targetReg, op1Reg, /* canSkip */ true); break; diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 57ee811ef8be73..d96690c4003601 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -1766,6 +1766,7 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) break; } + case NI_Vector128_AsVector128Unsafe: case NI_Vector128_AsVector2: case NI_Vector128_AsVector3: case NI_Vector128_ToScalar: diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index a98380e8687627..4de6a416516e1d 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -130,6 +130,7 @@ HARDWARE_INTRINSIC(Vector128, AsVector2, HARDWARE_INTRINSIC(Vector128, AsVector3, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) HARDWARE_INTRINSIC(Vector128, AsVector4, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, AsVector128, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsVector128Unsafe, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, Ceiling, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, ConditionalSelect, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, ConvertToDouble, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index f69b4b93d8758b..33ececc04ff76b 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -49,6 +49,7 @@ HARDWARE_INTRINSIC(Vector128, AsVector2, HARDWARE_INTRINSIC(Vector128, AsVector3, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, AsVector4, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, AsVector128, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsVector128Unsafe, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics|HW_Flag_NoContainment) HARDWARE_INTRINSIC(Vector128, Ceiling, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, ConditionalSelect, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, ConvertToDouble, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 61a7890e5051f4..b44c9cf0785e71 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -1214,6 +1214,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, return vecCon; } + op1 = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector128_AsVector128Unsafe, simdBaseJitType, 8); + GenTree* idx = gtNewIconNode(2, TYP_INT); GenTree* zero = gtNewZeroConNode(TYP_FLOAT); op1 = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseJitType, 16); @@ -1240,6 +1242,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, return vecCon; } + op1 = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector128_AsVector128Unsafe, simdBaseJitType, 12); + GenTree* idx = gtNewIconNode(3, TYP_INT); GenTree* zero = gtNewZeroConNode(TYP_FLOAT); retNode = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseJitType, 16); diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index f52fe739f11c00..c640105ec3a065 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -3070,12 +3070,17 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, GenTree* hwintrinsic = impHWIntrinsic(ni, clsHnd, method, sig R2RARG(entryPoint), mustExpand); - if (mustExpand && (hwintrinsic == nullptr)) + if (hwintrinsic == nullptr) { - return impUnsupportedNamedIntrinsic(CORINFO_HELP_THROW_NOT_IMPLEMENTED, method, sig, mustExpand); + if (mustExpand) + { + return impUnsupportedNamedIntrinsic(CORINFO_HELP_THROW_NOT_IMPLEMENTED, method, sig, mustExpand); + } + return nullptr; } - return hwintrinsic; + // Fold result, if possible + return gtFoldExpr(hwintrinsic); } else { @@ -3083,7 +3088,16 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, if (isIntrinsic) { - return impSimdAsHWIntrinsic(ni, clsHnd, method, sig, newobjThis, mustExpand); + GenTree* hwintrinsic = impSimdAsHWIntrinsic(ni, clsHnd, method, sig, newobjThis, mustExpand); + + if (hwintrinsic == nullptr) + { + assert(!mustExpand); + return nullptr; + } + + // Fold result, if possible + return gtFoldExpr(hwintrinsic); } } } diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 2675303454bebe..2f903a22a60278 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -2202,7 +2202,8 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) break; } - if (second->AsHWIntrinsic()->HWOperGet() == GT_AND_NOT) + bool isScalar = false; + if ((second->AsHWIntrinsic()->HWOperGet(&isScalar) == GT_AND_NOT) || isScalar) { // currently ANDNOT logic cannot be optimized by the ternary node. break; @@ -9266,50 +9267,22 @@ void Lowering::TryFoldCnsVecForEmbeddedBroadcast(GenTreeHWIntrinsic* parentNode, { simdType = Compiler::getSIMDTypeForSize(simdSize); } - int elementCount = GenTreeVecCon::ElementCount(genTypeSize(simdType), simdBaseType); - switch (simdBaseType) + if (varTypeIsSmall(simdBaseType)) { - case TYP_FLOAT: - case TYP_INT: - case TYP_UINT: - { - uint32_t firstElement = static_cast(childNode->gtSimdVal.u32[0]); - for (int i = 1; i < elementCount; i++) - { - uint32_t elementToCheck = static_cast(childNode->gtSimdVal.u32[i]); - if (firstElement != elementToCheck) - { - isCreatedFromScalar = false; - break; - } - } - break; - } - - case TYP_DOUBLE: -#if defined(TARGET_AMD64) - case TYP_LONG: - case TYP_ULONG: -#endif // TARGET_AMD64 - { - uint64_t firstElement = static_cast(childNode->gtSimdVal.u64[0]); - for (int i = 1; i < elementCount; i++) - { - uint64_t elementToCheck = static_cast(childNode->gtSimdVal.u64[i]); - if (firstElement != elementToCheck) - { - isCreatedFromScalar = false; - break; - } - } - break; - } - - default: - isCreatedFromScalar = false; - break; + isCreatedFromScalar = false; + } +#ifndef TARGET_64BIT + else if (varTypeIsLong(simdBaseType)) + { + isCreatedFromScalar = false; } +#endif // TARGET_64BIT + else + { + isCreatedFromScalar = childNode->IsBroadcast(simdBaseType); + } + if (isCreatedFromScalar) { NamedIntrinsic broadcastName = NI_AVX2_BroadcastScalarToVector128; diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index fc8fec26e41923..8a1816f4c9ec7a 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1542,6 +1542,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } case NI_Vector64_ToVector128Unsafe: + case NI_Vector128_AsVector128Unsafe: case NI_Vector128_AsVector3: case NI_Vector128_GetLower: { diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index a834eb7ad9acea..5d8e0e07262309 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2272,6 +2272,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou break; } + case NI_Vector128_AsVector128Unsafe: case NI_Vector128_AsVector2: case NI_Vector128_AsVector3: case NI_Vector128_ToVector256: diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 1dc6cb7af0e881..c99c8c84a93e91 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -10679,10 +10679,16 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) default: { - genTreeOps oper = node->HWOperGet(); + bool isScalar = false; + genTreeOps oper = node->HWOperGet(&isScalar); if (GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic(oper)) { + if (oper == GT_NOT) + { + break; + } + GenTree* op1 = node->Op(1); GenTree* op2 = node->Op(2); @@ -10861,11 +10867,19 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) } } - // Transforms: - // 1.(~v1 & v2) to VectorXxx.AndNot(v1, v2) - // 2.(v1 & (~v2)) to VectorXxx.AndNot(v2, v1) - switch (node->HWOperGet()) + bool isScalar = false; + genTreeOps oper = node->HWOperGet(&isScalar); + + if (isScalar) { + return node; + } + + switch (oper) + { + // Transforms: + // 1.(~v1 & v2) to VectorXxx.AndNot(v1, v2) + // 2.(v1 & (~v2)) to VectorXxx.AndNot(v2, v1) case GT_AND: { GenTree* op1 = node->Op(1); @@ -10877,7 +10891,12 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) { // Try handle: ~op1 & op2 GenTreeHWIntrinsic* hw = op1->AsHWIntrinsic(); - genTreeOps hwOper = hw->HWOperGet(); + genTreeOps hwOper = hw->HWOperGet(&isScalar); + + if (isScalar) + { + return node; + } if (hwOper == GT_NOT) { @@ -10906,7 +10925,12 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) { // Try handle: op1 & ~op2 GenTreeHWIntrinsic* hw = op2->AsHWIntrinsic(); - genTreeOps hwOper = hw->HWOperGet(); + genTreeOps hwOper = hw->HWOperGet(&isScalar); + + if (isScalar) + { + return node; + } if (hwOper == GT_NOT) { @@ -11930,8 +11954,6 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree, bool* optAssertionPropD // GenTree* Compiler::fgMorphMultiOp(GenTreeMultiOp* multiOp) { - gtUpdateNodeOperSideEffects(multiOp); - bool dontCseConstArguments = false; #if defined(FEATURE_HW_INTRINSICS) // Opportunistically, avoid unexpected CSE for hw intrinsics with IMM arguments @@ -11954,12 +11976,10 @@ GenTree* Compiler::fgMorphMultiOp(GenTreeMultiOp* multiOp) for (GenTree** use : multiOp->UseEdges()) { - *use = fgMorphTree(*use); - + *use = fgMorphTree(*use); GenTree* operand = *use; - multiOp->gtFlags |= (operand->gtFlags & GTF_ALL_EFFECT); - if (dontCseConstArguments && operand->OperIsConst()) + if (dontCseConstArguments && operand->IsCnsIntOrI()) { operand->SetDoNotCSE(); } @@ -11978,10 +11998,33 @@ GenTree* Compiler::fgMorphMultiOp(GenTreeMultiOp* multiOp) } } + gtUpdateNodeOperSideEffects(multiOp); + + for (GenTree** use : multiOp->UseEdges()) + { + GenTree* operand = *use; + multiOp->AddAllEffectsFlags(operand); + } + #if defined(FEATURE_HW_INTRINSICS) - if (opts.OptimizationEnabled() && multiOp->OperIs(GT_HWINTRINSIC)) + if (opts.OptimizationEnabled() && multiOp->OperIsHWIntrinsic()) { - GenTreeHWIntrinsic* hw = multiOp->AsHWIntrinsic(); + // Try to fold it, maybe we get lucky, + GenTree* foldedTree = gtFoldExpr(multiOp); + + if (foldedTree != multiOp) + { + assert(!fgIsCommaThrow(foldedTree)); + INDEBUG(foldedTree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return foldedTree; + } + else if (!foldedTree->OperIsHWIntrinsic()) + { + INDEBUG(foldedTree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return foldedTree; + } + + GenTreeHWIntrinsic* hw = foldedTree->AsHWIntrinsic(); // Move constant vectors from op1 to op2 for commutative and compare operations if ((hw->GetOperandCount() == 2) && hw->Op(1)->IsVectorConst() && diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index a388eecebdf3c1..8364861cf09d4e 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -415,7 +415,7 @@ TBase EvaluateUnaryScalar(genTreeOps oper, TBase arg0) } template -void EvaluateUnarySimd(genTreeOps oper, bool scalar, TSimd* result, TSimd arg0) +void EvaluateUnarySimd(genTreeOps oper, bool scalar, TSimd* result, const TSimd& arg0) { uint32_t count = sizeof(TSimd) / sizeof(TBase); @@ -445,7 +445,7 @@ void EvaluateUnarySimd(genTreeOps oper, bool scalar, TSimd* result, TSimd arg0) } template -void EvaluateUnarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd* result, TSimd arg0) +void EvaluateUnarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd* result, const TSimd& arg0) { switch (baseType) { @@ -532,7 +532,8 @@ TBase EvaluateBinaryScalarRSZ(TBase arg0, TBase arg1) } #else // Other platforms enforce masking in their encoding - assert((arg1 >= 0) && (arg1 < (sizeof(TBase) * 8))); + unsigned shiftCountMask = (sizeof(TBase) * 8) - 1; + arg1 &= shiftCountMask; #endif return arg0 >> arg1; @@ -608,7 +609,8 @@ TBase EvaluateBinaryScalarSpecialized(genTreeOps oper, TBase arg0, TBase arg1) } #else // Other platforms enforce masking in their encoding - assert((arg1 >= 0) && (arg1 < (sizeof(TBase) * 8))); + unsigned shiftCountMask = (sizeof(TBase) * 8) - 1; + arg1 &= shiftCountMask; #endif return arg0 << arg1; } @@ -647,7 +649,8 @@ TBase EvaluateBinaryScalarSpecialized(genTreeOps oper, TBase arg0, TBase arg1) } #else // Other platforms enforce masking in their encoding - assert((arg1 >= 0) && (arg1 < (sizeof(TBase) * 8))); + unsigned shiftCountMask = (sizeof(TBase) * 8) - 1; + arg1 &= shiftCountMask; #endif return arg0 >> arg1; } @@ -722,7 +725,7 @@ TBase EvaluateBinaryScalar(genTreeOps oper, TBase arg0, TBase arg1) } template -void EvaluateBinarySimd(genTreeOps oper, bool scalar, TSimd* result, TSimd arg0, TSimd arg1) +void EvaluateBinarySimd(genTreeOps oper, bool scalar, TSimd* result, const TSimd& arg0, const TSimd& arg1) { uint32_t count = sizeof(TSimd) / sizeof(TBase); @@ -755,7 +758,8 @@ void EvaluateBinarySimd(genTreeOps oper, bool scalar, TSimd* result, TSimd arg0, } template -void EvaluateBinarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd* result, TSimd arg0, TSimd arg1) +void EvaluateBinarySimd( + genTreeOps oper, bool scalar, var_types baseType, TSimd* result, const TSimd& arg0, const TSimd& arg1) { switch (baseType) { @@ -826,6 +830,168 @@ void EvaluateBinarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd* } } +template +double EvaluateGetElementFloating(var_types simdBaseType, const TSimd& arg0, int32_t arg1) +{ + switch (simdBaseType) + { + case TYP_FLOAT: + { + return arg0.f32[arg1]; + } + + case TYP_DOUBLE: + { + return arg0.f64[arg1]; + } + + default: + { + unreached(); + } + } +} + +template +int64_t EvaluateGetElementIntegral(var_types simdBaseType, const TSimd& arg0, int32_t arg1) +{ + switch (simdBaseType) + { + case TYP_BYTE: + { + return arg0.i8[arg1]; + } + + case TYP_UBYTE: + { + return arg0.u8[arg1]; + } + + case TYP_SHORT: + { + return arg0.i16[arg1]; + } + + case TYP_USHORT: + { + return arg0.u16[arg1]; + } + + case TYP_INT: + { + return arg0.i32[arg1]; + } + + case TYP_UINT: + { + return arg0.u32[arg1]; + } + + case TYP_LONG: + { + return arg0.i64[arg1]; + } + + case TYP_ULONG: + { + return static_cast(arg0.u64[arg1]); + } + + default: + { + unreached(); + } + } +} + +template +void EvaluateWithElementFloating(var_types simdBaseType, TSimd* result, const TSimd& arg0, int32_t arg1, double arg2) +{ + *result = arg0; + + switch (simdBaseType) + { + case TYP_FLOAT: + { + result->f32[arg1] = static_cast(arg2); + break; + } + + case TYP_DOUBLE: + { + result->f64[arg1] = static_cast(arg2); + break; + } + + default: + { + unreached(); + } + } +} + +template +void EvaluateWithElementIntegral(var_types simdBaseType, TSimd* result, const TSimd& arg0, int32_t arg1, int64_t arg2) +{ + *result = arg0; + + switch (simdBaseType) + { + case TYP_BYTE: + { + result->i8[arg1] = static_cast(arg2); + break; + } + + case TYP_UBYTE: + { + result->u8[arg1] = static_cast(arg2); + break; + } + + case TYP_SHORT: + { + result->i16[arg1] = static_cast(arg2); + break; + } + + case TYP_USHORT: + { + result->u16[arg1] = static_cast(arg2); + break; + } + + case TYP_INT: + { + result->i32[arg1] = static_cast(arg2); + break; + } + + case TYP_UINT: + { + result->u32[arg1] = static_cast(arg2); + break; + } + + case TYP_LONG: + { + result->i64[arg1] = static_cast(arg2); + break; + } + + case TYP_ULONG: + { + result->u64[arg1] = static_cast(arg2); + break; + } + + default: + { + unreached(); + } + } +} + template void BroadcastConstantToSimd(TSimd* result, TBase arg0) { diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp index 2bf103c2ba2f79..b15a9988195e13 100644 --- a/src/coreclr/jit/simdashwintrinsic.cpp +++ b/src/coreclr/jit/simdashwintrinsic.cpp @@ -1733,7 +1733,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, { assert(retType == TYP_VOID); assert(simdBaseType == TYP_FLOAT); - assert((simdSize == 12) || (simdSize == 16)); + assert(simdSize == 12); + assert(simdType == TYP_SIMD12); // TODO-CQ: We should be able to check for contiguous args here after // the relevant methods are updated to support more than just float @@ -1743,21 +1744,19 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, GenTreeVecCon* vecCon = op2->AsVecCon(); vecCon->gtType = simdType; - if (simdSize == 12) - { - vecCon->gtSimdVal.f32[2] = static_cast(op3->AsDblCon()->DconValue()); - } - else - { - vecCon->gtSimdVal.f32[3] = static_cast(op3->AsDblCon()->DconValue()); - } - - copyBlkSrc = vecCon; + vecCon->gtSimdVal.f32[2] = static_cast(op3->AsDblCon()->DconValue()); + copyBlkSrc = vecCon; } else { - GenTree* idx = gtNewIconNode((simdSize == 12) ? 2 : 3, TYP_INT); - copyBlkSrc = gtNewSimdWithElementNode(simdType, op2, idx, op3, simdBaseJitType, simdSize); + GenTree* idx = gtNewIconNode(2, TYP_INT); + + op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, NI_Vector128_AsVector128Unsafe, simdBaseJitType, + 12); + op2 = gtNewSimdWithElementNode(TYP_SIMD16, op2, idx, op3, simdBaseJitType, 16); + + copyBlkSrc = + gtNewSimdHWIntrinsicNode(TYP_SIMD12, op2, NI_Vector128_AsVector3, simdBaseJitType, 16); } copyBlkDst = op1; diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 8e7875ea269ef9..17b45702776c80 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -1856,33 +1856,33 @@ ValueNum ValueNumStore::VNForByrefCon(target_size_t cnsVal) } #if defined(FEATURE_SIMD) -ValueNum ValueNumStore::VNForSimd8Con(simd8_t cnsVal) +ValueNum ValueNumStore::VNForSimd8Con(const simd8_t& cnsVal) { return VnForConst(cnsVal, GetSimd8CnsMap(), TYP_SIMD8); } -ValueNum ValueNumStore::VNForSimd12Con(simd12_t cnsVal) +ValueNum ValueNumStore::VNForSimd12Con(const simd12_t& cnsVal) { return VnForConst(cnsVal, GetSimd12CnsMap(), TYP_SIMD12); } -ValueNum ValueNumStore::VNForSimd16Con(simd16_t cnsVal) +ValueNum ValueNumStore::VNForSimd16Con(const simd16_t& cnsVal) { return VnForConst(cnsVal, GetSimd16CnsMap(), TYP_SIMD16); } #if defined(TARGET_XARCH) -ValueNum ValueNumStore::VNForSimd32Con(simd32_t cnsVal) +ValueNum ValueNumStore::VNForSimd32Con(const simd32_t& cnsVal) { return VnForConst(cnsVal, GetSimd32CnsMap(), TYP_SIMD32); } -ValueNum ValueNumStore::VNForSimd64Con(simd64_t cnsVal) +ValueNum ValueNumStore::VNForSimd64Con(const simd64_t& cnsVal) { return VnForConst(cnsVal, GetSimd64CnsMap(), TYP_SIMD64); } -ValueNum ValueNumStore::VNForSimdMaskCon(simdmask_t cnsVal) +ValueNum ValueNumStore::VNForSimdMaskCon(const simdmask_t& cnsVal) { return VnForConst(cnsVal, GetSimdMaskCnsMap(), TYP_MASK); } @@ -2217,121 +2217,182 @@ ValueNum ValueNumStore::VNAllBitsForType(var_types typ) } #ifdef FEATURE_SIMD -ValueNum ValueNumStore::VNOneForSimdType(var_types simdType, var_types simdBaseType) +template +TSimd BroadcastConstantToSimd(ValueNumStore* vns, var_types baseType, ValueNum argVN) { - assert(varTypeIsSIMD(simdType)); + assert(vns->IsVNConstant(argVN)); + assert(!varTypeIsSIMD(vns->TypeOfVN(argVN))); - simd_t simdVal = {}; - int simdSize = genTypeSize(simdType); + TSimd result = {}; - switch (simdBaseType) + switch (baseType) { + case TYP_FLOAT: + { + float arg = vns->GetConstantSingle(argVN); + BroadcastConstantToSimd(&result, arg); + break; + } + + case TYP_DOUBLE: + { + double arg = vns->GetConstantDouble(argVN); + BroadcastConstantToSimd(&result, arg); + break; + } + case TYP_BYTE: case TYP_UBYTE: { - for (int i = 0; i < simdSize; i++) - { - simdVal.u8[i] = 1; - } + uint8_t arg = static_cast(vns->GetConstantInt32(argVN)); + BroadcastConstantToSimd(&result, arg); break; } case TYP_SHORT: case TYP_USHORT: { - for (int i = 0; i < (simdSize / 2); i++) - { - simdVal.u16[i] = 1; - } + uint16_t arg = static_cast(vns->GetConstantInt32(argVN)); + BroadcastConstantToSimd(&result, arg); break; } case TYP_INT: case TYP_UINT: { - for (int i = 0; i < (simdSize / 4); i++) - { - simdVal.u32[i] = 1; - } + uint32_t arg = static_cast(vns->GetConstantInt32(argVN)); + BroadcastConstantToSimd(&result, arg); break; } case TYP_LONG: case TYP_ULONG: { - for (int i = 0; i < (simdSize / 8); i++) - { - simdVal.u64[i] = 1; - } + uint64_t arg = static_cast(vns->GetConstantInt64(argVN)); + BroadcastConstantToSimd(&result, arg); break; } - case TYP_FLOAT: + default: { - for (int i = 0; i < (simdSize / 4); i++) - { - simdVal.f32[i] = 1.0f; - } - break; + unreached(); } + } - case TYP_DOUBLE: + return result; +} + +ValueNum ValueNumStore::VNBroadcastForSimdType(var_types simdType, var_types simdBaseType, ValueNum valVN) +{ + assert(varTypeIsSIMD(simdType)); + + switch (simdType) + { + case TYP_SIMD8: { - for (int i = 0; i < (simdSize / 8); i++) - { - simdVal.f64[i] = 1.0; - } - break; + simd8_t result = BroadcastConstantToSimd(this, simdBaseType, valVN); + return VNForSimd8Con(result); + } + + case TYP_SIMD12: + { + simd12_t result = BroadcastConstantToSimd(this, simdBaseType, valVN); + return VNForSimd12Con(result); + } + + case TYP_SIMD16: + { + simd16_t result = BroadcastConstantToSimd(this, simdBaseType, valVN); + return VNForSimd16Con(result); + } + +#if defined(TARGET_XARCH) + case TYP_SIMD32: + { + simd32_t result = BroadcastConstantToSimd(this, simdBaseType, valVN); + return VNForSimd32Con(result); + } + + case TYP_SIMD64: + { + simd64_t result = BroadcastConstantToSimd(this, simdBaseType, valVN); + return VNForSimd64Con(result); + } + + case TYP_MASK: + { + unreached(); } +#endif // TARGET_XARCH default: { unreached(); } } +} + +ValueNum ValueNumStore::VNOneForSimdType(var_types simdType, var_types simdBaseType) +{ + ValueNum oneVN = VNOneForType(simdBaseType); + return VNBroadcastForSimdType(simdType, simdBaseType, oneVN); +} + +ValueNum ValueNumStore::VNForSimdType(unsigned simdSize, CorInfoType simdBaseJitType) +{ + ValueNum baseTypeVN = VNForIntCon(INT32(simdBaseJitType)); + ValueNum sizeVN = VNForIntCon(simdSize); + ValueNum simdTypeVN = VNForFunc(TYP_REF, VNF_SimdType, sizeVN, baseTypeVN); + + return simdTypeVN; +} + +bool ValueNumStore::VNIsVectorNaN(var_types simdType, var_types simdBaseType, ValueNum valVN) +{ + assert(varTypeIsSIMD(simdType)); + + simd_t vector = {}; switch (simdType) { case TYP_SIMD8: { - simd8_t simd8Val; - memcpy(&simd8Val, &simdVal, sizeof(simd8_t)); - return VNForSimd8Con(simd8Val); + simd8_t tmp = GetConstantSimd8(valVN); + memcpy(&vector, &tmp, genTypeSize(simdType)); + break; } case TYP_SIMD12: { - simd12_t simd12Val; - memcpy(&simd12Val, &simdVal, sizeof(simd12_t)); - return VNForSimd12Con(simd12Val); + simd12_t tmp = GetConstantSimd12(valVN); + memcpy(&vector, &tmp, genTypeSize(simdType)); + break; } case TYP_SIMD16: { - simd16_t simd16Val; - memcpy(&simd16Val, &simdVal, sizeof(simd16_t)); - return VNForSimd16Con(simd16Val); + simd16_t tmp = GetConstantSimd16(valVN); + memcpy(&vector, &tmp, genTypeSize(simdType)); + break; } #if defined(TARGET_XARCH) case TYP_SIMD32: { - simd32_t simd32Val; - memcpy(&simd32Val, &simdVal, sizeof(simd32_t)); - return VNForSimd32Con(simd32Val); + simd32_t tmp = GetConstantSimd32(valVN); + memcpy(&vector, &tmp, genTypeSize(simdType)); + break; } case TYP_SIMD64: { - simd64_t simd64Val; - memcpy(&simd64Val, &simdVal, sizeof(simd64_t)); - return VNForSimd64Con(simd64Val); + simd64_t tmp = GetConstantSimd64(valVN); + memcpy(&vector, &tmp, genTypeSize(simdType)); + break; } case TYP_MASK: { - // '1' doesn't make sense for TYP_MASK? - // Or should it be AllBitsSet? unreached(); } #endif // TARGET_XARCH @@ -2341,15 +2402,91 @@ ValueNum ValueNumStore::VNOneForSimdType(var_types simdType, var_types simdBaseT unreached(); } } + + uint32_t elementCount = GenTreeVecCon::ElementCount(genTypeSize(simdType), simdBaseType); + + for (uint32_t i = 0; i < elementCount; i++) + { + double element = EvaluateGetElementFloating(simdBaseType, vector, i); + + if (!FloatingPointUtils::isNaN(element)) + { + return false; + } + } + + return true; } -ValueNum ValueNumStore::VNForSimdType(unsigned simdSize, CorInfoType simdBaseJitType) +bool ValueNumStore::VNIsVectorNegativeZero(var_types simdType, var_types simdBaseType, ValueNum valVN) { - ValueNum baseTypeVN = VNForIntCon(INT32(simdBaseJitType)); - ValueNum sizeVN = VNForIntCon(simdSize); - ValueNum simdTypeVN = VNForFunc(TYP_REF, VNF_SimdType, sizeVN, baseTypeVN); + assert(varTypeIsSIMD(simdType)); - return simdTypeVN; + simd_t vector = {}; + + switch (simdType) + { + case TYP_SIMD8: + { + simd8_t tmp = GetConstantSimd8(valVN); + memcpy(&vector, &tmp, genTypeSize(simdType)); + break; + } + + case TYP_SIMD12: + { + simd12_t tmp = GetConstantSimd12(valVN); + memcpy(&vector, &tmp, genTypeSize(simdType)); + break; + } + + case TYP_SIMD16: + { + simd16_t tmp = GetConstantSimd16(valVN); + memcpy(&vector, &tmp, genTypeSize(simdType)); + break; + } + +#if defined(TARGET_XARCH) + case TYP_SIMD32: + { + simd32_t tmp = GetConstantSimd32(valVN); + memcpy(&vector, &tmp, genTypeSize(simdType)); + break; + } + + case TYP_SIMD64: + { + simd64_t tmp = GetConstantSimd64(valVN); + memcpy(&vector, &tmp, genTypeSize(simdType)); + break; + } + + case TYP_MASK: + { + unreached(); + } +#endif // TARGET_XARCH + + default: + { + unreached(); + } + } + + uint32_t elementCount = GenTreeVecCon::ElementCount(genTypeSize(simdType), simdBaseType); + + for (uint32_t i = 0; i < elementCount; i++) + { + double element = EvaluateGetElementFloating(simdBaseType, vector, i); + + if (!FloatingPointUtils::isNegativeZero(element)) + { + return false; + } + } + + return true; } #endif // FEATURE_SIMD @@ -7091,71 +7228,6 @@ void ValueNumStore::SetVNIsCheckedBound(ValueNum vn) } #ifdef FEATURE_HW_INTRINSICS -template -TSimd BroadcastConstantToSimd(ValueNumStore* vns, var_types baseType, ValueNum argVN) -{ - assert(vns->IsVNConstant(argVN)); - assert(!varTypeIsSIMD(vns->TypeOfVN(argVN))); - - TSimd result = {}; - - switch (baseType) - { - case TYP_FLOAT: - { - float arg = vns->GetConstantSingle(argVN); - BroadcastConstantToSimd(&result, arg); - break; - } - - case TYP_DOUBLE: - { - double arg = vns->GetConstantDouble(argVN); - BroadcastConstantToSimd(&result, arg); - break; - } - - case TYP_BYTE: - case TYP_UBYTE: - { - uint8_t arg = static_cast(vns->GetConstantInt32(argVN)); - BroadcastConstantToSimd(&result, arg); - break; - } - - case TYP_SHORT: - case TYP_USHORT: - { - uint16_t arg = static_cast(vns->GetConstantInt32(argVN)); - BroadcastConstantToSimd(&result, arg); - break; - } - - case TYP_INT: - case TYP_UINT: - { - uint32_t arg = static_cast(vns->GetConstantInt32(argVN)); - BroadcastConstantToSimd(&result, arg); - break; - } - - case TYP_LONG: - case TYP_ULONG: - { - uint64_t arg = static_cast(vns->GetConstantInt64(argVN)); - BroadcastConstantToSimd(&result, arg); - break; - } - - default: - { - unreached(); - } - } - - return result; -} - simd8_t GetConstantSimd8(ValueNumStore* vns, var_types baseType, ValueNum argVN) { assert(vns->IsVNConstant(argVN)); @@ -7347,7 +7419,7 @@ ValueNum EvaluateBinarySimd(ValueNumStore* vns, } template -ValueNum EvaluateSimdGetElement(ValueNumStore* vns, var_types baseType, TSimd arg0, int arg1) +ValueNum EvaluateSimdGetElement(ValueNumStore* vns, var_types baseType, const TSimd& arg0, int32_t arg1) { switch (baseType) { @@ -7418,9 +7490,14 @@ ValueNum EvaluateSimdGetElement(ValueNumStore* vns, var_types baseType, TSimd ar } } -ValueNum EvaluateSimdGetElement(ValueNumStore* vns, var_types type, var_types baseType, ValueNum arg0VN, int arg1) +ValueNum EvaluateSimdGetElement( + ValueNumStore* vns, var_types simdType, var_types baseType, ValueNum arg0VN, int32_t arg1) { - switch (vns->TypeOfVN(arg0VN)) + assert(vns->IsVNConstant(arg0VN)); + assert(simdType == vns->TypeOfVN(arg0VN)); + assert(static_cast(arg1) < GenTreeVecCon::ElementCount(genTypeSize(simdType), baseType)); + + switch (simdType) { case TYP_SIMD8: { @@ -7456,16 +7533,23 @@ ValueNum EvaluateSimdGetElement(ValueNumStore* vns, var_types type, var_types ba } } -ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(var_types type, - var_types baseType, - NamedIntrinsic ni, - VNFunc func, - ValueNum arg0VN, - bool encodeResultType, - ValueNum resultTypeVN) +ValueNum ValueNumStore::EvalHWIntrinsicFunUnary( + GenTreeHWIntrinsic* tree, VNFunc func, ValueNum arg0VN, bool encodeResultType, ValueNum resultTypeVN) { + var_types type = tree->TypeGet(); + var_types baseType = tree->GetSimdBaseType(); + NamedIntrinsic ni = tree->GetHWIntrinsicId(); + if (IsVNConstant(arg0VN)) { + bool isScalar = false; + genTreeOps oper = tree->HWOperGet(&isScalar); + + if (oper != GT_NONE) + { + return EvaluateUnarySimd(this, oper, isScalar, type, baseType, arg0VN); + } + switch (ni) { #ifdef TARGET_ARM64 @@ -7525,21 +7609,24 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(var_types type, return VNForLongCon(static_cast(result)); } - case NI_AdvSimd_Negate: - case NI_AdvSimd_Arm64_Negate: + case NI_Vector64_ToVector128: + case NI_Vector64_ToVector128Unsafe: { - return EvaluateUnarySimd(this, GT_NEG, /* scalar */ false, type, baseType, arg0VN); + simd16_t result = {}; + result.v64[0] = GetConstantSimd8(arg0VN); + return VNForSimd16Con(result); } - case NI_AdvSimd_NegateScalar: - case NI_AdvSimd_Arm64_NegateScalar: + case NI_Vector128_GetLower: { - return EvaluateUnarySimd(this, GT_NEG, /* scalar */ true, type, baseType, arg0VN); + simd8_t result = GetConstantSimd16(arg0VN).v64[0]; + return VNForSimd8Con(result); } - case NI_AdvSimd_Not: + case NI_Vector128_GetUpper: { - return EvaluateUnarySimd(this, GT_NOT, /* scalar */ false, type, baseType, arg0VN); + simd8_t result = GetConstantSimd16(arg0VN).v64[1]; + return VNForSimd8Con(result); } #endif // TARGET_ARM64 @@ -7651,17 +7738,111 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(var_types type, uint32_t result = BitOperations::BitScanReverse(static_cast(value)); return VNForLongCon(static_cast(result)); } -#endif // TARGET_XARCH - case NI_Vector128_ToScalar: -#ifdef TARGET_ARM64 - case NI_Vector64_ToScalar: -#else - case NI_Vector256_ToScalar: - case NI_Vector512_ToScalar: -#endif + case NI_Vector128_AsVector2: { - return EvaluateSimdGetElement(this, type, baseType, arg0VN, 0); + simd8_t result = GetConstantSimd16(arg0VN).v64[0]; + return VNForSimd8Con(result); + } + + case NI_Vector128_ToVector256: + case NI_Vector128_ToVector256Unsafe: + { + simd32_t result = {}; + result.v128[0] = GetConstantSimd16(arg0VN); + return VNForSimd32Con(result); + } + + case NI_Vector128_ToVector512: + { + simd64_t result = {}; + result.v128[0] = GetConstantSimd16(arg0VN); + return VNForSimd64Con(result); + } + + case NI_Vector256_GetLower: + { + simd16_t result = GetConstantSimd32(arg0VN).v128[0]; + return VNForSimd16Con(result); + } + + case NI_Vector256_GetUpper: + { + simd16_t result = GetConstantSimd32(arg0VN).v128[1]; + return VNForSimd16Con(result); + } + + case NI_Vector256_ToVector512: + case NI_Vector256_ToVector512Unsafe: + { + simd64_t result = {}; + result.v256[0] = GetConstantSimd32(arg0VN); + return VNForSimd64Con(result); + } + + case NI_Vector512_GetLower: + { + simd32_t result = GetConstantSimd64(arg0VN).v256[0]; + return VNForSimd32Con(result); + } + + case NI_Vector512_GetUpper: + { + simd32_t result = GetConstantSimd64(arg0VN).v256[1]; + return VNForSimd32Con(result); + } + + case NI_Vector512_GetLower128: + { + simd16_t result = GetConstantSimd64(arg0VN).v128[0]; + return VNForSimd16Con(result); + } +#endif // TARGET_XARCH + + case NI_Vector128_AsVector3: + { + simd12_t result = {}; + simd16_t vector = GetConstantSimd16(arg0VN); + + result.f32[0] = vector.f32[0]; + result.f32[1] = vector.f32[1]; + result.f32[2] = vector.f32[2]; + + return VNForSimd12Con(result); + } + + case NI_Vector128_AsVector128Unsafe: + { + if (TypeOfVN(arg0VN) == TYP_SIMD8) + { + simd16_t result = {}; + result.v64[0] = GetConstantSimd8(arg0VN); + return VNForSimd16Con(result); + } + else + { + assert(TypeOfVN(arg0VN) == TYP_SIMD12); + + simd16_t result = {}; + simd12_t vector = GetConstantSimd12(arg0VN); + + result.f32[0] = vector.f32[0]; + result.f32[1] = vector.f32[1]; + result.f32[2] = vector.f32[2]; + + return VNForSimd16Con(result); + } + } + + case NI_Vector128_ToScalar: +#ifdef TARGET_ARM64 + case NI_Vector64_ToScalar: +#else + case NI_Vector256_ToScalar: + case NI_Vector512_ToScalar: +#endif + { + return EvaluateSimdGetElement(this, TypeOfVN(arg0VN), baseType, arg0VN, 0); } default: @@ -7676,15 +7857,17 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(var_types type, return VNForFunc(type, func, arg0VN); } -ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, - var_types baseType, - NamedIntrinsic ni, - VNFunc func, - ValueNum arg0VN, - ValueNum arg1VN, - bool encodeResultType, - ValueNum resultTypeVN) +ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(GenTreeHWIntrinsic* tree, + VNFunc func, + ValueNum arg0VN, + ValueNum arg1VN, + bool encodeResultType, + ValueNum resultTypeVN) { + var_types type = tree->TypeGet(); + var_types baseType = tree->GetSimdBaseType(); + NamedIntrinsic ni = tree->GetHWIntrinsicId(); + ValueNum cnsVN = NoVN; ValueNum argVN = NoVN; @@ -7711,89 +7894,54 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, { assert(IsVNConstant(arg0VN) && IsVNConstant(arg1VN)); - switch (ni) - { -#ifdef TARGET_ARM64 - case NI_AdvSimd_Add: - case NI_AdvSimd_Arm64_Add: -#else - case NI_SSE_Add: - case NI_SSE2_Add: - case NI_AVX_Add: - case NI_AVX2_Add: - case NI_AVX512F_Add: - case NI_AVX512BW_Add: -#endif - { - return EvaluateBinarySimd(this, GT_ADD, /* scalar */ false, type, baseType, arg0VN, arg1VN); - } + bool isScalar = false; + genTreeOps oper = tree->HWOperGet(&isScalar); -#ifdef TARGET_ARM64 - case NI_AdvSimd_AddScalar: -#else - case NI_SSE_AddScalar: - case NI_SSE2_AddScalar: -#endif + if (oper != GT_NONE) + { +#if defined(TARGET_XARCH) + if (oper == GT_AND_NOT) { - return EvaluateBinarySimd(this, GT_ADD, /* scalar */ true, type, baseType, arg0VN, arg1VN); + // xarch does: ~arg0VN & arg1VN + std::swap(arg0VN, arg1VN); } - -#ifdef TARGET_ARM64 - case NI_AdvSimd_And: -#else - case NI_SSE_And: - case NI_SSE2_And: - case NI_AVX_And: - case NI_AVX2_And: - case NI_AVX512F_And: - case NI_AVX512DQ_And: - case NI_AVX10v1_V512_And: -#endif + else if ((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ)) { - return EvaluateBinarySimd(this, GT_AND, /* scalar */ false, type, baseType, arg0VN, arg1VN); - } + if (TypeOfVN(arg1VN) == TYP_SIMD16) + { + if ((ni != NI_AVX2_ShiftLeftLogicalVariable) && (ni != NI_AVX2_ShiftRightArithmeticVariable) && + (ni != NI_AVX512F_VL_ShiftRightArithmeticVariable) && + (ni != NI_AVX10v1_ShiftRightArithmeticVariable) && (ni != NI_AVX2_ShiftRightLogicalVariable)) + { + // The xarch shift instructions support taking the shift amount as + // a simd16, in which case they take the shift amount from the lower + // 64-bits. -#ifdef TARGET_ARM64 - case NI_AdvSimd_BitwiseClear: - { - return EvaluateBinarySimd(this, GT_AND_NOT, /* scalar */ false, type, baseType, arg0VN, arg1VN); - } -#else - case NI_SSE_AndNot: - case NI_SSE2_AndNot: - case NI_AVX_AndNot: - case NI_AVX2_AndNot: - case NI_AVX512F_AndNot: - case NI_AVX512DQ_AndNot: - case NI_AVX10v1_V512_AndNot: - { - // xarch does: ~arg0VN & arg1VN - return EvaluateBinarySimd(this, GT_AND_NOT, /* scalar */ false, type, baseType, arg1VN, arg0VN); - } -#endif + uint64_t shiftAmount = GetConstantSimd16(arg1VN).u64[0]; -#ifdef TARGET_ARM64 - case NI_AdvSimd_Arm64_Divide: -#else - case NI_SSE_Divide: - case NI_SSE2_Divide: - case NI_AVX_Divide: - case NI_AVX512F_Divide: -#endif - { - return EvaluateBinarySimd(this, GT_DIV, /* scalar */ false, type, baseType, arg0VN, arg1VN); + if (genTypeSize(baseType) != 8) + { + if (shiftAmount > INT_MAX) + { + // Ensure we don't lose track the the amount is an overshift + shiftAmount = -1; + } + arg1VN = VNForIntCon(static_cast(shiftAmount)); + } + else + { + arg1VN = VNForLongCon(static_cast(shiftAmount)); + } + } + } } +#endif // TARGET_XARCH -#ifdef TARGET_ARM64 - case NI_AdvSimd_DivideScalar: -#else - case NI_SSE_DivideScalar: - case NI_SSE2_DivideScalar: -#endif - { - return EvaluateBinarySimd(this, GT_DIV, /* scalar */ true, type, baseType, arg0VN, arg1VN); - } + return EvaluateBinarySimd(this, oper, isScalar, type, baseType, arg0VN, arg1VN); + } + switch (ni) + { case NI_Vector128_GetElement: #ifdef TARGET_ARM64 case NI_Vector64_GetElement: @@ -7802,10 +7950,18 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, case NI_Vector512_GetElement: #endif { - return EvaluateSimdGetElement(this, type, baseType, arg0VN, GetConstantInt32(arg1VN)); + var_types simdType = TypeOfVN(arg0VN); + int32_t index = GetConstantInt32(arg1VN); + + if (static_cast(index) >= GenTreeVecCon::ElementCount(genTypeSize(simdType), baseType)) + { + // Nothing to fold for out of range indexes + break; + } + return EvaluateSimdGetElement(this, simdType, baseType, arg0VN, index); } -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) case NI_AdvSimd_MultiplyByScalar: case NI_AdvSimd_Arm64_MultiplyByScalar: { @@ -7813,278 +7969,92 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, // We need to extract it and then functionally broadcast it up for the evaluation to // work as expected. - arg1VN = EvaluateSimdGetElement(this, type, baseType, arg1VN, 0); - FALLTHROUGH; + arg1VN = EvaluateSimdGetElement(this, TYP_SIMD8, baseType, arg1VN, 0); + return EvaluateBinarySimd(this, GT_MUL, /* scalar */ false, type, baseType, arg0VN, arg1VN); } -#endif -#ifdef TARGET_XARCH - case NI_AVX512F_Multiply: + case NI_Vector128_WithLower: { - if (!varTypeIsFloating(baseType)) - { - // We don't support this for integrals since it returns a different size than the input - break; - } - FALLTHROUGH; + simd16_t result = GetConstantSimd16(arg0VN); + result.v64[0] = GetConstantSimd8(arg1VN); + return VNForSimd16Con(result); } -#endif // TARGET_XARCH -#ifdef TARGET_ARM64 - case NI_AdvSimd_Multiply: - case NI_AdvSimd_Arm64_Multiply: -#else - case NI_SSE_Multiply: - case NI_SSE2_Multiply: - case NI_SSE2_MultiplyLow: - case NI_SSE41_MultiplyLow: - case NI_AVX_Multiply: - case NI_AVX2_MultiplyLow: - case NI_AVX512F_MultiplyLow: - case NI_AVX512BW_MultiplyLow: - case NI_AVX512DQ_MultiplyLow: - case NI_AVX512DQ_VL_MultiplyLow: - case NI_AVX10v1_MultiplyLow: - case NI_AVX10v1_V512_MultiplyLow: -#endif + case NI_Vector128_WithUpper: { - return EvaluateBinarySimd(this, GT_MUL, /* scalar */ false, type, baseType, arg0VN, arg1VN); + simd16_t result = GetConstantSimd16(arg0VN); + result.v64[1] = GetConstantSimd8(arg1VN); + return VNForSimd16Con(result); } +#endif // TARGET_ARM64 -#ifdef TARGET_ARM64 - case NI_AdvSimd_MultiplyScalar: -#else - case NI_SSE_MultiplyScalar: - case NI_SSE2_MultiplyScalar: -#endif +#if defined(TARGET_XARCH) + case NI_Vector256_WithLower: { - return EvaluateBinarySimd(this, GT_MUL, /* scalar */ true, type, baseType, arg0VN, arg1VN); + simd32_t result = GetConstantSimd32(arg0VN); + result.v128[0] = GetConstantSimd16(arg1VN); + return VNForSimd32Con(result); } -#ifdef TARGET_ARM64 - case NI_AdvSimd_Or: -#else - case NI_SSE_Or: - case NI_SSE2_Or: - case NI_AVX_Or: - case NI_AVX2_Or: - case NI_AVX512F_Or: - case NI_AVX512DQ_Or: - case NI_AVX10v1_V512_Or: -#endif + case NI_Vector256_WithUpper: { - return EvaluateBinarySimd(this, GT_OR, /* scalar */ false, type, baseType, arg0VN, arg1VN); + simd32_t result = GetConstantSimd32(arg0VN); + result.v128[1] = GetConstantSimd16(arg1VN); + return VNForSimd32Con(result); } -#ifdef TARGET_XARCH - case NI_AVX512F_RotateLeft: - case NI_AVX512F_VL_RotateLeft: - case NI_AVX10v1_RotateLeft: + case NI_Vector512_WithLower: { - return EvaluateBinarySimd(this, GT_ROL, /* scalar */ false, type, baseType, arg0VN, arg1VN); + simd64_t result = GetConstantSimd64(arg0VN); + result.v256[0] = GetConstantSimd32(arg1VN); + return VNForSimd64Con(result); } - case NI_AVX512F_RotateRight: - case NI_AVX512F_VL_RotateRight: - case NI_AVX10v1_RotateRight: + case NI_Vector512_WithUpper: { - return EvaluateBinarySimd(this, GT_ROR, /* scalar */ false, type, baseType, arg0VN, arg1VN); + simd64_t result = GetConstantSimd64(arg0VN); + result.v256[1] = GetConstantSimd32(arg1VN); + return VNForSimd64Con(result); } #endif // TARGET_XARCH -#ifdef TARGET_ARM64 - case NI_AdvSimd_ShiftLeftLogical: -#else - case NI_SSE2_ShiftLeftLogical: - case NI_AVX2_ShiftLeftLogical: - case NI_AVX512F_ShiftLeftLogical: - case NI_AVX512BW_ShiftLeftLogical: -#endif - { -#ifdef TARGET_XARCH - if (TypeOfVN(arg1VN) == TYP_SIMD16) - { - // The xarch shift instructions support taking the shift amount as - // a simd16, in which case they take the shift amount from the lower - // 64-bits. - - uint64_t shiftAmount = GetConstantSimd16(arg1VN).u64[0]; - - if (genTypeSize(baseType) != 8) - { - if (shiftAmount > INT_MAX) - { - // Ensure we don't lose track the the amount is an overshift - shiftAmount = -1; - } - arg1VN = VNForIntCon(static_cast(shiftAmount)); - } - else - { - arg1VN = VNForLongCon(static_cast(shiftAmount)); - } - } -#endif // TARGET_XARCH + default: + break; + } + } + else if (cnsVN != NoVN) + { + bool isScalar = false; + genTreeOps oper = tree->HWOperGet(&isScalar); - return EvaluateBinarySimd(this, GT_LSH, /* scalar */ false, type, baseType, arg0VN, arg1VN); - } + if (isScalar) + { + // We don't support folding scalars today + oper = GT_NONE; + } -#ifdef TARGET_ARM64 - case NI_AdvSimd_ShiftRightArithmetic: -#else - case NI_SSE2_ShiftRightArithmetic: - case NI_AVX2_ShiftRightArithmetic: - case NI_AVX512F_ShiftRightArithmetic: - case NI_AVX512F_VL_ShiftRightArithmetic: - case NI_AVX512BW_ShiftRightArithmetic: - case NI_AVX10v1_ShiftRightArithmetic: -#endif + switch (oper) + { + case GT_ADD: { -#ifdef TARGET_XARCH - if (TypeOfVN(arg1VN) == TYP_SIMD16) + if (varTypeIsFloating(baseType)) { - // The xarch shift instructions support taking the shift amount as - // a simd16, in which case they take the shift amount from the lower - // 64-bits. - - uint64_t shiftAmount = GetConstantSimd16(arg1VN).u64[0]; + // Handle `x + NaN == NaN` and `NaN + x == NaN` + // This is safe for all floats since we do not fault for sNaN - if (genTypeSize(baseType) != 8) + if (VNIsVectorNaN(type, baseType, cnsVN)) { - if (shiftAmount > INT_MAX) - { - // Ensure we don't lose track the the amount is an overshift - shiftAmount = -1; - } - arg1VN = VNForIntCon(static_cast(shiftAmount)); - } - else - { - arg1VN = VNForLongCon(static_cast(shiftAmount)); + return cnsVN; } - } -#endif // TARGET_XARCH - return EvaluateBinarySimd(this, GT_RSH, /* scalar */ false, type, baseType, arg0VN, arg1VN); - } + // Handle `x + -0 == x` and `-0 + x == x` -#ifdef TARGET_ARM64 - case NI_AdvSimd_ShiftRightLogical: -#else - case NI_SSE2_ShiftRightLogical: - case NI_AVX2_ShiftRightLogical: - case NI_AVX512F_ShiftRightLogical: - case NI_AVX512BW_ShiftRightLogical: -#endif - { -#ifdef TARGET_XARCH - if (TypeOfVN(arg1VN) == TYP_SIMD16) - { - // The xarch shift instructions support taking the shift amount as - // a simd16, in which case they take the shift amount from the lower - // 64-bits. - - uint64_t shiftAmount = GetConstantSimd16(arg1VN).u64[0]; - - if (genTypeSize(baseType) != 8) - { - if (shiftAmount > INT_MAX) - { - // Ensure we don't lose track the the amount is an overshift - shiftAmount = -1; - } - arg1VN = VNForIntCon(static_cast(shiftAmount)); - } - else + if (VNIsVectorNegativeZero(type, baseType, cnsVN)) { - arg1VN = VNForLongCon(static_cast(shiftAmount)); + return argVN; } - } -#endif // TARGET_XARCH - - return EvaluateBinarySimd(this, GT_RSZ, /* scalar */ false, type, baseType, arg0VN, arg1VN); - } - -#ifdef TARGET_ARM64 - case NI_AdvSimd_ShiftLeftLogicalScalar: - { - return EvaluateBinarySimd(this, GT_LSH, /* scalar */ true, type, baseType, arg0VN, arg1VN); - } - - case NI_AdvSimd_ShiftRightArithmeticScalar: - { - return EvaluateBinarySimd(this, GT_RSH, /* scalar */ true, type, baseType, arg0VN, arg1VN); - } - - case NI_AdvSimd_ShiftRightLogicalScalar: - { - return EvaluateBinarySimd(this, GT_RSZ, /* scalar */ true, type, baseType, arg0VN, arg1VN); - } -#endif // TARGET_ARM64 - -#ifdef TARGET_ARM64 - case NI_AdvSimd_Subtract: - case NI_AdvSimd_Arm64_Subtract: -#else - case NI_SSE_Subtract: - case NI_SSE2_Subtract: - case NI_AVX_Subtract: - case NI_AVX2_Subtract: - case NI_AVX512F_Subtract: - case NI_AVX512BW_Subtract: -#endif - { - return EvaluateBinarySimd(this, GT_SUB, /* scalar */ false, type, baseType, arg0VN, arg1VN); - } - -#ifdef TARGET_ARM64 - case NI_AdvSimd_SubtractScalar: -#else - case NI_SSE_SubtractScalar: - case NI_SSE2_SubtractScalar: -#endif - { - return EvaluateBinarySimd(this, GT_SUB, /* scalar */ true, type, baseType, arg0VN, arg1VN); - } -#ifdef TARGET_ARM64 - case NI_AdvSimd_Xor: -#else - case NI_SSE_Xor: - case NI_SSE2_Xor: - case NI_AVX_Xor: - case NI_AVX2_Xor: - case NI_AVX512F_Xor: - case NI_AVX512DQ_Xor: - case NI_AVX10v1_V512_Xor: -#endif - { - return EvaluateBinarySimd(this, GT_XOR, /* scalar */ false, type, baseType, arg0VN, arg1VN); - } - - default: - break; - } - } - else if (cnsVN != NoVN) - { - switch (ni) - { -#ifdef TARGET_ARM64 - case NI_AdvSimd_Add: - case NI_AdvSimd_Arm64_Add: -#else - case NI_SSE_Add: - case NI_SSE2_Add: - case NI_AVX_Add: - case NI_AVX2_Add: - case NI_AVX512F_Add: - case NI_AVX512BW_Add: -#endif - { - if (varTypeIsFloating(baseType)) - { - // Not safe for floating-point when x == -0.0 + // We cannot handle `x + 0 == x` or `0 + x == x` since `-0 + 0 == 0` break; } @@ -8098,17 +8068,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, break; } -#ifdef TARGET_ARM64 - case NI_AdvSimd_And: -#else - case NI_SSE_And: - case NI_SSE2_And: - case NI_AVX_And: - case NI_AVX2_And: - case NI_AVX512F_And: - case NI_AVX512DQ_And: - case NI_AVX10v1_V512_And: -#endif + case GT_AND: { // Handle `x & 0 == 0` and `0 & x == 0` ValueNum zeroVN = VNZeroForType(type); @@ -8118,7 +8078,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, return zeroVN; } - // Handle `x & ~0 == x` and `~0 & x == x` + // Handle `x & AllBitsSet == x` and `AllBitsSet & x == x` ValueNum allBitsVN = VNAllBitsForType(type); if (cnsVN == allBitsVN) @@ -8128,59 +8088,50 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, break; } -#ifdef TARGET_ARM64 - case NI_AdvSimd_BitwiseClear: -#else - case NI_SSE_AndNot: - case NI_SSE2_AndNot: - case NI_AVX_AndNot: - case NI_AVX2_AndNot: - case NI_AVX512F_AndNot: - case NI_AVX512DQ_AndNot: - case NI_AVX10v1_V512_AndNot: + case GT_AND_NOT: { -#ifdef TARGET_ARM64 - if (cnsVN == arg0VN) - { - // arm64 preserves the args, so we can only handle `x & ~cns` - break; - } -#else - if (cnsVN == arg1VN) - { - // xarch swaps the args, so we can only handle `~cns & x` - break; - } -#endif +#if defined(TARGET_XARCH) + std::swap(arg0VN, arg1VN); +#endif // TARGET_XARCH - // Handle `x & ~0 == x` + // Handle `x & ~0 == x` and `0 & ~x == 0` ValueNum zeroVN = VNZeroForType(type); if (cnsVN == zeroVN) { + if (cnsVN == arg0VN) + { + return zeroVN; + } return argVN; } - // Handle `x & 0 == 0` + // Handle `x & ~AllBitsSet == 0` ValueNum allBitsVN = VNAllBitsForType(type); if (cnsVN == allBitsVN) { - return zeroVN; + if (cnsVN == arg1VN) + { + return zeroVN; + } + } + break; + } + + case GT_DIV: + { + if (varTypeIsFloating(baseType)) + { + // Handle `x / NaN == NaN` and `NaN / x == NaN` + // This is safe for all floats since we do not fault for sNaN + + if (VNIsVectorNaN(type, baseType, cnsVN)) + { + return cnsVN; + } } - break; - } -#endif -#ifdef TARGET_ARM64 - case NI_AdvSimd_Arm64_Divide: -#else - case NI_SSE_Divide: - case NI_SSE2_Divide: - case NI_AVX_Divide: - case NI_AVX512F_Divide: -#endif - { // Handle `x / 1 == x`. // This is safe for all floats since we do not fault for sNaN ValueNum oneVN; @@ -8201,9 +8152,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, break; } -#ifdef TARGET_ARM64 - case NI_AdvSimd_MultiplyByScalar: - case NI_AdvSimd_Arm64_MultiplyByScalar: + case GT_MUL: { if (!varTypeIsFloating(baseType)) { @@ -8213,64 +8162,21 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, if (cnsVN == zeroVN) { - return VNZeroForType(type); - } - } - - assert((TypeOfVN(arg0VN) == type) && (TypeOfVN(arg1VN) == TYP_SIMD8)); - - // Handle x * 1 => x, but only if the scalar RHS is <1, ...>. - if (IsVNConstant(arg1VN)) - { - if (EvaluateSimdGetElement(this, TYP_SIMD8, baseType, arg1VN, 0) == VNOneForType(baseType)) - { - return arg0VN; + return zeroVN; } } - break; - } -#endif - -#ifdef TARGET_XARCH - case NI_AVX512F_Multiply: - { - if (!varTypeIsFloating(baseType)) - { - // We don't support this for integrals since it returns a different size than the input - break; - } - FALLTHROUGH; - } -#endif // TARGET_XARCH - -#ifdef TARGET_ARM64 - case NI_AdvSimd_Multiply: - case NI_AdvSimd_Arm64_Multiply: -#else - case NI_SSE_Multiply: - case NI_SSE2_Multiply: - case NI_SSE2_MultiplyLow: - case NI_SSE41_MultiplyLow: - case NI_AVX_Multiply: - case NI_AVX2_MultiplyLow: - case NI_AVX512F_MultiplyLow: - case NI_AVX512BW_MultiplyLow: - case NI_AVX512DQ_MultiplyLow: - case NI_AVX512DQ_VL_MultiplyLow: - case NI_AVX10v1_MultiplyLow: - case NI_AVX10v1_V512_MultiplyLow: -#endif - { - if (!varTypeIsFloating(baseType)) + else { - // Handle `x * 0 == 0` and `0 * x == 0` - // Not safe for floating-point when x == -0.0, NaN, +Inf, -Inf - ValueNum zeroVN = VNZeroForType(TypeOfVN(cnsVN)); + // Handle `x * NaN == NaN` and `NaN * x == NaN` + // This is safe for all floats since we do not fault for sNaN - if (cnsVN == zeroVN) + if (VNIsVectorNaN(type, baseType, cnsVN)) { - return zeroVN; + return cnsVN; } + + // We cannot handle `x * 0 == 0` or ` 0 * x == 0` since `-0 * 0 == -0` + // We cannot handle `x * -0 == -0` or `-0 * x == -0` since `-0 * -0 == 0` } // Handle `x * 1 == x` and `1 * x == x` @@ -8293,17 +8199,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, break; } -#ifdef TARGET_ARM64 - case NI_AdvSimd_Or: -#else - case NI_SSE_Or: - case NI_SSE2_Or: - case NI_AVX_Or: - case NI_AVX2_Or: - case NI_AVX512F_Or: - case NI_AVX512DQ_Or: - case NI_AVX10v1_V512_Or: -#endif + case GT_OR: { // Handle `x | 0 == x` and `0 | x == x` ValueNum zeroVN = VNZeroForType(type); @@ -8323,27 +8219,14 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, break; } -#ifdef TARGET_ARM64 - case NI_AdvSimd_ShiftLeftLogical: - case NI_AdvSimd_ShiftRightArithmetic: - case NI_AdvSimd_ShiftRightLogical: -#else - case NI_SSE2_ShiftLeftLogical: - case NI_SSE2_ShiftRightArithmetic: - case NI_SSE2_ShiftRightLogical: - case NI_AVX2_ShiftLeftLogical: - case NI_AVX2_ShiftRightArithmetic: - case NI_AVX2_ShiftRightLogical: - case NI_AVX512F_ShiftLeftLogical: - case NI_AVX512F_ShiftRightArithmetic: - case NI_AVX512F_ShiftRightLogical: - case NI_AVX512F_VL_ShiftRightArithmetic: - case NI_AVX512BW_ShiftLeftLogical: - case NI_AVX512BW_ShiftRightArithmetic: - case NI_AVX512BW_ShiftRightLogical: - case NI_AVX10v1_ShiftRightArithmetic: -#endif + case GT_ROL: + case GT_ROR: + case GT_LSH: + case GT_RSH: + case GT_RSZ: { + // Handle `x rol 0 == x` and `0 rol x == 0` + // Handle `x ror 0 == x` and `0 ror x == 0` // Handle `x << 0 == x` and `0 << x == 0` // Handle `x >> 0 == x` and `0 >> x == 0` // Handle `x >>> 0 == x` and `0 >>> x == 0` @@ -8356,22 +8239,19 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, break; } -#ifdef TARGET_ARM64 - case NI_AdvSimd_Subtract: - case NI_AdvSimd_Arm64_Subtract: -#else - case NI_SSE_Subtract: - case NI_SSE2_Subtract: - case NI_AVX_Subtract: - case NI_AVX2_Subtract: - case NI_AVX512F_Subtract: - case NI_AVX512BW_Subtract: -#endif + case GT_SUB: { if (varTypeIsFloating(baseType)) { - // Not safe for floating-point when x == -0.0 - break; + // Handle `x - NaN == NaN` and `NaN - x == NaN` + // This is safe for all floats since we do not fault for sNaN + + if (VNIsVectorNaN(type, baseType, cnsVN)) + { + return cnsVN; + } + + // We cannot handle `x - -0 == x` since `-0 - -0 == 0` } // Handle `x - 0 == x` @@ -8384,19 +8264,9 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, break; } -#ifdef TARGET_ARM64 - case NI_AdvSimd_Xor: -#else - case NI_SSE_Xor: - case NI_SSE2_Xor: - case NI_AVX_Xor: - case NI_AVX2_Xor: - case NI_AVX512F_Xor: - case NI_AVX512DQ_Xor: - case NI_AVX10v1_V512_Xor: -#endif + case GT_XOR: { - // Handle `x | 0 == x` and `0 | x == x` + // Handle `x ^ 0 == x` and `0 ^ x == x` ValueNum zeroVN = VNZeroForType(type); if (cnsVN == zeroVN) @@ -8407,72 +8277,133 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, } default: + { break; + } } - } - else if (arg0VN == arg1VN) - { + switch (ni) { #ifdef TARGET_ARM64 - case NI_AdvSimd_And: -#else - case NI_SSE_And: - case NI_SSE2_And: - case NI_AVX_And: - case NI_AVX2_And: - case NI_AVX512F_And: - case NI_AVX512DQ_And: - case NI_AVX10v1_V512_And: + case NI_AdvSimd_MultiplyByScalar: + case NI_AdvSimd_Arm64_MultiplyByScalar: + { + assert((TypeOfVN(arg0VN) == type) && (TypeOfVN(arg1VN) == TYP_SIMD8)); + + if (!varTypeIsFloating(baseType)) + { + // Handle `x * 0 == 0` and `0 * x == 0` + // Not safe for floating-point when x == -0.0, NaN, +Inf, -Inf + + if (cnsVN == arg0VN) + { + if (cnsVN == VNZeroForType(type)) + { + return cnsVN; + } + } + else + { + assert(cnsVN == arg1VN); + ValueNum scalarVN = EvaluateSimdGetElement(this, TYP_SIMD8, baseType, arg1VN, 0); + + if (scalarVN == VNZeroForType(baseType)) + { + return VNZeroForType(type); + } + } + } + else + { + // Handle `x * NaN == NaN` and `NaN * x == NaN` + // This is safe for all floats since we do not fault for sNaN + + if (cnsVN == arg0VN) + { + if (VNIsVectorNaN(type, baseType, cnsVN)) + { + return cnsVN; + } + } + else + { + assert(cnsVN == arg1VN); + ValueNum scalarVN = EvaluateSimdGetElement(this, TYP_SIMD8, baseType, arg1VN, 0); + + double val; + + if (baseType == TYP_FLOAT) + { + val = GetConstantSingle(scalarVN); + } + else + { + assert(baseType == TYP_DOUBLE); + val = GetConstantDouble(scalarVN); + } + + if (FloatingPointUtils::isNaN(val)) + { + return VNBroadcastForSimdType(type, baseType, scalarVN); + } + } + + // We cannot handle `x * 0 == 0` or ` 0 * x == 0` since `-0 * 0 == -0` + // We cannot handle `x * -0 == -0` or `-0 * x == -0` since `-0 * -0 == 0` + } + + // Handle x * 1 => x, but only if the scalar RHS is <1, ...>. + if (IsVNConstant(arg1VN)) + { + ValueNum scalarVN = EvaluateSimdGetElement(this, TYP_SIMD8, baseType, arg1VN, 0); + + if (scalarVN == VNOneForType(baseType)) + { + return arg0VN; + } + } + break; + } #endif + + default: + { + break; + } + } + } + else if (arg0VN == arg1VN) + { + bool isScalar = false; + genTreeOps oper = tree->HWOperGet(&isScalar); + + if (isScalar) + { + // We don't support folding scalars today + oper = GT_NONE; + } + + switch (oper) + { + case GT_AND: { // Handle `x & x == x` return arg0VN; } -#ifdef TARGET_ARM64 - case NI_AdvSimd_BitwiseClear: -#else - case NI_SSE_AndNot: - case NI_SSE2_AndNot: - case NI_AVX_AndNot: - case NI_AVX2_AndNot: - case NI_AVX512F_AndNot: - case NI_AVX512DQ_AndNot: - case NI_AVX10v1_V512_AndNot: + case GT_AND_NOT: { // Handle `x & ~x == 0` return VNZeroForType(type); } -#endif -#ifdef TARGET_ARM64 - case NI_AdvSimd_Or: -#else - case NI_SSE_Or: - case NI_SSE2_Or: - case NI_AVX_Or: - case NI_AVX2_Or: - case NI_AVX512F_Or: - case NI_AVX512DQ_Or: - case NI_AVX10v1_V512_Or: -#endif + case GT_OR: { // Handle `x | x == x` return arg0VN; } -#ifdef TARGET_ARM64 - case NI_AdvSimd_Subtract: - case NI_AdvSimd_Arm64_Subtract: -#else - case NI_SSE_Subtract: - case NI_SSE2_Subtract: - case NI_AVX_Subtract: - case NI_AVX2_Subtract: - case NI_AVX512F_Subtract: - case NI_AVX512BW_Subtract: -#endif + case GT_SUB: { if (varTypeIsFloating(baseType)) { @@ -8484,17 +8415,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, return VNZeroForType(type); } -#ifdef TARGET_ARM64 - case NI_AdvSimd_Xor: -#else - case NI_SSE_Xor: - case NI_SSE2_Xor: - case NI_AVX_Xor: - case NI_AVX2_Xor: - case NI_AVX512F_Xor: - case NI_AVX512DQ_Xor: - case NI_AVX10v1_V512_Xor: -#endif + case GT_XOR: { // Handle `x ^ x == 0` return VNZeroForType(type); @@ -8512,45 +8433,107 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, return VNForFunc(type, func, arg0VN, arg1VN); } -ValueNum EvaluateSimdFloatWithElement(ValueNumStore* vns, var_types type, ValueNum arg0VN, int index, float value) +ValueNum EvaluateSimdWithElementFloating( + ValueNumStore* vns, var_types simdType, var_types baseType, ValueNum arg0VN, int32_t arg1, double arg2) { + assert(varTypeIsFloating(baseType)); assert(vns->IsVNConstant(arg0VN)); - assert(static_cast(index) < genTypeSize(type) / genTypeSize(TYP_FLOAT)); + assert(simdType == vns->TypeOfVN(arg0VN)); + assert(static_cast(arg1) < GenTreeVecCon::ElementCount(genTypeSize(simdType), baseType)); - switch (type) + switch (simdType) + { + case TYP_SIMD8: + { + simd8_t result = {}; + EvaluateWithElementFloating(baseType, &result, vns->GetConstantSimd8(arg0VN), arg1, arg2); + return vns->VNForSimd8Con(result); + } + + case TYP_SIMD12: + { + simd12_t result = {}; + EvaluateWithElementFloating(baseType, &result, vns->GetConstantSimd12(arg0VN), arg1, arg2); + return vns->VNForSimd12Con(result); + } + + case TYP_SIMD16: + { + simd16_t result = {}; + EvaluateWithElementFloating(baseType, &result, vns->GetConstantSimd16(arg0VN), arg1, arg2); + return vns->VNForSimd16Con(result); + } + +#if defined TARGET_XARCH + case TYP_SIMD32: + { + simd32_t result = {}; + EvaluateWithElementFloating(baseType, &result, vns->GetConstantSimd32(arg0VN), arg1, arg2); + return vns->VNForSimd32Con(result); + } + + case TYP_SIMD64: + { + simd64_t result = {}; + EvaluateWithElementFloating(baseType, &result, vns->GetConstantSimd64(arg0VN), arg1, arg2); + return vns->VNForSimd64Con(result); + } +#endif // TARGET_XARCH + + default: + { + unreached(); + } + } +} + +ValueNum EvaluateSimdWithElementIntegral( + ValueNumStore* vns, var_types simdType, var_types baseType, ValueNum arg0VN, int32_t arg1, int64_t arg2) +{ + assert(varTypeIsIntegral(baseType)); + assert(simdType == vns->TypeOfVN(arg0VN)); + assert(vns->IsVNConstant(arg0VN)); + assert(static_cast(arg1) < GenTreeVecCon::ElementCount(genTypeSize(simdType), baseType)); + + switch (simdType) { case TYP_SIMD8: { - simd8_t cnsVec = vns->GetConstantSimd8(arg0VN); - cnsVec.f32[index] = value; - return vns->VNForSimd8Con(cnsVec); + simd8_t result = {}; + EvaluateWithElementIntegral(baseType, &result, vns->GetConstantSimd8(arg0VN), arg1, arg2); + return vns->VNForSimd8Con(result); } + case TYP_SIMD12: { - simd12_t cnsVec = vns->GetConstantSimd12(arg0VN); - cnsVec.f32[index] = value; - return vns->VNForSimd12Con(cnsVec); + simd12_t result = {}; + EvaluateWithElementIntegral(baseType, &result, vns->GetConstantSimd12(arg0VN), arg1, arg2); + return vns->VNForSimd12Con(result); } + case TYP_SIMD16: { - simd16_t cnsVec = vns->GetConstantSimd16(arg0VN); - cnsVec.f32[index] = value; - return vns->VNForSimd16Con(cnsVec); + simd16_t result = {}; + EvaluateWithElementIntegral(baseType, &result, vns->GetConstantSimd16(arg0VN), arg1, arg2); + return vns->VNForSimd16Con(result); } + #if defined TARGET_XARCH case TYP_SIMD32: { - simd32_t cnsVec = vns->GetConstantSimd32(arg0VN); - cnsVec.f32[index] = value; - return vns->VNForSimd32Con(cnsVec); + simd32_t result = {}; + EvaluateWithElementIntegral(baseType, &result, vns->GetConstantSimd32(arg0VN), arg1, arg2); + return vns->VNForSimd32Con(result); } + case TYP_SIMD64: { - simd64_t cnsVec = vns->GetConstantSimd64(arg0VN); - cnsVec.f32[index] = value; - return vns->VNForSimd64Con(cnsVec); + simd64_t result = {}; + EvaluateWithElementIntegral(baseType, &result, vns->GetConstantSimd64(arg0VN), arg1, arg2); + return vns->VNForSimd64Con(result); } #endif // TARGET_XARCH + default: { unreached(); @@ -8558,16 +8541,18 @@ ValueNum EvaluateSimdFloatWithElement(ValueNumStore* vns, var_types type, ValueN } } -ValueNum ValueNumStore::EvalHWIntrinsicFunTernary(var_types type, - var_types baseType, - NamedIntrinsic ni, - VNFunc func, - ValueNum arg0VN, - ValueNum arg1VN, - ValueNum arg2VN, - bool encodeResultType, - ValueNum resultTypeVN) +ValueNum ValueNumStore::EvalHWIntrinsicFunTernary(GenTreeHWIntrinsic* tree, + VNFunc func, + ValueNum arg0VN, + ValueNum arg1VN, + ValueNum arg2VN, + bool encodeResultType, + ValueNum resultTypeVN) { + var_types type = tree->TypeGet(); + var_types baseType = tree->GetSimdBaseType(); + NamedIntrinsic ni = tree->GetHWIntrinsicId(); + if (IsVNConstant(arg0VN) && IsVNConstant(arg1VN) && IsVNConstant(arg2VN)) { switch (ni) @@ -8580,20 +8565,43 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunTernary(var_types type, case NI_Vector512_WithElement: #endif { - int index = GetConstantInt32(arg1VN); - - assert(varTypeIsSIMD(type)); + int32_t index = GetConstantInt32(arg1VN); - // No meaningful diffs for other base-types. - if ((baseType != TYP_FLOAT) || (TypeOfVN(arg0VN) != type) || - (static_cast(index) >= (genTypeSize(type) / genTypeSize(baseType)))) + if (static_cast(index) >= GenTreeVecCon::ElementCount(genTypeSize(type), baseType)) { + // Nothing to fold for out of range indexes break; } - float value = GetConstantSingle(arg2VN); + if (varTypeIsFloating(baseType)) + { + double value; + + if (baseType == TYP_FLOAT) + { + value = GetConstantSingle(arg2VN); + } + else + { + value = GetConstantDouble(arg2VN); + } + return EvaluateSimdWithElementFloating(this, type, baseType, arg0VN, index, value); + } + else + { + assert(varTypeIsIntegral(baseType)); + int64_t value; - return EvaluateSimdFloatWithElement(this, type, arg0VN, index, value); + if (varTypeIsLong(baseType)) + { + value = GetConstantInt64(arg2VN); + } + else + { + value = GetConstantInt32(arg2VN); + } + return EvaluateSimdWithElementIntegral(this, type, baseType, arg0VN, index, value); + } } default: @@ -12346,12 +12354,10 @@ void Compiler::fgValueNumberHWIntrinsic(GenTreeHWIntrinsic* tree) if (opCount == 1) { - ValueNum normalLVN = vnStore->EvalHWIntrinsicFunUnary(tree->TypeGet(), tree->GetSimdBaseType(), - intrinsicId, func, op1vnp.GetLiberal(), - encodeResultType, resultTypeVNPair.GetLiberal()); + ValueNum normalLVN = vnStore->EvalHWIntrinsicFunUnary(tree, func, op1vnp.GetLiberal(), encodeResultType, + resultTypeVNPair.GetLiberal()); ValueNum normalCVN = - vnStore->EvalHWIntrinsicFunUnary(tree->TypeGet(), tree->GetSimdBaseType(), intrinsicId, func, - op1vnp.GetConservative(), encodeResultType, + vnStore->EvalHWIntrinsicFunUnary(tree, func, op1vnp.GetConservative(), encodeResultType, resultTypeVNPair.GetConservative()); normalPair = ValueNumPair(normalLVN, normalCVN); @@ -12366,13 +12372,11 @@ void Compiler::fgValueNumberHWIntrinsic(GenTreeHWIntrinsic* tree) if (opCount == 2) { ValueNum normalLVN = - vnStore->EvalHWIntrinsicFunBinary(tree->TypeGet(), tree->GetSimdBaseType(), intrinsicId, func, - op1vnp.GetLiberal(), op2vnp.GetLiberal(), encodeResultType, - resultTypeVNPair.GetLiberal()); - ValueNum normalCVN = - vnStore->EvalHWIntrinsicFunBinary(tree->TypeGet(), tree->GetSimdBaseType(), intrinsicId, func, - op1vnp.GetConservative(), op2vnp.GetConservative(), - encodeResultType, resultTypeVNPair.GetConservative()); + vnStore->EvalHWIntrinsicFunBinary(tree, func, op1vnp.GetLiberal(), op2vnp.GetLiberal(), + encodeResultType, resultTypeVNPair.GetLiberal()); + ValueNum normalCVN = vnStore->EvalHWIntrinsicFunBinary(tree, func, op1vnp.GetConservative(), + op2vnp.GetConservative(), encodeResultType, + resultTypeVNPair.GetConservative()); normalPair = ValueNumPair(normalLVN, normalCVN); excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); @@ -12386,15 +12390,13 @@ void Compiler::fgValueNumberHWIntrinsic(GenTreeHWIntrinsic* tree) getOperandVNs(tree->Op(3), &op3vnp, &op3Xvnp); ValueNum normalLVN = - vnStore->EvalHWIntrinsicFunTernary(tree->TypeGet(), tree->GetSimdBaseType(), intrinsicId, func, - op1vnp.GetLiberal(), op2vnp.GetLiberal(), + vnStore->EvalHWIntrinsicFunTernary(tree, func, op1vnp.GetLiberal(), op2vnp.GetLiberal(), op3vnp.GetLiberal(), encodeResultType, resultTypeVNPair.GetLiberal()); ValueNum normalCVN = - vnStore->EvalHWIntrinsicFunTernary(tree->TypeGet(), tree->GetSimdBaseType(), intrinsicId, func, - op1vnp.GetConservative(), op2vnp.GetConservative(), - op3vnp.GetConservative(), encodeResultType, - resultTypeVNPair.GetConservative()); + vnStore->EvalHWIntrinsicFunTernary(tree, func, op1vnp.GetConservative(), + op2vnp.GetConservative(), op3vnp.GetConservative(), + encodeResultType, resultTypeVNPair.GetConservative()); normalPair = ValueNumPair(normalLVN, normalCVN); diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h index 6a5032cd79ed74..4df32b8096acd6 100644 --- a/src/coreclr/jit/valuenum.h +++ b/src/coreclr/jit/valuenum.h @@ -447,13 +447,13 @@ class ValueNumStore ValueNum VNForByrefCon(target_size_t byrefVal); #if defined(FEATURE_SIMD) - ValueNum VNForSimd8Con(simd8_t cnsVal); - ValueNum VNForSimd12Con(simd12_t cnsVal); - ValueNum VNForSimd16Con(simd16_t cnsVal); + ValueNum VNForSimd8Con(const simd8_t& cnsVal); + ValueNum VNForSimd12Con(const simd12_t& cnsVal); + ValueNum VNForSimd16Con(const simd16_t& cnsVal); #if defined(TARGET_XARCH) - ValueNum VNForSimd32Con(simd32_t cnsVal); - ValueNum VNForSimd64Con(simd64_t cnsVal); - ValueNum VNForSimdMaskCon(simdmask_t cnsVal); + ValueNum VNForSimd32Con(const simd32_t& cnsVal); + ValueNum VNForSimd64Con(const simd64_t& cnsVal); + ValueNum VNForSimdMaskCon(const simdmask_t& cnsVal); #endif // TARGET_XARCH #endif // FEATURE_SIMD ValueNum VNForGenericCon(var_types typ, uint8_t* cnsVal); @@ -553,11 +553,20 @@ class ValueNumStore ValueNum VNAllBitsForType(var_types typ); #ifdef FEATURE_SIMD + // Returns the value number broadcast of the given "simdType" and "simdBaseType". + ValueNum VNBroadcastForSimdType(var_types simdType, var_types simdBaseType, ValueNum valVN); + // Returns the value number for one of the given "simdType" and "simdBaseType". ValueNum VNOneForSimdType(var_types simdType, var_types simdBaseType); // A helper function for constructing VNF_SimdType VNs. ValueNum VNForSimdType(unsigned simdSize, CorInfoType simdBaseJitType); + + // Returns if a value number represents NaN in all elements + bool VNIsVectorNaN(var_types simdType, var_types simdBaseType, ValueNum valVN); + + // Returns if a value number represents negative zero in all elements + bool VNIsVectorNegativeZero(var_types simdType, var_types simdBaseType, ValueNum valVN); #endif // FEATURE_SIMD // Create or return the existimg value number representing a singleton exception set @@ -1211,32 +1220,25 @@ class ValueNumStore EvalMathFuncBinary(typ, mthFunc, arg0VNP.GetConservative(), arg1VNP.GetConservative())); } - ValueNum EvalHWIntrinsicFunUnary(var_types type, - var_types baseType, - NamedIntrinsic ni, - VNFunc func, - ValueNum arg0VN, - bool encodeResultType, - ValueNum resultTypeVN); - - ValueNum EvalHWIntrinsicFunBinary(var_types type, - var_types baseType, - NamedIntrinsic ni, - VNFunc func, - ValueNum arg0VN, - ValueNum arg1VN, - bool encodeResultType, - ValueNum resultTypeVN); - - ValueNum EvalHWIntrinsicFunTernary(var_types type, - var_types baseType, - NamedIntrinsic ni, - VNFunc func, - ValueNum arg0VN, - ValueNum arg1VN, - ValueNum arg2VN, - bool encodeResultType, - ValueNum resultTypeVN); +#if defined(FEATURE_HW_INTRINSICS) + ValueNum EvalHWIntrinsicFunUnary( + GenTreeHWIntrinsic* tree, VNFunc func, ValueNum arg0VN, bool encodeResultType, ValueNum resultTypeVN); + + ValueNum EvalHWIntrinsicFunBinary(GenTreeHWIntrinsic* tree, + VNFunc func, + ValueNum arg0VN, + ValueNum arg1VN, + bool encodeResultType, + ValueNum resultTypeVN); + + ValueNum EvalHWIntrinsicFunTernary(GenTreeHWIntrinsic* tree, + VNFunc func, + ValueNum arg0VN, + ValueNum arg1VN, + ValueNum arg2VN, + bool encodeResultType, + ValueNum resultTypeVN); +#endif // FEATURE_HW_INTRINSICS // Returns "true" iff "vn" represents a function application. bool IsVNFunc(ValueNum vn); @@ -1611,12 +1613,12 @@ class ValueNumStore #if defined(FEATURE_SIMD) struct Simd8PrimitiveKeyFuncs : public JitKeyFuncsDefEquals { - static bool Equals(simd8_t x, simd8_t y) + static bool Equals(const simd8_t& x, const simd8_t& y) { return x == y; } - static unsigned GetHashCode(const simd8_t val) + static unsigned GetHashCode(const simd8_t& val) { unsigned hash = 0; @@ -1640,12 +1642,12 @@ class ValueNumStore struct Simd12PrimitiveKeyFuncs : public JitKeyFuncsDefEquals { - static bool Equals(simd12_t x, simd12_t y) + static bool Equals(const simd12_t& x, const simd12_t& y) { return x == y; } - static unsigned GetHashCode(const simd12_t val) + static unsigned GetHashCode(const simd12_t& val) { unsigned hash = 0; @@ -1670,12 +1672,12 @@ class ValueNumStore struct Simd16PrimitiveKeyFuncs : public JitKeyFuncsDefEquals { - static bool Equals(simd16_t x, simd16_t y) + static bool Equals(const simd16_t& x, const simd16_t& y) { return x == y; } - static unsigned GetHashCode(const simd16_t val) + static unsigned GetHashCode(const simd16_t& val) { unsigned hash = 0; @@ -1702,12 +1704,12 @@ class ValueNumStore #if defined(TARGET_XARCH) struct Simd32PrimitiveKeyFuncs : public JitKeyFuncsDefEquals { - static bool Equals(simd32_t x, simd32_t y) + static bool Equals(const simd32_t& x, const simd32_t& y) { return x == y; } - static unsigned GetHashCode(const simd32_t val) + static unsigned GetHashCode(const simd32_t& val) { unsigned hash = 0; @@ -1737,12 +1739,12 @@ class ValueNumStore struct Simd64PrimitiveKeyFuncs : public JitKeyFuncsDefEquals { - static bool Equals(simd64_t x, simd64_t y) + static bool Equals(const simd64_t& x, const simd64_t& y) { return x == y; } - static unsigned GetHashCode(const simd64_t val) + static unsigned GetHashCode(const simd64_t& val) { unsigned hash = 0; @@ -1780,12 +1782,12 @@ class ValueNumStore struct SimdMaskPrimitiveKeyFuncs : public JitKeyFuncsDefEquals { - static bool Equals(simdmask_t x, simdmask_t y) + static bool Equals(const simdmask_t& x, const simdmask_t& y) { return x == y; } - static unsigned GetHashCode(const simdmask_t val) + static unsigned GetHashCode(const simdmask_t& val) { unsigned hash = 0;