From 23283ffabac13d979d0e1b2e71c51c467544dd21 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 2 May 2022 10:11:49 -0700 Subject: [PATCH] Adding the 2-parameter xplat shuffle helpers and accelerating them (#68559) * Adding managed definitions for cross-platform shuffle helpers * Adding basic tests covering the Vector64/128/256 Shuffle APIs * Adding JIT support to recognize Vector64/128/256.Shuffle as intrinsic but not handle it as an intrinsic * Adding a helper for determining if a node represents a vector constant * Adding x86/x64 acceleration for the 2-parameter xplat shuffle helpers * Adding Arm64 acceleration for the 2-parameter xplat shuffle helpers * Ensure a switch covers the "default" case * Applying formatting patch * Ensure the call to Op uses 1-based indexing * Ensure TYP_LONG and TYP_ULONG fixup simdBaseJitType and simdBaseType * Have gtNewSimdShuffle use fgMakeMultiUse * Don't pass an unecessary compiler instance to `gtNewSimdShuffleNode` * Don't expose the unused gtNewSimdShuffleNode API * Allow fgMakeMultiUse to take a structType and pass it down to fgInsertCommaFormTemp * Pass down the clsHnd to fgMakeMultiUse from gtNewSimdShuffleNode * Adding some additional tests covering the vector shuffle APIs * Ensure the Vector256 test is accessing the right index * Ensure we look up the correct clsHnd * Applying formatting patch --- src/coreclr/jit/compiler.h | 30 +- src/coreclr/jit/gentree.cpp | 332 ++++++++ src/coreclr/jit/gentree.h | 153 ++++ src/coreclr/jit/hwintrinsicarm64.cpp | 34 + src/coreclr/jit/hwintrinsiclistarm64.h | 2 + src/coreclr/jit/hwintrinsiclistxarch.h | 2 + src/coreclr/jit/hwintrinsicxarch.cpp | 87 ++ src/coreclr/jit/morph.cpp | 10 +- .../System/Runtime/Intrinsics/Vector128.cs | 244 ++++++ .../System/Runtime/Intrinsics/Vector256.cs | 244 ++++++ .../src/System/Runtime/Intrinsics/Vector64.cs | 171 ++++ .../ref/System.Runtime.Intrinsics.cs | 38 + .../tests/Vectors/Vector128Tests.cs | 600 ++++++++++++++ .../tests/Vectors/Vector256Tests.cs | 762 +++++++++++++++++- .../tests/Vectors/Vector64Tests.cs | 424 +++++++++- 15 files changed, 3125 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 26801adca120df..1f54a7af39cb11 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2522,6 +2522,13 @@ class Compiler unsigned simdSize, bool isSimdAsHWIntrinsic); + GenTree* gtNewSimdShuffleNode(var_types type, + GenTree* op1, + GenTree* op2, + CorInfoType simdBaseJitType, + unsigned simdSize, + bool isSimdAsHWIntrinsic); + GenTree* gtNewSimdSqrtNode( var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic); @@ -5495,7 +5502,7 @@ class Compiler // Create a new temporary variable to hold the result of *ppTree, // and transform the graph accordingly. GenTree* fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType = nullptr); - GenTree* fgMakeMultiUse(GenTree** ppTree); + GenTree* fgMakeMultiUse(GenTree** ppTree, CORINFO_CLASS_HANDLE structType = nullptr); private: // Recognize a bitwise rotation pattern and convert into a GT_ROL or a GT_ROR node. @@ -8261,6 +8268,27 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX return NO_CLASS_HANDLE; } +#if defined(FEATURE_HW_INTRINSICS) + CORINFO_CLASS_HANDLE gtGetStructHandleForSimdOrHW(var_types simdType, + CorInfoType simdBaseJitType, + bool isSimdAsHWIntrinsic = false) + { + CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE; + + if (isSimdAsHWIntrinsic) + { + clsHnd = gtGetStructHandleForSIMD(simdType, simdBaseJitType); + } + else + { + clsHnd = gtGetStructHandleForHWSIMD(simdType, simdBaseJitType); + } + + assert(clsHnd != NO_CLASS_HANDLE); + return clsHnd; + } +#endif // FEATURE_HW_INTRINSICS + // Returns true if this is a SIMD type that should be considered an opaque // vector type (i.e. do not analyze or promote its fields). // Note that all but the fixed vector types are opaque, even though they may diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index d4d76c70734ae7..8d443ad2a53209 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21335,6 +21335,338 @@ GenTree* Compiler::gtNewSimdNarrowNode(var_types type, #endif // !TARGET_XARCH && !TARGET_ARM64 } +GenTree* Compiler::gtNewSimdShuffleNode(var_types type, + GenTree* op1, + GenTree* op2, + CorInfoType simdBaseJitType, + unsigned simdSize, + bool isSimdAsHWIntrinsic) +{ + assert(IsBaselineSimdIsaSupportedDebugOnly()); + + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); + + assert(op1 != nullptr); + assert(op1->TypeIs(type)); + + assert(op2 != nullptr); + assert(op2->TypeIs(type)); + assert(op2->IsVectorConst()); + + var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); + assert(varTypeIsArithmetic(simdBaseType)); + + if (op2->IsVectorAllBitsSet()) + { + // AllBitsSet represents indices that are always "out of range" which means zero should be + // selected for every element. We can special-case this down to just returning a zero node + return gtNewSimdZeroNode(type, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false); + } + + if (op2->IsVectorZero()) + { + // TODO-XARCH-CQ: Zero represents indices that select the first element of op1 each time. We can simplify + // this down to basically a broadcast equivalent. + } + + GenTree* retNode = nullptr; + GenTreeIntConCommon* cnsNode = nullptr; + + size_t elementSize = genTypeSize(simdBaseType); + size_t elementCount = simdSize / elementSize; + +#if defined(TARGET_XARCH) + uint8_t control = 0; + bool crossLane = false; + bool needsZero = varTypeIsSmallInt(simdBaseType); + uint64_t value = 0; + uint8_t vecCns[32] = {}; + uint8_t mskCns[32] = {}; + + for (size_t index = 0; index < elementCount; index++) + { + value = op2->GetIntegralVectorConstElement(index); + + if (value < elementCount) + { + if (simdSize == 32) + { + // Most of the 256-bit shuffle/permute instructions operate as if + // the inputs were 2x 128-bit values. If the selected indices cross + // the respective 128-bit "lane" we may need to specialize the codegen + + if (index < (elementCount / 2)) + { + crossLane |= (value >= (elementCount / 2)); + } + else + { + crossLane |= (value < (elementCount / 2)); + } + } + + // Setting the control for byte/sbyte and short/ushort is unnecessary + // and will actually compute an incorrect control word. But it simplifies + // the overall logic needed here and will remain unused. + + control |= (value << (index * (elementCount / 2))); + + // When Ssse3 is supported, we may need vecCns to accurately select the relevant + // bytes if some index is outside the valid range. Since x86/x64 is little-endian + // we can simplify this down to a for loop that scales the value and selects count + // sequential bytes. + + for (uint32_t i = 0; i < elementSize; i++) + { + vecCns[(index * elementSize) + i] = (uint8_t)((value * elementSize) + i); + + // When Ssse3 is not supported, we need to adjust the constant to be AllBitsSet + // so that we can emit a ConditionalSelect(op2, retNode, zeroNode). + + mskCns[(index * elementSize) + i] = 0xFF; + } + } + else + { + needsZero = true; + + // When Ssse3 is supported, we may need vecCns to accurately select the relevant + // bytes if some index is outside the valid range. We can do this by just zeroing + // out each byte in the element. This only requires the most significant bit to be + // set, but we use 0xFF instead since that will be the equivalent of AllBitsSet + + for (uint32_t i = 0; i < elementSize; i++) + { + vecCns[(index * elementSize) + i] = 0xFF; + + // When Ssse3 is not supported, we need to adjust the constant to be Zero + // so that we can emit a ConditionalSelect(op2, retNode, zeroNode). + + mskCns[(index * elementSize) + i] = 0x00; + } + } + } + + if (simdSize == 32) + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); + + if (varTypeIsSmallInt(simdBaseType)) + { + if (crossLane) + { + // TODO-XARCH-CQ: We should emulate cross-lane shuffling for byte/sbyte and short/ushort + unreached(); + } + + // If we aren't crossing lanes, then we can decompose the byte/sbyte + // and short/ushort operations into 2x 128-bit operations + + CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic); + + // We want to build what is essentially the following managed code: + // var op1Lower = op1.GetLower(); + // op1Lower = Ssse3.Shuffle(op1Lower, Vector128.Create(...)); + // + // var op1Upper = op1.GetUpper(); + // op1Upper = Ssse3.Shuffle(op1Upper, Vector128.Create(...)); + // + // return Vector256.Create(op1Lower, op1Upper); + + simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE; + + GenTree* op1Dup = fgMakeMultiUse(&op1, clsHnd); + GenTree* op1Lower = gtNewSimdHWIntrinsicNode(type, op1, NI_Vector256_GetLower, simdBaseJitType, simdSize, + isSimdAsHWIntrinsic); + + IntrinsicNodeBuilder nodeBuilder1(getAllocator(CMK_ASTNode), 16); + + for (uint32_t i = 0; i < 16; i++) + { + nodeBuilder1.AddOperand(i, gtNewIconNode(vecCns[i])); + } + + op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder1), NI_Vector128_Create, simdBaseJitType, 16, + isSimdAsHWIntrinsic); + + op1Lower = gtNewSimdHWIntrinsicNode(type, op1Lower, op2, NI_SSSE3_Shuffle, simdBaseJitType, 16, + isSimdAsHWIntrinsic); + + GenTree* op1Upper = gtNewSimdHWIntrinsicNode(type, op1Dup, gtNewIconNode(1), NI_AVX_ExtractVector128, + simdBaseJitType, simdSize, isSimdAsHWIntrinsic); + + IntrinsicNodeBuilder nodeBuilder2(getAllocator(CMK_ASTNode), 16); + + for (uint32_t i = 0; i < 16; i++) + { + nodeBuilder2.AddOperand(i, gtNewIconNode(vecCns[16 + i])); + } + + op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder2), NI_Vector128_Create, simdBaseJitType, 16, + isSimdAsHWIntrinsic); + + op1Upper = gtNewSimdHWIntrinsicNode(type, op1Upper, op2, NI_SSSE3_Shuffle, simdBaseJitType, 16, + isSimdAsHWIntrinsic); + + return gtNewSimdHWIntrinsicNode(type, op1Lower, op1Upper, gtNewIconNode(1), NI_AVX_InsertVector128, + simdBaseJitType, simdSize, isSimdAsHWIntrinsic); + } + + if (elementSize == 4) + { + IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), elementCount); + + for (uint32_t i = 0; i < elementCount; i++) + { + uint8_t value = (uint8_t)(vecCns[i * elementSize] / elementSize); + nodeBuilder.AddOperand(i, gtNewIconNode(value)); + } + + CorInfoType indicesJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UINT : CORINFO_TYPE_INT; + + op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder), NI_Vector256_Create, indicesJitType, simdSize, + isSimdAsHWIntrinsic); + + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX2_PermuteVar8x32, simdBaseJitType, simdSize, + isSimdAsHWIntrinsic); + } + else + { + assert(elementSize == 8); + + cnsNode = gtNewIconNode(control); + retNode = gtNewSimdHWIntrinsicNode(type, op1, cnsNode, NI_AVX2_Permute4x64, simdBaseJitType, simdSize, + isSimdAsHWIntrinsic); + } + } + else + { + if (needsZero && compOpportunisticallyDependsOn(InstructionSet_SSSE3)) + { + simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE; + + IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), simdSize); + + for (uint32_t i = 0; i < simdSize; i++) + { + nodeBuilder.AddOperand(i, gtNewIconNode(vecCns[i])); + } + + op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder), NI_Vector128_Create, simdBaseJitType, simdSize, + isSimdAsHWIntrinsic); + + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_SSSE3_Shuffle, simdBaseJitType, simdSize, + isSimdAsHWIntrinsic); + } + + if (varTypeIsLong(simdBaseType)) + { + // TYP_LONG and TYP_ULONG don't have their own shuffle/permute instructions and so we'll + // just utilize the path for TYP_DOUBLE for simplicity. We could alternatively break this + // down into a TYP_INT or TYP_UINT based shuffle, but that's additional complexity for no + // real benefit since shuffle gets its own port rather than using the fp specific ports. + + simdBaseJitType = CORINFO_TYPE_DOUBLE; + simdBaseType = TYP_DOUBLE; + } + + cnsNode = gtNewIconNode(control); + + if (varTypeIsIntegral(simdBaseType)) + { + retNode = gtNewSimdHWIntrinsicNode(type, op1, cnsNode, NI_SSE2_Shuffle, simdBaseJitType, simdSize, + isSimdAsHWIntrinsic); + } + else if (compOpportunisticallyDependsOn(InstructionSet_AVX)) + { + retNode = gtNewSimdHWIntrinsicNode(type, op1, cnsNode, NI_AVX_Permute, simdBaseJitType, simdSize, + isSimdAsHWIntrinsic); + } + else + { + CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic); + + GenTree* op1Dup = fgMakeMultiUse(&op1, clsHnd); + retNode = gtNewSimdHWIntrinsicNode(type, op1, op1Dup, cnsNode, NI_SSE_Shuffle, simdBaseJitType, simdSize, + isSimdAsHWIntrinsic); + } + } + + assert(retNode != nullptr); + + if (needsZero) + { + assert(!compIsaSupportedDebugOnly(InstructionSet_SSSE3)); + + IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), simdSize); + + for (uint32_t i = 0; i < simdSize; i++) + { + nodeBuilder.AddOperand(i, gtNewIconNode(mskCns[i])); + } + + op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder), NI_Vector128_Create, simdBaseJitType, simdSize, + isSimdAsHWIntrinsic); + + GenTree* zero = gtNewSimdZeroNode(type, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); + retNode = gtNewSimdCndSelNode(type, op2, retNode, zero, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); + } + + return retNode; +#elif defined(TARGET_ARM64) + uint64_t value = 0; + uint8_t vecCns[16] = {}; + + for (size_t index = 0; index < elementCount; index++) + { + value = op2->GetIntegralVectorConstElement(index); + + if (value < elementCount) + { + for (uint32_t i = 0; i < elementSize; i++) + { + vecCns[(index * elementSize) + i] = (uint8_t)((value * elementSize) + i); + } + } + else + { + for (uint32_t i = 0; i < elementSize; i++) + { + vecCns[(index * elementSize) + i] = 0xFF; + } + } + } + + NamedIntrinsic createIntrinsic = NI_Vector64_Create; + NamedIntrinsic lookupIntrinsic = NI_AdvSimd_VectorTableLookup; + + if (simdSize == 16) + { + createIntrinsic = NI_Vector128_Create; + lookupIntrinsic = NI_AdvSimd_Arm64_VectorTableLookup; + + op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector64_ToVector128, simdBaseJitType, simdSize, + isSimdAsHWIntrinsic); + } + + IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), simdSize); + + for (uint32_t i = 0; i < simdSize; i++) + { + nodeBuilder.AddOperand(i, gtNewIconNode(vecCns[i])); + } + + op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder), createIntrinsic, simdBaseJitType, simdSize, + isSimdAsHWIntrinsic); + + return gtNewSimdHWIntrinsicNode(type, op1, op2, lookupIntrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 +} + GenTree* Compiler::gtNewSimdSqrtNode( var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic) { diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 7f11f4f26680ac..eb74dcfc11d68d 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1710,6 +1710,10 @@ struct GenTree inline bool IsSIMDZero() const; inline bool IsFloatPositiveZero() const; inline bool IsVectorZero() const; + inline bool IsVectorAllBitsSet() const; + inline bool IsVectorConst(); + + inline uint64_t GetIntegralVectorConstElement(size_t index); inline bool IsBoxedValue(); @@ -8143,6 +8147,155 @@ inline bool GenTree::IsVectorZero() const return false; } +//------------------------------------------------------------------- +// IsVectorAllBitsSet: returns true if this node is a HWIntrinsic that is Vector*_get_AllBitsSet. +// +// Returns: +// True if this represents a HWIntrinsic node that is Vector*_get_AllBitsSet. +// +inline bool GenTree::IsVectorAllBitsSet() const +{ +#ifdef FEATURE_HW_INTRINSICS + if (gtOper == GT_HWINTRINSIC) + { + const GenTreeHWIntrinsic* node = AsHWIntrinsic(); + const NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); + +#if defined(TARGET_XARCH) + return (intrinsicId == NI_Vector128_get_AllBitsSet) || (intrinsicId == NI_Vector256_get_AllBitsSet); +#elif defined(TARGET_ARM64) + return (intrinsicId == NI_Vector64_get_AllBitsSet) || (intrinsicId == NI_Vector128_get_AllBitsSet); +#endif // !TARGET_XARCH && !TARGET_ARM64 + } +#endif // FEATURE_HW_INTRINSICS + + return false; +} + +//------------------------------------------------------------------- +// IsVectorConst: returns true if this node is a HWIntrinsic that represents a constant. +// +// Returns: +// True if this represents a HWIntrinsic node that represents a constant. +// +inline bool GenTree::IsVectorConst() +{ +#ifdef FEATURE_HW_INTRINSICS + if (gtOper == GT_HWINTRINSIC) + { + const GenTreeHWIntrinsic* node = AsHWIntrinsic(); + const NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); + +#if defined(TARGET_XARCH) + if ((intrinsicId == NI_Vector128_Create) || (intrinsicId == NI_Vector256_Create)) + { + for (GenTree* arg : Operands()) + { + if (!arg->IsIntegralConst() && !arg->IsCnsFltOrDbl()) + { + return false; + } + } + + return true; + } +#elif defined(TARGET_ARM64) + if ((intrinsicId == NI_Vector64_Create) || (intrinsicId == NI_Vector128_Create)) + { + for (GenTree* arg : Operands()) + { + if (!arg->IsIntegralConst() && !arg->IsCnsFltOrDbl()) + { + return false; + } + } + + return true; + } +#endif // !TARGET_XARCH && !TARGET_ARM64 + + return IsVectorZero() || IsVectorAllBitsSet(); + } +#endif // FEATURE_HW_INTRINSICS + + return false; +} + +//------------------------------------------------------------------- +// GetIntegralVectorConstElement: Gets the value of a given element in an integral vector constant +// +// Returns: +// The value of a given element in an integral vector constant +// +inline uint64_t GenTree::GetIntegralVectorConstElement(size_t index) +{ +#ifdef FEATURE_HW_INTRINSICS + if (gtOper == GT_HWINTRINSIC) + { + const GenTreeHWIntrinsic* node = AsHWIntrinsic(); + const NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); + size_t operandsCount = node->GetOperandCount(); + + CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); + var_types simdBaseType = node->GetSimdBaseType(); + +#if defined(TARGET_XARCH) + if ((intrinsicId == NI_Vector128_Create) || (intrinsicId == NI_Vector256_Create)) + { + return (uint64_t)node->Op(index + 1)->AsIntConCommon()->IntegralValue(); + } +#elif defined(TARGET_ARM64) + if ((intrinsicId == NI_Vector64_Create) || (intrinsicId == NI_Vector128_Create)) + { + return (uint64_t)node->Op(index + 1)->AsIntConCommon()->IntegralValue(); + } +#endif // !TARGET_XARCH && !TARGET_ARM64 + + if (IsVectorZero()) + { + return 0; + } + + if (IsVectorAllBitsSet()) + { + switch (simdBaseType) + { + case TYP_BYTE: + case TYP_UBYTE: + { + return 0xFF; + } + + case TYP_SHORT: + case TYP_USHORT: + { + return 0xFFFF; + } + + case TYP_INT: + case TYP_UINT: + { + return 0xFFFFFFFF; + } + + case TYP_LONG: + case TYP_ULONG: + { + return 0xFFFFFFFFFFFFFFFF; + } + + default: + { + unreached(); + } + } + } + } +#endif // FEATURE_HW_INTRINSICS + + return false; +} + inline bool GenTree::IsBoxedValue() { assert(gtOper != GT_BOX || AsBox()->BoxOp() != nullptr); diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index f2105f9f9871a8..0d379f631328e3 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1389,6 +1389,40 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector64_Shuffle: + case NI_Vector128_Shuffle: + { + assert((sig->numArgs == 2) || (sig->numArgs == 3)); + assert((simdSize == 8) || (simdSize == 16)); + + GenTree* indices = impStackTop(0).val; + + if (!indices->IsVectorConst()) + { + // TODO-ARM64-CQ: Handling non-constant indices is a bit more complex + break; + } + + size_t elementSize = genTypeSize(simdBaseType); + size_t elementCount = simdSize / elementSize; + + if (genTypeSize(indices->AsHWIntrinsic()->GetSimdBaseType()) != elementSize) + { + // TODO-ARM64-CQ: Handling reinterpreted vector constants is a bit more complex + break; + } + + if (sig->numArgs == 2) + { + op2 = impSIMDPopStack(retType); + op1 = impSIMDPopStack(retType); + + retNode = gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize, + /* isSimdAsHWIntrinsic */ false); + } + break; + } + case NI_Vector64_Sqrt: case NI_Vector128_Sqrt: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index b5960c905ea681..281fef88b34f72 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -89,6 +89,7 @@ HARDWARE_INTRINSIC(Vector64, op_UnaryPlus, HARDWARE_INTRINSIC(Vector64, ShiftLeft, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector64, ShiftRightArithmetic, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector64, ShiftRightLogical, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector64, Shuffle, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector64, Sqrt, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector64, Store, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector64, StoreAligned, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) @@ -189,6 +190,7 @@ HARDWARE_INTRINSIC(Vector128, op_UnaryPlus, HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, ShiftRightArithmetic, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, ShiftRightLogical, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, Store, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index 7c80de358cf4ef..22c85d758e3b67 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -106,6 +106,7 @@ HARDWARE_INTRINSIC(Vector128, op_UnaryPlus, HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, ShiftRightArithmetic, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, ShiftRightLogical, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, Store, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) @@ -203,6 +204,7 @@ HARDWARE_INTRINSIC(Vector256, op_UnaryPlus, HARDWARE_INTRINSIC(Vector256, ShiftLeft, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector256, ShiftRightArithmetic, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector256, ShiftRightLogical, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector256, Shuffle, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector256, Sqrt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector256, Store, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector256, StoreAligned, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index e3d49758defa3b..c425ffdd9e1504 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -1878,6 +1878,93 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector128_Shuffle: + case NI_Vector256_Shuffle: + { + assert((sig->numArgs == 2) || (sig->numArgs == 3)); + + GenTree* indices = impStackTop(0).val; + + if (!indices->IsVectorConst()) + { + // TODO-XARCH-CQ: Handling non-constant indices is a bit more complex + break; + } + + size_t elementSize = genTypeSize(simdBaseType); + size_t elementCount = simdSize / elementSize; + + if (genTypeSize(indices->AsHWIntrinsic()->GetSimdBaseType()) != elementSize) + { + // TODO-XARCH-CQ: Handling reinterpreted vector constants is a bit more complex + break; + } + + if (simdSize == 32) + { + if (!compExactlyDependsOn(InstructionSet_AVX2)) + { + // While we could accelerate some functions on hardware with only AVX support + // it's likely not worth it overall given that IsHardwareAccelerated reports false + break; + } + else if (varTypeIsSmallInt(simdBaseType)) + { + bool crossLane = false; + + for (size_t index = 0; index < elementCount; index++) + { + uint64_t value = indices->GetIntegralVectorConstElement(index); + + if (value >= elementCount) + { + continue; + } + + if (index < (elementCount / 2)) + { + if (value >= (elementCount / 2)) + { + crossLane = true; + break; + } + } + else if (value < (elementCount / 2)) + { + crossLane = true; + break; + } + } + + if (crossLane) + { + // TODO-XARCH-CQ: We should emulate cross-lane shuffling for byte/sbyte and short/ushort + break; + } + } + } + else + { + assert(simdSize == 16); + + if (varTypeIsSmallInt(simdBaseType) && !compExactlyDependsOn(InstructionSet_SSSE3)) + { + // TYP_BYTE, TYP_UBYTE, TYP_SHORT, and TYP_USHORT need SSSE3 to be able to shuffle any operation + break; + } + } + + if (sig->numArgs == 2) + { + op2 = impSIMDPopStack(retType); + op1 = impSIMDPopStack(retType); + + retNode = gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize, + /* isSimdAsHWIntrinsic */ false); + } + break; + } + case NI_Vector128_Sqrt: case NI_Vector256_Sqrt: { diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 65b50de28f1108..c9fa5364c74586 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1810,8 +1810,10 @@ void CallArgs::SetNeedsTemp(CallArg* arg) // otherwise insert a comma form temp // // Arguments: -// ppTree - a pointer to the child node we will be replacing with the comma expression that -// evaluates ppTree to a temp and returns the result +// ppTree - a pointer to the child node we will be replacing with the comma expression that +// evaluates ppTree to a temp and returns the result +// +// structType - value type handle if the temp created is of TYP_STRUCT. // // Return Value: // A fresh GT_LCL_VAR node referencing the temp which has not been used @@ -1822,7 +1824,7 @@ void CallArgs::SetNeedsTemp(CallArg* arg) // // Can be safely called in morph preorder, before GTF_GLOB_REF is reliable. // -GenTree* Compiler::fgMakeMultiUse(GenTree** pOp) +GenTree* Compiler::fgMakeMultiUse(GenTree** pOp, CORINFO_CLASS_HANDLE structType /*= nullptr*/) { GenTree* const tree = *pOp; @@ -1840,7 +1842,7 @@ GenTree* Compiler::fgMakeMultiUse(GenTree** pOp) } } - return fgInsertCommaFormTemp(pOp); + return fgInsertCommaFormTemp(pOp, structType); } //------------------------------------------------------------------------------ diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index b2e4f025350354..9995bc1d447ea8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -3412,6 +3412,250 @@ public static Vector128 ShiftRightLogical(Vector128 vector, int sh return result; } + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + public static Vector128 Shuffle(Vector128 vector, Vector128 indices) + { + Unsafe.SkipInit(out Vector128 result); + + for (int index = 0; index < Vector128.Count; index++) + { + byte selectedIndex = indices.GetElementUnsafe(index); + byte selectedValue = 0; + + if (selectedIndex < Vector128.Count) + { + selectedValue = vector.GetElementUnsafe(selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + [CLSCompliant(false)] + public static Vector128 Shuffle(Vector128 vector, Vector128 indices) + { + Unsafe.SkipInit(out Vector128 result); + + for (int index = 0; index < Vector128.Count; index++) + { + byte selectedIndex = (byte)indices.GetElementUnsafe(index); + sbyte selectedValue = 0; + + if (selectedIndex < Vector128.Count) + { + selectedValue = vector.GetElementUnsafe(selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + public static Vector128 Shuffle(Vector128 vector, Vector128 indices) + { + Unsafe.SkipInit(out Vector128 result); + + for (int index = 0; index < Vector128.Count; index++) + { + ushort selectedIndex = (ushort)indices.GetElementUnsafe(index); + short selectedValue = 0; + + if (selectedIndex < Vector128.Count) + { + selectedValue = vector.GetElementUnsafe(selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + [CLSCompliant(false)] + public static Vector128 Shuffle(Vector128 vector, Vector128 indices) + { + Unsafe.SkipInit(out Vector128 result); + + for (int index = 0; index < Vector128.Count; index++) + { + ushort selectedIndex = indices.GetElementUnsafe(index); + ushort selectedValue = 0; + + if (selectedIndex < Vector128.Count) + { + selectedValue = vector.GetElementUnsafe(selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + public static Vector128 Shuffle(Vector128 vector, Vector128 indices) + { + Unsafe.SkipInit(out Vector128 result); + + for (int index = 0; index < Vector128.Count; index++) + { + uint selectedIndex = (uint)indices.GetElementUnsafe(index); + int selectedValue = 0; + + if (selectedIndex < Vector128.Count) + { + selectedValue = vector.GetElementUnsafe((int)selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + [CLSCompliant(false)] + public static Vector128 Shuffle(Vector128 vector, Vector128 indices) + { + Unsafe.SkipInit(out Vector128 result); + + for (int index = 0; index < Vector128.Count; index++) + { + uint selectedIndex = indices.GetElementUnsafe(index); + uint selectedValue = 0; + + if (selectedIndex < Vector128.Count) + { + selectedValue = vector.GetElementUnsafe((int)selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + public static Vector128 Shuffle(Vector128 vector, Vector128 indices) + { + Unsafe.SkipInit(out Vector128 result); + + for (int index = 0; index < Vector128.Count; index++) + { + uint selectedIndex = (uint)indices.GetElementUnsafe(index); + float selectedValue = 0; + + if (selectedIndex < Vector128.Count) + { + selectedValue = vector.GetElementUnsafe((int)selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + public static Vector128 Shuffle(Vector128 vector, Vector128 indices) + { + Unsafe.SkipInit(out Vector128 result); + + for (int index = 0; index < Vector128.Count; index++) + { + ulong selectedIndex = (ulong)indices.GetElementUnsafe(index); + long selectedValue = 0; + + if (selectedIndex < (uint)Vector128.Count) + { + selectedValue = vector.GetElementUnsafe((int)selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + [CLSCompliant(false)] + public static Vector128 Shuffle(Vector128 vector, Vector128 indices) + { + Unsafe.SkipInit(out Vector128 result); + + for (int index = 0; index < Vector128.Count; index++) + { + ulong selectedIndex = indices.GetElementUnsafe(index); + ulong selectedValue = 0; + + if (selectedIndex < (uint)Vector128.Count) + { + selectedValue = vector.GetElementUnsafe((int)selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + public static Vector128 Shuffle(Vector128 vector, Vector128 indices) + { + Unsafe.SkipInit(out Vector128 result); + + for (int index = 0; index < Vector128.Count; index++) + { + ulong selectedIndex = (ulong)indices.GetElementUnsafe(index); + double selectedValue = 0; + + if (selectedIndex < (uint)Vector128.Count) + { + selectedValue = vector.GetElementUnsafe((int)selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + /// Computes the square root of a vector on a per-element basis. /// The vector whose square root is to be computed. /// The type of the elements in the vector. diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs index be8282602365df..27796494f0adef 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs @@ -3551,6 +3551,250 @@ public static Vector256 ShiftRightLogical(Vector256 vector, int sh return result; } + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + public static Vector256 Shuffle(Vector256 vector, Vector256 indices) + { + Unsafe.SkipInit(out Vector256 result); + + for (int index = 0; index < Vector256.Count; index++) + { + byte selectedIndex = indices.GetElementUnsafe(index); + byte selectedValue = 0; + + if (selectedIndex < Vector256.Count) + { + selectedValue = vector.GetElementUnsafe(selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + [CLSCompliant(false)] + public static Vector256 Shuffle(Vector256 vector, Vector256 indices) + { + Unsafe.SkipInit(out Vector256 result); + + for (int index = 0; index < Vector256.Count; index++) + { + byte selectedIndex = (byte)indices.GetElementUnsafe(index); + sbyte selectedValue = 0; + + if (selectedIndex < Vector256.Count) + { + selectedValue = vector.GetElementUnsafe(selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + public static Vector256 Shuffle(Vector256 vector, Vector256 indices) + { + Unsafe.SkipInit(out Vector256 result); + + for (int index = 0; index < Vector256.Count; index++) + { + ushort selectedIndex = (ushort)indices.GetElementUnsafe(index); + short selectedValue = 0; + + if (selectedIndex < Vector256.Count) + { + selectedValue = vector.GetElementUnsafe(selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + [CLSCompliant(false)] + public static Vector256 Shuffle(Vector256 vector, Vector256 indices) + { + Unsafe.SkipInit(out Vector256 result); + + for (int index = 0; index < Vector256.Count; index++) + { + ushort selectedIndex = indices.GetElementUnsafe(index); + ushort selectedValue = 0; + + if (selectedIndex < Vector256.Count) + { + selectedValue = vector.GetElementUnsafe(selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + public static Vector256 Shuffle(Vector256 vector, Vector256 indices) + { + Unsafe.SkipInit(out Vector256 result); + + for (int index = 0; index < Vector256.Count; index++) + { + uint selectedIndex = (uint)indices.GetElementUnsafe(index); + int selectedValue = 0; + + if (selectedIndex < Vector256.Count) + { + selectedValue = vector.GetElementUnsafe((int)selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + [CLSCompliant(false)] + public static Vector256 Shuffle(Vector256 vector, Vector256 indices) + { + Unsafe.SkipInit(out Vector256 result); + + for (int index = 0; index < Vector256.Count; index++) + { + uint selectedIndex = indices.GetElementUnsafe(index); + uint selectedValue = 0; + + if (selectedIndex < Vector256.Count) + { + selectedValue = vector.GetElementUnsafe((int)selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + public static Vector256 Shuffle(Vector256 vector, Vector256 indices) + { + Unsafe.SkipInit(out Vector256 result); + + for (int index = 0; index < Vector256.Count; index++) + { + uint selectedIndex = (uint)indices.GetElementUnsafe(index); + float selectedValue = 0; + + if (selectedIndex < Vector256.Count) + { + selectedValue = vector.GetElementUnsafe((int)selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + public static Vector256 Shuffle(Vector256 vector, Vector256 indices) + { + Unsafe.SkipInit(out Vector256 result); + + for (int index = 0; index < Vector256.Count; index++) + { + ulong selectedIndex = (ulong)indices.GetElementUnsafe(index); + long selectedValue = 0; + + if (selectedIndex < (uint)Vector256.Count) + { + selectedValue = vector.GetElementUnsafe((int)selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + [CLSCompliant(false)] + public static Vector256 Shuffle(Vector256 vector, Vector256 indices) + { + Unsafe.SkipInit(out Vector256 result); + + for (int index = 0; index < Vector256.Count; index++) + { + ulong selectedIndex = indices.GetElementUnsafe(index); + ulong selectedValue = 0; + + if (selectedIndex < (uint)Vector256.Count) + { + selectedValue = vector.GetElementUnsafe((int)selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + public static Vector256 Shuffle(Vector256 vector, Vector256 indices) + { + Unsafe.SkipInit(out Vector256 result); + + for (int index = 0; index < Vector256.Count; index++) + { + ulong selectedIndex = (ulong)indices.GetElementUnsafe(index); + double selectedValue = 0; + + if (selectedIndex < (uint)Vector256.Count) + { + selectedValue = vector.GetElementUnsafe((int)selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + /// Computes the square root of a vector on a per-element basis. /// The vector whose square root is to be computed. /// The type of the elements in the vector. diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs index e3bd933159fdb7..b84bae7ab015f1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs @@ -2616,6 +2616,177 @@ public static Vector64 ShiftRightLogical(Vector64 vector, int shif return result; } + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + public static Vector64 Shuffle(Vector64 vector, Vector64 indices) + { + Unsafe.SkipInit(out Vector64 result); + + for (int index = 0; index < Vector64.Count; index++) + { + byte selectedIndex = indices.GetElementUnsafe(index); + byte selectedValue = 0; + + if (selectedIndex < Vector64.Count) + { + selectedValue = vector.GetElementUnsafe(selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + [CLSCompliant(false)] + public static Vector64 Shuffle(Vector64 vector, Vector64 indices) + { + Unsafe.SkipInit(out Vector64 result); + + for (int index = 0; index < Vector64.Count; index++) + { + byte selectedIndex = (byte)indices.GetElementUnsafe(index); + sbyte selectedValue = 0; + + if (selectedIndex < Vector64.Count) + { + selectedValue = vector.GetElementUnsafe(selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + public static Vector64 Shuffle(Vector64 vector, Vector64 indices) + { + Unsafe.SkipInit(out Vector64 result); + + for (int index = 0; index < Vector64.Count; index++) + { + ushort selectedIndex = (ushort)indices.GetElementUnsafe(index); + short selectedValue = 0; + + if (selectedIndex < Vector64.Count) + { + selectedValue = vector.GetElementUnsafe(selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + [CLSCompliant(false)] + public static Vector64 Shuffle(Vector64 vector, Vector64 indices) + { + Unsafe.SkipInit(out Vector64 result); + + for (int index = 0; index < Vector64.Count; index++) + { + ushort selectedIndex = indices.GetElementUnsafe(index); + ushort selectedValue = 0; + + if (selectedIndex < Vector64.Count) + { + selectedValue = vector.GetElementUnsafe(selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + public static Vector64 Shuffle(Vector64 vector, Vector64 indices) + { + Unsafe.SkipInit(out Vector64 result); + + for (int index = 0; index < Vector64.Count; index++) + { + uint selectedIndex = (uint)indices.GetElementUnsafe(index); + int selectedValue = 0; + + if (selectedIndex < Vector64.Count) + { + selectedValue = vector.GetElementUnsafe((int)selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + [CLSCompliant(false)] + public static Vector64 Shuffle(Vector64 vector, Vector64 indices) + { + Unsafe.SkipInit(out Vector64 result); + + for (int index = 0; index < Vector64.Count; index++) + { + uint selectedIndex = indices.GetElementUnsafe(index); + uint selectedValue = 0; + + if (selectedIndex < Vector64.Count) + { + selectedValue = vector.GetElementUnsafe((int)selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + [Intrinsic] + public static Vector64 Shuffle(Vector64 vector, Vector64 indices) + { + Unsafe.SkipInit(out Vector64 result); + + for (int index = 0; index < Vector64.Count; index++) + { + uint selectedIndex = (uint)indices.GetElementUnsafe(index); + float selectedValue = 0; + + if (selectedIndex < Vector64.Count) + { + selectedValue = vector.GetElementUnsafe((int)selectedIndex); + } + result.SetElementUnsafe(index, selectedValue); + } + + return result; + } + /// Computes the square root of a vector on a per-element basis. /// The vector whose square root is to be computed. /// The type of the elements in the vector. diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index ca118e1935384b..60b34a918dba91 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -228,6 +228,20 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector128 vector, public static System.Runtime.Intrinsics.Vector128 ShiftRightLogical(System.Runtime.Intrinsics.Vector128 vector, int shiftCount) { throw null; } [System.CLSCompliantAttribute(false)] public static System.Runtime.Intrinsics.Vector128 ShiftRightLogical(System.Runtime.Intrinsics.Vector128 vector, int shiftCount) { throw null; } + public static System.Runtime.Intrinsics.Vector128 Shuffle(System.Runtime.Intrinsics.Vector128 vector, System.Runtime.Intrinsics.Vector128 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector128 Shuffle(System.Runtime.Intrinsics.Vector128 vector, System.Runtime.Intrinsics.Vector128 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector128 Shuffle(System.Runtime.Intrinsics.Vector128 vector, System.Runtime.Intrinsics.Vector128 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector128 Shuffle(System.Runtime.Intrinsics.Vector128 vector, System.Runtime.Intrinsics.Vector128 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector128 Shuffle(System.Runtime.Intrinsics.Vector128 vector, System.Runtime.Intrinsics.Vector128 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector128 Shuffle(System.Runtime.Intrinsics.Vector128 vector, System.Runtime.Intrinsics.Vector128 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector128 Shuffle(System.Runtime.Intrinsics.Vector128 vector, System.Runtime.Intrinsics.Vector128 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector128 Shuffle(System.Runtime.Intrinsics.Vector128 vector, System.Runtime.Intrinsics.Vector128 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector128 Shuffle(System.Runtime.Intrinsics.Vector128 vector, System.Runtime.Intrinsics.Vector128 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector128 Shuffle(System.Runtime.Intrinsics.Vector128 vector, System.Runtime.Intrinsics.Vector128 indices) { throw null; } public static System.Runtime.Intrinsics.Vector128 Sqrt(System.Runtime.Intrinsics.Vector128 vector) where T : struct { throw null; } [System.CLSCompliantAttribute(false)] public static unsafe void Store(this System.Runtime.Intrinsics.Vector128 source, T* destination) where T : unmanaged { throw null; } @@ -502,6 +516,20 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector256 vector, public static System.Runtime.Intrinsics.Vector256 ShiftRightLogical(System.Runtime.Intrinsics.Vector256 vector, int shiftCount) { throw null; } [System.CLSCompliantAttribute(false)] public static System.Runtime.Intrinsics.Vector256 ShiftRightLogical(System.Runtime.Intrinsics.Vector256 vector, int shiftCount) { throw null; } + public static System.Runtime.Intrinsics.Vector256 Shuffle(System.Runtime.Intrinsics.Vector256 vector, System.Runtime.Intrinsics.Vector256 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector256 Shuffle(System.Runtime.Intrinsics.Vector256 vector, System.Runtime.Intrinsics.Vector256 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector256 Shuffle(System.Runtime.Intrinsics.Vector256 vector, System.Runtime.Intrinsics.Vector256 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector256 Shuffle(System.Runtime.Intrinsics.Vector256 vector, System.Runtime.Intrinsics.Vector256 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector256 Shuffle(System.Runtime.Intrinsics.Vector256 vector, System.Runtime.Intrinsics.Vector256 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector256 Shuffle(System.Runtime.Intrinsics.Vector256 vector, System.Runtime.Intrinsics.Vector256 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector256 Shuffle(System.Runtime.Intrinsics.Vector256 vector, System.Runtime.Intrinsics.Vector256 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector256 Shuffle(System.Runtime.Intrinsics.Vector256 vector, System.Runtime.Intrinsics.Vector256 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector256 Shuffle(System.Runtime.Intrinsics.Vector256 vector, System.Runtime.Intrinsics.Vector256 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector256 Shuffle(System.Runtime.Intrinsics.Vector256 vector, System.Runtime.Intrinsics.Vector256 indices) { throw null; } public static System.Runtime.Intrinsics.Vector256 Sqrt(System.Runtime.Intrinsics.Vector256 vector) where T : struct { throw null; } [System.CLSCompliantAttribute(false)] public static unsafe void Store(this System.Runtime.Intrinsics.Vector256 source, T* destination) where T : unmanaged { throw null; } @@ -748,6 +776,16 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector64 vector, public static System.Runtime.Intrinsics.Vector64 ShiftRightLogical(System.Runtime.Intrinsics.Vector64 vector, int shiftCount) { throw null; } [System.CLSCompliantAttribute(false)] public static System.Runtime.Intrinsics.Vector64 ShiftRightLogical(System.Runtime.Intrinsics.Vector64 vector, int shiftCount) { throw null; } + public static System.Runtime.Intrinsics.Vector64 Shuffle(System.Runtime.Intrinsics.Vector64 vector, System.Runtime.Intrinsics.Vector64 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector64 Shuffle(System.Runtime.Intrinsics.Vector64 vector, System.Runtime.Intrinsics.Vector64 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector64 Shuffle(System.Runtime.Intrinsics.Vector64 vector, System.Runtime.Intrinsics.Vector64 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector64 Shuffle(System.Runtime.Intrinsics.Vector64 vector, System.Runtime.Intrinsics.Vector64 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector64 Shuffle(System.Runtime.Intrinsics.Vector64 vector, System.Runtime.Intrinsics.Vector64 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector64 Shuffle(System.Runtime.Intrinsics.Vector64 vector, System.Runtime.Intrinsics.Vector64 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector64 Shuffle(System.Runtime.Intrinsics.Vector64 vector, System.Runtime.Intrinsics.Vector64 indices) { throw null; } public static System.Runtime.Intrinsics.Vector64 Sqrt(System.Runtime.Intrinsics.Vector64 vector) where T : struct { throw null; } [System.CLSCompliantAttribute(false)] public static unsafe void Store(this System.Runtime.Intrinsics.Vector64 source, T* destination) where T : unmanaged { throw null; } diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs index 26942c55037d4b..4779e2005b2092 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs @@ -2141,6 +2141,606 @@ public void Vector128UInt64ShiftRightLogicalTest() } } + [Fact] + public void Vector128ByteShuffleOneInputTest() + { + Vector128 vector = Vector128.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector128 result = Vector128.Shuffle(vector, Vector128.Create((byte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((byte)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128DoubleShuffleOneInputTest() + { + Vector128 vector = Vector128.Create((double)1, 2); + Vector128 result = Vector128.Shuffle(vector, Vector128.Create((long)1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((double)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int16ShuffleOneInputTest() + { + Vector128 vector = Vector128.Create((short)1, 2, 3, 4, 5, 6, 7, 8); + Vector128 result = Vector128.Shuffle(vector, Vector128.Create((short)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((short)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int32ShuffleOneInputTest() + { + Vector128 vector = Vector128.Create((int)1, 2, 3, 4); + Vector128 result = Vector128.Shuffle(vector, Vector128.Create((int)3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((int)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int64ShuffleOneInputTest() + { + Vector128 vector = Vector128.Create((long)1, 2); + Vector128 result = Vector128.Shuffle(vector, Vector128.Create((long)1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((long)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128SByteShuffleOneInputTest() + { + Vector128 vector = Vector128.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector128 result = Vector128.Shuffle(vector, Vector128.Create((sbyte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((sbyte)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128SingleShuffleOneInputTest() + { + Vector128 vector = Vector128.Create((float)1, 2, 3, 4); + Vector128 result = Vector128.Shuffle(vector, Vector128.Create((int)3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((float)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt16ShuffleOneInputTest() + { + Vector128 vector = Vector128.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8); + Vector128 result = Vector128.Shuffle(vector, Vector128.Create((ushort)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ushort)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt32ShuffleOneInputTest() + { + Vector128 vector = Vector128.Create((uint)1, 2, 3, 4); + Vector128 result = Vector128.Shuffle(vector, Vector128.Create((uint)3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((uint)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt64ShuffleOneInputTest() + { + Vector128 vector = Vector128.Create((ulong)1, 2); + Vector128 result = Vector128.Shuffle(vector, Vector128.Create((ulong)1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ulong)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128ByteShuffleOneInputWithDirectVectorTest() + { + Vector128 result = Vector128.Shuffle(Vector128.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), Vector128.Create((byte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((byte)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128DoubleShuffleOneInputWithDirectVectorTest() + { + Vector128 result = Vector128.Shuffle(Vector128.Create((double)1, 2), Vector128.Create((long)1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((double)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int16ShuffleOneInputWithDirectVectorTest() + { + Vector128 result = Vector128.Shuffle(Vector128.Create((short)1, 2, 3, 4, 5, 6, 7, 8), Vector128.Create((short)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((short)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int32ShuffleOneInputWithDirectVectorTest() + { + Vector128 result = Vector128.Shuffle(Vector128.Create((int)1, 2, 3, 4), Vector128.Create((int)3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((int)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int64ShuffleOneInputWithDirectVectorTest() + { + Vector128 result = Vector128.Shuffle(Vector128.Create((long)1, 2), Vector128.Create((long)1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((long)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128SByteShuffleOneInputWithDirectVectorTest() + { + Vector128 result = Vector128.Shuffle(Vector128.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), Vector128.Create((sbyte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((sbyte)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128SingleShuffleOneInputWithDirectVectorTest() + { + Vector128 result = Vector128.Shuffle(Vector128.Create((float)1, 2, 3, 4), Vector128.Create((int)3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((float)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt16ShuffleOneInputWithDirectVectorTest() + { + Vector128 result = Vector128.Shuffle(Vector128.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8), Vector128.Create((ushort)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ushort)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt32ShuffleOneInputWithDirectVectorTest() + { + Vector128 result = Vector128.Shuffle(Vector128.Create((uint)1, 2, 3, 4), Vector128.Create((uint)3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((uint)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt64ShuffleOneInputWithDirectVectorTest() + { + Vector128 result = Vector128.Shuffle(Vector128.Create((ulong)1, 2), Vector128.Create((ulong)1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ulong)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128ByteShuffleOneInputWithLocalIndicesTest() + { + Vector128 vector = Vector128.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector128 indices = Vector128.Create((byte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + Vector128 result = Vector128.Shuffle(vector, indices); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((byte)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128DoubleShuffleOneInputWithLocalIndicesTest() + { + Vector128 vector = Vector128.Create((double)1, 2); + Vector128 indices = Vector128.Create((long)1, 0); + Vector128 result = Vector128.Shuffle(vector, indices); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((double)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int16ShuffleOneInputWithLocalIndicesTest() + { + Vector128 vector = Vector128.Create((short)1, 2, 3, 4, 5, 6, 7, 8); + Vector128 indices = Vector128.Create((short)7, 6, 5, 4, 3, 2, 1, 0); + Vector128 result = Vector128.Shuffle(vector, indices); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((short)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int32ShuffleOneInputWithLocalIndicesTest() + { + Vector128 vector = Vector128.Create((int)1, 2, 3, 4); + Vector128 indices = Vector128.Create((int)3, 2, 1, 0); + Vector128 result = Vector128.Shuffle(vector, indices); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((int)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int64ShuffleOneInputWithLocalIndicesTest() + { + Vector128 vector = Vector128.Create((long)1, 2); + Vector128 indices = Vector128.Create((long)1, 0); + Vector128 result = Vector128.Shuffle(vector, indices); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((long)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128SByteShuffleOneInputWithLocalIndicesTest() + { + Vector128 vector = Vector128.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector128 indices = Vector128.Create((sbyte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + Vector128 result = Vector128.Shuffle(vector, indices); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((sbyte)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128SingleShuffleOneInputWithLocalIndicesTest() + { + Vector128 vector = Vector128.Create((float)1, 2, 3, 4); + Vector128 indices = Vector128.Create((int)3, 2, 1, 0); + Vector128 result = Vector128.Shuffle(vector, indices); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((float)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt16ShuffleOneInputWithLocalIndicesTest() + { + Vector128 vector = Vector128.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8); + Vector128 indices = Vector128.Create((ushort)7, 6, 5, 4, 3, 2, 1, 0); + Vector128 result = Vector128.Shuffle(vector, indices); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ushort)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt32ShuffleOneInputWithLocalIndicesTest() + { + Vector128 vector = Vector128.Create((uint)1, 2, 3, 4); + Vector128 indices = Vector128.Create((uint)3, 2, 1, 0); + Vector128 result = Vector128.Shuffle(vector, indices); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((uint)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt64ShuffleOneInputWithLocalIndicesTest() + { + Vector128 vector = Vector128.Create((ulong)1, 2); + Vector128 indices = Vector128.Create((ulong)1, 0); + Vector128 result = Vector128.Shuffle(vector, indices); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ulong)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128ByteShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector128 vector = Vector128.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector128 result = Vector128.Shuffle(vector, Vector128.AllBitsSet); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((byte)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector128DoubleShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector128 vector = Vector128.Create((double)1, 2); + Vector128 result = Vector128.Shuffle(vector, Vector128.AllBitsSet); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((double)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int16ShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector128 vector = Vector128.Create((short)1, 2, 3, 4, 5, 6, 7, 8); + Vector128 result = Vector128.Shuffle(vector, Vector128.AllBitsSet); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((short)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int32ShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector128 vector = Vector128.Create((int)1, 2, 3, 4); + Vector128 result = Vector128.Shuffle(vector, Vector128.AllBitsSet); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((int)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int64ShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector128 vector = Vector128.Create((long)1, 2); + Vector128 result = Vector128.Shuffle(vector, Vector128.AllBitsSet); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((long)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector128SByteShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector128 vector = Vector128.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector128 result = Vector128.Shuffle(vector, Vector128.AllBitsSet); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((sbyte)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector128SingleShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector128 vector = Vector128.Create((float)1, 2, 3, 4); + Vector128 result = Vector128.Shuffle(vector, Vector128.AllBitsSet); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((float)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt16ShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector128 vector = Vector128.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8); + Vector128 result = Vector128.Shuffle(vector, Vector128.AllBitsSet); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ushort)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt32ShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector128 vector = Vector128.Create((uint)1, 2, 3, 4); + Vector128 result = Vector128.Shuffle(vector, Vector128.AllBitsSet); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((uint)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt64ShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector128 vector = Vector128.Create((ulong)1, 2); + Vector128 result = Vector128.Shuffle(vector, Vector128.AllBitsSet); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ulong)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector128ByteShuffleOneInputWithZeroIndicesTest() + { + Vector128 vector = Vector128.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector128 result = Vector128.Shuffle(vector, Vector128.Zero); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((byte)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector128DoubleShuffleOneInputWithZeroIndicesTest() + { + Vector128 vector = Vector128.Create((double)1, 2); + Vector128 result = Vector128.Shuffle(vector, Vector128.Zero); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((double)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int16ShuffleOneInputWithZeroIndicesTest() + { + Vector128 vector = Vector128.Create((short)1, 2, 3, 4, 5, 6, 7, 8); + Vector128 result = Vector128.Shuffle(vector, Vector128.Zero); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((short)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int32ShuffleOneInputWithZeroIndicesTest() + { + Vector128 vector = Vector128.Create((int)1, 2, 3, 4); + Vector128 result = Vector128.Shuffle(vector, Vector128.Zero); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((int)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int64ShuffleOneInputWithZeroIndicesTest() + { + Vector128 vector = Vector128.Create((long)1, 2); + Vector128 result = Vector128.Shuffle(vector, Vector128.Zero); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((long)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector128SByteShuffleOneInputWithZeroIndicesTest() + { + Vector128 vector = Vector128.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector128 result = Vector128.Shuffle(vector, Vector128.Zero); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((sbyte)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector128SingleShuffleOneInputWithZeroIndicesTest() + { + Vector128 vector = Vector128.Create((float)1, 2, 3, 4); + Vector128 result = Vector128.Shuffle(vector, Vector128.Zero); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((float)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt16ShuffleOneInputWithZeroIndicesTest() + { + Vector128 vector = Vector128.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8); + Vector128 result = Vector128.Shuffle(vector, Vector128.Zero); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ushort)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt32ShuffleOneInputWithZeroIndicesTest() + { + Vector128 vector = Vector128.Create((uint)1, 2, 3, 4); + Vector128 result = Vector128.Shuffle(vector, Vector128.Zero); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((uint)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt64ShuffleOneInputWithZeroIndicesTest() + { + Vector128 vector = Vector128.Create((ulong)1, 2); + Vector128 result = Vector128.Shuffle(vector, Vector128.Zero); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ulong)1, result.GetElement(index)); + } + } + [Fact] public unsafe void Vector128ByteStoreTest() { diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs index 0c4aa0a149565e..5662ef9b56c7fb 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs @@ -2609,6 +2609,766 @@ public void Vector256UInt64ShiftRightLogicalTest() } } + [Fact] + public void Vector256ByteShuffleOneInputTest() + { + Vector256 vector = Vector256.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + Vector256 result = Vector256.Shuffle(vector, Vector256.Create((byte)31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((byte)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256DoubleShuffleOneInputTest() + { + Vector256 vector = Vector256.Create((double)1, 2, 3, 4); + Vector256 result = Vector256.Shuffle(vector, Vector256.Create((long)3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((double)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int16ShuffleOneInputTest() + { + Vector256 vector = Vector256.Create((short)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector256 result = Vector256.Shuffle(vector, Vector256.Create((short)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((short)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int32ShuffleOneInputTest() + { + Vector256 vector = Vector256.Create((int)1, 2, 3, 4, 5, 6, 7, 8); + Vector256 result = Vector256.Shuffle(vector, Vector256.Create((int)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((int)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int64ShuffleOneInputTest() + { + Vector256 vector = Vector256.Create((long)1, 2, 3, 4); + Vector256 result = Vector256.Shuffle(vector, Vector256.Create((long)3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((long)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256SByteShuffleOneInputTest() + { + Vector256 vector = Vector256.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + Vector256 result = Vector256.Shuffle(vector, Vector256.Create((sbyte)31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((sbyte)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256SingleShuffleOneInputTest() + { + Vector256 vector = Vector256.Create((float)1, 2, 3, 4, 5, 6, 7, 8); + Vector256 result = Vector256.Shuffle(vector, Vector256.Create((int)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((float)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt16ShuffleOneInputTest() + { + Vector256 vector = Vector256.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector256 result = Vector256.Shuffle(vector, Vector256.Create((ushort)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((ushort)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt32ShuffleOneInputTest() + { + Vector256 vector = Vector256.Create((uint)1, 2, 3, 4, 5, 6, 7, 8); + Vector256 result = Vector256.Shuffle(vector, Vector256.Create((uint)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((uint)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt64ShuffleOneInputTest() + { + Vector256 vector = Vector256.Create((ulong)1, 2, 3, 4); + Vector256 result = Vector256.Shuffle(vector, Vector256.Create((ulong)3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((ulong)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256ByteShuffleOneInputWithDirectVectorTest() + { + Vector256 result = Vector256.Shuffle(Vector256.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), Vector256.Create((byte)31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((byte)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256DoubleShuffleOneInputWithDirectVectorTest() + { + Vector256 result = Vector256.Shuffle(Vector256.Create((double)1, 2, 3, 4), Vector256.Create((long)3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((double)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int16ShuffleOneInputWithDirectVectorTest() + { + Vector256 result = Vector256.Shuffle(Vector256.Create((short)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), Vector256.Create((short)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((short)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int32ShuffleOneInputWithDirectVectorTest() + { + Vector256 result = Vector256.Shuffle(Vector256.Create((int)1, 2, 3, 4, 5, 6, 7, 8), Vector256.Create((int)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((int)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int64ShuffleOneInputWithDirectVectorTest() + { + Vector256 result = Vector256.Shuffle(Vector256.Create((long)1, 2, 3, 4), Vector256.Create((long)3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((long)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256SByteShuffleOneInputWithDirectVectorTest() + { + Vector256 result = Vector256.Shuffle(Vector256.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), Vector256.Create((sbyte)31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((sbyte)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256SingleShuffleOneInputWithDirectVectorTest() + { + Vector256 result = Vector256.Shuffle(Vector256.Create((float)1, 2, 3, 4, 5, 6, 7, 8), Vector256.Create((int)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((float)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt16ShuffleOneInputWithDirectVectorTest() + { + Vector256 result = Vector256.Shuffle(Vector256.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), Vector256.Create((ushort)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((ushort)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt32ShuffleOneInputWithDirectVectorTest() + { + Vector256 result = Vector256.Shuffle(Vector256.Create((uint)1, 2, 3, 4, 5, 6, 7, 8), Vector256.Create((uint)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((uint)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt64ShuffleOneInputWithDirectVectorTest() + { + Vector256 result = Vector256.Shuffle(Vector256.Create((ulong)1, 2, 3, 4), Vector256.Create((ulong)3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((ulong)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256ByteShuffleOneInputWithDirectVectorAndNoCrossLaneTest() + { + Vector256 result = Vector256.Shuffle(Vector256.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), Vector256.Create((byte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((byte)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((byte)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector256DoubleShuffleOneInputWithDirectVectorAndNoCrossLaneTest() + { + Vector256 result = Vector256.Shuffle(Vector256.Create((double)1, 2, 3, 4), Vector256.Create((long)1, 0, 3, 2)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((double)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((double)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int16ShuffleOneInputWithDirectVectorAndNoCrossLaneTest() + { + Vector256 result = Vector256.Shuffle(Vector256.Create((short)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), Vector256.Create((short)7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((short)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((short)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int32ShuffleOneInputWithDirectVectorAndNoCrossLaneTest() + { + Vector256 result = Vector256.Shuffle(Vector256.Create((int)1, 2, 3, 4, 5, 6, 7, 8), Vector256.Create((int)3, 2, 1, 0, 7, 6, 5, 4)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((int)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((int)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int64ShuffleOneInputWithDirectVectorAndNoCrossLaneTest() + { + Vector256 result = Vector256.Shuffle(Vector256.Create((long)1, 2, 3, 4), Vector256.Create((long)1, 0, 3, 2)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((long)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((long)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector256SByteShuffleOneInputWithDirectVectorAndNoCrossLaneTest() + { + Vector256 result = Vector256.Shuffle(Vector256.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), Vector256.Create((sbyte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((sbyte)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((sbyte)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector256SingleShuffleOneInputWithDirectVectorAndNoCrossLaneTest() + { + Vector256 result = Vector256.Shuffle(Vector256.Create((float)1, 2, 3, 4, 5, 6, 7, 8), Vector256.Create((int)3, 2, 1, 0, 7, 6, 5, 4)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((float)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((float)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt16ShuffleOneInputWithDirectVectorAndNoCrossLaneTest() + { + Vector256 result = Vector256.Shuffle(Vector256.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), Vector256.Create((ushort)7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ushort)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((ushort)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt32ShuffleOneInputWithDirectVectorAndNoCrossLaneTest() + { + Vector256 result = Vector256.Shuffle(Vector256.Create((uint)1, 2, 3, 4, 5, 6, 7, 8), Vector256.Create((uint)3, 2, 1, 0, 7, 6, 5, 4)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((uint)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((uint)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt64ShuffleOneInputWithDirectVectorAndNoCrossLaneTest() + { + Vector256 result = Vector256.Shuffle(Vector256.Create((ulong)1, 2, 3, 4), Vector256.Create((ulong)1, 0, 3, 2)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ulong)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((ulong)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector256ByteShuffleOneInputWithLocalIndicesTest() + { + Vector256 vector = Vector256.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + Vector256 indices = Vector256.Create((byte)31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + Vector256 result = Vector256.Shuffle(vector, indices); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((byte)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256DoubleShuffleOneInputWithLocalIndicesTest() + { + Vector256 vector = Vector256.Create((double)1, 2, 3, 4); + Vector256 indices = Vector256.Create((long)3, 2, 1, 0); + Vector256 result = Vector256.Shuffle(vector, indices); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((double)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int16ShuffleOneInputWithLocalIndicesTest() + { + Vector256 vector = Vector256.Create((short)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector256 indices = Vector256.Create((short)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + Vector256 result = Vector256.Shuffle(vector, indices); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((short)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int32ShuffleOneInputWithLocalIndicesTest() + { + Vector256 vector = Vector256.Create((int)1, 2, 3, 4, 5, 6, 7, 8); + Vector256 indices = Vector256.Create((int)7, 6, 5, 4, 3, 2, 1, 0); + Vector256 result = Vector256.Shuffle(vector, indices); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((int)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int64ShuffleOneInputWithLocalIndicesTest() + { + Vector256 vector = Vector256.Create((long)1, 2, 3, 4); + Vector256 indices = Vector256.Create((long)3, 2, 1, 0); + Vector256 result = Vector256.Shuffle(vector, indices); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((long)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256SByteShuffleOneInputWithLocalIndicesTest() + { + Vector256 vector = Vector256.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + Vector256 indices = Vector256.Create((sbyte)31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + Vector256 result = Vector256.Shuffle(vector, indices); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((sbyte)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256SingleShuffleOneInputWithLocalIndicesTest() + { + Vector256 vector = Vector256.Create((float)1, 2, 3, 4, 5, 6, 7, 8); + Vector256 indices = Vector256.Create((int)7, 6, 5, 4, 3, 2, 1, 0); + Vector256 result = Vector256.Shuffle(vector, indices); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((float)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt16ShuffleOneInputWithLocalIndicesTest() + { + Vector256 vector = Vector256.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector256 indices = Vector256.Create((ushort)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + Vector256 result = Vector256.Shuffle(vector, indices); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((ushort)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt32ShuffleOneInputWithLocalIndicesTest() + { + Vector256 vector = Vector256.Create((uint)1, 2, 3, 4, 5, 6, 7, 8); + Vector256 indices = Vector256.Create((uint)7, 6, 5, 4, 3, 2, 1, 0); + Vector256 result = Vector256.Shuffle(vector, indices); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((uint)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt64ShuffleOneInputWithLocalIndicesTest() + { + Vector256 vector = Vector256.Create((ulong)1, 2, 3, 4); + Vector256 indices = Vector256.Create((ulong)3, 2, 1, 0); + Vector256 result = Vector256.Shuffle(vector, indices); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((ulong)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256ByteShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector256 vector = Vector256.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + Vector256 result = Vector256.Shuffle(vector, Vector256.AllBitsSet); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((byte)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector256DoubleShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector256 vector = Vector256.Create((double)1, 2, 3, 4); + Vector256 result = Vector256.Shuffle(vector, Vector256.AllBitsSet); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((double)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int16ShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector256 vector = Vector256.Create((short)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector256 result = Vector256.Shuffle(vector, Vector256.AllBitsSet); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((short)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int32ShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector256 vector = Vector256.Create((int)1, 2, 3, 4, 5, 6, 7, 8); + Vector256 result = Vector256.Shuffle(vector, Vector256.AllBitsSet); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((int)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int64ShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector256 vector = Vector256.Create((long)1, 2, 3, 4); + Vector256 result = Vector256.Shuffle(vector, Vector256.AllBitsSet); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((long)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector256SByteShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector256 vector = Vector256.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + Vector256 result = Vector256.Shuffle(vector, Vector256.AllBitsSet); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((sbyte)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector256SingleShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector256 vector = Vector256.Create((float)1, 2, 3, 4, 5, 6, 7, 8); + Vector256 result = Vector256.Shuffle(vector, Vector256.AllBitsSet); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((float)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt16ShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector256 vector = Vector256.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector256 result = Vector256.Shuffle(vector, Vector256.AllBitsSet); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((ushort)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt32ShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector256 vector = Vector256.Create((uint)1, 2, 3, 4, 5, 6, 7, 8); + Vector256 result = Vector256.Shuffle(vector, Vector256.AllBitsSet); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((uint)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt64ShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector256 vector = Vector256.Create((ulong)1, 2, 3, 4); + Vector256 result = Vector256.Shuffle(vector, Vector256.AllBitsSet); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((ulong)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector256ByteShuffleOneInputWithZeroIndicesTest() + { + Vector256 vector = Vector256.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + Vector256 result = Vector256.Shuffle(vector, Vector256.Zero); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((byte)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector256DoubleShuffleOneInputWithZeroIndicesTest() + { + Vector256 vector = Vector256.Create((double)1, 2, 3, 4); + Vector256 result = Vector256.Shuffle(vector, Vector256.Zero); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((double)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int16ShuffleOneInputWithZeroIndicesTest() + { + Vector256 vector = Vector256.Create((short)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector256 result = Vector256.Shuffle(vector, Vector256.Zero); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((short)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int32ShuffleOneInputWithZeroIndicesTest() + { + Vector256 vector = Vector256.Create((int)1, 2, 3, 4, 5, 6, 7, 8); + Vector256 result = Vector256.Shuffle(vector, Vector256.Zero); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((int)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int64ShuffleOneInputWithZeroIndicesTest() + { + Vector256 vector = Vector256.Create((long)1, 2, 3, 4); + Vector256 result = Vector256.Shuffle(vector, Vector256.Zero); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((long)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector256SByteShuffleOneInputWithZeroIndicesTest() + { + Vector256 vector = Vector256.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + Vector256 result = Vector256.Shuffle(vector, Vector256.Zero); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((sbyte)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector256SingleShuffleOneInputWithZeroIndicesTest() + { + Vector256 vector = Vector256.Create((float)1, 2, 3, 4, 5, 6, 7, 8); + Vector256 result = Vector256.Shuffle(vector, Vector256.Zero); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((float)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt16ShuffleOneInputWithZeroIndicesTest() + { + Vector256 vector = Vector256.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector256 result = Vector256.Shuffle(vector, Vector256.Zero); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((ushort)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt32ShuffleOneInputWithZeroIndicesTest() + { + Vector256 vector = Vector256.Create((uint)1, 2, 3, 4, 5, 6, 7, 8); + Vector256 result = Vector256.Shuffle(vector, Vector256.Zero); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((uint)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt64ShuffleOneInputWithZeroIndicesTest() + { + Vector256 vector = Vector256.Create((ulong)1, 2, 3, 4); + Vector256 result = Vector256.Shuffle(vector, Vector256.Zero); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((ulong)1, result.GetElement(index)); + } + } + [Fact] public unsafe void Vector256ByteStoreTest() { @@ -4701,7 +5461,7 @@ public void Vector256Int32IndexerTest(params int[] values) public void Vector256Int64IndexerTest(params long[] values) { var vector = Vector256.Create(values); - + Assert.Equal(vector[0], values[0]); Assert.Equal(vector[1], values[1]); Assert.Equal(vector[2], values[2]); diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs index b8a3f95bd4f7d7..d56323bd499873 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs @@ -211,7 +211,7 @@ public unsafe void Vector64UInt64ExtractMostSignificantBitsTest() { Vector64 vector = Vector64.Create( 0x0000000000000001UL - ); + ); uint result = Vector64.ExtractMostSignificantBits(vector); Assert.Equal(0b0u, result); @@ -489,7 +489,7 @@ public unsafe void Vector64ByteLoadAlignedTest() finally { NativeMemory.AlignedFree(value); - } + } } [Fact] @@ -1942,6 +1942,426 @@ public void Vector64UInt64ShiftRightLogicalTest() } } + [Fact] + public void Vector64ByteShuffleOneInputTest() + { + Vector64 vector = Vector64.Create((byte)1, 2, 3, 4, 5, 6, 7, 8); + Vector64 result = Vector64.Shuffle(vector, Vector64.Create((byte)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((byte)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64Int16ShuffleOneInputTest() + { + Vector64 vector = Vector64.Create((short)1, 2, 3, 4); + Vector64 result = Vector64.Shuffle(vector, Vector64.Create((short)3, 2, 1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((short)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64Int32ShuffleOneInputTest() + { + Vector64 vector = Vector64.Create((int)1, 2); + Vector64 result = Vector64.Shuffle(vector, Vector64.Create((int)1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((int)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64SByteShuffleOneInputTest() + { + Vector64 vector = Vector64.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8); + Vector64 result = Vector64.Shuffle(vector, Vector64.Create((sbyte)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((sbyte)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64SingleShuffleOneInputTest() + { + Vector64 vector = Vector64.Create((float)1, 2); + Vector64 result = Vector64.Shuffle(vector, Vector64.Create((int)1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((float)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64UInt16ShuffleOneInputTest() + { + Vector64 vector = Vector64.Create((ushort)1, 2, 3, 4); + Vector64 result = Vector64.Shuffle(vector, Vector64.Create((ushort)3, 2, 1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((ushort)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64UInt32ShuffleOneInputTest() + { + Vector64 vector = Vector64.Create((uint)1, 2); + Vector64 result = Vector64.Shuffle(vector, Vector64.Create((uint)1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((uint)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64ByteShuffleOneInputWithDirectVectorTest() + { + Vector64 result = Vector64.Shuffle(Vector64.Create((byte)1, 2, 3, 4, 5, 6, 7, 8), Vector64.Create((byte)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((byte)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64Int16ShuffleOneInputWithDirectVectorTest() + { + Vector64 result = Vector64.Shuffle(Vector64.Create((short)1, 2, 3, 4), Vector64.Create((short)3, 2, 1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((short)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64Int32ShuffleOneInputWithDirectVectorTest() + { + Vector64 result = Vector64.Shuffle(Vector64.Create((int)1, 2), Vector64.Create((int)1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((int)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64SByteShuffleOneInputWithDirectVectorTest() + { + Vector64 result = Vector64.Shuffle(Vector64.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8), Vector64.Create((sbyte)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((sbyte)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64SingleShuffleOneInputWithDirectVectorTest() + { + Vector64 result = Vector64.Shuffle(Vector64.Create((float)1, 2), Vector64.Create((int)1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((float)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64UInt16ShuffleOneInputWithDirectVectorTest() + { + Vector64 result = Vector64.Shuffle(Vector64.Create((ushort)1, 2, 3, 4), Vector64.Create((ushort)3, 2, 1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((ushort)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64UInt32ShuffleOneInputWithDirectVectorTest() + { + Vector64 result = Vector64.Shuffle(Vector64.Create((uint)1, 2), Vector64.Create((uint)1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((uint)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64ByteShuffleOneInputWithLocalIndicesTest() + { + Vector64 vector = Vector64.Create((byte)1, 2, 3, 4, 5, 6, 7, 8); + Vector64 indices = Vector64.Create((byte)7, 6, 5, 4, 3, 2, 1, 0); + Vector64 result = Vector64.Shuffle(vector, indices); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((byte)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64Int16ShuffleOneInputWithLocalIndicesTest() + { + Vector64 vector = Vector64.Create((short)1, 2, 3, 4); + Vector64 indices = Vector64.Create((short)3, 2, 1, 0); + Vector64 result = Vector64.Shuffle(vector, indices); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((short)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64Int32ShuffleOneInputWithLocalIndicesTest() + { + Vector64 vector = Vector64.Create((int)1, 2); + Vector64 indices = Vector64.Create((int)1, 0); + Vector64 result = Vector64.Shuffle(vector, indices); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((int)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64SByteShuffleOneInputWithLocalIndicesTest() + { + Vector64 vector = Vector64.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8); + Vector64 indices = Vector64.Create((sbyte)7, 6, 5, 4, 3, 2, 1, 0); + Vector64 result = Vector64.Shuffle(vector, indices); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((sbyte)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64SingleShuffleOneInputWithLocalIndicesTest() + { + Vector64 vector = Vector64.Create((float)1, 2); + Vector64 indices = Vector64.Create((int)1, 0); + Vector64 result = Vector64.Shuffle(vector, indices); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((float)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64UInt16ShuffleOneInputWithLocalIndicesTest() + { + Vector64 vector = Vector64.Create((ushort)1, 2, 3, 4); + Vector64 indices = Vector64.Create((ushort)3, 2, 1, 0); + Vector64 result = Vector64.Shuffle(vector, indices); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((ushort)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64UInt32ShuffleOneInputWithLocalIndicesTest() + { + Vector64 vector = Vector64.Create((uint)1, 2); + Vector64 indices = Vector64.Create((uint)1, 0); + Vector64 result = Vector64.Shuffle(vector, indices); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((uint)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64ByteShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector64 vector = Vector64.Create((byte)1, 2, 3, 4, 5, 6, 7, 8); + Vector64 result = Vector64.Shuffle(vector, Vector64.AllBitsSet); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((byte)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector64Int16ShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector64 vector = Vector64.Create((short)1, 2, 3, 4); + Vector64 result = Vector64.Shuffle(vector, Vector64.AllBitsSet); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal(0, result.GetElement(index)); + } + } + + [Fact] + public void Vector64Int32ShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector64 vector = Vector64.Create((int)1, 2); + Vector64 result = Vector64.Shuffle(vector, Vector64.AllBitsSet); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((int)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector64SByteShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector64 vector = Vector64.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8); + Vector64 result = Vector64.Shuffle(vector, Vector64.AllBitsSet); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((sbyte)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector64SingleShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector64 vector = Vector64.Create((float)1, 2); + Vector64 result = Vector64.Shuffle(vector, Vector64.AllBitsSet); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((float)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector64UInt16ShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector64 vector = Vector64.Create((ushort)1, 2, 3, 4); + Vector64 result = Vector64.Shuffle(vector, Vector64.AllBitsSet); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((ushort)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector64UInt32ShuffleOneInputWithAllBitsSetIndicesTest() + { + Vector64 vector = Vector64.Create((uint)1, 2); + Vector64 result = Vector64.Shuffle(vector, Vector64.AllBitsSet); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((uint)0, result.GetElement(index)); + } + } + + [Fact] + public void Vector64ByteShuffleOneInputWithZeroIndicesTest() + { + Vector64 vector = Vector64.Create((byte)1, 2, 3, 4, 5, 6, 7, 8); + Vector64 result = Vector64.Shuffle(vector, Vector64.Zero); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((byte)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector64Int16ShuffleOneInputWithZeroIndicesTest() + { + Vector64 vector = Vector64.Create((short)1, 2, 3, 4); + Vector64 result = Vector64.Shuffle(vector, Vector64.Zero); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal(1, result.GetElement(index)); + } + } + + [Fact] + public void Vector64Int32ShuffleOneInputWithZeroIndicesTest() + { + Vector64 vector = Vector64.Create((int)1, 2); + Vector64 result = Vector64.Shuffle(vector, Vector64.Zero); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((int)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector64SByteShuffleOneInputWithZeroIndicesTest() + { + Vector64 vector = Vector64.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8); + Vector64 result = Vector64.Shuffle(vector, Vector64.Zero); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((sbyte)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector64SingleShuffleOneInputWithZeroIndicesTest() + { + Vector64 vector = Vector64.Create((float)1, 2); + Vector64 result = Vector64.Shuffle(vector, Vector64.Zero); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((float)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector64UInt16ShuffleOneInputWithZeroIndicesTest() + { + Vector64 vector = Vector64.Create((ushort)1, 2, 3, 4); + Vector64 result = Vector64.Shuffle(vector, Vector64.Zero); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((ushort)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector64UInt32ShuffleOneInputWithZeroIndicesTest() + { + Vector64 vector = Vector64.Create((uint)1, 2); + Vector64 result = Vector64.Shuffle(vector, Vector64.Zero); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((uint)1, result.GetElement(index)); + } + } + [Fact] public unsafe void Vector64ByteStoreTest() {