diff --git a/tests/common.h b/tests/common.h index edd6feb7..8993d3ec 100644 --- a/tests/common.h +++ b/tests/common.h @@ -210,16 +210,279 @@ result_t validateFloatError(__m128d a, double d0, double d1, double err); #define VALIDATE_UINT16_M64(A, B) validateUInt16(A, B[0], B[1], B[2], B[3]) #define VALIDATE_INT32_M64(A, B) validateInt32(A, B[0], B[1]) #define VALIDATE_UINT32_M64(A, B) validateUInt32(A, B[0], B[1]) - +#define CHECK_RESULT(EXP) \ + if (EXP != TEST_SUCCESS) { \ + return TEST_FAIL; \ + } +#define IMM_4_ITER \ + TEST_IMPL(0) \ + TEST_IMPL(1) \ + TEST_IMPL(2) \ + TEST_IMPL(3) #define IMM_8_ITER \ - TEST(0) \ - TEST(1) \ - TEST(2) \ - TEST(3) \ - TEST(4) \ - TEST(5) \ - TEST(6) \ - TEST(7) + IMM_4_ITER \ + TEST_IMPL(4) \ + TEST_IMPL(5) \ + TEST_IMPL(6) \ + TEST_IMPL(7) +#define IMM_16_ITER \ + IMM_8_ITER \ + TEST_IMPL(8) \ + TEST_IMPL(9) \ + TEST_IMPL(10) \ + TEST_IMPL(11) \ + TEST_IMPL(12) \ + TEST_IMPL(13) \ + TEST_IMPL(14) \ + TEST_IMPL(15) +#define IMM_32_ITER \ + IMM_16_ITER \ + TEST_IMPL(16) \ + TEST_IMPL(17) \ + TEST_IMPL(18) \ + TEST_IMPL(19) \ + TEST_IMPL(20) \ + TEST_IMPL(21) \ + TEST_IMPL(22) \ + TEST_IMPL(23) \ + TEST_IMPL(24) \ + TEST_IMPL(25) \ + TEST_IMPL(26) \ + TEST_IMPL(27) \ + TEST_IMPL(28) \ + TEST_IMPL(29) \ + TEST_IMPL(30) \ + TEST_IMPL(31) +#define IMM_64_ITER \ + IMM_32_ITER \ + TEST_IMPL(32) \ + TEST_IMPL(33) \ + TEST_IMPL(34) \ + TEST_IMPL(35) \ + TEST_IMPL(36) \ + TEST_IMPL(37) \ + TEST_IMPL(38) \ + TEST_IMPL(39) \ + TEST_IMPL(40) \ + TEST_IMPL(41) \ + TEST_IMPL(42) \ + TEST_IMPL(43) \ + TEST_IMPL(44) \ + TEST_IMPL(45) \ + TEST_IMPL(46) \ + TEST_IMPL(47) \ + TEST_IMPL(48) \ + TEST_IMPL(49) \ + TEST_IMPL(50) \ + TEST_IMPL(51) \ + TEST_IMPL(52) \ + TEST_IMPL(53) \ + TEST_IMPL(54) \ + TEST_IMPL(55) \ + TEST_IMPL(56) \ + TEST_IMPL(57) \ + TEST_IMPL(58) \ + TEST_IMPL(59) \ + TEST_IMPL(60) \ + TEST_IMPL(61) \ + TEST_IMPL(62) \ + TEST_IMPL(63) +#define IMM_128_ITER \ + IMM_64_ITER \ + TEST_IMPL(64) \ + TEST_IMPL(65) \ + TEST_IMPL(66) \ + TEST_IMPL(67) \ + TEST_IMPL(68) \ + TEST_IMPL(69) \ + TEST_IMPL(70) \ + TEST_IMPL(71) \ + TEST_IMPL(72) \ + TEST_IMPL(73) \ + TEST_IMPL(74) \ + TEST_IMPL(75) \ + TEST_IMPL(76) \ + TEST_IMPL(77) \ + TEST_IMPL(78) \ + TEST_IMPL(79) \ + TEST_IMPL(80) \ + TEST_IMPL(81) \ + TEST_IMPL(82) \ + TEST_IMPL(83) \ + TEST_IMPL(84) \ + TEST_IMPL(85) \ + TEST_IMPL(86) \ + TEST_IMPL(87) \ + TEST_IMPL(88) \ + TEST_IMPL(89) \ + TEST_IMPL(90) \ + TEST_IMPL(91) \ + TEST_IMPL(92) \ + TEST_IMPL(93) \ + TEST_IMPL(94) \ + TEST_IMPL(95) \ + TEST_IMPL(96) \ + TEST_IMPL(97) \ + TEST_IMPL(98) \ + TEST_IMPL(99) \ + TEST_IMPL(100) \ + TEST_IMPL(101) \ + TEST_IMPL(102) \ + TEST_IMPL(103) \ + TEST_IMPL(104) \ + TEST_IMPL(105) \ + TEST_IMPL(106) \ + TEST_IMPL(107) \ + TEST_IMPL(108) \ + TEST_IMPL(109) \ + TEST_IMPL(110) \ + TEST_IMPL(111) \ + TEST_IMPL(112) \ + TEST_IMPL(113) \ + TEST_IMPL(114) \ + TEST_IMPL(115) \ + TEST_IMPL(116) \ + TEST_IMPL(117) \ + TEST_IMPL(118) \ + TEST_IMPL(119) \ + TEST_IMPL(120) \ + TEST_IMPL(121) \ + TEST_IMPL(122) \ + TEST_IMPL(123) \ + TEST_IMPL(124) \ + TEST_IMPL(125) \ + TEST_IMPL(126) \ + TEST_IMPL(127) +#define IMM_256_ITER \ + IMM_128_ITER \ + TEST_IMPL(128) \ + TEST_IMPL(129) \ + TEST_IMPL(130) \ + TEST_IMPL(131) \ + TEST_IMPL(132) \ + TEST_IMPL(133) \ + TEST_IMPL(134) \ + TEST_IMPL(135) \ + TEST_IMPL(136) \ + TEST_IMPL(137) \ + TEST_IMPL(138) \ + TEST_IMPL(139) \ + TEST_IMPL(140) \ + TEST_IMPL(141) \ + TEST_IMPL(142) \ + TEST_IMPL(143) \ + TEST_IMPL(144) \ + TEST_IMPL(145) \ + TEST_IMPL(146) \ + TEST_IMPL(147) \ + TEST_IMPL(148) \ + TEST_IMPL(149) \ + TEST_IMPL(150) \ + TEST_IMPL(151) \ + TEST_IMPL(152) \ + TEST_IMPL(153) \ + TEST_IMPL(154) \ + TEST_IMPL(155) \ + TEST_IMPL(156) \ + TEST_IMPL(157) \ + TEST_IMPL(158) \ + TEST_IMPL(159) \ + TEST_IMPL(160) \ + TEST_IMPL(161) \ + TEST_IMPL(162) \ + TEST_IMPL(163) \ + TEST_IMPL(164) \ + TEST_IMPL(165) \ + TEST_IMPL(166) \ + TEST_IMPL(167) \ + TEST_IMPL(168) \ + TEST_IMPL(169) \ + TEST_IMPL(170) \ + TEST_IMPL(171) \ + TEST_IMPL(172) \ + TEST_IMPL(173) \ + TEST_IMPL(174) \ + TEST_IMPL(175) \ + TEST_IMPL(176) \ + TEST_IMPL(177) \ + TEST_IMPL(178) \ + TEST_IMPL(179) \ + TEST_IMPL(180) \ + TEST_IMPL(181) \ + TEST_IMPL(182) \ + TEST_IMPL(183) \ + TEST_IMPL(184) \ + TEST_IMPL(185) \ + TEST_IMPL(186) \ + TEST_IMPL(187) \ + TEST_IMPL(188) \ + TEST_IMPL(189) \ + TEST_IMPL(190) \ + TEST_IMPL(191) \ + TEST_IMPL(192) \ + TEST_IMPL(193) \ + TEST_IMPL(194) \ + TEST_IMPL(195) \ + TEST_IMPL(196) \ + TEST_IMPL(197) \ + TEST_IMPL(198) \ + TEST_IMPL(199) \ + TEST_IMPL(200) \ + TEST_IMPL(201) \ + TEST_IMPL(202) \ + TEST_IMPL(203) \ + TEST_IMPL(204) \ + TEST_IMPL(205) \ + TEST_IMPL(206) \ + TEST_IMPL(207) \ + TEST_IMPL(208) \ + TEST_IMPL(209) \ + TEST_IMPL(210) \ + TEST_IMPL(211) \ + TEST_IMPL(212) \ + TEST_IMPL(213) \ + TEST_IMPL(214) \ + TEST_IMPL(215) \ + TEST_IMPL(216) \ + TEST_IMPL(217) \ + TEST_IMPL(218) \ + TEST_IMPL(219) \ + TEST_IMPL(220) \ + TEST_IMPL(221) \ + TEST_IMPL(222) \ + TEST_IMPL(223) \ + TEST_IMPL(224) \ + TEST_IMPL(225) \ + TEST_IMPL(226) \ + TEST_IMPL(227) \ + TEST_IMPL(228) \ + TEST_IMPL(229) \ + TEST_IMPL(230) \ + TEST_IMPL(231) \ + TEST_IMPL(232) \ + TEST_IMPL(233) \ + TEST_IMPL(234) \ + TEST_IMPL(235) \ + TEST_IMPL(236) \ + TEST_IMPL(237) \ + TEST_IMPL(238) \ + TEST_IMPL(239) \ + TEST_IMPL(240) \ + TEST_IMPL(241) \ + TEST_IMPL(242) \ + TEST_IMPL(243) \ + TEST_IMPL(244) \ + TEST_IMPL(245) \ + TEST_IMPL(246) \ + TEST_IMPL(247) \ + TEST_IMPL(248) \ + TEST_IMPL(249) \ + TEST_IMPL(250) \ + TEST_IMPL(251) \ + TEST_IMPL(252) \ + TEST_IMPL(253) \ + TEST_IMPL(254) \ + TEST_IMPL(255) } // namespace SSE2NEON #endif diff --git a/tests/impl.cpp b/tests/impl.cpp index cc8c7a6c..e95548b9 100644 --- a/tests/impl.cpp +++ b/tests/impl.cpp @@ -1986,18 +1986,24 @@ result_t test_mm_insert_pi16(const SSE2NEONTestImpl &impl, uint32_t iter) { const int16_t *_a = (const int16_t *) impl.mTestIntPointer1; const int16_t insert = (int16_t) impl.mTestInts[iter]; - const int imm8 = 2; - - int16_t d[4]; - for (int i = 0; i < 4; i++) { - d[i] = _a[i]; - } - d[imm8] = insert; + __m64 a; + __m64 b; + +#define TEST_IMPL(IDX) \ + int16_t d##IDX[4]; \ + for (int i = 0; i < 4; i++) { \ + d##IDX[i] = _a[i]; \ + } \ + d##IDX[IDX] = insert; \ + \ + a = load_m64(_a); \ + b = _mm_insert_pi16(a, insert, IDX); \ + CHECK_RESULT(VALIDATE_INT16_M64(b, d##IDX)) + + IMM_4_ITER +#undef TEST_IMPL - __m64 a = load_m64(_a); - __m64 b = _mm_insert_pi16(a, insert, imm8); - - return VALIDATE_INT16_M64(b, d); + return TEST_SUCCESS; } result_t test_mm_load_ps(const SSE2NEONTestImpl &impl, uint32_t iter) @@ -2756,18 +2762,25 @@ result_t test_mm_sfence(const SSE2NEONTestImpl &impl, uint32_t iter) result_t test_mm_shuffle_pi16(const SSE2NEONTestImpl &impl, uint32_t iter) { const int16_t *_a = (const int16_t *) impl.mTestIntPointer1; - const int32_t imm = 73; - - __m64 a = load_m64(_a); - __m64 d = _mm_shuffle_pi16(a, imm); - - int16_t _d[4]; - _d[0] = _a[imm & 0x3]; - _d[1] = _a[(imm >> 2) & 0x3]; - _d[2] = _a[(imm >> 4) & 0x3]; - _d[3] = _a[(imm >> 6) & 0x3]; - - return VALIDATE_INT16_M64(d, _d); + __m64 a; + __m64 d; + +#define TEST_IMPL(IDX) \ + a = load_m64(_a); \ + d = _mm_shuffle_pi16(a, IDX); \ + \ + int16_t _d##IDX[4]; \ + _d##IDX[0] = _a[IDX & 0x3]; \ + _d##IDX[1] = _a[(IDX >> 2) & 0x3]; \ + _d##IDX[2] = _a[(IDX >> 4) & 0x3]; \ + _d##IDX[3] = _a[(IDX >> 6) & 0x3]; \ + if (VALIDATE_INT16_M64(d, _d##IDX) != TEST_SUCCESS) { \ + return TEST_FAIL; \ + } + + IMM_256_ITER +#undef TEST_IMPL + return TEST_SUCCESS; } // Note, NEON does not have a general purpose shuffled command like SSE. @@ -4833,7 +4846,7 @@ result_t test_mm_insert_epi16(const SSE2NEONTestImpl &impl, uint32_t iter) const int16_t *_a = (const int16_t *) impl.mTestIntPointer1; const int16_t insert = (int16_t) *impl.mTestIntPointer2; -#define TEST(IDX) \ +#define TEST_IMPL(IDX) \ int16_t d##IDX[8]; \ for (int i = 0; i < 8; i++) { \ d##IDX[i] = _a[i]; \ @@ -4842,10 +4855,10 @@ result_t test_mm_insert_epi16(const SSE2NEONTestImpl &impl, uint32_t iter) \ __m128i a##IDX = load_m128i(_a); \ __m128i b##IDX = _mm_insert_epi16(a##IDX, insert, IDX); \ - assert(VALIDATE_INT16_M128(b##IDX, d##IDX) == TEST_SUCCESS); + CHECK_RESULT(VALIDATE_INT16_M128(b##IDX, d##IDX)) IMM_8_ITER -#undef TEST +#undef TEST_IMPL return TEST_SUCCESS; } @@ -5766,164 +5779,188 @@ result_t test_mm_setzero_si128(const SSE2NEONTestImpl &impl, uint32_t iter) result_t test_mm_shuffle_epi32(const SSE2NEONTestImpl &impl, uint32_t iter) { const int32_t *_a = impl.mTestIntPointer1; - const int imm = 105; - - int32_t d[4]; - d[0] = _a[((imm) &0x3)]; - d[1] = _a[((imm >> 2) & 0x3)]; - d[2] = _a[((imm >> 4) & 0x3)]; - d[3] = _a[((imm >> 6) & 0x3)]; - - __m128i a = load_m128i(_a); - __m128i c = _mm_shuffle_epi32(a, imm); - - return VALIDATE_INT32_M128(c, d); + __m128i a, c; + +#define TEST_IMPL(IDX) \ + int32_t d##IDX[4]; \ + d##IDX[0] = _a[((IDX) &0x3)]; \ + d##IDX[1] = _a[((IDX >> 2) & 0x3)]; \ + d##IDX[2] = _a[((IDX >> 4) & 0x3)]; \ + d##IDX[3] = _a[((IDX >> 6) & 0x3)]; \ + \ + a = load_m128i(_a); \ + c = _mm_shuffle_epi32(a, IDX); \ + CHECK_RESULT(VALIDATE_INT32_M128(c, d##IDX)) + + IMM_256_ITER +#undef TEST_IMPL + return TEST_SUCCESS; } result_t test_mm_shuffle_pd(const SSE2NEONTestImpl &impl, uint32_t iter) { const double *_a = (const double *) impl.mTestFloatPointer1; const double *_b = (const double *) impl.mTestFloatPointer2; - - double d0 = _a[iter & 0x1]; - double d1 = _b[(iter & 0x2) >> 1]; - - __m128d a = load_m128d(_a); - __m128d b = load_m128d(_b); - __m128d c; - switch (iter & 0x3) { - case 0: - c = _mm_shuffle_pd(a, b, 0); - break; - case 1: - c = _mm_shuffle_pd(a, b, 1); - break; - case 2: - c = _mm_shuffle_pd(a, b, 2); - break; - case 3: - c = _mm_shuffle_pd(a, b, 3); - break; - } - - return validateDouble(c, d0, d1); + __m128d a, b, c; + +#define TEST_IMPL(IDX) \ + a = load_m128d(_a); \ + b = load_m128d(_b); \ + c = _mm_shuffle_pd(a, b, IDX); \ + \ + double d0##IDX = _a[IDX & 0x1]; \ + double d1##IDX = _b[(IDX & 0x2) >> 1]; \ + CHECK_RESULT(validateDouble(c, d0##IDX, d1##IDX)) + + IMM_4_ITER +#undef TEST_IMPL + return TEST_SUCCESS; } result_t test_mm_shufflehi_epi16(const SSE2NEONTestImpl &impl, uint32_t iter) { const int16_t *_a = (const int16_t *) impl.mTestIntPointer1; - const int imm = 112; - - int16_t d[8]; - d[0] = _a[0]; - d[1] = _a[1]; - d[2] = _a[2]; - d[3] = _a[3]; - d[4] = ((const int64_t *) _a)[1] >> ((imm & 0x3) * 16); - d[5] = ((const int64_t *) _a)[1] >> (((imm >> 2) & 0x3) * 16); - d[6] = ((const int64_t *) _a)[1] >> (((imm >> 4) & 0x3) * 16); - d[7] = ((const int64_t *) _a)[1] >> (((imm >> 6) & 0x3) * 16); - - __m128i a = load_m128i(_a); - __m128i c = _mm_shufflehi_epi16(a, imm); - - return VALIDATE_INT16_M128(c, d); + __m128i a, c; + +#define TEST_IMPL(IDX) \ + int16_t d##IDX[8]; \ + d##IDX[0] = _a[0]; \ + d##IDX[1] = _a[1]; \ + d##IDX[2] = _a[2]; \ + d##IDX[3] = _a[3]; \ + d##IDX[4] = ((const int64_t *) _a)[1] >> ((IDX & 0x3) * 16); \ + d##IDX[5] = ((const int64_t *) _a)[1] >> (((IDX >> 2) & 0x3) * 16); \ + d##IDX[6] = ((const int64_t *) _a)[1] >> (((IDX >> 4) & 0x3) * 16); \ + d##IDX[7] = ((const int64_t *) _a)[1] >> (((IDX >> 6) & 0x3) * 16); \ + \ + a = load_m128i(_a); \ + c = _mm_shufflehi_epi16(a, IDX); \ + \ + CHECK_RESULT(VALIDATE_INT16_M128(c, d##IDX)) + + IMM_256_ITER +#undef TEST_IMPL + return TEST_SUCCESS; } result_t test_mm_shufflelo_epi16(const SSE2NEONTestImpl &impl, uint32_t iter) { const int16_t *_a = (const int16_t *) impl.mTestIntPointer1; - const int imm = 112; - - int16_t d[8]; - d[0] = ((const int64_t *) _a)[0] >> ((imm & 0x3) * 16); - d[1] = ((const int64_t *) _a)[0] >> (((imm >> 2) & 0x3) * 16); - d[2] = ((const int64_t *) _a)[0] >> (((imm >> 4) & 0x3) * 16); - d[3] = ((const int64_t *) _a)[0] >> (((imm >> 6) & 0x3) * 16); - d[4] = _a[4]; - d[5] = _a[5]; - d[6] = _a[6]; - d[7] = _a[7]; - - __m128i a = load_m128i(_a); - __m128i c = _mm_shufflelo_epi16(a, imm); - - return VALIDATE_INT16_M128(c, d); + __m128i a, c; + +#define TEST_IMPL(IDX) \ + int16_t d##IDX[8]; \ + d##IDX[0] = ((const int64_t *) _a)[0] >> ((IDX & 0x3) * 16); \ + d##IDX[1] = ((const int64_t *) _a)[0] >> (((IDX >> 2) & 0x3) * 16); \ + d##IDX[2] = ((const int64_t *) _a)[0] >> (((IDX >> 4) & 0x3) * 16); \ + d##IDX[3] = ((const int64_t *) _a)[0] >> (((IDX >> 6) & 0x3) * 16); \ + d##IDX[4] = _a[4]; \ + d##IDX[5] = _a[5]; \ + d##IDX[6] = _a[6]; \ + d##IDX[7] = _a[7]; \ + \ + a = load_m128i(_a); \ + c = _mm_shufflelo_epi16(a, IDX); \ + \ + CHECK_RESULT(VALIDATE_INT16_M128(c, d##IDX)) + + IMM_256_ITER +#undef TEST_IMPL + return TEST_SUCCESS; } result_t test_mm_sll_epi16(const SSE2NEONTestImpl &impl, uint32_t iter) { const int16_t *_a = (const int16_t *) impl.mTestIntPointer1; - const int64_t count = (int64_t) (iter % 18 - 1); // range: -1 ~ 16 - - uint16_t d[8]; - d[0] = (count & ~15) ? 0 : _a[0] << count; - d[1] = (count & ~15) ? 0 : _a[1] << count; - d[2] = (count & ~15) ? 0 : _a[2] << count; - d[3] = (count & ~15) ? 0 : _a[3] << count; - d[4] = (count & ~15) ? 0 : _a[4] << count; - d[5] = (count & ~15) ? 0 : _a[5] << count; - d[6] = (count & ~15) ? 0 : _a[6] << count; - d[7] = (count & ~15) ? 0 : _a[7] << count; - - __m128i a = load_m128i(_a); - __m128i b = _mm_set1_epi64x(count); - __m128i c = _mm_sll_epi16(a, b); + __m128i a, b, c; + +#define TEST_IMPL(IDX) \ + uint16_t d##IDX[8]; \ + d##IDX[0] = (IDX & ~15) ? 0 : _a[0] << IDX; \ + d##IDX[1] = (IDX & ~15) ? 0 : _a[1] << IDX; \ + d##IDX[2] = (IDX & ~15) ? 0 : _a[2] << IDX; \ + d##IDX[3] = (IDX & ~15) ? 0 : _a[3] << IDX; \ + d##IDX[4] = (IDX & ~15) ? 0 : _a[4] << IDX; \ + d##IDX[5] = (IDX & ~15) ? 0 : _a[5] << IDX; \ + d##IDX[6] = (IDX & ~15) ? 0 : _a[6] << IDX; \ + d##IDX[7] = (IDX & ~15) ? 0 : _a[7] << IDX; \ + \ + a = load_m128i(_a); \ + b = _mm_set1_epi64x(IDX); \ + c = _mm_sll_epi16(a, b); \ + CHECK_RESULT(VALIDATE_INT16_M128(c, d##IDX)) + + IMM_64_ITER +#undef TEST_IMPL - return VALIDATE_INT16_M128(c, d); + return TEST_SUCCESS; } result_t test_mm_sll_epi32(const SSE2NEONTestImpl &impl, uint32_t iter) { const int32_t *_a = (const int32_t *) impl.mTestIntPointer1; - const int64_t count = (int64_t) (iter % 34 - 1); // range: -1 ~ 32 - - uint32_t d[4]; - d[0] = (count & ~31) ? 0 : _a[0] << count; - d[1] = (count & ~31) ? 0 : _a[1] << count; - d[2] = (count & ~31) ? 0 : _a[2] << count; - d[3] = (count & ~31) ? 0 : _a[3] << count; - - __m128i a = load_m128i(_a); - __m128i b = _mm_set1_epi64x(count); - __m128i c = _mm_sll_epi32(a, b); - - return VALIDATE_INT32_M128(c, d); + __m128i a, b, c; + +#define TEST_IMPL(IDX) \ + uint32_t d##IDX[4]; \ + d##IDX[0] = (IDX & ~31) ? 0 : _a[0] << IDX; \ + d##IDX[1] = (IDX & ~31) ? 0 : _a[1] << IDX; \ + d##IDX[2] = (IDX & ~31) ? 0 : _a[2] << IDX; \ + d##IDX[3] = (IDX & ~31) ? 0 : _a[3] << IDX; \ + \ + a = load_m128i(_a); \ + b = _mm_set1_epi64x(IDX); \ + c = _mm_sll_epi32(a, b); \ + CHECK_RESULT(VALIDATE_INT32_M128(c, d##IDX)) + + IMM_64_ITER +#undef TEST_IMPL + return TEST_SUCCESS; } result_t test_mm_sll_epi64(const SSE2NEONTestImpl &impl, uint32_t iter) { const int64_t *_a = (const int64_t *) impl.mTestIntPointer1; - const int64_t count = (int64_t) (iter % 66 - 1); // range: -1 ~ 64 - - uint64_t d0 = (count & ~63) ? 0 : _a[0] << count; - uint64_t d1 = (count & ~63) ? 0 : _a[1] << count; - - __m128i a = load_m128i(_a); - __m128i b = _mm_set1_epi64x(count); - __m128i c = _mm_sll_epi64(a, b); - - return validateInt64(c, d0, d1); + __m128i a, b, c; + +#define TEST_IMPL(IDX) \ + uint64_t d0##IDX = (IDX & ~63) ? 0 : _a[0] << IDX; \ + uint64_t d1##IDX = (IDX & ~63) ? 0 : _a[1] << IDX; \ + \ + a = load_m128i(_a); \ + b = _mm_set1_epi64x(IDX); \ + c = _mm_sll_epi64(a, b); \ + \ + CHECK_RESULT(validateInt64(c, d0##IDX, d1##IDX)) + + IMM_64_ITER +#undef TEST_IMPL + return TEST_SUCCESS; } result_t test_mm_slli_epi16(const SSE2NEONTestImpl &impl, uint32_t iter) { const int16_t *_a = (const int16_t *) impl.mTestIntPointer1; - const int count = (int64_t) (iter % 18 - 1); // range: -1 ~ 16 - - int16_t d[8]; - d[0] = (count & ~15) ? 0 : _a[0] << count; - d[1] = (count & ~15) ? 0 : _a[1] << count; - d[2] = (count & ~15) ? 0 : _a[2] << count; - d[3] = (count & ~15) ? 0 : _a[3] << count; - d[4] = (count & ~15) ? 0 : _a[4] << count; - d[5] = (count & ~15) ? 0 : _a[5] << count; - d[6] = (count & ~15) ? 0 : _a[6] << count; - d[7] = (count & ~15) ? 0 : _a[7] << count; - - __m128i a = load_m128i(_a); - __m128i c = _mm_slli_epi16(a, count); - return VALIDATE_INT16_M128(c, d); + __m128i a, c; + +#define TEST_IMPL(IDX) \ + int16_t d##IDX[8]; \ + d##IDX[0] = (IDX & ~15) ? 0 : _a[0] << IDX; \ + d##IDX[1] = (IDX & ~15) ? 0 : _a[1] << IDX; \ + d##IDX[2] = (IDX & ~15) ? 0 : _a[2] << IDX; \ + d##IDX[3] = (IDX & ~15) ? 0 : _a[3] << IDX; \ + d##IDX[4] = (IDX & ~15) ? 0 : _a[4] << IDX; \ + d##IDX[5] = (IDX & ~15) ? 0 : _a[5] << IDX; \ + d##IDX[6] = (IDX & ~15) ? 0 : _a[6] << IDX; \ + d##IDX[7] = (IDX & ~15) ? 0 : _a[7] << IDX; \ + \ + a = load_m128i(_a); \ + c = _mm_slli_epi16(a, IDX); \ + CHECK_RESULT(VALIDATE_INT16_M128(c, d##IDX)) + + IMM_64_ITER +#undef TEST_IMPL + return TEST_SUCCESS; } result_t test_mm_slli_epi32(const SSE2NEONTestImpl &impl, uint32_t iter) @@ -7844,103 +7881,56 @@ result_t test_mm_blend_pd(const SSE2NEONTestImpl &impl, uint32_t iter) { const double *_a = (const double *) impl.mTestFloatPointer1; const double *_b = (const double *) impl.mTestFloatPointer2; - // the last argument must be a 2-bit immediate - const int mask = 3; - - double _c[2]; - for (int j = 0; j < 2; j++) { - if ((mask >> j) & 0x1) { - _c[j] = _b[j]; - } else { - _c[j] = _a[j]; - } - } - - __m128d a = load_m128d(_a); - __m128d b = load_m128d(_b); - __m128d c = _mm_blend_pd(a, b, mask); - - return validateDouble(c, _c[0], _c[1]); + __m128d a, b, c; + +#define TEST_IMPL(IDX) \ + double _c##IDX[2]; \ + for (int j = 0; j < 2; j++) { \ + if ((IDX >> j) & 0x1) { \ + _c##IDX[j] = _b[j]; \ + } else { \ + _c##IDX[j] = _a[j]; \ + } \ + } \ + \ + a = load_m128d(_a); \ + b = load_m128d(_b); \ + c = _mm_blend_pd(a, b, IDX); \ + CHECK_RESULT(validateDouble(c, _c##IDX[0], _c##IDX[1])) + + IMM_4_ITER +#undef TEST_IMPL + return TEST_SUCCESS; } result_t test_mm_blend_ps(const SSE2NEONTestImpl &impl, uint32_t iter) { const float *_a = impl.mTestFloatPointer1; const float *_b = impl.mTestFloatPointer2; - - const char mask = (char) iter; - - float _c[4]; - for (int i = 0; i < 4; i++) { - if (mask & (1 << i)) { - _c[i] = _b[i]; - } else { - _c[i] = _a[i]; - } - } - __m128 a = load_m128(_a); __m128 b = load_m128(_b); - - // gcc and clang can't compile call to _mm_blend_ps with 3rd argument as - // integer type due 4 bit size limitation and test framework doesn't support - // compile time constant so for testing decided explicit define all 16 - // possible values __m128 c; - switch (mask & 0xF) { - case 0: - c = _mm_blend_ps(a, b, 0); - break; - case 1: - c = _mm_blend_ps(a, b, 1); - break; - case 2: - c = _mm_blend_ps(a, b, 2); - break; - case 3: - c = _mm_blend_ps(a, b, 3); - break; - case 4: - c = _mm_blend_ps(a, b, 4); - break; - case 5: - c = _mm_blend_ps(a, b, 5); - break; - case 6: - c = _mm_blend_ps(a, b, 6); - break; - case 7: - c = _mm_blend_ps(a, b, 7); - break; - - case 8: - c = _mm_blend_ps(a, b, 8); - break; - case 9: - c = _mm_blend_ps(a, b, 9); - break; - case 10: - c = _mm_blend_ps(a, b, 10); - break; - case 11: - c = _mm_blend_ps(a, b, 11); - break; + // gcc and clang can't compile call to _mm_blend_ps with 3rd argument as + // integer type due 4 bit size limitation. +#define TEST_IMPL(IDX) \ + float _c##IDX[4]; \ + for (int i = 0; i < 4; i++) { \ + if (IDX & (1 << i)) { \ + _c##IDX[i] = _b[i]; \ + } else { \ + _c##IDX[i] = _a[i]; \ + } \ + } \ + \ + c = _mm_blend_ps(a, b, IDX); \ + CHECK_RESULT( \ + validateFloat(c, _c##IDX[0], _c##IDX[1], _c##IDX[2], _c##IDX[3])) + + IMM_4_ITER +#undef TEST_IMPL - case 12: - c = _mm_blend_ps(a, b, 12); - break; - case 13: - c = _mm_blend_ps(a, b, 13); - break; - case 14: - c = _mm_blend_ps(a, b, 14); - break; - case 15: - c = _mm_blend_ps(a, b, 15); - break; - } - return validateFloat(c, _c[0], _c[1], _c[2], _c[3]); + return TEST_SUCCESS; } result_t test_mm_blendv_epi8(const SSE2NEONTestImpl &impl, uint32_t iter)