From 73c96da31514e076c956c303f19ad00e2c191784 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 5 May 2024 09:27:29 +0300 Subject: [PATCH 01/23] vectorize search --- benchmarks/CMakeLists.txt | 1 + benchmarks/src/search.cpp | 61 ++++++++++++ stl/inc/algorithm | 28 +++++- stl/inc/xutility | 64 +++++++++++++ stl/src/vector_algorithms.cpp | 172 ++++++++++++++++++++++++++++++++++ 5 files changed, 322 insertions(+), 4 deletions(-) create mode 100644 benchmarks/src/search.cpp diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 0aaca52f99..bb6ebbf403 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -119,6 +119,7 @@ add_benchmark(path_lexically_normal src/path_lexically_normal.cpp) add_benchmark(priority_queue_push_range src/priority_queue_push_range.cpp) add_benchmark(random_integer_generation src/random_integer_generation.cpp) add_benchmark(replace src/replace.cpp) +add_benchmark(search src/search.cpp) add_benchmark(std_copy src/std_copy.cpp) add_benchmark(swap_ranges src/swap_ranges.cpp) diff --git a/benchmarks/src/search.cpp b/benchmarks/src/search.cpp new file mode 100644 index 0000000000..36ee781e14 --- /dev/null +++ b/benchmarks/src/search.cpp @@ -0,0 +1,61 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include + +const char src_haystack[] = + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam mollis imperdiet massa, at dapibus elit interdum " + "ac. In eget sollicitudin mi. Nam at tellus at sapien tincidunt sollicitudin vel non eros. Pellentesque nunc nunc, " + "ullamcorper eu accumsan at, pulvinar non turpis. Quisque vel mauris pulvinar, pretium purus vel, ultricies erat. " + "Curabitur a magna in ligula tristique ornare. Quisque commodo, massa viverra laoreet luctus, sem nisi aliquam " + "velit, fermentum pulvinar velit leo eget justo. Suspendisse vel erat efficitur, pulvinar eros volutpat, vulputate " + "ex. Phasellus non purus vel velit tristique tristique id at ligula. Quisque mollis sodales magna. Mauris et quam " + "eu quam viverra tempus. Nullam tempus maximus porta. Nunc mattis eleifend fermentum. Nullam aliquam libero " + "accumsan velit elementum, eu laoreet metus convallis. Donec pellentesque lacus ut iaculis iaculis. Curabitur orci " + "elit, bibendum sit amet feugiat at, iaculis sit amet massa. Maecenas imperdiet lacus at vehicula iaculis. Donec " + "volutpat nunc sit amet accumsan tempor. Quisque pretium vestibulum ultricies. Suspendisse potenti. Aenean at diam " + "iaculis, condimentum felis venenatis, condimentum erat. Nam quis elit dui. Duis quis odio vitae metus hendrerit " + "rhoncus ut et magna. Cras ac augue quis nibh pharetra sagittis. Donec ullamcorper vel eros semper pretium. Proin " + "vel sollicitudin eros. Nulla sollicitudin mattis turpis id suscipit. Aliquam sed risus velit. Aliquam iaculis nec " + "nibh ac egestas. Duis finibus semper est sed consequat. Sed in sapien quis nibh dignissim mattis. Vestibulum nec " + "metus sodales, euismod mauris ac, sollicitudin libero. Maecenas non arcu ac velit ullamcorper fringilla et quis " + "nulla. Curabitur posuere leo eget ipsum tincidunt dignissim. Cras ultricies suscipit neque, quis suscipit tortor " + "venenatis non. Cras nisl mi, bibendum in vulputate quis, vestibulum ornare enim. Nunc hendrerit placerat dui, " + "aliquam mollis sem convallis et. Integer vitae urna diam. Phasellus et imperdiet est. Maecenas auctor facilisis " + "nibh non commodo. Suspendisse iaculis quam id bibendum feugiat. Pellentesque felis erat, egestas a libero ac, " + "laoreet consectetur elit. Cras ut suscipit ex. Etiam gravida sem quis ex porta, eu lacinia tortor fermentum. " + "Nulla consequat odio enim, sed condimentum est sagittis a. Quisque nec commodo tellus. Phasellus elementum " + "feugiat dolor et feugiat. Praesent sed mattis tortor. In vitae sodales purus. Morbi accumsan, ligula et interdum " + "lacinia, leo risus suscipit urna, non luctus mi justo eu ipsum. Curabitur venenatis pretium orci id porttitor. " + "Quisque dapibus nisl sit amet elit lobortis sagittis. Orci varius natoque penatibus et magnis dis parturient " + "montes, nascetur ridiculus mus. Mauris varius dui sit amet tortor facilisis vestibulum. Curabitur condimentum " + "justo nec orci mattis auctor. Quisque aliquet condimentum arcu ac sollicitudin. Maecenas elit elit, condimentum " + "vitae auctor a, cursus et sem. Cras vehicula ante in consequat fermentum. Praesent at massa nisi. Mauris pretium " + "euismod eros, ut posuere ligula ullamcorper id. Nullam aliquet malesuada est at dignissim. Pellentesque finibus " + "sagittis libero nec bibendum. Phasellus dolor ipsum, finibus quis turpis quis, mollis interdum felis."; + +const char src_needle[] = "aliquet"; + + +template +void bm(benchmark::State& state) { + const std::vector haystack(std::begin(src_haystack), std::end(src_haystack)); + const std::vector needle(std::begin(src_needle), std::end(src_needle)); + + for (auto _ : state) { + benchmark::DoNotOptimize(haystack); + benchmark::DoNotOptimize(needle); + auto res = std::ranges::search(haystack, needle); + benchmark::DoNotOptimize(res); + } +} + +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); + +BENCHMARK_MAIN(); diff --git a/stl/inc/algorithm b/stl/inc/algorithm index df8d7abfb9..499560bec0 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -2142,13 +2142,33 @@ _NODISCARD _CONSTEXPR20 _FwdItHaystack search(_FwdItHaystack _First1, _FwdItHays const _FwdItPat _Last2, _Pr _Pred) { // find first [_First2, _Last2) satisfying _Pred _STD _Adl_verify_range(_First1, _Last1); _STD _Adl_verify_range(_First2, _Last2); - auto _UFirst1 = _STD _Get_unwrapped(_First1); - const auto _ULast1 = _STD _Get_unwrapped(_Last1); - const auto _UFirst2 = _STD _Get_unwrapped(_First2); - const auto _ULast2 = _STD _Get_unwrapped(_Last2); + auto _UFirst1 = _STD _Get_unwrapped(_First1); + const auto _ULast1 = _STD _Get_unwrapped(_Last1); + auto _UFirst2 = _STD _Get_unwrapped(_First2); + const auto _ULast2 = _STD _Get_unwrapped(_Last2); if constexpr (_Is_ranges_random_iter_v<_FwdItHaystack> && _Is_ranges_random_iter_v<_FwdItPat>) { const _Iter_diff_t<_FwdItPat> _Count2 = _ULast2 - _UFirst2; if (_ULast1 - _UFirst1 >= _Count2) { +#if _USE_STD_VECTOR_ALGORITHMS + if constexpr (_Equal_memcmp_is_safe) { + if (!_STD _Is_constant_evaluated()) { + const auto _Ptr1 = _STD _To_address(_UFirst1); + const auto _Ptr_last1 = _STD _To_address(_ULast1); + + const auto _Ptr_res1 = + _Search_vectorized(_Ptr1, _Ptr_last1, _STD _To_address(_UFirst2), _STD _To_address(_ULast2)); + + if constexpr (is_pointer_v) { + _UFirst1 = _Ptr_res1; + } else { + _UFirst1 += _Ptr_last1 - _Ptr_res1; + } + + _STD _Seek_wrapped(_Last1, _UFirst1); + return _Last1; + } + } +#endif // _USE_STD_VECTOR_ALGORITHMS const auto _Last_possible = _ULast1 - static_cast<_Iter_diff_t<_FwdItHaystack>>(_Count2); for (;; ++_UFirst1) { if (_STD _Equal_rev_pred_unchecked(_UFirst1, _UFirst2, _ULast2, _STD _Pass_fn(_Pred))) { diff --git a/stl/inc/xutility b/stl/inc/xutility index 0fc9b352a0..f9b7554caa 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -90,6 +90,15 @@ const void* __stdcall __std_find_trivial_2(const void* _First, const void* _Last const void* __stdcall __std_find_trivial_4(const void* _First, const void* _Last, uint32_t _Val) noexcept; const void* __stdcall __std_find_trivial_8(const void* _First, const void* _Last, uint64_t _Val) noexcept; +const void* __stdcall __std_search_1( + const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept; +const void* __stdcall __std_search_2( + const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept; +const void* __stdcall __std_search_4( + const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept; +const void* __stdcall __std_search_8( + const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept; + const void* __stdcall __std_min_element_1(const void* _First, const void* _Last, bool _Signed) noexcept; const void* __stdcall __std_min_element_2(const void* _First, const void* _Last, bool _Signed) noexcept; const void* __stdcall __std_min_element_4(const void* _First, const void* _Last, bool _Signed) noexcept; @@ -195,6 +204,21 @@ _Ty* _Find_vectorized(_Ty* const _First, _Ty* const _Last, const _TVal _Val) noe } } +template +_Ty1* _Search_vectorized(_Ty1* const _First1, _Ty1* const _Last1, _Ty2* const _First2, _Ty2* const _Last2) noexcept { + if constexpr (sizeof(_Ty1) == 1) { + return const_cast<_Ty1*>(static_cast(::__std_search_1(_First1, _Last1, _First2, _Last2))); + } else if constexpr (sizeof(_Ty1) == 2) { + return const_cast<_Ty1*>(static_cast(::__std_search_2(_First1, _Last1, _First2, _Last2))); + } else if constexpr (sizeof(_Ty1) == 4) { + return const_cast<_Ty1*>(static_cast(::__std_search_4(_First1, _Last1, _First2, _Last2))); + } else if constexpr (sizeof(_Ty1) == 8) { + return const_cast<_Ty1*>(static_cast(::__std_search_8(_First1, _Last1, _First2, _Last2))); + } else { + _STL_INTERNAL_STATIC_ASSERT(false); // unexpected size + } +} + template _Ty* _Min_element_vectorized(_Ty* const _First, _Ty* const _Last) noexcept { constexpr bool _Signed = is_signed_v<_Ty>; @@ -6749,6 +6773,46 @@ namespace ranges { _STL_INTERNAL_CHECK(_RANGES distance(_First1, _Last1) == _Count1); _STL_INTERNAL_CHECK(_RANGES distance(_First2, _Last2) == _Count2); +#if _USE_STD_VECTOR_ALGORITHMS + if constexpr (_Equal_memcmp_is_safe<_It1, _It2, _Pr> && is_same_v<_Pj1, identity> + && is_same_v<_Pj2, identity>) { + if (!_STD is_constant_evaluated()) { + const auto _Ptr1 = _STD to_address(_First1); + const auto _Ptr2 = _STD to_address(_First2); + remove_const_t _Ptr_last1; + remove_const_t _Ptr_last2; + + if constexpr (is_same_v<_It1, _Se1>) { + _Ptr_last1 = _STD to_address(_Last1); + } else { + _Ptr_last1 = _Ptr1 + _Count1; + } + + if constexpr (is_same_v<_It2, _Se2>) { + _Ptr_last2 = _STD to_address(_Last2); + } else { + _Ptr_last2 = _Ptr2 + _Count2; + } + + const auto _Ptr_res1 = _Search_vectorized(_Ptr1, _Ptr_last1, _Ptr2, _Ptr_last2); + + if constexpr (is_pointer_v<_It1>) { + if (_Ptr_res1 != _Ptr_last1) { + return {_Ptr_res1, _Ptr_res1 + _Count2}; + } else { + return {_Ptr_last1, _Ptr_last1}; + } + } else { + _First1 = _RANGES next(_STD move(_First1), _Ptr_res1 - _Ptr1); + if (_First1 != _Last1) { + return {_First1, _RANGES next(_First1, _Count2)}; + } else { + return {_First1, _First1}; + } + } + } + } +#endif // _USE_STD_VECTOR_ALGORITHMS for (; _Count1 >= _Count2; ++_First1, (void) --_Count1) { auto _Match_and_mid1 = _RANGES _Equal_rev_pred(_First1, _First2, _Last2, _Pred, _Proj1, _Proj2); if (_Match_and_mid1.first) { diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 69cb787e35..b659db5863 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -2647,6 +2647,158 @@ namespace { return _Result; } + + template + bool _Equal_avx2(const void* _First1, const void* _First2, size_t _Size) noexcept { + // no need for DevCom-10331414 workaround; this funtion is called only on AVX2 path + + // preconditions: non-zero length needle, first is already equal + _Advance_bytes(_First1, sizeof(_Ty)); + _Advance_bytes(_First2, sizeof(_Ty)); + _Size -= sizeof(_Ty); + + const void* _Stop1 = _First1; + _Advance_bytes(_Stop1, _Size & ~size_t{0x1F}); + + while (_First1 != _Stop1) { + const __m256i _Data1 = _mm256_loadu_si256(static_cast(_First1)); + const __m256i _Data2 = _mm256_loadu_si256(static_cast(_First2)); + const __m256i _Eq = _mm256_xor_si256(_Data1, _Data2); + if (!_mm256_testz_si256(_Eq, _Eq)) { + return false; + } + + _Advance_bytes(_First1, 32); + _Advance_bytes(_First2, 32); + } + + if (const size_t _Avx_tail_size = _Size & 0x1C; _Avx_tail_size != 0) { + const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size >> 2); + const __m256i _Data1 = _mm256_maskload_epi32(static_cast(_First1), _Tail_mask); + const __m256i _Data2 = _mm256_maskload_epi32(static_cast(_First2), _Tail_mask); + const __m256i _Eq = _mm256_xor_si256(_Data1, _Data2); + if (!_mm256_testz_si256(_Eq, _Eq)) { + return false; + } + + _Advance_bytes(_First1, _Avx_tail_size); + _Advance_bytes(_First2, _Avx_tail_size); + } + + if constexpr (sizeof(_Ty) <= 1) { + const void* _Stop1_final_tail = _First1; + _Advance_bytes(_Stop1_final_tail, _Size & 0x3); + + while (_First1 != _Stop1_final_tail) { + if (*static_cast(_First1) != *static_cast(_First2)) { + return false; + } + } + } + + return true; + } + + template + const void* __stdcall __std_search_impl( + const void* _First1, const void* const _Last1, const void* const _First2, const void* const _Last2) noexcept { + const size_t _Size_bytes_2 = _Byte_length(_First2, _Last2); + + if (_Size_bytes_2 == 0) { + return _First1; + } + + if (_Size_bytes_2 == sizeof(_Ty)) { + return __std_find_trivial_impl<_Traits, _Ty>(_First1, _Last1, *static_cast(_First2)); + } + + const size_t _Size_bytes_1 = _Byte_length(_First1, _Last1); + if (_Size_bytes_1 < _Size_bytes_2) { + return _Last1; + } + + const size_t _Max_pos = _Size_bytes_1 - _Size_bytes_2 + sizeof(_Ty); + + if (_Use_avx2()) { + _Zeroupper_on_exit _Guard; // TRANSITION, DevCom-10331414 + + const __m256i _Comparand = _Traits::_Set_avx(*static_cast(_First2)); + const void* _Stop1 = _First1; + _Advance_bytes(_Stop1, _Max_pos & ~size_t{0x1F}); + + while (_First1 != _Stop1) { + const __m256i _Data = _mm256_loadu_si256(static_cast(_First1)); + long _Bingo = _mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand)); + + while (_Bingo != 0) { + const unsigned long _Offset = _tzcnt_u32(_Bingo); + + const void* _Match1 = _First1; + _Advance_bytes(_Match1, _Offset); + + if (_Equal_avx2<_Ty>(_Match1, _First2, _Size_bytes_2)) { + return _Match1; + } + + _bittestandreset(&_Bingo, _Offset); + } + + _Advance_bytes(_First1, 32); + }; + + if (const size_t _Avx_tail_size = _Max_pos & 0x1C; _Avx_tail_size != 0) { + const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size >> 2); + const __m256i _Data = _mm256_maskload_epi32(static_cast(_First1), _Tail_mask); + long _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Traits::_Cmp_avx(_Data, _Comparand), _Tail_mask)); + + while (_Bingo != 0) { + const unsigned long _Offset = _tzcnt_u32(_Bingo); + + const void* _Match1 = _First1; + _Advance_bytes(_Match1, _Offset); + + if (_Equal_avx2<_Ty>(_Match1, _First2, _Size_bytes_2)) { + return _Match1; + } + + _bittestandreset(&_Bingo, _Offset); + } + + _Advance_bytes(_First1, _Avx_tail_size); + } + + if constexpr (sizeof(_Ty) <= 2) { + const void* _Stop1_final_tail = _First1; + _Advance_bytes(_Stop1_final_tail, _Max_pos & 0x3); + + while (_First1 != _Stop1_final_tail) { + if (*static_cast(_First1) == *static_cast(_First2)) { + if (_Equal_avx2<_Ty>(_First1, _First2, _Size_bytes_2)) { + return _First1; + } + } + + _Advance_bytes(_First1, sizeof(_Ty)); + } + } + + return _Last1; + } else { + const void* _Stop1 = _First1; + _Advance_bytes(_Stop1, _Max_pos); + + while (_First1 != _Stop1) { + if (memcmp(_First1, _First2, _Size_bytes_2) == 0) { + return _First1; + } + + _Advance_bytes(_First1, sizeof(_Ty)); + } + + return _Last1; + } + } + } // unnamed namespace extern "C" { @@ -2752,6 +2904,26 @@ const void* __stdcall __std_find_first_of_trivial_8( return __std_find_first_of::_Impl_4_8<__std_find_first_of::_Traits_8>(_First1, _Last1, _First2, _Last2); } +const void* __stdcall __std_search_1( + const void* const _First1, const void* const _Last1, const void* const _First2, const void* const _Last2) noexcept { + return __std_search_impl<_Find_traits_1, uint8_t>(_First1, _Last1, _First2, _Last2); +} + +const void* __stdcall __std_search_2( + const void* const _First1, const void* const _Last1, const void* const _First2, const void* const _Last2) noexcept { + return __std_search_impl<_Find_traits_2, uint16_t>(_First1, _Last1, _First2, _Last2); +} + +const void* __stdcall __std_search_4( + const void* const _First1, const void* const _Last1, const void* const _First2, const void* const _Last2) noexcept { + return __std_search_impl<_Find_traits_4, uint32_t>(_First1, _Last1, _First2, _Last2); +} + +const void* __stdcall __std_search_8( + const void* const _First1, const void* const _Last1, const void* const _First2, const void* const _Last2) noexcept { + return __std_search_impl<_Find_traits_8, uint64_t>(_First1, _Last1, _First2, _Last2); +} + __declspec(noalias) size_t __stdcall __std_mismatch_1(const void* const _First1, const void* const _First2, const size_t _Count) noexcept { return __std_mismatch_impl<_Find_traits_1, uint8_t>(_First1, _First2, _Count); From 0c17a53745a5e962947bedd12e291a2e8e0285b3 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 5 May 2024 14:03:28 +0300 Subject: [PATCH 02/23] very tail fix --- stl/src/vector_algorithms.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index b659db5863..d55395b0f0 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -2685,7 +2685,7 @@ namespace { _Advance_bytes(_First2, _Avx_tail_size); } - if constexpr (sizeof(_Ty) <= 1) { + if constexpr (sizeof(_Ty) <= 2) { const void* _Stop1_final_tail = _First1; _Advance_bytes(_Stop1_final_tail, _Size & 0x3); @@ -2798,7 +2798,6 @@ namespace { return _Last1; } } - } // unnamed namespace extern "C" { From 11c05eea6679c571ad45b38cccba7516f5297bfe Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 5 May 2024 14:27:48 +0300 Subject: [PATCH 03/23] =?UTF-8?q?I=20=F0=9F=A7=A1=20ADL?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- stl/inc/algorithm | 4 ++-- stl/inc/xutility | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 499560bec0..b690c29c15 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -2155,8 +2155,8 @@ _NODISCARD _CONSTEXPR20 _FwdItHaystack search(_FwdItHaystack _First1, _FwdItHays const auto _Ptr1 = _STD _To_address(_UFirst1); const auto _Ptr_last1 = _STD _To_address(_ULast1); - const auto _Ptr_res1 = - _Search_vectorized(_Ptr1, _Ptr_last1, _STD _To_address(_UFirst2), _STD _To_address(_ULast2)); + const auto _Ptr_res1 = _STD _Search_vectorized( + _Ptr1, _Ptr_last1, _STD _To_address(_UFirst2), _STD _To_address(_ULast2)); if constexpr (is_pointer_v) { _UFirst1 = _Ptr_res1; diff --git a/stl/inc/xutility b/stl/inc/xutility index f9b7554caa..a4ff769238 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -6794,7 +6794,7 @@ namespace ranges { _Ptr_last2 = _Ptr2 + _Count2; } - const auto _Ptr_res1 = _Search_vectorized(_Ptr1, _Ptr_last1, _Ptr2, _Ptr_last2); + const auto _Ptr_res1 = _STD _Search_vectorized(_Ptr1, _Ptr_last1, _Ptr2, _Ptr_last2); if constexpr (is_pointer_v<_It1>) { if (_Ptr_res1 != _Ptr_last1) { From d4fcc96cd6919fa26b3875a2c190983c9181a48f Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 5 May 2024 14:29:18 +0300 Subject: [PATCH 04/23] unify ipsum --- benchmarks/src/replace.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/src/replace.cpp b/benchmarks/src/replace.cpp index fe5bc68641..5740edaaab 100644 --- a/benchmarks/src/replace.cpp +++ b/benchmarks/src/replace.cpp @@ -10,7 +10,7 @@ const char src[] = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam mollis imperdiet massa, at dapibus elit interdum " "ac. In eget sollicitudin mi. Nam at tellus at sapien tincidunt sollicitudin vel non eros. Pellentesque nunc nunc, " "ullamcorper eu accumsan at, pulvinar non turpis. Quisque vel mauris pulvinar, pretium purus vel, ultricies erat. " - "Curabitur a magna in ligula tristique ornare. Quisque commodo, massa viverra laoreet luctus, sem nisi aliquet " + "Curabitur a magna in ligula tristique ornare. Quisque commodo, massa viverra laoreet luctus, sem nisi aliquam " "velit, fermentum pulvinar velit leo eget justo. Suspendisse vel erat efficitur, pulvinar eros volutpat, vulputate " "ex. Phasellus non purus vel velit tristique tristique id at ligula. Quisque mollis sodales magna. Mauris et quam " "eu quam viverra tempus. Nullam tempus maximus porta. Nunc mattis eleifend fermentum. Nullam aliquam libero " @@ -34,7 +34,7 @@ const char src[] = "montes, nascetur ridiculus mus. Mauris varius dui sit amet tortor facilisis vestibulum. Curabitur condimentum " "justo nec orci mattis auctor. Quisque aliquet condimentum arcu ac sollicitudin. Maecenas elit elit, condimentum " "vitae auctor a, cursus et sem. Cras vehicula ante in consequat fermentum. Praesent at massa nisi. Mauris pretium " - "euismod eros, ut posuere ligula ullamcorper id. Nullam aliquam malesuada est at dignissim. Pellentesque finibus " + "euismod eros, ut posuere ligula ullamcorper id. Nullam aliquet malesuada est at dignissim. Pellentesque finibus " "sagittis libero nec bibendum. Phasellus dolor ipsum, finibus quis turpis quis, mollis interdum felis."; template From da5cf2ec2e2a5ea82d9ac2417e8d7367d95fa7d5 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 5 May 2024 14:30:26 +0300 Subject: [PATCH 05/23] -newline --- benchmarks/src/search.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/benchmarks/src/search.cpp b/benchmarks/src/search.cpp index 36ee781e14..bba3d433c4 100644 --- a/benchmarks/src/search.cpp +++ b/benchmarks/src/search.cpp @@ -39,7 +39,6 @@ const char src_haystack[] = const char src_needle[] = "aliquet"; - template void bm(benchmark::State& state) { const std::vector haystack(std::begin(src_haystack), std::end(src_haystack)); From da157b19f9e6dea9bdb516eb30f72257c31c1134 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 5 May 2024 15:09:55 +0300 Subject: [PATCH 06/23] `strstr` for competition --- benchmarks/src/search.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/benchmarks/src/search.cpp b/benchmarks/src/search.cpp index bba3d433c4..1b2bb43f4a 100644 --- a/benchmarks/src/search.cpp +++ b/benchmarks/src/search.cpp @@ -39,6 +39,18 @@ const char src_haystack[] = const char src_needle[] = "aliquet"; +void bm_strstr(benchmark::State& state) { + const std::string haystack(std::begin(src_haystack), std::end(src_haystack)); + const std::string needle(std::begin(src_needle), std::end(src_needle)); + + for (auto _ : state) { + benchmark::DoNotOptimize(haystack); + benchmark::DoNotOptimize(needle); + auto res = strstr(haystack.c_str(), needle.c_str()); + benchmark::DoNotOptimize(res); + } +} + template void bm(benchmark::State& state) { const std::vector haystack(std::begin(src_haystack), std::end(src_haystack)); @@ -52,6 +64,7 @@ void bm(benchmark::State& state) { } } +BENCHMARK(bm_strstr); BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); From 772c51374f83deed8175ca89034704cdd11cb391 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 5 May 2024 15:37:14 +0300 Subject: [PATCH 07/23] missing progress --- stl/src/vector_algorithms.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index d55395b0f0..24b6f4e823 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -2693,6 +2693,8 @@ namespace { if (*static_cast(_First1) != *static_cast(_First2)) { return false; } + _Advance_bytes(_First1, sizeof(_Ty)); + _Advance_bytes(_First2, sizeof(_Ty)); } } From 2c6c32935b6a364072a9c377742d47d547b951f1 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 5 May 2024 15:47:31 +0300 Subject: [PATCH 08/23] coverage --- .../VSO_0000000_vector_algorithms/test.cpp | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 07b9081706..9f60709b84 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -170,6 +170,26 @@ auto last_known_good_find_first_of(FwdItH h_first, FwdItH h_last, FwdItN n_first return h_first; } +template +auto last_known_good_search(RanItH h_first, RanItH h_last, RanItN n_first, RanItN n_last) { + const auto n_len = n_last - n_first; + const auto h_len = h_last - h_first; + + if (n_len > h_len) { + return h_last; + } + + const auto h_last_start = h_last - n_len; + + for (; h_first <= h_last_start; ++h_first) { + if (equal(h_first, h_first + n_len, n_first, n_first + n_len)) { + return h_first; + } + } + + return h_last; +} + template void test_case_find(const vector& input, T v) { auto expected = last_known_good_find(input.begin(), input.end(), v); @@ -312,6 +332,50 @@ void test_find_first_of_containers() { #endif // _HAS_CXX20 } +template +void test_case_search(const vector& input_haystack, const vector& input_needle) { + auto expected = + last_known_good_search(input_haystack.begin(), input_haystack.end(), input_needle.begin(), input_needle.end()); + auto actual = search(input_haystack.begin(), input_haystack.end(), input_needle.begin(), input_needle.end()); + assert(expected == actual); +#if _HAS_CXX20 + auto ranges_actual = ranges::search(input_haystack, input_needle); + assert(expected == begin(ranges_actual)); + if (expected != input_haystack.end()) { + assert(expected + static_cast(input_needle.size()) == end(ranges_actual)); + } else { + assert(expected == end(ranges_actual)); + } +#endif // _HAS_CXX20 +} + +template +void test_search(mt19937_64& gen) { + constexpr size_t needleDataCount = 50; + using TD = conditional_t; + uniform_int_distribution dis('0', '9'); + vector input_haystack; + vector input_needle; + input_haystack.reserve(dataCount); + input_needle.reserve(needleDataCount); + + for (;;) { + input_needle.clear(); + + test_case_search(input_haystack, input_needle); + for (size_t attempts = 0; attempts < needleDataCount; ++attempts) { + input_needle.push_back(static_cast(dis(gen))); + test_case_search(input_haystack, input_needle); + } + + if (input_haystack.size() == dataCount) { + break; + } + + input_haystack.push_back(static_cast(dis(gen))); + } +} + template void test_min_max_element(mt19937_64& gen) { using Limits = numeric_limits; @@ -821,6 +885,16 @@ void test_vector_algorithms(mt19937_64& gen) { test_find_first_of_containers, vector>(); test_find_first_of_containers, vector>(); + test_search(gen); + test_search(gen); + test_search(gen); + test_search(gen); + test_search(gen); + test_search(gen); + test_search(gen); + test_search(gen); + test_search(gen); + test_min_max_element(gen); test_min_max_element(gen); test_min_max_element(gen); From 81a60001b8717c829b40cfd626ed4f06cd0f085b Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 5 May 2024 15:59:18 +0300 Subject: [PATCH 09/23] these tests are too long --- .../VSO_0000000_vector_algorithms/test.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 9f60709b84..305c93d842 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -295,12 +295,13 @@ void test_case_find_first_of(const vector& input_haystack, const vector& i template void test_find_first_of(mt19937_64& gen) { - constexpr size_t needleDataCount = 50; - using TD = conditional_t; + constexpr size_t haystackDataCount = 200; + constexpr size_t needleDataCount = 35; + using TD = conditional_t; uniform_int_distribution dis('a', 'z'); vector input_haystack; vector input_needle; - input_haystack.reserve(dataCount); + input_haystack.reserve(haystackDataCount); input_needle.reserve(needleDataCount); for (;;) { @@ -312,7 +313,7 @@ void test_find_first_of(mt19937_64& gen) { test_case_find_first_of(input_haystack, input_needle); } - if (input_haystack.size() == dataCount) { + if (input_haystack.size() == haystackDataCount) { break; } @@ -351,12 +352,13 @@ void test_case_search(const vector& input_haystack, const vector& input_ne template void test_search(mt19937_64& gen) { - constexpr size_t needleDataCount = 50; - using TD = conditional_t; + constexpr size_t haystackDataCount = 200; + constexpr size_t needleDataCount = 35; + using TD = conditional_t; uniform_int_distribution dis('0', '9'); vector input_haystack; vector input_needle; - input_haystack.reserve(dataCount); + input_haystack.reserve(haystackDataCount); input_needle.reserve(needleDataCount); for (;;) { @@ -368,7 +370,7 @@ void test_search(mt19937_64& gen) { test_case_search(input_haystack, input_needle); } - if (input_haystack.size() == dataCount) { + if (input_haystack.size() == haystackDataCount) { break; } From 0b59b2ec4df66ac100b890f6c976f2764d377208 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 5 May 2024 17:51:15 +0300 Subject: [PATCH 10/23] missing include --- benchmarks/src/search.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/benchmarks/src/search.cpp b/benchmarks/src/search.cpp index 1b2bb43f4a..298e91b416 100644 --- a/benchmarks/src/search.cpp +++ b/benchmarks/src/search.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include const char src_haystack[] = @@ -46,7 +47,7 @@ void bm_strstr(benchmark::State& state) { for (auto _ : state) { benchmark::DoNotOptimize(haystack); benchmark::DoNotOptimize(needle); - auto res = strstr(haystack.c_str(), needle.c_str()); + auto res = std::strstr(haystack.c_str(), needle.c_str()); benchmark::DoNotOptimize(res); } } From f2806c562fbf97cb6b3f83147c0a61422efc0aba Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 5 May 2024 19:42:42 +0300 Subject: [PATCH 11/23] default_searcher --- benchmarks/src/search.cpp | 35 +++++++++++++++---- stl/inc/algorithm | 2 +- stl/inc/functional | 24 +++++++++++++ stl/inc/xutility | 2 +- .../VSO_0000000_vector_algorithms/test.cpp | 5 +++ 5 files changed, 59 insertions(+), 9 deletions(-) diff --git a/benchmarks/src/search.cpp b/benchmarks/src/search.cpp index 298e91b416..6ead82b700 100644 --- a/benchmarks/src/search.cpp +++ b/benchmarks/src/search.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include const char src_haystack[] = @@ -40,7 +41,7 @@ const char src_haystack[] = const char src_needle[] = "aliquet"; -void bm_strstr(benchmark::State& state) { +void c_strstr(benchmark::State& state) { const std::string haystack(std::begin(src_haystack), std::end(src_haystack)); const std::string needle(std::begin(src_needle), std::end(src_needle)); @@ -53,7 +54,7 @@ void bm_strstr(benchmark::State& state) { } template -void bm(benchmark::State& state) { +void ranges_search(benchmark::State& state) { const std::vector haystack(std::begin(src_haystack), std::end(src_haystack)); const std::vector needle(std::begin(src_needle), std::end(src_needle)); @@ -65,10 +66,30 @@ void bm(benchmark::State& state) { } } -BENCHMARK(bm_strstr); -BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); +template +void search_default_searcher(benchmark::State& state) { + const std::vector haystack(std::begin(src_haystack), std::end(src_haystack)); + const std::vector needle(std::begin(src_needle), std::end(src_needle)); + + for (auto _ : state) { + benchmark::DoNotOptimize(haystack); + benchmark::DoNotOptimize(needle); + auto res = std::search(haystack.begin(), haystack.end(), std::default_searcher{needle.begin(), needle.end()}); + benchmark::DoNotOptimize(res); + } +} + +BENCHMARK(c_strstr); + +BENCHMARK(ranges_search); +BENCHMARK(ranges_search); +BENCHMARK(ranges_search); +BENCHMARK(ranges_search); + +BENCHMARK(search_default_searcher); +BENCHMARK(search_default_searcher); +BENCHMARK(search_default_searcher); +BENCHMARK(search_default_searcher); + BENCHMARK_MAIN(); diff --git a/stl/inc/algorithm b/stl/inc/algorithm index b690c29c15..2361259c94 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -2161,7 +2161,7 @@ _NODISCARD _CONSTEXPR20 _FwdItHaystack search(_FwdItHaystack _First1, _FwdItHays if constexpr (is_pointer_v) { _UFirst1 = _Ptr_res1; } else { - _UFirst1 += _Ptr_last1 - _Ptr_res1; + _UFirst1 += _Ptr_res1 - _Ptr1; } _STD _Seek_wrapped(_Last1, _UFirst1); diff --git a/stl/inc/functional b/stl/inc/functional index 68d01df97b..1394b53636 100644 --- a/stl/inc/functional +++ b/stl/inc/functional @@ -2456,6 +2456,30 @@ _CONSTEXPR20 pair<_FwdItHaystack, _FwdItHaystack> _Search_pair_unchecked( _Iter_diff_t<_FwdItHaystack> _Count1 = _Last1 - _First1; _Iter_diff_t<_FwdItPat> _Count2 = _Last2 - _First2; +#if _USE_STD_VECTOR_ALGORITHMS + if constexpr (_Equal_memcmp_is_safe<_FwdItHaystack, _FwdItPat, _Pred_eq>) { + if (!_STD _Is_constant_evaluated()) { + const auto _Ptr1 = _STD _To_address(_First1); + const auto _Ptr_last1 = _STD _To_address(_Last1); + + const auto _Ptr_res1 = + _Search_vectorized(_Ptr1, _Ptr_last1, _STD _To_address(_First2), _STD _To_address(_Last2)); + + if constexpr (is_pointer_v<_FwdItHaystack>) { + _First1 = _Ptr_res1; + } else { + _First1 += _Ptr_res1 - _Ptr1; + } + + if (_First1 != _Last1) { + return {_First1, _First1 + _Count2}; + } else { + return {_Last1, _Last1}; + } + } + } +#endif // _USE_STD_VECTOR_ALGORITHMS + for (; _Count2 <= _Count1; ++_First1, (void) --_Count1) { // room for match, try it _FwdItHaystack _Mid1 = _First1; for (_FwdItPat _Mid2 = _First2;; ++_Mid1, (void) ++_Mid2) { diff --git a/stl/inc/xutility b/stl/inc/xutility index a4ff769238..17a3d4d22f 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -5384,7 +5384,7 @@ constexpr bool _Equal_memcmp_is_safe_helper = template constexpr bool _Equal_memcmp_is_safe = - _Equal_memcmp_is_safe_helper, remove_const_t<_Iter2>, _Pr>; + _Equal_memcmp_is_safe_helper, remove_const_t<_Iter2>, remove_const_t<_Pr>>; template _NODISCARD int _Memcmp_ranges(_CtgIt1 _First1, _CtgIt1 _Last1, _CtgIt2 _First2) { diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 305c93d842..ea8af03154 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -339,6 +339,11 @@ void test_case_search(const vector& input_haystack, const vector& input_ne last_known_good_search(input_haystack.begin(), input_haystack.end(), input_needle.begin(), input_needle.end()); auto actual = search(input_haystack.begin(), input_haystack.end(), input_needle.begin(), input_needle.end()); assert(expected == actual); +#if _HAS_CXX17 + auto searcher_actual = search( + input_haystack.begin(), input_haystack.end(), default_searcher{input_needle.begin(), input_needle.end()}); + assert(expected == searcher_actual); +#endif // _HAS_CXX17 #if _HAS_CXX20 auto ranges_actual = ranges::search(input_haystack, input_needle); assert(expected == begin(ranges_actual)); From 15e54a9291b971cd6b7fbd751e93cf888f1ea452 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 5 May 2024 21:52:44 +0300 Subject: [PATCH 12/23] ADL again --- stl/inc/functional | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/functional b/stl/inc/functional index 1394b53636..f6c932fbf5 100644 --- a/stl/inc/functional +++ b/stl/inc/functional @@ -2463,7 +2463,7 @@ _CONSTEXPR20 pair<_FwdItHaystack, _FwdItHaystack> _Search_pair_unchecked( const auto _Ptr_last1 = _STD _To_address(_Last1); const auto _Ptr_res1 = - _Search_vectorized(_Ptr1, _Ptr_last1, _STD _To_address(_First2), _STD _To_address(_Last2)); + _STD _Search_vectorized(_Ptr1, _Ptr_last1, _STD _To_address(_First2), _STD _To_address(_Last2)); if constexpr (is_pointer_v<_FwdItHaystack>) { _First1 = _Ptr_res1; From 26646fe551d9fd6eef003eaac31734e6b828a5cc Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 5 May 2024 22:35:56 +0300 Subject: [PATCH 13/23] avoid `memcmp` in fallback --- stl/src/vector_algorithms.cpp | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 24b6f4e823..96fffbf81d 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -2786,17 +2786,30 @@ namespace { return _Last1; } else { - const void* _Stop1 = _First1; + auto _Ptr1 = static_cast(_First1); + const auto _Ptr2 = static_cast(_First2); + const size_t _Count2 = _Size_bytes_2 / sizeof(_Ty); + const void* _Stop1 = _Ptr1; _Advance_bytes(_Stop1, _Max_pos); - while (_First1 != _Stop1) { - if (memcmp(_First1, _First2, _Size_bytes_2) == 0) { - return _First1; + for (; _Ptr1 != _Stop1; ++_Ptr1) { + if (*_Ptr1 != *_Ptr2) { + continue; } - _Advance_bytes(_First1, sizeof(_Ty)); - } + bool _Equal = true; + for (size_t i = 1; i != _Count2; ++i) { + if (_Ptr1[i] != _Ptr2[i]) { + _Equal = false; + break; + } + } + + if (_Equal) { + return _Ptr1; + } + } return _Last1; } } From 0c473a4be5a9cb04220d79dde8001c3336cbd8eb Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Fri, 7 Jun 2024 18:43:46 +0300 Subject: [PATCH 14/23] partial review comment --- benchmarks/src/search.cpp | 1 + stl/src/vector_algorithms.cpp | 15 ++++++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/benchmarks/src/search.cpp b/benchmarks/src/search.cpp index 6ead82b700..e1d127afb1 100644 --- a/benchmarks/src/search.cpp +++ b/benchmarks/src/search.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include const char src_haystack[] = diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 96fffbf81d..92f8f163a5 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -2648,9 +2648,10 @@ namespace { return _Result; } +#ifndef _M_ARM64EC template bool _Equal_avx2(const void* _First1, const void* _First2, size_t _Size) noexcept { - // no need for DevCom-10331414 workaround; this funtion is called only on AVX2 path + // no need for DevCom-10331414 workaround; this function is called only from AVX2 path // preconditions: non-zero length needle, first is already equal _Advance_bytes(_First1, sizeof(_Ty)); @@ -2700,6 +2701,7 @@ namespace { return true; } +#endif // !defined(_M_ARM64EC) template const void* __stdcall __std_search_impl( @@ -2721,6 +2723,7 @@ namespace { const size_t _Max_pos = _Size_bytes_1 - _Size_bytes_2 + sizeof(_Ty); +#ifndef _M_ARM64EC if (_Use_avx2()) { _Zeroupper_on_exit _Guard; // TRANSITION, DevCom-10331414 @@ -2746,7 +2749,7 @@ namespace { } _Advance_bytes(_First1, 32); - }; + } if (const size_t _Avx_tail_size = _Max_pos & 0x1C; _Avx_tail_size != 0) { const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size >> 2); @@ -2785,7 +2788,9 @@ namespace { } return _Last1; - } else { + } else +#endif // !defined(_M_ARM64EC) + { auto _Ptr1 = static_cast(_First1); const auto _Ptr2 = static_cast(_First2); const size_t _Count2 = _Size_bytes_2 / sizeof(_Ty); @@ -2799,8 +2804,8 @@ namespace { bool _Equal = true; - for (size_t i = 1; i != _Count2; ++i) { - if (_Ptr1[i] != _Ptr2[i]) { + for (size_t _Idx = 1; _Idx != _Count2; ++_Idx) { + if (_Ptr1[_Idx] != _Ptr2[_Idx]) { _Equal = false; break; } From 629afd49c281b0cd01a125105ca2493af0f65e8e Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 10 Jun 2024 11:28:49 -0700 Subject: [PATCH 15/23] Internal static assert `sizeof(_Ty1) == sizeof(_Ty2)`. --- stl/inc/algorithm | 1 + stl/inc/xutility | 1 + 2 files changed, 2 insertions(+) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index c63afbdf65..828fb78162 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -200,6 +200,7 @@ _Ty* _Find_last_vectorized(_Ty* const _First, _Ty* const _Last, const _TVal _Val template _Ty1* _Find_first_of_vectorized( _Ty1* const _First1, _Ty1* const _Last1, _Ty2* const _First2, _Ty2* const _Last2) noexcept { + _STL_INTERNAL_STATIC_ASSERT(sizeof(_Ty1) == sizeof(_Ty2)); if constexpr (sizeof(_Ty1) == 1) { return const_cast<_Ty1*>( static_cast(::__std_find_first_of_trivial_1(_First1, _Last1, _First2, _Last2))); diff --git a/stl/inc/xutility b/stl/inc/xutility index 951c2466b0..34a6f97060 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -206,6 +206,7 @@ _Ty* _Find_vectorized(_Ty* const _First, _Ty* const _Last, const _TVal _Val) noe template _Ty1* _Search_vectorized(_Ty1* const _First1, _Ty1* const _Last1, _Ty2* const _First2, _Ty2* const _Last2) noexcept { + _STL_INTERNAL_STATIC_ASSERT(sizeof(_Ty1) == sizeof(_Ty2)); if constexpr (sizeof(_Ty1) == 1) { return const_cast<_Ty1*>(static_cast(::__std_search_1(_First1, _Last1, _First2, _Last2))); } else if constexpr (sizeof(_Ty1) == 2) { From a24e6eb58d67d0ce5420ec81f0a380ed91808b77 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 10 Jun 2024 11:51:58 -0700 Subject: [PATCH 16/23] Use `+=` and `+` instead of `_RANGES next`. --- stl/inc/xutility | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index 34a6f97060..300b00da83 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -6799,9 +6799,9 @@ namespace ranges { return {_Ptr_last1, _Ptr_last1}; } } else { - _First1 = _RANGES next(_STD move(_First1), _Ptr_res1 - _Ptr1); + _First1 += _Ptr_res1 - _Ptr1; if (_First1 != _Last1) { - return {_First1, _RANGES next(_First1, _Count2)}; + return {_First1, _First1 + _Count2}; } else { return {_First1, _First1}; } From 9d07a400016bf5425bca61a6ad4e6a60c30f3821 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 10 Jun 2024 11:55:23 -0700 Subject: [PATCH 17/23] Style: Return `_Ptr_res1` instead of `_Ptr_last1` when they're equal. --- stl/inc/xutility | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index 300b00da83..3284ff2f69 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -6796,7 +6796,7 @@ namespace ranges { if (_Ptr_res1 != _Ptr_last1) { return {_Ptr_res1, _Ptr_res1 + _Count2}; } else { - return {_Ptr_last1, _Ptr_last1}; + return {_Ptr_res1, _Ptr_res1}; } } else { _First1 += _Ptr_res1 - _Ptr1; From d57f9b6d3225ef76eaa181b00ab29175f1dde68d Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 10 Jun 2024 12:06:48 -0700 Subject: [PATCH 18/23] Style: In `` and ``, `_Ptr_last1` doesn't need to be named. --- stl/inc/algorithm | 5 ++--- stl/inc/functional | 7 +++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 828fb78162..2d9757bb2d 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -2153,11 +2153,10 @@ _NODISCARD _CONSTEXPR20 _FwdItHaystack search(_FwdItHaystack _First1, _FwdItHays #if _USE_STD_VECTOR_ALGORITHMS if constexpr (_Equal_memcmp_is_safe) { if (!_STD _Is_constant_evaluated()) { - const auto _Ptr1 = _STD _To_address(_UFirst1); - const auto _Ptr_last1 = _STD _To_address(_ULast1); + const auto _Ptr1 = _STD _To_address(_UFirst1); const auto _Ptr_res1 = _STD _Search_vectorized( - _Ptr1, _Ptr_last1, _STD _To_address(_UFirst2), _STD _To_address(_ULast2)); + _Ptr1, _STD _To_address(_ULast1), _STD _To_address(_UFirst2), _STD _To_address(_ULast2)); if constexpr (is_pointer_v) { _UFirst1 = _Ptr_res1; diff --git a/stl/inc/functional b/stl/inc/functional index 28255a34ec..b8e290a192 100644 --- a/stl/inc/functional +++ b/stl/inc/functional @@ -2462,11 +2462,10 @@ _CONSTEXPR20 pair<_FwdItHaystack, _FwdItHaystack> _Search_pair_unchecked( #if _USE_STD_VECTOR_ALGORITHMS if constexpr (_Equal_memcmp_is_safe<_FwdItHaystack, _FwdItPat, _Pred_eq>) { if (!_STD _Is_constant_evaluated()) { - const auto _Ptr1 = _STD _To_address(_First1); - const auto _Ptr_last1 = _STD _To_address(_Last1); + const auto _Ptr1 = _STD _To_address(_First1); - const auto _Ptr_res1 = - _STD _Search_vectorized(_Ptr1, _Ptr_last1, _STD _To_address(_First2), _STD _To_address(_Last2)); + const auto _Ptr_res1 = _STD _Search_vectorized( + _Ptr1, _STD _To_address(_Last1), _STD _To_address(_First2), _STD _To_address(_Last2)); if constexpr (is_pointer_v<_FwdItHaystack>) { _First1 = _Ptr_res1; From e51b98d6f8aa7c4e8fe4c79af7297d4da7f096f6 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 10 Jun 2024 12:40:41 -0700 Subject: [PATCH 19/23] Restore top-level constness for `_UFirst2`. --- stl/inc/algorithm | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 2d9757bb2d..0fc847b1b4 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -2143,10 +2143,10 @@ _NODISCARD _CONSTEXPR20 _FwdItHaystack search(_FwdItHaystack _First1, _FwdItHays const _FwdItPat _Last2, _Pr _Pred) { // find first [_First2, _Last2) satisfying _Pred _STD _Adl_verify_range(_First1, _Last1); _STD _Adl_verify_range(_First2, _Last2); - auto _UFirst1 = _STD _Get_unwrapped(_First1); - const auto _ULast1 = _STD _Get_unwrapped(_Last1); - auto _UFirst2 = _STD _Get_unwrapped(_First2); - const auto _ULast2 = _STD _Get_unwrapped(_Last2); + auto _UFirst1 = _STD _Get_unwrapped(_First1); + const auto _ULast1 = _STD _Get_unwrapped(_Last1); + const auto _UFirst2 = _STD _Get_unwrapped(_First2); + const auto _ULast2 = _STD _Get_unwrapped(_Last2); if constexpr (_Is_ranges_random_iter_v<_FwdItHaystack> && _Is_ranges_random_iter_v<_FwdItPat>) { const _Iter_diff_t<_FwdItPat> _Count2 = _ULast2 - _UFirst2; if (_ULast1 - _UFirst1 >= _Count2) { From d4462a5fd2ae3f6a47df42f6c8145d2355def461 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 10 Jun 2024 14:30:45 -0700 Subject: [PATCH 20/23] Benchmark classic search(). --- benchmarks/src/search.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/benchmarks/src/search.cpp b/benchmarks/src/search.cpp index e1d127afb1..a7423e6c63 100644 --- a/benchmarks/src/search.cpp +++ b/benchmarks/src/search.cpp @@ -54,6 +54,19 @@ void c_strstr(benchmark::State& state) { } } +template +void classic_search(benchmark::State& state) { + const std::vector haystack(std::begin(src_haystack), std::end(src_haystack)); + const std::vector needle(std::begin(src_needle), std::end(src_needle)); + + for (auto _ : state) { + benchmark::DoNotOptimize(haystack); + benchmark::DoNotOptimize(needle); + auto res = std::search(haystack.begin(), haystack.end(), needle.begin(), needle.end()); + benchmark::DoNotOptimize(res); + } +} + template void ranges_search(benchmark::State& state) { const std::vector haystack(std::begin(src_haystack), std::end(src_haystack)); @@ -82,6 +95,11 @@ void search_default_searcher(benchmark::State& state) { BENCHMARK(c_strstr); +BENCHMARK(classic_search); +BENCHMARK(classic_search); +BENCHMARK(classic_search); +BENCHMARK(classic_search); + BENCHMARK(ranges_search); BENCHMARK(ranges_search); BENCHMARK(ranges_search); From 95ba820c5801c08e21690248bfe4dd5ac6481fdf Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 10 Jun 2024 14:52:17 -0700 Subject: [PATCH 21/23] Simplify `last_known_good_search()`. Who's a good search? You are! Yes you! --- .../std/tests/VSO_0000000_vector_algorithms/test.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 90faecc68c..5f81721806 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -173,16 +173,9 @@ auto last_known_good_find_first_of(FwdItH h_first, FwdItH h_last, FwdItN n_first template auto last_known_good_search(RanItH h_first, RanItH h_last, RanItN n_first, RanItN n_last) { const auto n_len = n_last - n_first; - const auto h_len = h_last - h_first; - if (n_len > h_len) { - return h_last; - } - - const auto h_last_start = h_last - n_len; - - for (; h_first <= h_last_start; ++h_first) { - if (equal(h_first, h_first + n_len, n_first, n_first + n_len)) { + for (; h_last - h_first >= n_len; ++h_first) { + if (equal(h_first, h_first + n_len, n_first, n_last)) { return h_first; } } From 72a0d293310dc758b1082aacf005ced6fdf4b933 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 10 Jun 2024 15:50:41 -0700 Subject: [PATCH 22/23] Revert vectorized implementation. --- stl/inc/algorithm | 19 ---- stl/inc/functional | 23 ---- stl/inc/xutility | 65 ------------ stl/src/vector_algorithms.cpp | 191 ---------------------------------- 4 files changed, 298 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 0fc847b1b4..ad597e64b5 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -2150,25 +2150,6 @@ _NODISCARD _CONSTEXPR20 _FwdItHaystack search(_FwdItHaystack _First1, _FwdItHays if constexpr (_Is_ranges_random_iter_v<_FwdItHaystack> && _Is_ranges_random_iter_v<_FwdItPat>) { const _Iter_diff_t<_FwdItPat> _Count2 = _ULast2 - _UFirst2; if (_ULast1 - _UFirst1 >= _Count2) { -#if _USE_STD_VECTOR_ALGORITHMS - if constexpr (_Equal_memcmp_is_safe) { - if (!_STD _Is_constant_evaluated()) { - const auto _Ptr1 = _STD _To_address(_UFirst1); - - const auto _Ptr_res1 = _STD _Search_vectorized( - _Ptr1, _STD _To_address(_ULast1), _STD _To_address(_UFirst2), _STD _To_address(_ULast2)); - - if constexpr (is_pointer_v) { - _UFirst1 = _Ptr_res1; - } else { - _UFirst1 += _Ptr_res1 - _Ptr1; - } - - _STD _Seek_wrapped(_Last1, _UFirst1); - return _Last1; - } - } -#endif // _USE_STD_VECTOR_ALGORITHMS const auto _Last_possible = _ULast1 - static_cast<_Iter_diff_t<_FwdItHaystack>>(_Count2); for (;; ++_UFirst1) { if (_STD _Equal_rev_pred_unchecked(_UFirst1, _UFirst2, _ULast2, _STD _Pass_fn(_Pred))) { diff --git a/stl/inc/functional b/stl/inc/functional index b8e290a192..5150939b5d 100644 --- a/stl/inc/functional +++ b/stl/inc/functional @@ -2459,29 +2459,6 @@ _CONSTEXPR20 pair<_FwdItHaystack, _FwdItHaystack> _Search_pair_unchecked( _Iter_diff_t<_FwdItHaystack> _Count1 = _Last1 - _First1; _Iter_diff_t<_FwdItPat> _Count2 = _Last2 - _First2; -#if _USE_STD_VECTOR_ALGORITHMS - if constexpr (_Equal_memcmp_is_safe<_FwdItHaystack, _FwdItPat, _Pred_eq>) { - if (!_STD _Is_constant_evaluated()) { - const auto _Ptr1 = _STD _To_address(_First1); - - const auto _Ptr_res1 = _STD _Search_vectorized( - _Ptr1, _STD _To_address(_Last1), _STD _To_address(_First2), _STD _To_address(_Last2)); - - if constexpr (is_pointer_v<_FwdItHaystack>) { - _First1 = _Ptr_res1; - } else { - _First1 += _Ptr_res1 - _Ptr1; - } - - if (_First1 != _Last1) { - return {_First1, _First1 + _Count2}; - } else { - return {_Last1, _Last1}; - } - } - } -#endif // _USE_STD_VECTOR_ALGORITHMS - for (; _Count2 <= _Count1; ++_First1, (void) --_Count1) { // room for match, try it _FwdItHaystack _Mid1 = _First1; for (_FwdItPat _Mid2 = _First2;; ++_Mid1, (void) ++_Mid2) { diff --git a/stl/inc/xutility b/stl/inc/xutility index 3284ff2f69..2116cda1f0 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -90,15 +90,6 @@ const void* __stdcall __std_find_trivial_2(const void* _First, const void* _Last const void* __stdcall __std_find_trivial_4(const void* _First, const void* _Last, uint32_t _Val) noexcept; const void* __stdcall __std_find_trivial_8(const void* _First, const void* _Last, uint64_t _Val) noexcept; -const void* __stdcall __std_search_1( - const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept; -const void* __stdcall __std_search_2( - const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept; -const void* __stdcall __std_search_4( - const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept; -const void* __stdcall __std_search_8( - const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept; - const void* __stdcall __std_min_element_1(const void* _First, const void* _Last, bool _Signed) noexcept; const void* __stdcall __std_min_element_2(const void* _First, const void* _Last, bool _Signed) noexcept; const void* __stdcall __std_min_element_4(const void* _First, const void* _Last, bool _Signed) noexcept; @@ -204,22 +195,6 @@ _Ty* _Find_vectorized(_Ty* const _First, _Ty* const _Last, const _TVal _Val) noe } } -template -_Ty1* _Search_vectorized(_Ty1* const _First1, _Ty1* const _Last1, _Ty2* const _First2, _Ty2* const _Last2) noexcept { - _STL_INTERNAL_STATIC_ASSERT(sizeof(_Ty1) == sizeof(_Ty2)); - if constexpr (sizeof(_Ty1) == 1) { - return const_cast<_Ty1*>(static_cast(::__std_search_1(_First1, _Last1, _First2, _Last2))); - } else if constexpr (sizeof(_Ty1) == 2) { - return const_cast<_Ty1*>(static_cast(::__std_search_2(_First1, _Last1, _First2, _Last2))); - } else if constexpr (sizeof(_Ty1) == 4) { - return const_cast<_Ty1*>(static_cast(::__std_search_4(_First1, _Last1, _First2, _Last2))); - } else if constexpr (sizeof(_Ty1) == 8) { - return const_cast<_Ty1*>(static_cast(::__std_search_8(_First1, _Last1, _First2, _Last2))); - } else { - _STL_INTERNAL_STATIC_ASSERT(false); // unexpected size - } -} - template _Ty* _Min_element_vectorized(_Ty* const _First, _Ty* const _Last) noexcept { constexpr bool _Signed = is_signed_v<_Ty>; @@ -6769,46 +6744,6 @@ namespace ranges { _STL_INTERNAL_CHECK(_RANGES distance(_First1, _Last1) == _Count1); _STL_INTERNAL_CHECK(_RANGES distance(_First2, _Last2) == _Count2); -#if _USE_STD_VECTOR_ALGORITHMS - if constexpr (_Equal_memcmp_is_safe<_It1, _It2, _Pr> && is_same_v<_Pj1, identity> - && is_same_v<_Pj2, identity>) { - if (!_STD is_constant_evaluated()) { - const auto _Ptr1 = _STD to_address(_First1); - const auto _Ptr2 = _STD to_address(_First2); - remove_const_t _Ptr_last1; - remove_const_t _Ptr_last2; - - if constexpr (is_same_v<_It1, _Se1>) { - _Ptr_last1 = _STD to_address(_Last1); - } else { - _Ptr_last1 = _Ptr1 + _Count1; - } - - if constexpr (is_same_v<_It2, _Se2>) { - _Ptr_last2 = _STD to_address(_Last2); - } else { - _Ptr_last2 = _Ptr2 + _Count2; - } - - const auto _Ptr_res1 = _STD _Search_vectorized(_Ptr1, _Ptr_last1, _Ptr2, _Ptr_last2); - - if constexpr (is_pointer_v<_It1>) { - if (_Ptr_res1 != _Ptr_last1) { - return {_Ptr_res1, _Ptr_res1 + _Count2}; - } else { - return {_Ptr_res1, _Ptr_res1}; - } - } else { - _First1 += _Ptr_res1 - _Ptr1; - if (_First1 != _Last1) { - return {_First1, _First1 + _Count2}; - } else { - return {_First1, _First1}; - } - } - } - } -#endif // _USE_STD_VECTOR_ALGORITHMS for (; _Count1 >= _Count2; ++_First1, (void) --_Count1) { auto _Match_and_mid1 = _RANGES _Equal_rev_pred(_First1, _First2, _Last2, _Pred, _Proj1, _Proj2); if (_Match_and_mid1.first) { diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index ae00e5b791..3d17a0afc5 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -2636,177 +2636,6 @@ namespace { return _Result; } - -#ifndef _M_ARM64EC - template - bool _Equal_avx2(const void* _First1, const void* _First2, size_t _Size) noexcept { - // no need for DevCom-10331414 workaround; this function is called only from AVX2 path - - // preconditions: non-zero length needle, first is already equal - _Advance_bytes(_First1, sizeof(_Ty)); - _Advance_bytes(_First2, sizeof(_Ty)); - _Size -= sizeof(_Ty); - - const void* _Stop1 = _First1; - _Advance_bytes(_Stop1, _Size & ~size_t{0x1F}); - - while (_First1 != _Stop1) { - const __m256i _Data1 = _mm256_loadu_si256(static_cast(_First1)); - const __m256i _Data2 = _mm256_loadu_si256(static_cast(_First2)); - const __m256i _Eq = _mm256_xor_si256(_Data1, _Data2); - if (!_mm256_testz_si256(_Eq, _Eq)) { - return false; - } - - _Advance_bytes(_First1, 32); - _Advance_bytes(_First2, 32); - } - - if (const size_t _Avx_tail_size = _Size & 0x1C; _Avx_tail_size != 0) { - const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size >> 2); - const __m256i _Data1 = _mm256_maskload_epi32(static_cast(_First1), _Tail_mask); - const __m256i _Data2 = _mm256_maskload_epi32(static_cast(_First2), _Tail_mask); - const __m256i _Eq = _mm256_xor_si256(_Data1, _Data2); - if (!_mm256_testz_si256(_Eq, _Eq)) { - return false; - } - - _Advance_bytes(_First1, _Avx_tail_size); - _Advance_bytes(_First2, _Avx_tail_size); - } - - if constexpr (sizeof(_Ty) <= 2) { - const void* _Stop1_final_tail = _First1; - _Advance_bytes(_Stop1_final_tail, _Size & 0x3); - - while (_First1 != _Stop1_final_tail) { - if (*static_cast(_First1) != *static_cast(_First2)) { - return false; - } - _Advance_bytes(_First1, sizeof(_Ty)); - _Advance_bytes(_First2, sizeof(_Ty)); - } - } - - return true; - } -#endif // !defined(_M_ARM64EC) - - template - const void* __stdcall __std_search_impl( - const void* _First1, const void* const _Last1, const void* const _First2, const void* const _Last2) noexcept { - const size_t _Size_bytes_2 = _Byte_length(_First2, _Last2); - - if (_Size_bytes_2 == 0) { - return _First1; - } - - if (_Size_bytes_2 == sizeof(_Ty)) { - return __std_find_trivial_impl<_Traits, _Ty>(_First1, _Last1, *static_cast(_First2)); - } - - const size_t _Size_bytes_1 = _Byte_length(_First1, _Last1); - if (_Size_bytes_1 < _Size_bytes_2) { - return _Last1; - } - - const size_t _Max_pos = _Size_bytes_1 - _Size_bytes_2 + sizeof(_Ty); - -#ifndef _M_ARM64EC - if (_Use_avx2()) { - _Zeroupper_on_exit _Guard; // TRANSITION, DevCom-10331414 - - const __m256i _Comparand = _Traits::_Set_avx(*static_cast(_First2)); - const void* _Stop1 = _First1; - _Advance_bytes(_Stop1, _Max_pos & ~size_t{0x1F}); - - while (_First1 != _Stop1) { - const __m256i _Data = _mm256_loadu_si256(static_cast(_First1)); - long _Bingo = _mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand)); - - while (_Bingo != 0) { - const unsigned long _Offset = _tzcnt_u32(_Bingo); - - const void* _Match1 = _First1; - _Advance_bytes(_Match1, _Offset); - - if (_Equal_avx2<_Ty>(_Match1, _First2, _Size_bytes_2)) { - return _Match1; - } - - _bittestandreset(&_Bingo, _Offset); - } - - _Advance_bytes(_First1, 32); - } - - if (const size_t _Avx_tail_size = _Max_pos & 0x1C; _Avx_tail_size != 0) { - const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size >> 2); - const __m256i _Data = _mm256_maskload_epi32(static_cast(_First1), _Tail_mask); - long _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Traits::_Cmp_avx(_Data, _Comparand), _Tail_mask)); - - while (_Bingo != 0) { - const unsigned long _Offset = _tzcnt_u32(_Bingo); - - const void* _Match1 = _First1; - _Advance_bytes(_Match1, _Offset); - - if (_Equal_avx2<_Ty>(_Match1, _First2, _Size_bytes_2)) { - return _Match1; - } - - _bittestandreset(&_Bingo, _Offset); - } - - _Advance_bytes(_First1, _Avx_tail_size); - } - - if constexpr (sizeof(_Ty) <= 2) { - const void* _Stop1_final_tail = _First1; - _Advance_bytes(_Stop1_final_tail, _Max_pos & 0x3); - - while (_First1 != _Stop1_final_tail) { - if (*static_cast(_First1) == *static_cast(_First2)) { - if (_Equal_avx2<_Ty>(_First1, _First2, _Size_bytes_2)) { - return _First1; - } - } - - _Advance_bytes(_First1, sizeof(_Ty)); - } - } - - return _Last1; - } else -#endif // !defined(_M_ARM64EC) - { - auto _Ptr1 = static_cast(_First1); - const auto _Ptr2 = static_cast(_First2); - const size_t _Count2 = _Size_bytes_2 / sizeof(_Ty); - const void* _Stop1 = _Ptr1; - _Advance_bytes(_Stop1, _Max_pos); - - for (; _Ptr1 != _Stop1; ++_Ptr1) { - if (*_Ptr1 != *_Ptr2) { - continue; - } - - bool _Equal = true; - - for (size_t _Idx = 1; _Idx != _Count2; ++_Idx) { - if (_Ptr1[_Idx] != _Ptr2[_Idx]) { - _Equal = false; - break; - } - } - - if (_Equal) { - return _Ptr1; - } - } - return _Last1; - } - } } // unnamed namespace extern "C" { @@ -2912,26 +2741,6 @@ const void* __stdcall __std_find_first_of_trivial_8( return __std_find_first_of::_Impl_4_8<__std_find_first_of::_Traits_8>(_First1, _Last1, _First2, _Last2); } -const void* __stdcall __std_search_1( - const void* const _First1, const void* const _Last1, const void* const _First2, const void* const _Last2) noexcept { - return __std_search_impl<_Find_traits_1, uint8_t>(_First1, _Last1, _First2, _Last2); -} - -const void* __stdcall __std_search_2( - const void* const _First1, const void* const _Last1, const void* const _First2, const void* const _Last2) noexcept { - return __std_search_impl<_Find_traits_2, uint16_t>(_First1, _Last1, _First2, _Last2); -} - -const void* __stdcall __std_search_4( - const void* const _First1, const void* const _Last1, const void* const _First2, const void* const _Last2) noexcept { - return __std_search_impl<_Find_traits_4, uint32_t>(_First1, _Last1, _First2, _Last2); -} - -const void* __stdcall __std_search_8( - const void* const _First1, const void* const _Last1, const void* const _First2, const void* const _Last2) noexcept { - return __std_search_impl<_Find_traits_8, uint64_t>(_First1, _Last1, _First2, _Last2); -} - __declspec(noalias) size_t __stdcall __std_mismatch_1(const void* const _First1, const void* const _First2, const size_t _Count) noexcept { return __std_mismatch_impl<_Find_traits_1, uint8_t>(_First1, _First2, _Count); From 38b32d68252da8ff7ec2c16ca787eb7a0356ac6e Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 10 Jun 2024 16:11:20 -0700 Subject: [PATCH 23/23] Drop `memcmp` paths from `_Equal_rev_pred_unchecked` and `_Equal_rev_pred`. `_Equal_rev_pred_unchecked` is called by classic/parallel `search`/`find_end`. `_Equal_rev_pred` is called by ranges `search`/`find_end`. This doesn't affect `equal` etc. --- stl/inc/algorithm | 9 --------- stl/inc/xutility | 23 ----------------------- 2 files changed, 32 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index ad597e64b5..af24443189 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -2120,15 +2120,6 @@ namespace ranges { template _NODISCARD _CONSTEXPR20 bool _Equal_rev_pred_unchecked(_InIt1 _First1, _InIt2 _First2, const _InIt2 _Last2, _Pr _Pred) { // compare [_First1, ...) to [_First2, _Last2) - if constexpr (_Equal_memcmp_is_safe<_InIt1, _InIt2, _Pr>) { -#if _HAS_CXX20 - if (!_STD is_constant_evaluated()) -#endif // _HAS_CXX20 - { - return _STD _Memcmp_ranges(_First2, _Last2, _First1) == 0; - } - } - for (; _First2 != _Last2; ++_First1, (void) ++_First2) { if (!_Pred(*_First1, *_First2)) { return false; diff --git a/stl/inc/xutility b/stl/inc/xutility index 2116cda1f0..ee83e903b9 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -6648,35 +6648,12 @@ namespace ranges { _EXPORT_STD inline constexpr _Adjacent_find_fn adjacent_find; - template - concept _Equal_rev_pred_can_memcmp = is_same_v<_Pj1, identity> && is_same_v<_Pj2, identity> - && sized_sentinel_for<_Se2, _It2> && _Equal_memcmp_is_safe<_It1, _It2, _Pr>; - template _Se2, class _Pr, class _Pj1, class _Pj2> requires indirectly_comparable<_It1, _It2, _Pr, _Pj1, _Pj2> _NODISCARD constexpr pair _Equal_rev_pred( _It1 _First1, _It2 _First2, const _Se2 _Last2, _Pr _Pred, _Pj1 _Proj1, _Pj2 _Proj2) { // Returns {true, _First1 + (_Last2 - _First2)} if [_First1, ...) equals [_First2, _Last2), and {false, {}} // otherwise. - constexpr bool _Optimize = _Equal_rev_pred_can_memcmp<_It1, _It2, _Se2, _Pr, _Pj1, _Pj2>; - if constexpr (_Optimize) { - if (!_STD is_constant_evaluated()) { - bool _Ans; - if constexpr (same_as<_It2, _Se2>) { - _Ans = _STD _Memcmp_ranges(_First2, _Last2, _First1) == 0; - } else { - _Ans = _STD _Memcmp_count(_First1, _First2, static_cast(_Last2 - _First2)) == 0; - } - - if (_Ans) { - _First1 += (_Last2 - _First2); - return {true, _STD move(_First1)}; - } else { - return {false, _It1 {}}; - } - } - } - for (; _First2 != _Last2; ++_First1, (void) ++_First2) { if (!_STD invoke(_Pred, _STD invoke(_Proj1, *_First1), _STD invoke(_Proj2, *_First2))) { return {false, _It1 {}};