Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve search/find_end perf by dropping memcmp #4654

Merged
merged 24 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
73c96da
vectorize search
AlexGuteniev May 5, 2024
0c17a53
very tail fix
AlexGuteniev May 5, 2024
11c05ee
I 🧡 ADL
AlexGuteniev May 5, 2024
d4fcc96
unify ipsum
AlexGuteniev May 5, 2024
da5cf2e
-newline
AlexGuteniev May 5, 2024
da157b1
`strstr` for competition
AlexGuteniev May 5, 2024
772c513
missing progress
AlexGuteniev May 5, 2024
2c6c329
coverage
AlexGuteniev May 5, 2024
81a6000
these tests are too long
AlexGuteniev May 5, 2024
0b59b2e
missing include
AlexGuteniev May 5, 2024
f2806c5
default_searcher
AlexGuteniev May 5, 2024
15e54a9
ADL again
AlexGuteniev May 5, 2024
26646fe
avoid `memcmp` in fallback
AlexGuteniev May 5, 2024
0c473a4
partial review comment
AlexGuteniev Jun 7, 2024
3452fcc
Merge branch 'main' into search
StephanTLavavej Jun 10, 2024
629afd4
Internal static assert `sizeof(_Ty1) == sizeof(_Ty2)`.
StephanTLavavej Jun 10, 2024
a24e6eb
Use `+=` and `+` instead of `_RANGES next`.
StephanTLavavej Jun 10, 2024
9d07a40
Style: Return `_Ptr_res1` instead of `_Ptr_last1` when they're equal.
StephanTLavavej Jun 10, 2024
d57f9b6
Style: In `<algorithm>` and `<functional>`, `_Ptr_last1` doesn't need…
StephanTLavavej Jun 10, 2024
e51b98d
Restore top-level constness for `_UFirst2`.
StephanTLavavej Jun 10, 2024
d4462a5
Benchmark classic search().
StephanTLavavej Jun 10, 2024
95ba820
Simplify `last_known_good_search()`.
StephanTLavavej Jun 10, 2024
72a0d29
Revert vectorized implementation.
StephanTLavavej Jun 10, 2024
38b32d6
Drop `memcmp` paths from `_Equal_rev_pred_unchecked` and `_Equal_rev_…
StephanTLavavej Jun 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ add_benchmark(path_lexically_normal src/path_lexically_normal.cpp)
add_benchmark(priority_queue_push_range src/priority_queue_push_range.cpp)
add_benchmark(random_integer_generation src/random_integer_generation.cpp)
add_benchmark(replace src/replace.cpp)
add_benchmark(search src/search.cpp)
add_benchmark(std_copy src/std_copy.cpp)
add_benchmark(swap_ranges src/swap_ranges.cpp)

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/src/replace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ const char src[] =
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam mollis imperdiet massa, at dapibus elit interdum "
"ac. In eget sollicitudin mi. Nam at tellus at sapien tincidunt sollicitudin vel non eros. Pellentesque nunc nunc, "
"ullamcorper eu accumsan at, pulvinar non turpis. Quisque vel mauris pulvinar, pretium purus vel, ultricies erat. "
"Curabitur a magna in ligula tristique ornare. Quisque commodo, massa viverra laoreet luctus, sem nisi aliquet "
"Curabitur a magna in ligula tristique ornare. Quisque commodo, massa viverra laoreet luctus, sem nisi aliquam "
"velit, fermentum pulvinar velit leo eget justo. Suspendisse vel erat efficitur, pulvinar eros volutpat, vulputate "
"ex. Phasellus non purus vel velit tristique tristique id at ligula. Quisque mollis sodales magna. Mauris et quam "
"eu quam viverra tempus. Nullam tempus maximus porta. Nunc mattis eleifend fermentum. Nullam aliquam libero "
Expand All @@ -34,7 +34,7 @@ const char src[] =
"montes, nascetur ridiculus mus. Mauris varius dui sit amet tortor facilisis vestibulum. Curabitur condimentum "
"justo nec orci mattis auctor. Quisque aliquet condimentum arcu ac sollicitudin. Maecenas elit elit, condimentum "
"vitae auctor a, cursus et sem. Cras vehicula ante in consequat fermentum. Praesent at massa nisi. Mauris pretium "
"euismod eros, ut posuere ligula ullamcorper id. Nullam aliquam malesuada est at dignissim. Pellentesque finibus "
"euismod eros, ut posuere ligula ullamcorper id. Nullam aliquet malesuada est at dignissim. Pellentesque finibus "
"sagittis libero nec bibendum. Phasellus dolor ipsum, finibus quis turpis quis, mollis interdum felis.";

template <class T>
Expand Down
114 changes: 114 additions & 0 deletions benchmarks/src/search.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <algorithm>
#include <benchmark/benchmark.h>
#include <cstdint>
#include <cstring>
#include <functional>
#include <string>
#include <vector>

const char src_haystack[] =
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam mollis imperdiet massa, at dapibus elit interdum "
"ac. In eget sollicitudin mi. Nam at tellus at sapien tincidunt sollicitudin vel non eros. Pellentesque nunc nunc, "
"ullamcorper eu accumsan at, pulvinar non turpis. Quisque vel mauris pulvinar, pretium purus vel, ultricies erat. "
"Curabitur a magna in ligula tristique ornare. Quisque commodo, massa viverra laoreet luctus, sem nisi aliquam "
"velit, fermentum pulvinar velit leo eget justo. Suspendisse vel erat efficitur, pulvinar eros volutpat, vulputate "
"ex. Phasellus non purus vel velit tristique tristique id at ligula. Quisque mollis sodales magna. Mauris et quam "
"eu quam viverra tempus. Nullam tempus maximus porta. Nunc mattis eleifend fermentum. Nullam aliquam libero "
"accumsan velit elementum, eu laoreet metus convallis. Donec pellentesque lacus ut iaculis iaculis. Curabitur orci "
"elit, bibendum sit amet feugiat at, iaculis sit amet massa. Maecenas imperdiet lacus at vehicula iaculis. Donec "
"volutpat nunc sit amet accumsan tempor. Quisque pretium vestibulum ultricies. Suspendisse potenti. Aenean at diam "
"iaculis, condimentum felis venenatis, condimentum erat. Nam quis elit dui. Duis quis odio vitae metus hendrerit "
"rhoncus ut et magna. Cras ac augue quis nibh pharetra sagittis. Donec ullamcorper vel eros semper pretium. Proin "
"vel sollicitudin eros. Nulla sollicitudin mattis turpis id suscipit. Aliquam sed risus velit. Aliquam iaculis nec "
"nibh ac egestas. Duis finibus semper est sed consequat. Sed in sapien quis nibh dignissim mattis. Vestibulum nec "
"metus sodales, euismod mauris ac, sollicitudin libero. Maecenas non arcu ac velit ullamcorper fringilla et quis "
"nulla. Curabitur posuere leo eget ipsum tincidunt dignissim. Cras ultricies suscipit neque, quis suscipit tortor "
"venenatis non. Cras nisl mi, bibendum in vulputate quis, vestibulum ornare enim. Nunc hendrerit placerat dui, "
"aliquam mollis sem convallis et. Integer vitae urna diam. Phasellus et imperdiet est. Maecenas auctor facilisis "
"nibh non commodo. Suspendisse iaculis quam id bibendum feugiat. Pellentesque felis erat, egestas a libero ac, "
"laoreet consectetur elit. Cras ut suscipit ex. Etiam gravida sem quis ex porta, eu lacinia tortor fermentum. "
"Nulla consequat odio enim, sed condimentum est sagittis a. Quisque nec commodo tellus. Phasellus elementum "
"feugiat dolor et feugiat. Praesent sed mattis tortor. In vitae sodales purus. Morbi accumsan, ligula et interdum "
"lacinia, leo risus suscipit urna, non luctus mi justo eu ipsum. Curabitur venenatis pretium orci id porttitor. "
"Quisque dapibus nisl sit amet elit lobortis sagittis. Orci varius natoque penatibus et magnis dis parturient "
"montes, nascetur ridiculus mus. Mauris varius dui sit amet tortor facilisis vestibulum. Curabitur condimentum "
"justo nec orci mattis auctor. Quisque aliquet condimentum arcu ac sollicitudin. Maecenas elit elit, condimentum "
"vitae auctor a, cursus et sem. Cras vehicula ante in consequat fermentum. Praesent at massa nisi. Mauris pretium "
"euismod eros, ut posuere ligula ullamcorper id. Nullam aliquet malesuada est at dignissim. Pellentesque finibus "
"sagittis libero nec bibendum. Phasellus dolor ipsum, finibus quis turpis quis, mollis interdum felis.";

const char src_needle[] = "aliquet";

void c_strstr(benchmark::State& state) {
const std::string haystack(std::begin(src_haystack), std::end(src_haystack));
const std::string needle(std::begin(src_needle), std::end(src_needle));

for (auto _ : state) {
benchmark::DoNotOptimize(haystack);
benchmark::DoNotOptimize(needle);
auto res = std::strstr(haystack.c_str(), needle.c_str());
benchmark::DoNotOptimize(res);
}
}

template <class T>
void classic_search(benchmark::State& state) {
const std::vector<T> haystack(std::begin(src_haystack), std::end(src_haystack));
const std::vector<T> needle(std::begin(src_needle), std::end(src_needle));

for (auto _ : state) {
benchmark::DoNotOptimize(haystack);
benchmark::DoNotOptimize(needle);
auto res = std::search(haystack.begin(), haystack.end(), needle.begin(), needle.end());
benchmark::DoNotOptimize(res);
}
}

template <class T>
void ranges_search(benchmark::State& state) {
const std::vector<T> haystack(std::begin(src_haystack), std::end(src_haystack));
const std::vector<T> needle(std::begin(src_needle), std::end(src_needle));

for (auto _ : state) {
benchmark::DoNotOptimize(haystack);
benchmark::DoNotOptimize(needle);
auto res = std::ranges::search(haystack, needle);
benchmark::DoNotOptimize(res);
}
}

template <class T>
void search_default_searcher(benchmark::State& state) {
const std::vector<T> haystack(std::begin(src_haystack), std::end(src_haystack));
const std::vector<T> needle(std::begin(src_needle), std::end(src_needle));

for (auto _ : state) {
benchmark::DoNotOptimize(haystack);
benchmark::DoNotOptimize(needle);
auto res = std::search(haystack.begin(), haystack.end(), std::default_searcher{needle.begin(), needle.end()});
benchmark::DoNotOptimize(res);
}
}

BENCHMARK(c_strstr);

BENCHMARK(classic_search<std::uint8_t>);
BENCHMARK(classic_search<std::uint16_t>);
BENCHMARK(classic_search<std::uint32_t>);
BENCHMARK(classic_search<std::uint64_t>);

BENCHMARK(ranges_search<std::uint8_t>);
BENCHMARK(ranges_search<std::uint16_t>);
BENCHMARK(ranges_search<std::uint32_t>);
BENCHMARK(ranges_search<std::uint64_t>);

BENCHMARK(search_default_searcher<std::uint8_t>);
BENCHMARK(search_default_searcher<std::uint16_t>);
BENCHMARK(search_default_searcher<std::uint32_t>);
BENCHMARK(search_default_searcher<std::uint64_t>);


BENCHMARK_MAIN();
10 changes: 1 addition & 9 deletions stl/inc/algorithm
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ _Ty* _Find_last_vectorized(_Ty* const _First, _Ty* const _Last, const _TVal _Val
template <class _Ty1, class _Ty2>
_Ty1* _Find_first_of_vectorized(
_Ty1* const _First1, _Ty1* const _Last1, _Ty2* const _First2, _Ty2* const _Last2) noexcept {
_STL_INTERNAL_STATIC_ASSERT(sizeof(_Ty1) == sizeof(_Ty2));
if constexpr (sizeof(_Ty1) == 1) {
return const_cast<_Ty1*>(
static_cast<const _Ty1*>(::__std_find_first_of_trivial_1(_First1, _Last1, _First2, _Last2)));
Expand Down Expand Up @@ -2119,15 +2120,6 @@ namespace ranges {
template <class _InIt1, class _InIt2, class _Pr>
_NODISCARD _CONSTEXPR20 bool _Equal_rev_pred_unchecked(_InIt1 _First1, _InIt2 _First2, const _InIt2 _Last2, _Pr _Pred) {
// compare [_First1, ...) to [_First2, _Last2)
if constexpr (_Equal_memcmp_is_safe<_InIt1, _InIt2, _Pr>) {
#if _HAS_CXX20
if (!_STD is_constant_evaluated())
#endif // _HAS_CXX20
{
return _STD _Memcmp_ranges(_First2, _Last2, _First1) == 0;
}
}

for (; _First2 != _Last2; ++_First1, (void) ++_First2) {
if (!_Pred(*_First1, *_First2)) {
return false;
Expand Down
25 changes: 1 addition & 24 deletions stl/inc/xutility
Original file line number Diff line number Diff line change
Expand Up @@ -5356,7 +5356,7 @@ constexpr bool _Equal_memcmp_is_safe_helper =

template <class _Iter1, class _Iter2, class _Pr>
constexpr bool _Equal_memcmp_is_safe =
_Equal_memcmp_is_safe_helper<remove_const_t<_Iter1>, remove_const_t<_Iter2>, _Pr>;
_Equal_memcmp_is_safe_helper<remove_const_t<_Iter1>, remove_const_t<_Iter2>, remove_const_t<_Pr>>;

template <class _CtgIt1, class _CtgIt2>
_NODISCARD int _Memcmp_ranges(_CtgIt1 _First1, _CtgIt1 _Last1, _CtgIt2 _First2) {
Expand Down Expand Up @@ -6648,35 +6648,12 @@ namespace ranges {

_EXPORT_STD inline constexpr _Adjacent_find_fn adjacent_find;

template <class _It1, class _It2, class _Se2, class _Pr, class _Pj1, class _Pj2>
concept _Equal_rev_pred_can_memcmp = is_same_v<_Pj1, identity> && is_same_v<_Pj2, identity>
&& sized_sentinel_for<_Se2, _It2> && _Equal_memcmp_is_safe<_It1, _It2, _Pr>;

template <forward_iterator _It1, input_iterator _It2, sentinel_for<_It2> _Se2, class _Pr, class _Pj1, class _Pj2>
requires indirectly_comparable<_It1, _It2, _Pr, _Pj1, _Pj2>
_NODISCARD constexpr pair<bool, _It1> _Equal_rev_pred(
_It1 _First1, _It2 _First2, const _Se2 _Last2, _Pr _Pred, _Pj1 _Proj1, _Pj2 _Proj2) {
// Returns {true, _First1 + (_Last2 - _First2)} if [_First1, ...) equals [_First2, _Last2), and {false, {}}
// otherwise.
constexpr bool _Optimize = _Equal_rev_pred_can_memcmp<_It1, _It2, _Se2, _Pr, _Pj1, _Pj2>;
if constexpr (_Optimize) {
if (!_STD is_constant_evaluated()) {
bool _Ans;
if constexpr (same_as<_It2, _Se2>) {
_Ans = _STD _Memcmp_ranges(_First2, _Last2, _First1) == 0;
} else {
_Ans = _STD _Memcmp_count(_First1, _First2, static_cast<size_t>(_Last2 - _First2)) == 0;
}

if (_Ans) {
_First1 += (_Last2 - _First2);
return {true, _STD move(_First1)};
} else {
return {false, _It1 {}};
}
}
}

for (; _First2 != _Last2; ++_First1, (void) ++_First2) {
if (!_STD invoke(_Pred, _STD invoke(_Proj1, *_First1), _STD invoke(_Proj2, *_First2))) {
return {false, _It1 {}};
Expand Down
82 changes: 78 additions & 4 deletions tests/std/tests/VSO_0000000_vector_algorithms/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,19 @@ auto last_known_good_find_first_of(FwdItH h_first, FwdItH h_last, FwdItN n_first
return h_first;
}

template <class RanItH, class RanItN>
auto last_known_good_search(RanItH h_first, RanItH h_last, RanItN n_first, RanItN n_last) {
const auto n_len = n_last - n_first;

for (; h_last - h_first >= n_len; ++h_first) {
if (equal(h_first, h_first + n_len, n_first, n_last)) {
return h_first;
}
}

return h_last;
}

template <class T>
void test_case_find(const vector<T>& input, T v) {
auto expected = last_known_good_find(input.begin(), input.end(), v);
Expand Down Expand Up @@ -275,12 +288,13 @@ void test_case_find_first_of(const vector<T>& input_haystack, const vector<T>& i

template <class T>
void test_find_first_of(mt19937_64& gen) {
constexpr size_t needleDataCount = 50;
using TD = conditional_t<sizeof(T) == 1, int, T>;
constexpr size_t haystackDataCount = 200;
constexpr size_t needleDataCount = 35;
using TD = conditional_t<sizeof(T) == 1, int, T>;
uniform_int_distribution<TD> dis('a', 'z');
vector<T> input_haystack;
vector<T> input_needle;
input_haystack.reserve(dataCount);
input_haystack.reserve(haystackDataCount);
input_needle.reserve(needleDataCount);

for (;;) {
Expand All @@ -292,7 +306,7 @@ void test_find_first_of(mt19937_64& gen) {
test_case_find_first_of(input_haystack, input_needle);
}

if (input_haystack.size() == dataCount) {
if (input_haystack.size() == haystackDataCount) {
break;
}

Expand All @@ -312,6 +326,56 @@ void test_find_first_of_containers() {
#endif // _HAS_CXX20
}

template <class T>
void test_case_search(const vector<T>& input_haystack, const vector<T>& input_needle) {
auto expected =
last_known_good_search(input_haystack.begin(), input_haystack.end(), input_needle.begin(), input_needle.end());
auto actual = search(input_haystack.begin(), input_haystack.end(), input_needle.begin(), input_needle.end());
assert(expected == actual);
#if _HAS_CXX17
auto searcher_actual = search(
input_haystack.begin(), input_haystack.end(), default_searcher{input_needle.begin(), input_needle.end()});
assert(expected == searcher_actual);
#endif // _HAS_CXX17
#if _HAS_CXX20
auto ranges_actual = ranges::search(input_haystack, input_needle);
assert(expected == begin(ranges_actual));
if (expected != input_haystack.end()) {
assert(expected + static_cast<ptrdiff_t>(input_needle.size()) == end(ranges_actual));
} else {
assert(expected == end(ranges_actual));
}
#endif // _HAS_CXX20
}

template <class T>
void test_search(mt19937_64& gen) {
constexpr size_t haystackDataCount = 200;
constexpr size_t needleDataCount = 35;
using TD = conditional_t<sizeof(T) == 1, int, T>;
uniform_int_distribution<TD> dis('0', '9');
vector<T> input_haystack;
vector<T> input_needle;
input_haystack.reserve(haystackDataCount);
input_needle.reserve(needleDataCount);

for (;;) {
input_needle.clear();

test_case_search(input_haystack, input_needle);
for (size_t attempts = 0; attempts < needleDataCount; ++attempts) {
input_needle.push_back(static_cast<T>(dis(gen)));
test_case_search(input_haystack, input_needle);
}

if (input_haystack.size() == haystackDataCount) {
break;
}

input_haystack.push_back(static_cast<T>(dis(gen)));
}
}

template <class T>
void test_min_max_element(mt19937_64& gen) {
using Limits = numeric_limits<T>;
Expand Down Expand Up @@ -817,6 +881,16 @@ void test_vector_algorithms(mt19937_64& gen) {
test_find_first_of_containers<const vector<wchar_t>, vector<wchar_t>>();
test_find_first_of_containers<vector<char>, vector<int>>();

test_search<char>(gen);
test_search<signed char>(gen);
test_search<unsigned char>(gen);
test_search<short>(gen);
test_search<unsigned short>(gen);
test_search<int>(gen);
test_search<unsigned int>(gen);
test_search<long long>(gen);
test_search<unsigned long long>(gen);

test_min_max_element<char>(gen);
test_min_max_element<signed char>(gen);
test_min_max_element<unsigned char>(gen);
Expand Down