diff --git a/google/cloud/storage/BUILD.bazel b/google/cloud/storage/BUILD.bazel index 34357888d734e..c9dd82957af81 100644 --- a/google/cloud/storage/BUILD.bazel +++ b/google/cloud/storage/BUILD.bazel @@ -148,3 +148,15 @@ load(":storage_client_grpc_unit_tests.bzl", "storage_client_grpc_unit_tests") "@com_google_googletest//:gtest_main", ], ) for test in storage_client_grpc_unit_tests] + +load(":google_cloud_cpp_storage_benchmarks.bzl", "google_cloud_cpp_storage_benchmarks") + +[cc_test( + name = benchmark.replace("/", "_").replace(".cc", ""), + srcs = [benchmark], + tags = ["benchmark"], + deps = [ + ":google_cloud_cpp_storage", + "@com_google_benchmark//:benchmark_main", + ], +) for benchmark in google_cloud_cpp_storage_benchmarks] diff --git a/google/cloud/storage/google_cloud_cpp_storage.bzl b/google/cloud/storage/google_cloud_cpp_storage.bzl index 53fae4cc60d0a..ac82667cfc4ce 100644 --- a/google/cloud/storage/google_cloud_cpp_storage.bzl +++ b/google/cloud/storage/google_cloud_cpp_storage.bzl @@ -166,6 +166,7 @@ google_cloud_cpp_storage_srcs = [ "internal/default_object_acl_requests.cc", "internal/empty_response.cc", "internal/error_credentials.cc", + "internal/generate_message_boundary.cc", "internal/hash_function.cc", "internal/hash_function_impl.cc", "internal/hash_validator.cc", diff --git a/google/cloud/storage/google_cloud_cpp_storage.cmake b/google/cloud/storage/google_cloud_cpp_storage.cmake index 8c812efcfc5c6..9a92f78f48c50 100644 --- a/google/cloud/storage/google_cloud_cpp_storage.cmake +++ b/google/cloud/storage/google_cloud_cpp_storage.cmake @@ -83,6 +83,7 @@ add_library( internal/empty_response.h internal/error_credentials.cc internal/error_credentials.h + internal/generate_message_boundary.cc internal/generate_message_boundary.h internal/generic_object_request.h internal/generic_request.h @@ -529,11 +530,31 @@ if (BUILD_TESTING) endif () add_test(NAME ${target} COMMAND ${target}) endforeach () - # Export the list of unit tests so the Bazel BUILD file can pick it up. export_list_to_bazel("storage_client_unit_tests.bzl" "storage_client_unit_tests" YEAR "2018") + find_package(benchmark CONFIG REQUIRED) + + set(google_cloud_cpp_storage_benchmarks + # cmake-format: sort + internal/generate_message_boundary_benchmark.cc) + + # Export the list of benchmarks to a .bzl file so we do not need to maintain + # the list in two places. + export_list_to_bazel("google_cloud_cpp_storage_benchmarks.bzl" + "google_cloud_cpp_storage_benchmarks" YEAR "2022") + + # Generate a target for each benchmark. + foreach (fname ${google_cloud_cpp_storage_benchmarks}) + google_cloud_cpp_add_executable(target "storage" "${fname}") + add_test(NAME ${target} COMMAND ${target}) + target_link_libraries( + ${target} PRIVATE absl::memory google-cloud-cpp::storage + benchmark::benchmark_main) + google_cloud_cpp_add_common_options(${target}) + endforeach () + add_subdirectory(tests) add_subdirectory(benchmarks) endif () diff --git a/google/cloud/storage/google_cloud_cpp_storage_benchmarks.bzl b/google/cloud/storage/google_cloud_cpp_storage_benchmarks.bzl new file mode 100644 index 0000000000000..01edf7cede091 --- /dev/null +++ b/google/cloud/storage/google_cloud_cpp_storage_benchmarks.bzl @@ -0,0 +1,21 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# DO NOT EDIT -- GENERATED BY CMake -- Change the CMakeLists.txt file if needed + +"""Automatically generated unit tests list - DO NOT EDIT.""" + +google_cloud_cpp_storage_benchmarks = [ + "internal/generate_message_boundary_benchmark.cc", +] diff --git a/google/cloud/storage/internal/curl_client.cc b/google/cloud/storage/internal/curl_client.cc index 33cb288ec1687..855a61e41aed4 100644 --- a/google/cloud/storage/internal/curl_client.cc +++ b/google/cloud/storage/internal/curl_client.cc @@ -1310,16 +1310,11 @@ std::string CurlClient::PickBoundary(std::string const& text_to_avoid) { // the candidate. Eventually we will find something, though it might be // larger than `text_to_avoid`. And we only make (approximately) one pass // over `text_to_avoid`. - auto generate_candidate = [this](int n) { - static auto const* const kChars = new std::string( - "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + auto generate_candidate = [this]() { std::unique_lock lk(mu_); - return google::cloud::internal::Sample(generator_, n, *kChars); + return GenerateMessageBoundaryCandidate(generator_); }; - constexpr int kCandidateInitialSize = 16; - constexpr int kCandidateGrowthSize = 4; - return GenerateMessageBoundary(text_to_avoid, std::move(generate_candidate), - kCandidateInitialSize, kCandidateGrowthSize); + return GenerateMessageBoundary(text_to_avoid, generate_candidate); } StatusOr CurlClient::InsertObjectMediaSimple( diff --git a/google/cloud/storage/internal/generate_message_boundary.cc b/google/cloud/storage/internal/generate_message_boundary.cc new file mode 100644 index 0000000000000..b7547fb7b69eb --- /dev/null +++ b/google/cloud/storage/internal/generate_message_boundary.cc @@ -0,0 +1,46 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/storage/internal/generate_message_boundary.h" +#include + +namespace google { +namespace cloud { +namespace storage { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN +namespace internal { + +std::string GenerateMessageBoundary( + std::string const& message, + absl::FunctionRef candidate_generator) { + auto candidate = candidate_generator(); + while (message.find(candidate) != std::string::npos) { + candidate = candidate_generator(); + } + return candidate; +} + +std::string GenerateMessageBoundaryCandidate( + google::cloud::internal::DefaultPRNG& generator) { + auto candidate = std::string{ + "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"}; + std::shuffle(candidate.begin(), candidate.end(), generator); + return candidate; +} + +} // namespace internal +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace storage +} // namespace cloud +} // namespace google diff --git a/google/cloud/storage/internal/generate_message_boundary.h b/google/cloud/storage/internal/generate_message_boundary.h index 828b993c9361b..5b4bf3f249876 100644 --- a/google/cloud/storage/internal/generate_message_boundary.h +++ b/google/cloud/storage/internal/generate_message_boundary.h @@ -17,6 +17,8 @@ #include "google/cloud/storage/version.h" #include "google/cloud/internal/invoke_result.h" +#include "google/cloud/internal/random.h" +#include "absl/functional/function_ref.h" #include namespace google { @@ -34,37 +36,45 @@ namespace internal { * characters to the message itself). * * The algorithm is to generate a short random string, and search for it in the - * message, if the message has that string, append some more random characters - * and keep searching. - * - * This function is a template because the string generator is typically a - * lambda that captures state variables (such as the random number generator), - * of the class that uses it. + * message, if the message has that string, generate a new string and retry. + * The strings are 64 (alphanumeric) characters long, the number of permutations + * is large enough that a suitable string will be found eventually. * * @param message a message body, typically the payload of a HTTP request that * will need to be encoded as a MIME multipart message. - * @param random_string_generator a callable to generate random strings. + * @param candidate_generator a callable to generate random strings. * Typically a lambda that captures the generator and any locks necessary * to generate the string. This is also used in testing to verify things * work when the initial string is found. - * @param initial_size the length for the initial string. - * @param growth_size how fast to grow the random string. * @return a string not found in @p message. */ +std::string GenerateMessageBoundary( + std::string const& message, + absl::FunctionRef candidate_generator); + +/// A helper to generate message boundary candidates. +std::string GenerateMessageBoundaryCandidate( + google::cloud::internal::DefaultPRNG& generator); + +/** + * @deprecated use another `GenerateMessageBoundary()` overload. + */ template ::value, int>::type = 0> -std::string GenerateMessageBoundary( - std::string const& message, RandomStringGenerator&& random_string_generator, - int initial_size, int growth_size) { - std::string candidate = random_string_generator(initial_size); - for (std::string::size_type i = message.find(candidate, 0); - i != std::string::npos; i = message.find(candidate, i)) { - candidate += random_string_generator(growth_size); - } - return candidate; +GOOGLE_CLOUD_CPP_DEPRECATED( + "deprecated, using the GenerateMessageBoundary(std::string const&, " + "absl::FunctionRef) overload") +std::string + GenerateMessageBoundary(std::string const& message, + RandomStringGenerator&& random_string_generator, + int /*initial_size*/, int /*growth_size*/) { + return GenerateMessageBoundary(message, [&random_string_generator]() { + return random_string_generator(64); + }); } + } // namespace internal GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END } // namespace storage diff --git a/google/cloud/storage/internal/generate_message_boundary_benchmark.cc b/google/cloud/storage/internal/generate_message_boundary_benchmark.cc new file mode 100644 index 0000000000000..143d7f7fba51c --- /dev/null +++ b/google/cloud/storage/internal/generate_message_boundary_benchmark.cc @@ -0,0 +1,156 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "google/cloud/storage/internal/generate_message_boundary.h" +#include "google/cloud/internal/random.h" +#include +#include +#include +#include + +namespace google { +namespace cloud { +namespace storage { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN +namespace internal { +namespace { + +// CXX=clang++ CC=clang bazel run -c opt -- +// //google/cloud/storage:internal_generate_message_boundary_benchmark +// --benchmark_repetitions=100 +// +// clang-format off +// Run on (96 X 2000 MHz CPU s) +// CPU Caches: +// L1 Data 32 KiB (x48) +// L1 Instruction 32 KiB (x48) +// L2 Unified 1024 KiB (x48) +// L3 Unified 39424 KiB (x2) +// --------------------------------------------------------------------------------------------- +// Benchmark Time CPU Iterations +// --------------------------------------------------------------------------------------------- +// ... +// GenerateBoundaryFixture/GenerateBoundary_mean 20478679 ns 20475235 ns 100 +// GenerateBoundaryFixture/GenerateBoundary_median 20583724 ns 20580943 ns 100 +// GenerateBoundaryFixture/GenerateBoundary_stddev 335315 ns 335459 ns 100 +// GenerateBoundaryFixture/GenerateBoundary_cv 1.64 % 1.64 % 100 +// ... +// GenerateBoundaryFixture/GenerateBoundaryOld_mean 20809894 ns 20806317 ns 100 +// GenerateBoundaryFixture/GenerateBoundaryOld_median 20520133 ns 20517279 ns 100 +// GenerateBoundaryFixture/GenerateBoundaryOld_stddev 1284277 ns 1284334 ns 100 +// GenerateBoundaryFixture/GenerateBoundaryOld_cv 6.17 % 6.17 % 100 +// ... +// GenerateBoundaryFixture/WorstCase_mean 100747489 ns 100727911 ns 100 +// GenerateBoundaryFixture/WorstCase_median 101026913 ns 101006689 ns 100 +// GenerateBoundaryFixture/WorstCase_stddev 1285934 ns 1285884 ns 100 +// GenerateBoundaryFixture/WorstCase_cv 1.28 % 1.28 % 100 +// ... +// GenerateBoundaryFixture/BestCase_mean 9584895 ns 9583080 ns 100 +// GenerateBoundaryFixture/BestCase_median 9598452 ns 9597243 ns 100 +// GenerateBoundaryFixture/BestCase_stddev 90679 ns 90643 ns 100 +// GenerateBoundaryFixture/BestCase_cv 0.95 % 0.95 % 100 +// clang-format on + +auto constexpr kMessageSize = 128 * 1024 * 1024; + +class GenerateBoundaryFixture : public ::benchmark::Fixture { + public: + std::string MakeRandomString(int n) { + std::lock_guard lk(mu_); + return google::cloud::internal::Sample(generator_, n, + "abcdefghijklmnopqrstuvwxyz" + "0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + } + + std::string GenerateCandidate() { + std::lock_guard lk(mu_); + return GenerateMessageBoundaryCandidate(generator_); + } + + std::string const& message() const { return message_; } + + private: + std::string MakeMessage() { + std::string all; + int min = (std::numeric_limits::min)(); // NOLINT + int end = (std::numeric_limits::max)() + 1; + for (int c = min; c != end; ++c) all.push_back(static_cast(c)); + return google::cloud::internal::Sample(generator_, kMessageSize, all); + } + + std::mutex mu_; + google::cloud::internal::DefaultPRNG generator_ = + google::cloud::internal::DefaultPRNG(std::random_device{}()); + std::string message_ = MakeMessage(); +}; + +BENCHMARK_F(GenerateBoundaryFixture, GenerateBoundary) +(benchmark::State& state) { + auto make_string = [this]() { return GenerateCandidate(); }; + + for (auto _ : state) { + benchmark::DoNotOptimize(GenerateMessageBoundary(message(), make_string)); + } +} + +BENCHMARK_F(GenerateBoundaryFixture, GenerateBoundaryOld) +(benchmark::State& state) { + auto old = [this](std::string const& message) { + auto candidate = MakeRandomString(16); + for (std::string::size_type i = message.find(candidate, 0); + i != std::string::npos; i = message.find(candidate, i)) { + candidate += MakeRandomString(8); + } + return candidate; + }; + + for (auto _ : state) { + benchmark::DoNotOptimize(old(message())); + } +} + +BENCHMARK_F(GenerateBoundaryFixture, WorstCase) +(benchmark::State& state) { + auto test = [](std::string const& message) { + auto count = std::count(message.begin(), message.end(), 'Z'); + return std::string(count % 64, 'A'); + }; + + for (auto _ : state) { + benchmark::DoNotOptimize(test(message())); + } +} + +BENCHMARK_F(GenerateBoundaryFixture, BestCase) +(benchmark::State& state) { + auto test = [](std::string const& message) { + auto count = 0; + for (std::size_t i = 0; i < message.size(); i += 64) { + count += message[i] == 'Z' ? 1 : 0; + } + return std::string(count % 64, 'A'); + }; + + for (auto _ : state) { + benchmark::DoNotOptimize(test(message())); + } +} + +} // namespace +} // namespace internal +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace storage +} // namespace cloud +} // namespace google diff --git a/google/cloud/storage/internal/generate_message_boundary_test.cc b/google/cloud/storage/internal/generate_message_boundary_test.cc index 5fef5750bd935..11656fdc05833 100644 --- a/google/cloud/storage/internal/generate_message_boundary_test.cc +++ b/google/cloud/storage/internal/generate_message_boundary_test.cc @@ -15,6 +15,8 @@ #include "google/cloud/storage/internal/generate_message_boundary.h" #include "google/cloud/internal/random.h" #include +#include +#include namespace google { namespace cloud { @@ -24,61 +26,52 @@ namespace internal { namespace { using ::testing::HasSubstr; +using ::testing::IsEmpty; using ::testing::Not; +using ::testing::Return; -std::string const* const kChars = new std::string( - "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"); +char constexpr kChars[] = + "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; -TEST(GenerateMessageBoundaryTest, Simple) { - auto generator = google::cloud::internal::MakeDefaultPRNG(); +TEST(GenerateMessageBoundaryTest, Candidate) { + auto constexpr kMaxBoundarySize = 70; + auto generator = google::cloud::internal::DefaultPRNG(std::random_device{}()); + + for (int i = 0; i != 10; ++i) { + auto const candidate = GenerateMessageBoundaryCandidate(generator); + ASSERT_THAT(candidate, Not(IsEmpty())); + EXPECT_LE(candidate.size(), kMaxBoundarySize); + auto f = std::find_if(candidate.begin(), candidate.end(), + [](char c) { return std::isalnum(c) == 0; }); + EXPECT_TRUE(f == candidate.end()) + << "found non-alphanumeric character in " << candidate; + } +} - auto string_generator = [&generator](int n) { - return google::cloud::internal::Sample(generator, n, *kChars); - }; +TEST(GenerateMessageBoundaryTest, Simple) { + auto generator = google::cloud::internal::DefaultPRNG(std::random_device{}()); // The magic constants here are uninteresting. We just want a large message // and a relatively short string to start searching for a boundary. - auto message = string_generator(1024); - auto boundary = - GenerateMessageBoundary(message, std::move(string_generator), 16, 4); - EXPECT_THAT(message, Not(HasSubstr(boundary))); + auto message = google::cloud::internal::Sample(generator, 128, kChars); + for (int i = 0; i != 10; ++i) { + auto boundary = GenerateMessageBoundary(message, [&generator]() { + return GenerateMessageBoundaryCandidate(generator); + }); + EXPECT_THAT(message, Not(HasSubstr(boundary))); + } } -TEST(GenerateMessageBoundaryTest, RequiresGrowth) { - auto generator = google::cloud::internal::MakeDefaultPRNG(); - - // This test will ensure that both the message and the initial string contain - // at least this many common characters. - int constexpr kMatchedStringLength = 32; - int constexpr kMismatchedStringLength = 512; - - auto g1 = google::cloud::internal::MakeDefaultPRNG(); - std::string message = - google::cloud::internal::Sample(g1, kMismatchedStringLength, *kChars); - // Copy the PRNG to obtain the same sequence of random numbers that - // `generator` will create later. - g1 = generator; - message += google::cloud::internal::Sample(g1, kMatchedStringLength, *kChars); - g1 = google::cloud::internal::MakeDefaultPRNG(); - message += - google::cloud::internal::Sample(g1, kMismatchedStringLength, *kChars); - - auto string_generator = [&generator](int n) { - return google::cloud::internal::Sample(generator, n, *kChars); - }; - - // The initial_size and growth_size parameters are set to - // (kMatchedStringLength / 2) and (kMatchedStringLength / 4) respectively, - // that forces the algorithm to find the initial string, and to grow it - // several times before the kMatchedStringLength common characters are - // exhausted. - auto boundary = GenerateMessageBoundary(message, std::move(string_generator), - kMatchedStringLength / 2, - kMatchedStringLength / 4); - EXPECT_THAT(message, Not(HasSubstr(boundary))); +TEST(GenerateMessageBoundaryTest, RequiresMoreCandidates) { + ::testing::MockFunction candidate_generator; + EXPECT_CALL(candidate_generator, Call) + .WillOnce(Return("abc")) + .WillOnce(Return("abcd")) + .WillOnce(Return("good")); - // We expect that the string is longer than the common characters. - EXPECT_LT(kMatchedStringLength, boundary.size()); + auto const actual = GenerateMessageBoundary( + "abc123abcd", candidate_generator.AsStdFunction()); + EXPECT_EQ(actual, "good"); } } // namespace diff --git a/google/cloud/storage/internal/rest_client.cc b/google/cloud/storage/internal/rest_client.cc index 87cdc53e5729f..766713708bc6c 100644 --- a/google/cloud/storage/internal/rest_client.cc +++ b/google/cloud/storage/internal/rest_client.cc @@ -334,22 +334,11 @@ StatusOr RestClient::LockBucketRetentionPolicy( } std::string RestClient::PickBoundary(std::string const& text_to_avoid) { - // We need to find a string that is *not* found in `text_to_avoid`, we pick - // a string at random, and see if it is in `text_to_avoid`, if it is, we grow - // the string with random characters and start from where we last found a - // the candidate. Eventually we will find something, though it might be - // larger than `text_to_avoid`. And we only make (approximately) one pass - // over `text_to_avoid`. - auto generate_candidate = [this](int n) { - static auto const* const kChars = new std::string( - "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + auto generate_candidate = [this]() { std::unique_lock lk(mu_); - return google::cloud::internal::Sample(generator_, n, *kChars); + return GenerateMessageBoundaryCandidate(generator_); }; - constexpr int kCandidateInitialSize = 16; - constexpr int kCandidateGrowthSize = 4; - return GenerateMessageBoundary(text_to_avoid, std::move(generate_candidate), - kCandidateInitialSize, kCandidateGrowthSize); + return GenerateMessageBoundary(text_to_avoid, generate_candidate); } StatusOr RestClient::InsertObjectMediaMultipart(