diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index eb6412d9d51..4a42610371d 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -22,6 +22,7 @@ target_sources(ginkgo factorization/par_ilut.cpp log/convergence.cpp log/logger.cpp + log/performance_hint.cpp log/record.cpp log/stream.cpp matrix/coo.cpp diff --git a/core/log/performance_hint.cpp b/core/log/performance_hint.cpp new file mode 100644 index 00000000000..71df539c756 --- /dev/null +++ b/core/log/performance_hint.cpp @@ -0,0 +1,190 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include + + +namespace gko { +namespace log { +namespace { + + +std::string location_name(const uintptr& location) +{ + std::ostringstream oss; + oss << std::hex << "0x" << location; + return oss.str(); +} + + +bool should_log_count(int count) +{ + return count == 10 || count == 100 || count == 1000 || count == 10000; +} + + +template +void compact_storage(std::unordered_map& map, size_type max_size, + size_type target_size) +{ + if (map.size() < max_size) { + return; + } + // sort entries by value in descending order + std::vector> entries; + for (auto pair : map) { + entries.emplace_back(pair); + } + GKO_ASSERT(map.size() == entries.size()); + map.clear(); + std::sort(entries.begin(), entries.end(), + [](auto a, auto b) { return a.second > b.second; }); + GKO_ASSERT(target_size < max_size); + GKO_ASSERT(target_size <= entries.size()); + entries.erase(entries.begin() + target_size, entries.end()); + map.insert(entries.begin(), entries.end()); +} + + +void print_allocation_message(std::ostream& stream, size_type size, int count) +{ + stream << "Observed " << count << " allocate-free pairs of size " << size + << " that may point to unnecessary allocations.\n"; +} + + +void print_copy_from_message(std::ostream& stream, gko::uintptr ptr, int count) +{ + stream << "Observed " << count << " cross-executor copies from " + << location_name(ptr) + << " that may point to unnecessary data transfers.\n"; +} + + +void print_copy_to_message(std::ostream& stream, gko::uintptr ptr, int count) +{ + stream << "Observed " << count << " cross-executor copies to " + << location_name(ptr) + << " that may point to unnecessary data transfers.\n"; +} + + +} // namespace + + +void PerformanceHint::on_allocation_completed(const Executor* exec, + const size_type& num_bytes, + const uintptr& location) const +{ + if (num_bytes > allocation_size_limit_) { + allocation_sizes_[location] = num_bytes; + // erase smallest allocations first + compact_storage(allocation_sizes_, histogram_max_size, + histogram_max_size * 3 / 4); + } +} + + +void PerformanceHint::on_free_completed(const Executor* exec, + const uintptr& location) const +{ + const auto it = allocation_sizes_.find(location); + if (it != allocation_sizes_.end()) { + const auto size = it->second; + allocation_sizes_.erase(it); + const auto count = ++allocation_histogram_[size]; + if (should_log_count(count)) { + print_allocation_message(log(), size, count); + } + // erase rarest allocation sizes first + compact_storage(allocation_histogram_, histogram_max_size, + histogram_max_size * 3 / 4); + } +} + + +void PerformanceHint::on_copy_completed(const Executor* from, + const Executor* to, + const uintptr& location_from, + const uintptr& location_to, + const size_type& num_bytes) const +{ + if (num_bytes > copy_size_limit_ && from != to) { + const auto count1 = ++copy_src_histogram_[location_from]; + const auto count2 = ++copy_dst_histogram_[location_to]; + if (should_log_count(count1)) { + print_copy_from_message(log(), location_from, count1); + } + if (should_log_count(count2)) { + print_copy_to_message(log(), location_to, count2); + } + compact_storage(copy_src_histogram_, histogram_max_size, + histogram_max_size * 3 / 4); + compact_storage(copy_dst_histogram_, histogram_max_size, + histogram_max_size * 3 / 4); + } +} + + +void PerformanceHint::print_status() const +{ + for (auto entry : allocation_histogram_) { + if (entry.second >= 10) { + print_allocation_message(log(), entry.first, entry.second); + } + } + for (auto entry : copy_src_histogram_) { + if (entry.second >= 10) { + print_copy_from_message(log(), entry.first, entry.second); + } + } + for (auto entry : copy_dst_histogram_) { + if (entry.second >= 10) { + print_copy_to_message(log(), entry.first, entry.second); + } + } +} + + +std::ostream& PerformanceHint::log() const { return *os_ << prefix_; } + + +constexpr Logger::mask_type PerformanceHint::mask_; +constexpr const char* PerformanceHint::prefix_; + + +} // namespace log +} // namespace gko diff --git a/core/log/stream.cpp b/core/log/stream.cpp index 847c25befbf..5c5586484b6 100644 --- a/core/log/stream.cpp +++ b/core/log/stream.cpp @@ -126,8 +126,8 @@ template void Stream::on_allocation_started(const Executor* exec, const size_type& num_bytes) const { - os_ << prefix_ << "allocation started on " << demangle_name(exec) - << " with " << bytes_name(num_bytes) << std::endl; + *os_ << prefix_ << "allocation started on " << demangle_name(exec) + << " with " << bytes_name(num_bytes) << std::endl; } @@ -136,9 +136,9 @@ void Stream::on_allocation_completed(const Executor* exec, const size_type& num_bytes, const uintptr& location) const { - os_ << prefix_ << "allocation completed on " << demangle_name(exec) - << " at " << location_name(location) << " with " - << bytes_name(num_bytes) << std::endl; + *os_ << prefix_ << "allocation completed on " << demangle_name(exec) + << " at " << location_name(location) << " with " + << bytes_name(num_bytes) << std::endl; } @@ -146,8 +146,8 @@ template void Stream::on_free_started(const Executor* exec, const uintptr& location) const { - os_ << prefix_ << "free started on " << demangle_name(exec) << " at " - << location_name(location) << std::endl; + *os_ << prefix_ << "free started on " << demangle_name(exec) << " at " + << location_name(location) << std::endl; } @@ -155,8 +155,8 @@ template void Stream::on_free_completed(const Executor* exec, const uintptr& location) const { - os_ << prefix_ << "free completed on " << demangle_name(exec) << " at " - << location_name(location) << std::endl; + *os_ << prefix_ << "free completed on " << demangle_name(exec) << " at " + << location_name(location) << std::endl; } @@ -167,10 +167,10 @@ void Stream::on_copy_started(const Executor* from, const uintptr& location_to, const size_type& num_bytes) const { - os_ << prefix_ << "copy started from " << demangle_name(from) << " to " - << demangle_name(to) << " from " << location_name(location_from) - << " to " << location_name(location_to) << " with " - << bytes_name(num_bytes) << std::endl; + *os_ << prefix_ << "copy started from " << demangle_name(from) << " to " + << demangle_name(to) << " from " << location_name(location_from) + << " to " << location_name(location_to) << " with " + << bytes_name(num_bytes) << std::endl; } @@ -181,10 +181,10 @@ void Stream::on_copy_completed(const Executor* from, const uintptr& location_to, const size_type& num_bytes) const { - os_ << prefix_ << "copy completed from " << demangle_name(from) << " to " - << demangle_name(to) << " from " << location_name(location_from) - << " to " << location_name(location_to) << " with " - << bytes_name(num_bytes) << std::endl; + *os_ << prefix_ << "copy completed from " << demangle_name(from) << " to " + << demangle_name(to) << " from " << location_name(location_from) + << " to " << location_name(location_to) << " with " + << bytes_name(num_bytes) << std::endl; } @@ -192,8 +192,8 @@ template void Stream::on_operation_launched(const Executor* exec, const Operation* operation) const { - os_ << prefix_ << demangle_name(operation) << " started on " - << demangle_name(exec) << std::endl; + *os_ << prefix_ << demangle_name(operation) << " started on " + << demangle_name(exec) << std::endl; } @@ -201,8 +201,8 @@ template void Stream::on_operation_completed(const Executor* exec, const Operation* operation) const { - os_ << prefix_ << demangle_name(operation) << " completed on " - << demangle_name(exec) << std::endl; + *os_ << prefix_ << demangle_name(operation) << " completed on " + << demangle_name(exec) << std::endl; } @@ -210,8 +210,8 @@ template void Stream::on_polymorphic_object_create_started( const Executor* exec, const PolymorphicObject* po) const { - os_ << prefix_ << "PolymorphicObject create started from " - << demangle_name(po) << " on " << demangle_name(exec) << std::endl; + *os_ << prefix_ << "PolymorphicObject create started from " + << demangle_name(po) << " on " << demangle_name(exec) << std::endl; } @@ -220,8 +220,8 @@ void Stream::on_polymorphic_object_create_completed( const Executor* exec, const PolymorphicObject* input, const PolymorphicObject* output) const { - os_ << prefix_ << demangle_name(output) << " create completed from " - << demangle_name(input) << " on " << demangle_name(exec) << std::endl; + *os_ << prefix_ << demangle_name(output) << " create completed from " + << demangle_name(input) << " on " << demangle_name(exec) << std::endl; } @@ -230,8 +230,8 @@ void Stream::on_polymorphic_object_copy_started( const Executor* exec, const PolymorphicObject* from, const PolymorphicObject* to) const { - os_ << prefix_ << demangle_name(from) << " copy started to " - << demangle_name(to) << " on " << demangle_name(exec) << std::endl; + *os_ << prefix_ << demangle_name(from) << " copy started to " + << demangle_name(to) << " on " << demangle_name(exec) << std::endl; } @@ -240,8 +240,8 @@ void Stream::on_polymorphic_object_copy_completed( const Executor* exec, const PolymorphicObject* from, const PolymorphicObject* to) const { - os_ << prefix_ << demangle_name(from) << " copy completed to " - << demangle_name(to) << " on " << demangle_name(exec) << std::endl; + *os_ << prefix_ << demangle_name(from) << " copy completed to " + << demangle_name(to) << " on " << demangle_name(exec) << std::endl; } @@ -250,8 +250,8 @@ void Stream::on_polymorphic_object_move_started( const Executor* exec, const PolymorphicObject* from, const PolymorphicObject* to) const { - os_ << prefix_ << demangle_name(from) << " move started to " - << demangle_name(to) << " on " << demangle_name(exec) << std::endl; + *os_ << prefix_ << demangle_name(from) << " move started to " + << demangle_name(to) << " on " << demangle_name(exec) << std::endl; } @@ -260,8 +260,8 @@ void Stream::on_polymorphic_object_move_completed( const Executor* exec, const PolymorphicObject* from, const PolymorphicObject* to) const { - os_ << prefix_ << demangle_name(from) << " move completed to " - << demangle_name(to) << " on " << demangle_name(exec) << std::endl; + *os_ << prefix_ << demangle_name(from) << " move completed to " + << demangle_name(to) << " on " << demangle_name(exec) << std::endl; } @@ -269,8 +269,8 @@ template void Stream::on_polymorphic_object_deleted( const Executor* exec, const PolymorphicObject* po) const { - os_ << prefix_ << demangle_name(po) << " deleted on " << demangle_name(exec) - << std::endl; + *os_ << prefix_ << demangle_name(po) << " deleted on " + << demangle_name(exec) << std::endl; } @@ -278,15 +278,15 @@ template void Stream::on_linop_apply_started(const LinOp* A, const LinOp* b, const LinOp* x) const { - os_ << prefix_ << "apply started on A " << demangle_name(A) << " with b " - << demangle_name(b) << " and x " << demangle_name(x) << std::endl; + *os_ << prefix_ << "apply started on A " << demangle_name(A) << " with b " + << demangle_name(b) << " and x " << demangle_name(x) << std::endl; if (verbose_) { - os_ << demangle_name(A) << as>(A) - << std::endl; - os_ << demangle_name(b) << as>(b) - << std::endl; - os_ << demangle_name(x) << as>(x) - << std::endl; + *os_ << demangle_name(A) << as>(A) + << std::endl; + *os_ << demangle_name(b) << as>(b) + << std::endl; + *os_ << demangle_name(x) << as>(x) + << std::endl; } } @@ -295,15 +295,15 @@ template void Stream::on_linop_apply_completed(const LinOp* A, const LinOp* b, const LinOp* x) const { - os_ << prefix_ << "apply completed on A " << demangle_name(A) << " with b " - << demangle_name(b) << " and x " << demangle_name(x) << std::endl; + *os_ << prefix_ << "apply completed on A " << demangle_name(A) << " with b " + << demangle_name(b) << " and x " << demangle_name(x) << std::endl; if (verbose_) { - os_ << demangle_name(A) << as>(A) - << std::endl; - os_ << demangle_name(b) << as>(b) - << std::endl; - os_ << demangle_name(x) << as>(x) - << std::endl; + *os_ << demangle_name(A) << as>(A) + << std::endl; + *os_ << demangle_name(b) << as>(b) + << std::endl; + *os_ << demangle_name(x) << as>(x) + << std::endl; } } @@ -315,21 +315,21 @@ void Stream::on_linop_advanced_apply_started(const LinOp* A, const LinOp* beta, const LinOp* x) const { - os_ << prefix_ << "advanced apply started on A " << demangle_name(A) - << " with alpha " << demangle_name(alpha) << " b " << demangle_name(b) - << " beta " << demangle_name(beta) << " and x " << demangle_name(x) - << std::endl; + *os_ << prefix_ << "advanced apply started on A " << demangle_name(A) + << " with alpha " << demangle_name(alpha) << " b " << demangle_name(b) + << " beta " << demangle_name(beta) << " and x " << demangle_name(x) + << std::endl; if (verbose_) { - os_ << demangle_name(A) << as>(A) - << std::endl; - os_ << demangle_name(alpha) << as>(alpha) - << std::endl; - os_ << demangle_name(b) << as>(b) - << std::endl; - os_ << demangle_name(beta) << as>(beta) - << std::endl; - os_ << demangle_name(x) << as>(x) - << std::endl; + *os_ << demangle_name(A) << as>(A) + << std::endl; + *os_ << demangle_name(alpha) << as>(alpha) + << std::endl; + *os_ << demangle_name(b) << as>(b) + << std::endl; + *os_ << demangle_name(beta) << as>(beta) + << std::endl; + *os_ << demangle_name(x) << as>(x) + << std::endl; } } @@ -341,21 +341,21 @@ void Stream::on_linop_advanced_apply_completed(const LinOp* A, const LinOp* beta, const LinOp* x) const { - os_ << prefix_ << "advanced apply completed on A " << demangle_name(A) - << " with alpha " << demangle_name(alpha) << " b " << demangle_name(b) - << " beta " << demangle_name(beta) << " and x " << demangle_name(x) - << std::endl; + *os_ << prefix_ << "advanced apply completed on A " << demangle_name(A) + << " with alpha " << demangle_name(alpha) << " b " << demangle_name(b) + << " beta " << demangle_name(beta) << " and x " << demangle_name(x) + << std::endl; if (verbose_) { - os_ << demangle_name(A) << as>(A) - << std::endl; - os_ << demangle_name(alpha) << as>(alpha) - << std::endl; - os_ << demangle_name(b) << as>(b) - << std::endl; - os_ << demangle_name(beta) << as>(beta) - << std::endl; - os_ << demangle_name(x) << as>(x) - << std::endl; + *os_ << demangle_name(A) << as>(A) + << std::endl; + *os_ << demangle_name(alpha) << as>(alpha) + << std::endl; + *os_ << demangle_name(b) << as>(b) + << std::endl; + *os_ << demangle_name(beta) << as>(beta) + << std::endl; + *os_ << demangle_name(x) << as>(x) + << std::endl; } } @@ -364,8 +364,8 @@ template void Stream::on_linop_factory_generate_started( const LinOpFactory* factory, const LinOp* input) const { - os_ << prefix_ << "generate started for " << demangle_name(factory) - << " with input " << demangle_name(input) << std::endl; + *os_ << prefix_ << "generate started for " << demangle_name(factory) + << " with input " << demangle_name(input) << std::endl; } @@ -373,9 +373,9 @@ template void Stream::on_linop_factory_generate_completed( const LinOpFactory* factory, const LinOp* input, const LinOp* output) const { - os_ << prefix_ << "generate completed for " << demangle_name(factory) - << " with input " << demangle_name(input) << " produced " - << demangle_name(output) << std::endl; + *os_ << prefix_ << "generate completed for " << demangle_name(factory) + << " with input " << demangle_name(input) << " produced " + << demangle_name(output) << std::endl; } @@ -385,23 +385,23 @@ void Stream::on_criterion_check_started( const LinOp* residual, const LinOp* residual_norm, const LinOp* solution, const uint8& stopping_id, const bool& set_finalized) const { - os_ << prefix_ << "check started for " << demangle_name(criterion) - << " at iteration " << num_iterations << " with ID " - << static_cast(stopping_id) << " and finalized set to " - << set_finalized << std::endl; + *os_ << prefix_ << "check started for " << demangle_name(criterion) + << " at iteration " << num_iterations << " with ID " + << static_cast(stopping_id) << " and finalized set to " + << set_finalized << std::endl; if (verbose_) { if (residual != nullptr) { - os_ << demangle_name(residual) - << as>(residual) << std::endl; + *os_ << demangle_name(residual) + << as>(residual) << std::endl; } if (residual_norm != nullptr) { - os_ << demangle_name(residual_norm) - << as>(residual_norm) - << std::endl; + *os_ << demangle_name(residual_norm) + << as>(residual_norm) + << std::endl; } if (solution != nullptr) { - os_ << demangle_name(solution) - << as>(solution) << std::endl; + *os_ << demangle_name(solution) + << as>(solution) << std::endl; } } } @@ -415,28 +415,28 @@ void Stream::on_criterion_check_completed( const array* status, const bool& oneChanged, const bool& converged) const { - os_ << prefix_ << "check completed for " << demangle_name(criterion) - << " at iteration " << num_iterations << " with ID " - << static_cast(stoppingId) << " and finalized set to " - << setFinalized << ". It changed one RHS " << oneChanged - << ", stopped the iteration process " << converged << std::endl; + *os_ << prefix_ << "check completed for " << demangle_name(criterion) + << " at iteration " << num_iterations << " with ID " + << static_cast(stoppingId) << " and finalized set to " + << setFinalized << ". It changed one RHS " << oneChanged + << ", stopped the iteration process " << converged << std::endl; if (verbose_) { array tmp(status->get_executor()->get_master(), *status); - os_ << tmp.get_const_data(); + *os_ << tmp.get_const_data(); if (residual != nullptr) { - os_ << demangle_name(residual) - << as>(residual) << std::endl; + *os_ << demangle_name(residual) + << as>(residual) << std::endl; } if (residual_norm != nullptr) { - os_ << demangle_name(residual_norm) - << as>(residual_norm) - << std::endl; + *os_ << demangle_name(residual_norm) + << as>(residual_norm) + << std::endl; } if (solution != nullptr) { - os_ << demangle_name(solution) - << as>(solution) << std::endl; + *os_ << demangle_name(solution) + << as>(solution) << std::endl; } } } @@ -460,28 +460,28 @@ void Stream::on_iteration_complete( const LinOp* solution, const LinOp* residual_norm, const LinOp* implicit_sq_residual_norm) const { - os_ << prefix_ << "iteration " << num_iterations - << " completed with solver " << demangle_name(solver) - << " with residual " << demangle_name(residual) << ", solution " - << demangle_name(solution) << ", residual_norm " - << demangle_name(residual_norm) << " and implicit_sq_residual_norm " - << demangle_name(implicit_sq_residual_norm) << std::endl; + *os_ << prefix_ << "iteration " << num_iterations + << " completed with solver " << demangle_name(solver) + << " with residual " << demangle_name(residual) << ", solution " + << demangle_name(solution) << ", residual_norm " + << demangle_name(residual_norm) << " and implicit_sq_residual_norm " + << demangle_name(implicit_sq_residual_norm) << std::endl; if (verbose_) { - os_ << demangle_name(residual) - << as>(residual) << std::endl; + *os_ << demangle_name(residual) + << as>(residual) << std::endl; if (solution != nullptr) { - os_ << demangle_name(solution) - << as>(solution) << std::endl; + *os_ << demangle_name(solution) + << as>(solution) << std::endl; } if (residual_norm != nullptr) { - os_ << demangle_name(residual_norm) - << as>(residual_norm) - << std::endl; + *os_ << demangle_name(residual_norm) + << as>(residual_norm) + << std::endl; } if (implicit_sq_residual_norm != nullptr) { - os_ << demangle_name(implicit_sq_residual_norm) - << as>(implicit_sq_residual_norm) - << std::endl; + *os_ << demangle_name(implicit_sq_residual_norm) + << as>(implicit_sq_residual_norm) + << std::endl; } } } diff --git a/core/test/log/CMakeLists.txt b/core/test/log/CMakeLists.txt index 668bb2b8501..7fa9d87b900 100644 --- a/core/test/log/CMakeLists.txt +++ b/core/test/log/CMakeLists.txt @@ -3,5 +3,6 @@ ginkgo_create_test(logger) if (GINKGO_HAVE_PAPI_SDE) ginkgo_create_test(papi PAPI::PAPI) endif() +ginkgo_create_test(performance_hint) ginkgo_create_test(record) ginkgo_create_test(stream) diff --git a/core/test/log/performance_hint.cpp b/core/test/log/performance_hint.cpp new file mode 100644 index 00000000000..22c0781981b --- /dev/null +++ b/core/test/log/performance_hint.cpp @@ -0,0 +1,241 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include + + +#include + + +#include + + +#include "core/test/utils.hpp" + + +namespace { + + +TEST(PerformanceHint, CatchesRepeatedAllocationFree) +{ + std::ostringstream ss; + auto exec = gko::ReferenceExecutor::create(); + exec->add_logger(gko::log::PerformanceHint::create(exec, ss)); + + for (int i = 0; i < 101; i++) { + gko::Array{exec, 25}; + } + + ASSERT_EQ(ss.str(), + "[PERFORMANCE] >>> Observed 10 allocate-free pairs of size 25 " + "that may point to unnecessary allocations.\n[PERFORMANCE] >>> " + "Observed 100 allocate-free pairs of size 25 that may point to " + "unnecessary allocations.\n"); +} + + +TEST(PerformanceHint, IgnoresSmallAllocationFreePairs) +{ + std::ostringstream ss; + auto exec = gko::ReferenceExecutor::create(); + exec->add_logger(gko::log::PerformanceHint::create(exec, ss, 32)); + + for (int i = 0; i < 101; i++) { + gko::Array{exec, 25}; + } + + ASSERT_EQ(ss.str(), ""); +} + + +TEST(PerformanceHint, IgnoresSmallAllocations) +{ + std::ostringstream ss; + auto exec = gko::ReferenceExecutor::create(); + exec->add_logger(gko::log::PerformanceHint::create(exec, ss, 16, 16, 50)); + + { + std::vector> big(10, gko::Array{exec, 32}); + std::vector> small(100, gko::Array{exec, 1}); + std::vector> big2(10, gko::Array{exec, 32}); + } + + ASSERT_EQ(ss.str(), + "[PERFORMANCE] >>> Observed 10 allocate-free pairs of size 32 " + "that may point to unnecessary allocations.\n"); +} + + +TEST(PerformanceHint, ForgetsOldAllocations) +{ + std::ostringstream ss; + auto exec = gko::ReferenceExecutor::create(); + exec->add_logger(gko::log::PerformanceHint::create(exec, ss, 16, 16, 9)); + + std::vector> data; + data.reserve(10); + for (int i = 0; i < 10; i++) { + data.emplace_back(exec, 32); + } + // by now we have forgotten the first allocation. + while (!data.empty()) { + data.pop_back(); + } + + ASSERT_EQ(ss.str(), ""); + + for (int i = 0; i < 10; i++) { + data.emplace_back(exec, 32); + } + data.clear(); + + ASSERT_EQ(ss.str(), + "[PERFORMANCE] >>> Observed 10 allocate-free pairs of size 32 " + "that may point to unnecessary allocations.\n"); +} + + +template +std::string ptr_to_str(const T* location) +{ + std::ostringstream oss; + oss << std::hex << "0x" << gko::uintptr(location); + return oss.str(); +} + + +TEST(PerformanceHint, CatchesRepeatedCrossExecutorCopy) +{ + std::ostringstream ss; + std::ostringstream ss2; + auto exec = gko::ReferenceExecutor::create(); + auto exec2 = gko::ReferenceExecutor::create(); + exec->add_logger(gko::log::PerformanceHint::create(exec, ss)); + exec2->add_logger(gko::log::PerformanceHint::create(exec, ss2)); + gko::Array a_small{exec, 1}; + gko::Array b_small{exec2, 1}; + gko::Array a{exec, 16}; + gko::Array b{exec2, 16}; + b.fill(0); + const auto ptr1 = ptr_to_str(a.get_data()); + const auto ptr2 = ptr_to_str(b.get_data()); + + for (int i = 0; i < 101; i++) { + a_small = b_small; + a = b; + } + + ASSERT_EQ( + ss.str(), + "[PERFORMANCE] >>> Observed 10 cross-executor copies from " + ptr2 + + " that may point to unnecessary data transfers.\n[PERFORMANCE] >>> " + "Observed 10 cross-executor copies to " + + ptr1 + + " that may point to unnecessary data transfers.\n[PERFORMANCE] >>> " + "Observed 100 cross-executor copies from " + + ptr2 + + " that may point to unnecessary data transfers.\n[PERFORMANCE] >>> " + "Observed 100 cross-executor copies to " + + ptr1 + " that may point to unnecessary data transfers.\n"); + ASSERT_EQ( + ss2.str(), + "[PERFORMANCE] >>> Observed 10 cross-executor copies from " + ptr2 + + " that may point to unnecessary data transfers.\n[PERFORMANCE] >>> " + "Observed 10 cross-executor copies to " + + ptr1 + + " that may point to unnecessary data transfers.\n[PERFORMANCE] >>> " + "Observed 100 cross-executor copies from " + + ptr2 + + " that may point to unnecessary data transfers.\n[PERFORMANCE] >>> " + "Observed 100 cross-executor copies to " + + ptr1 + " that may point to unnecessary data transfers.\n"); +} + + +TEST(PerformanceHint, IgnoresSmallCrossExecutorCopy) +{ + std::ostringstream ss; + std::ostringstream ss2; + auto exec = gko::ReferenceExecutor::create(); + auto exec2 = gko::ReferenceExecutor::create(); + exec->add_logger(gko::log::PerformanceHint::create(exec, ss, 16, 1024)); + exec2->add_logger(gko::log::PerformanceHint::create(exec, ss2, 16, 1024)); + gko::Array a{exec, 16}; + gko::Array b{exec2, 16}; + b.fill(0); + + for (int i = 0; i < 101; i++) { + a = b; + } + + ASSERT_EQ(ss.str(), ""); + ASSERT_EQ(ss2.str(), ""); +} + + +TEST(PerformanceHint, PrintsStatus) +{ + std::ostringstream ss; + auto exec = gko::ReferenceExecutor::create(); + auto exec2 = gko::ReferenceExecutor::create(); + auto logger = gko::share(gko::log::PerformanceHint::create(exec, ss)); + exec->add_logger(logger); + gko::Array a{exec, 16}; + gko::Array b{exec2, 16}; + b.fill(0); + const auto ptr1 = ptr_to_str(a.get_data()); + const auto ptr2 = ptr_to_str(b.get_data()); + + for (int i = 0; i < 101; i++) { + gko::Array{exec, 16}; + a = b; + } + ss.str(""); + + logger->print_status(); + ASSERT_EQ( + ss.str(), + "[PERFORMANCE] >>> Observed 101 allocate-free pairs of size 64 that " + "may point to unnecessary allocations.\n[PERFORMANCE] >>> Observed 101 " + "cross-executor copies from " + + ptr2 + + " that may point to unnecessary data transfers.\n[PERFORMANCE] >>> " + "Observed 101 cross-executor copies to " + + ptr1 + " that may point to unnecessary data transfers.\n"); +} + + +} // namespace diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 73976c24089..399a8f703e7 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -673,9 +673,14 @@ class Executor : public log::EnableLogging { void copy_from(const Executor* src_exec, size_type num_elems, const T* src_ptr, T* dest_ptr) const { + const auto src_loc = reinterpret_cast(src_ptr); + const auto dest_loc = reinterpret_cast(dest_ptr); this->template log( - src_exec, this, reinterpret_cast(src_ptr), - reinterpret_cast(dest_ptr), num_elems * sizeof(T)); + src_exec, this, src_loc, dest_loc, num_elems * sizeof(T)); + if (this != src_exec) { + src_exec->template log( + src_exec, this, src_loc, dest_loc, num_elems * sizeof(T)); + } try { this->raw_copy_from(src_exec, num_elems * sizeof(T), src_ptr, dest_ptr); @@ -696,8 +701,11 @@ class Executor : public log::EnableLogging { } } this->template log( - src_exec, this, reinterpret_cast(src_ptr), - reinterpret_cast(dest_ptr), num_elems * sizeof(T)); + src_exec, this, src_loc, dest_loc, num_elems * sizeof(T)); + if (this != src_exec) { + src_exec->template log( + src_exec, this, src_loc, dest_loc, num_elems * sizeof(T)); + } } /** diff --git a/include/ginkgo/core/log/logger.hpp b/include/ginkgo/core/log/logger.hpp index 35283c5b84a..c70c6265353 100644 --- a/include/ginkgo/core/log/logger.hpp +++ b/include/ginkgo/core/log/logger.hpp @@ -642,6 +642,10 @@ class EnableLogging : public PolymorphicBase { template void log(Params&&... params) const { + /*if (!std::is_base_of::value) { + static_cast(this)->get_executor()->log( + std::forward(params)...); + }*/ for (auto& logger : loggers_) { logger->template on(std::forward(params)...); } diff --git a/include/ginkgo/core/log/performance_hint.hpp b/include/ginkgo/core/log/performance_hint.hpp new file mode 100644 index 00000000000..0d0727f91e6 --- /dev/null +++ b/include/ginkgo/core/log/performance_hint.hpp @@ -0,0 +1,133 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_PUBLIC_CORE_LOG_PERFORMANCE_HINT_HPP_ +#define GKO_PUBLIC_CORE_LOG_PERFORMANCE_HINT_HPP_ + + +#include +#include +#include + + +#include + + +namespace gko { +namespace log { + + +/** + * PerformanceHint is a Logger which analyzes the performance of the application + * and outputs hints for unnecessary copies and allocations. + * + * @ingroup log + */ +class PerformanceHint : public Logger { +public: + void on_allocation_completed(const Executor* exec, + const size_type& num_bytes, + const uintptr& location) const override; + + void on_free_completed(const Executor* exec, + const uintptr& location) const override; + + void on_copy_completed(const Executor* from, const Executor* to, + const uintptr& location_from, + const uintptr& location_to, + const size_type& num_bytes) const override; + + /** + * Writes out the cross-executor writes and allocations that have been + * stored so far. + */ + void print_status() const; + + /** + * Creates a PerformanceHint logger. This dynamically allocates the memory, + * constructs the object and returns an std::unique_ptr to this object. + * + * @param exec the executor + * @param os the stream used for this logger + * @param allocation_size_limit ignore allocations below this limit (bytes) + * @param copy_size_limit ignore copies below this limit (bytes) + * @param histogram_max_size how many allocation sizes and/or pointers to + * keep track of at most? + * + * @return an std::unique_ptr to the the constructed object + */ + static std::unique_ptr create( + std::shared_ptr exec, std::ostream& os = std::cerr, + size_type allocation_size_limit = 16, size_type copy_size_limit = 16, + size_type histogram_max_size = 1024) + { + return std::unique_ptr( + new PerformanceHint(exec, os, allocation_size_limit, + copy_size_limit, histogram_max_size)); + } + +protected: + explicit PerformanceHint(std::shared_ptr exec, + std::ostream& os, size_type allocation_size_limit, + size_type copy_size_limit, + size_type histogram_max_size) + : Logger(exec, mask_), + os_(&os), + allocation_size_limit_{allocation_size_limit}, + copy_size_limit_{copy_size_limit}, + histogram_max_size{histogram_max_size} + {} + +private: + // set a breakpoint here if you want to see where the output comes from! + std::ostream& log() const; + + std::ostream* os_; + mutable std::unordered_map allocation_sizes_; + mutable std::unordered_map allocation_histogram_; + mutable std::unordered_map copy_src_histogram_; + mutable std::unordered_map copy_dst_histogram_; + size_type allocation_size_limit_ = 16; + size_type copy_size_limit_ = 16; + size_type histogram_max_size = 1024; + static constexpr Logger::mask_type mask_ = + Logger::allocation_completed_mask | Logger::free_completed_mask | + Logger::copy_completed_mask; + static constexpr const char* prefix_ = "[PERFORMANCE] >>> "; +}; + + +} // namespace log +} // namespace gko + + +#endif // GKO_PUBLIC_CORE_LOG_PERFORMANCE_HINT_HPP_ diff --git a/include/ginkgo/core/log/stream.hpp b/include/ginkgo/core/log/stream.hpp index 8c3bdabfa89..369a9bbbd7e 100644 --- a/include/ginkgo/core/log/stream.hpp +++ b/include/ginkgo/core/log/stream.hpp @@ -188,7 +188,7 @@ class Stream : public Logger { static std::unique_ptr create( std::shared_ptr exec, const Logger::mask_type& enabled_events = Logger::all_events_mask, - std::ostream& os = std::cout, bool verbose = false) + std::ostream& os = std::cerr, bool verbose = false) { return std::unique_ptr( new Stream(exec, enabled_events, os, verbose)); @@ -206,16 +206,15 @@ class Stream : public Logger { * includes always printing residuals and other information * which can give a large output. */ - explicit Stream( - std::shared_ptr exec, - const Logger::mask_type& enabled_events = Logger::all_events_mask, - std::ostream& os = std::cout, bool verbose = false) - : Logger(exec, enabled_events), os_(os), verbose_(verbose) + explicit Stream(std::shared_ptr exec, + const Logger::mask_type& enabled_events, std::ostream& os, + bool verbose) + : Logger(exec, enabled_events), os_(&os), verbose_(verbose) {} private: - std::ostream& os_; + std::ostream* os_; static constexpr const char* prefix_ = "[LOG] >>> "; bool verbose_; }; diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index cb4eeebe268..de3b1e288af 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -81,6 +81,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include