From 6006fc4625875b4f99e9a38a6906f89fe5060850 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 19 Oct 2020 16:59:40 +0200 Subject: [PATCH 1/8] Move memory operations from Executor to MemorySpace classes. --- include/ginkgo/core/base/executor.hpp | 391 +++++++++--------- include/ginkgo/core/base/memory_space.hpp | 468 ++++++++++++++++++++++ 2 files changed, 661 insertions(+), 198 deletions(-) create mode 100644 include/ginkgo/core/base/memory_space.hpp diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 1df29abc59c..c76f05c29c3 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include #include @@ -473,69 +474,6 @@ class Executor : public log::EnableLogging { this->run(op); } - /** - * Allocates memory in this Executor. - * - * @tparam T datatype to allocate - * - * @param num_elems number of elements of type T to allocate - * - * @throw AllocationError if the allocation failed - * - * @return pointer to allocated memory - */ - template - T *alloc(size_type num_elems) const - { - this->template log( - this, num_elems * sizeof(T)); - T *allocated = static_cast(this->raw_alloc(num_elems * sizeof(T))); - this->template log( - this, num_elems * sizeof(T), reinterpret_cast(allocated)); - return allocated; - } - - /** - * Frees memory previously allocated with Executor::alloc(). - * - * If `ptr` is a `nullptr`, the function has no effect. - * - * @param ptr pointer to the allocated memory block - */ - void free(void *ptr) const noexcept - { - this->template log( - this, reinterpret_cast(ptr)); - this->raw_free(ptr); - this->template log( - this, reinterpret_cast(ptr)); - } - - /** - * Copies data from another Executor. - * - * @tparam T datatype to copy - * - * @param src_exec Executor from which the memory will be copied - * @param num_elems number of elements of type T to copy - * @param src_ptr pointer to a block of memory containing the data to be - * copied - * @param dest_ptr pointer to an allocated block of memory - * where the data will be copied to - */ - template - void copy_from(const Executor *src_exec, size_type num_elems, - const T *src_ptr, T *dest_ptr) const - { - this->template log( - src_exec, this, reinterpret_cast(src_ptr), - reinterpret_cast(dest_ptr), num_elems * sizeof(T)); - this->raw_copy_from(src_exec, num_elems * sizeof(T), src_ptr, dest_ptr); - this->template log( - src_exec, this, reinterpret_cast(src_ptr), - reinterpret_cast(dest_ptr), num_elems * sizeof(T)); - } - /** * Copies data within this Executor. * @@ -582,60 +520,21 @@ class Executor : public log::EnableLogging { virtual std::shared_ptr get_master() const noexcept = 0; /** - * Synchronize the operations launched on the executor with its master. - */ - virtual void synchronize() const = 0; - -protected: - /** - * Allocates raw memory in this Executor. - * - * @param size number of bytes to allocate - * - * @throw AllocationError if the allocation failed - * - * @return raw pointer to allocated memory + * Returns the associated memory space of this Executor. + * @return the associated memory space of this Executor. */ - virtual void *raw_alloc(size_type size) const = 0; + virtual std::shared_ptr get_mem_space() noexcept = 0; /** - * Frees memory previously allocated with Executor::alloc(). - * - * If `ptr` is a `nullptr`, the function has no effect. - * - * @param ptr pointer to the allocated memory block + * @copydoc get_mem_space */ - virtual void raw_free(void *ptr) const noexcept = 0; + virtual std::shared_ptr get_mem_space() const + noexcept = 0; /** - * Copies raw data from another Executor. - * - * @param src_exec Executor from which the memory will be copied - * @param n_bytes number of bytes to copy - * @param src_ptr pointer to a block of memory containing the data to be - * copied - * @param dest_ptr pointer to an allocated block of memory where the data - * will be copied to + * Synchronize the operations launched on the executor with its master. */ - virtual void raw_copy_from(const Executor *src_exec, size_type n_bytes, - const void *src_ptr, void *dest_ptr) const = 0; - -/** - * @internal - * Declares a raw_copy_to() overload for a specified Executor subclass. - * - * This is the second stage of the double dispatch emulation required to - * implement raw_copy_from(). - * - * @param _exec_type the Executor subclass - */ -#define GKO_ENABLE_RAW_COPY_TO(_exec_type, ...) \ - virtual void raw_copy_to(const _exec_type *dest_exec, size_type n_bytes, \ - const void *src_ptr, void *dest_ptr) const = 0 - - GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_ENABLE_RAW_COPY_TO); - -#undef GKO_ENABLE_RAW_COPY_TO + virtual void synchronize() const = 0; private: /** @@ -690,66 +589,6 @@ class Executor : public log::EnableLogging { }; -/** - * This is a deleter that uses an executor's `free` method to deallocate the - * data. - * - * @tparam T the type of object being deleted - * - * @ingroup Executor - */ -template -class executor_deleter { -public: - using pointer = T *; - - /** - * Creates a new deleter. - * - * @param exec the executor used to free the data - */ - explicit executor_deleter(std::shared_ptr exec) - : exec_{exec} - {} - - /** - * Deletes the object. - * - * @param ptr pointer to the object being deleted - */ - void operator()(pointer ptr) const - { - if (exec_) { - exec_->free(ptr); - } - } - -private: - std::shared_ptr exec_; -}; - -// a specialization for arrays -template -class executor_deleter { -public: - using pointer = T[]; - - explicit executor_deleter(std::shared_ptr exec) - : exec_{exec} - {} - - void operator()(pointer ptr) const - { - if (exec_) { - exec_->free(ptr); - } - } - -private: - std::shared_ptr exec_; -}; - - namespace detail { @@ -763,13 +602,6 @@ class ExecutorBase : public Executor { this->template log(this, &op); } -protected: - void raw_copy_from(const Executor *src_exec, size_type n_bytes, - const void *src_ptr, void *dest_ptr) const override - { - src_exec->raw_copy_to(self(), n_bytes, src_ptr, dest_ptr); - } - private: ConcreteExecutor *self() noexcept { @@ -823,11 +655,6 @@ class EnableDeviceReset { } // namespace detail -#define GKO_OVERRIDE_RAW_COPY_TO(_executor_type, ...) \ - void raw_copy_to(const _executor_type *dest_exec, size_type n_bytes, \ - const void *src_ptr, void *dest_ptr) const override - - /** * This is the Executor subclass which represents the OpenMP device * (typically CPU). @@ -840,6 +667,8 @@ class OmpExecutor : public detail::ExecutorBase, friend class detail::ExecutorBase; public: + using DefaultMemorySpace = HostMemorySpace; + /** * Creates a new OmpExecutor. */ @@ -852,16 +681,44 @@ class OmpExecutor : public detail::ExecutorBase, std::shared_ptr get_master() const noexcept override; + std::shared_ptr get_mem_space() noexcept override; + + std::shared_ptr get_mem_space() const noexcept override; + void synchronize() const override; protected: - OmpExecutor() = default; + OmpExecutor() { mem_space_instance_ = HostMemorySpace::create(); } + + OmpExecutor(std::shared_ptr mem_space) + : mem_space_instance_(mem_space) + { + if (!check_mem_space_validity(mem_space_instance_)) { + GKO_MEMSPACE_MISMATCH(NOT_HOST); + } + } - void *raw_alloc(size_type size) const override; + OmpExecutor(std::shared_ptr mem_space) + : mem_space_instance_(mem_space) + { + if (!check_mem_space_validity(mem_space_instance_)) { + GKO_MEMSPACE_MISMATCH(NOT_HOST); + } + } - void raw_free(void *ptr) const noexcept override; + bool check_mem_space_validity(std::shared_ptr mem_space) + { + auto check_default_mem_space = + dynamic_cast(mem_space.get()); + if (check_default_mem_space == nullptr) { + return false; + } else { + return true; + } + } - GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_OVERRIDE_RAW_COPY_TO); +private: + std::shared_ptr mem_space_instance_; }; @@ -881,11 +738,27 @@ using DefaultExecutor = OmpExecutor; */ class ReferenceExecutor : public OmpExecutor { public: + /** + * Creates a new ReferenceExecutor with an existing memory space. + * + */ static std::shared_ptr create() { return std::shared_ptr(new ReferenceExecutor()); } + /** + * Creates a new ReferenceExecutor with an existing memory space. + * + * @param memory_space The memory space to be associated with the executor. + */ + static std::shared_ptr create( + std::shared_ptr memory_space) + { + return std::shared_ptr( + new ReferenceExecutor(memory_space)); + } + void run(const Operation &op) const override { this->template log(this, &op); @@ -894,8 +767,40 @@ class ReferenceExecutor : public OmpExecutor { this->template log(this, &op); } + std::shared_ptr get_mem_space() noexcept override + { + return this->mem_space_instance_; + } + + std::shared_ptr get_mem_space() const noexcept override + { + return this->mem_space_instance_; + } + protected: - ReferenceExecutor() = default; + ReferenceExecutor() { mem_space_instance_ = HostMemorySpace::create(); } + + ReferenceExecutor(std::shared_ptr mem_space) + : mem_space_instance_(mem_space) + { + if (!check_mem_space_validity(mem_space_instance_)) { + GKO_MEMSPACE_MISMATCH(NOT_HOST); + } + } + + bool check_mem_space_validity(std::shared_ptr mem_space) + { + auto check_default_mem_space = + dynamic_cast(mem_space.get()); + if (check_default_mem_space == nullptr) { + return false; + } else { + return true; + } + } + +private: + std::shared_ptr mem_space_instance_; }; @@ -918,23 +823,45 @@ class CudaExecutor : public detail::ExecutorBase, friend class detail::ExecutorBase; public: + using DefaultMemorySpace = CudaMemorySpace; + /** * Creates a new CudaExecutor. * * @param device_id the CUDA device id of this device * @param master an executor on the host that is used to invoke the device * kernels + * @param device_reset Flag that controls calls CudaDeviceReset at the end + * of its scope. */ static std::shared_ptr create( int device_id, std::shared_ptr master, bool device_reset = false); + /** + * Creates a new CudaExecutor. + * + * @param device_id the CUDA device id of this device + * @param memory_space the memory space associated to the executor. + * @param master an executor on the host that is used to invoke the device + * kernels + * @param device_reset Flag that controls calls CudaDeviceReset at the end + * of its scope. + */ + static std::shared_ptr create( + int device_id, std::shared_ptr memory_space, + std::shared_ptr master, bool device_reset = false); + ~CudaExecutor() { decrease_num_execs(this->device_id_); } std::shared_ptr get_master() noexcept override; std::shared_ptr get_master() const noexcept override; + std::shared_ptr get_mem_space() noexcept override; + + std::shared_ptr get_mem_space() const noexcept override; + void synchronize() const override; void run(const Operation &op) const override; @@ -1019,13 +946,41 @@ class CudaExecutor : public detail::ExecutorBase, this->set_gpu_property(); this->init_handles(); increase_num_execs(device_id); + mem_space_instance_ = CudaMemorySpace::create(device_id); } - void *raw_alloc(size_type size) const override; - - void raw_free(void *ptr) const noexcept override; + CudaExecutor(int device_id, std::shared_ptr mem_space, + std::shared_ptr master, bool device_reset = false) + : EnableDeviceReset{device_reset}, + device_id_(device_id), + master_(master), + num_warps_per_sm_(0), + num_multiprocessor_(0), + major_(0), + minor_(0), + mem_space_instance_(mem_space) + { + assert(device_id < max_devices); + this->set_gpu_property(); + this->init_handles(); + increase_num_execs(device_id); + if (!check_mem_space_validity(mem_space_instance_)) { + GKO_MEMSPACE_MISMATCH(NOT_CUDA); + } + } - GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_OVERRIDE_RAW_COPY_TO); + bool check_mem_space_validity(std::shared_ptr mem_space) + { + auto check_cuda_mem_space = + dynamic_cast(mem_space.get()); + auto check_cuda_uvm_mem_space = + dynamic_cast(mem_space.get()); + if (check_cuda_mem_space == nullptr && + check_cuda_uvm_mem_space == nullptr) { + return false; + } + return true; + } static void increase_num_execs(unsigned device_id) { @@ -1053,6 +1008,7 @@ class CudaExecutor : public detail::ExecutorBase, int major_; int minor_; int warp_size_; + std::shared_ptr mem_space_instance_; template using handle_manager = std::unique_ptr>; @@ -1084,23 +1040,43 @@ class HipExecutor : public detail::ExecutorBase, friend class detail::ExecutorBase; public: + using DefaultMemorySpace = HipMemorySpace; + /** * Creates a new HipExecutor. * * @param device_id the HIP device id of this device * @param master an executor on the host that is used to invoke the device * kernels + * @param device_reset Flag that controls calls HipDeviceReset at the end + * of its scope. */ static std::shared_ptr create(int device_id, std::shared_ptr master, bool device_reset = false); + /** + * Creates a new HipExecutor. + * + * @param device_id the HIP device id of this device + * @param memory_space the memory space associated to the executor. + * @param master an executor on the host that is used to invoke the device + * kernels + */ + static std::shared_ptr create( + int device_id, std::shared_ptr memory_space, + std::shared_ptr master, bool device_reset = false); + ~HipExecutor() { decrease_num_execs(this->device_id_); } std::shared_ptr get_master() noexcept override; std::shared_ptr get_master() const noexcept override; + std::shared_ptr get_mem_space() noexcept override; + + std::shared_ptr get_mem_space() const noexcept override; + void synchronize() const override; void run(const Operation &op) const override; @@ -1185,13 +1161,34 @@ class HipExecutor : public detail::ExecutorBase, this->set_gpu_property(); this->init_handles(); increase_num_execs(device_id); + mem_space_instance_ = HipMemorySpace::create(device_id); } - void *raw_alloc(size_type size) const override; - - void raw_free(void *ptr) const noexcept override; + HipExecutor(int device_id, std::shared_ptr mem_space, + std::shared_ptr master, bool device_reset = false) + : device_id_(device_id), + master_(master), + num_multiprocessor_(0), + mem_space_instance_(mem_space) + { + assert(device_id < max_devices); + this->set_gpu_property(); + this->init_handles(); + increase_num_execs(device_id); + if (!check_mem_space_validity(mem_space_instance_)) { + GKO_MEMSPACE_MISMATCH(NOT_HIP); + } + } - GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_OVERRIDE_RAW_COPY_TO); + bool check_mem_space_validity(std::shared_ptr mem_space) + { + auto check_hip_mem_space = + dynamic_cast(mem_space.get()); + if (check_hip_mem_space == nullptr) { + return false; + } + return true; + } static void increase_num_execs(int device_id) { @@ -1219,6 +1216,7 @@ class HipExecutor : public detail::ExecutorBase, int major_; int minor_; int warp_size_; + std::shared_ptr mem_space_instance_; template using handle_manager = std::unique_ptr>; @@ -1238,9 +1236,6 @@ using DefaultExecutor = HipExecutor; } // namespace kernels -#undef GKO_OVERRIDE_RAW_COPY_TO - - } // namespace gko diff --git a/include/ginkgo/core/base/memory_space.hpp b/include/ginkgo/core/base/memory_space.hpp new file mode 100644 index 00000000000..1a4a9a44159 --- /dev/null +++ b/include/ginkgo/core/base/memory_space.hpp @@ -0,0 +1,468 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_MEMORY_SPACE_HPP_ +#define GKO_CORE_MEMORY_SPACE_HPP_ + + +#include +#include +#include +#include +#include + + +#include +#include +#include + + +namespace gko { + + +#define GKO_FORWARD_DECLARE(_type, ...) class _type + +GKO_ENABLE_FOR_ALL_MEMORY_SPACES(GKO_FORWARD_DECLARE); + +#undef GKO_FORWARD_DECLARE + + +namespace detail { + + +template +class MemorySpaceBase; + + +} // namespace detail + + +class MemorySpace : public log::EnableLogging { + template + friend class detail::MemorySpaceBase; + +public: + virtual ~MemorySpace() = default; + + MemorySpace() = default; + MemorySpace(MemorySpace &) = delete; + MemorySpace(MemorySpace &&) = default; + MemorySpace &operator=(MemorySpace &) = delete; + MemorySpace &operator=(MemorySpace &&) = default; + + /** + * Allocates memory in this MemorySpace. + * + * @tparam T datatype to allocate + * + * @param num_elems number of elements of type T to allocate + * + * @throw AllocationError if the allocation failed + * + * @return pointer to allocated memory + */ + template + T *alloc(size_type num_elems) const + { + this->template log( + this, num_elems * sizeof(T)); + T *allocated = static_cast(this->raw_alloc(num_elems * sizeof(T))); + this->template log( + this, num_elems * sizeof(T), reinterpret_cast(allocated)); + return allocated; + } + + /** + * Frees memory previously allocated with MemorySpace::alloc(). + * + * If `ptr` is a `nullptr`, the function has no effect. + * + * @param ptr pointer to the allocated memory block + */ + void free(void *ptr) const noexcept + { + this->template log( + this, reinterpret_cast(ptr)); + this->raw_free(ptr); + this->template log( + this, reinterpret_cast(ptr)); + } + + /** + * Copies data from another MemorySpace. + * + * @tparam T datatype to copy + * + * @param src_mem_space MemorySpace from which the memory will be copied + * @param num_elems number of elements of type T to copy + * @param src_ptr pointer to a block of memory containing the data to be + * copied + * @param dest_ptr pointer to an allocated block of memory + * where the data will be copied to + */ + template + void copy_from(const MemorySpace *src_mem_space, size_type num_elems, + const T *src_ptr, T *dest_ptr) const + { + this->template log( + src_mem_space, this, reinterpret_cast(src_ptr), + reinterpret_cast(dest_ptr), num_elems * sizeof(T)); + this->raw_copy_from(src_mem_space, num_elems * sizeof(T), src_ptr, + dest_ptr); + this->template log( + src_mem_space, this, reinterpret_cast(src_ptr), + reinterpret_cast(dest_ptr), num_elems * sizeof(T)); + } + + /** + * Synchronize the operations launched on the executor with its master. + */ + virtual void synchronize() const = 0; + +protected: + /** + * Allocates raw memory in this MemorySpace. + * + * @param size number of bytes to allocate + * + * @throw AllocationError if the allocation failed + * + * @return raw pointer to allocated memory + */ + virtual void *raw_alloc(size_type size) const = 0; + + /** + * Frees memory previously allocated with MemorySpace::alloc(). + * + * If `ptr` is a `nullptr`, the function has no effect. + * + * @param ptr pointer to the allocated memory block + */ + virtual void raw_free(void *ptr) const noexcept = 0; + + /** + * Copies raw data from another MemorySpace. + * + * @param src_mem_space MemorySpace from which the memory will be copied + * @param n_bytes number of bytes to copy + * @param src_ptr pointer to a block of memory containing the data to be + * copied + * @param dest_ptr pointer to an allocated block of memory where the data + * will be copied to + */ + virtual void raw_copy_from(const MemorySpace *src_mem_space, + size_type n_bytes, const void *src_ptr, + void *dest_ptr) const = 0; + +/** + * @internal + * Declares a raw_copy_to() overload for a specified MemorySpace subclass. + * + * This is the second stage of the double dispatch emulation required to + * implement raw_copy_from(). + * + * @param _mem_space_type the MemorySpace subclass + */ +#define GKO_ENABLE_RAW_COPY_TO(_mem_space_type, ...) \ + virtual void raw_copy_to(const _mem_space_type *dest_mem_space, \ + size_type n_bytes, const void *src_ptr, \ + void *dest_ptr) const = 0 + + GKO_ENABLE_FOR_ALL_MEMORY_SPACES(GKO_ENABLE_RAW_COPY_TO); + +#undef GKO_ENABLE_RAW_COPY_TO +}; + + +/** + * This is a deleter that uses an mem_space's `free` method to deallocate + * the data. + * + * @tparam T the type of object being deleted + * + * @ingroup MemorySpace + */ +template +class memory_space_deleter { +public: + using pointer = T *; + + /** + * Creates a new deleter. + * + * @param mem_space the mem_spaceutor used to free the data + */ + explicit memory_space_deleter(std::shared_ptr mem_space) + : mem_space_{mem_space} + {} + + /** + * Deletes the object. + * + * @param ptr pointer to the object being deleted + */ + void operator()(pointer ptr) const + { + if (mem_space_) { + mem_space_->free(ptr); + } + } + +private: + std::shared_ptr mem_space_; +}; + +// a specialization for arrays +template +class memory_space_deleter { +public: + using pointer = T[]; + + explicit memory_space_deleter(std::shared_ptr mem_space) + : mem_space_{mem_space} + {} + + void operator()(pointer ptr) const + { + if (mem_space_) { + mem_space_->free(ptr); + } + } + +private: + std::shared_ptr mem_space_; +}; + + +namespace detail { + + +template +class MemorySpaceBase : public MemorySpace { +public: + void raw_copy_from(const MemorySpace *src_mem_space, size_type n_bytes, + const void *src_ptr, void *dest_ptr) const override + { + src_mem_space->raw_copy_to(self(), n_bytes, src_ptr, dest_ptr); + } + +private: + ConcreteMemorySpace *self() noexcept + { + return static_cast(this); + } + + const ConcreteMemorySpace *self() const noexcept + { + return static_cast(this); + } +}; + + +} // namespace detail + + +#define GKO_OVERRIDE_RAW_COPY_TO(_memory_space_type, ...) \ + void raw_copy_to(const _memory_space_type *dest_mem_space, \ + size_type n_bytes, const void *src_ptr, void *dest_ptr) \ + const override + + +class HostMemorySpace : public detail::MemorySpaceBase { + friend class detail::MemorySpaceBase; + +public: + /** + * Creates a new HostMemorySpace. + */ + static std::shared_ptr create() + { + return std::shared_ptr(new HostMemorySpace()); + } + + void synchronize() const override; + +protected: + HostMemorySpace() = default; + + void *raw_alloc(size_type size) const override; + + void raw_free(void *ptr) const noexcept override; + + GKO_ENABLE_FOR_ALL_MEMORY_SPACES(GKO_OVERRIDE_RAW_COPY_TO); +}; + + +class CudaMemorySpace : public detail::MemorySpaceBase { + friend class detail::MemorySpaceBase; + +public: + /** + * Creates a new CudaMemorySpace. + * + * @param device_id the CUDA device id of this device + */ + static std::shared_ptr create(int device_id) + { + return std::shared_ptr(new CudaMemorySpace(device_id)); + } + + /** + * Get the CUDA device id of the device associated to this memory_space. + */ + int get_device_id() const noexcept { return this->device_id_; } + + /** + * Get the number of devices present on the system. + */ + static int get_num_devices(); + + void synchronize() const override; + +protected: + CudaMemorySpace() = default; + + CudaMemorySpace(int device_id) : device_id_(device_id) + { + assert(device_id < max_devices); + } + + void *raw_alloc(size_type size) const override; + + void raw_free(void *ptr) const noexcept override; + + GKO_ENABLE_FOR_ALL_MEMORY_SPACES(GKO_OVERRIDE_RAW_COPY_TO); + +private: + int device_id_; + static constexpr int max_devices = 64; +}; + + +class CudaUVMSpace : public detail::MemorySpaceBase { + friend class detail::MemorySpaceBase; + +public: + /** + * Creates a new CudaUVMSpace. + * + * @param device_id the CUDA device id of this device + */ + static std::shared_ptr create(int device_id) + { + return std::shared_ptr(new CudaUVMSpace(device_id)); + } + + /** + * Get the CUDA device id of the device associated to this memory_space. + */ + int get_device_id() const noexcept { return this->device_id_; } + + /** + * Get the number of devices present on the system. + */ + static int get_num_devices(); + + void synchronize() const override; + +protected: + CudaUVMSpace() = default; + + CudaUVMSpace(int device_id) : device_id_(device_id) + { + assert(device_id < max_devices); + } + + void *raw_alloc(size_type size) const override; + + void raw_free(void *ptr) const noexcept override; + + GKO_ENABLE_FOR_ALL_MEMORY_SPACES(GKO_OVERRIDE_RAW_COPY_TO); + +private: + int device_id_; + static constexpr int max_devices = 64; +}; + + +class HipMemorySpace : public detail::MemorySpaceBase { + friend class detail::MemorySpaceBase; + +public: + /** + * Creates a new HipMemorySpace. + * + * @param device_id the HIP device id of this device + */ + static std::shared_ptr create(int device_id) + { + return std::shared_ptr(new HipMemorySpace(device_id)); + } + + /** + * Get the HIP device id of the device associated to this memory_space. + */ + int get_device_id() const noexcept { return this->device_id_; } + + /** + * Get the number of devices present on the system. + */ + static int get_num_devices(); + + void synchronize() const override; + +protected: + HipMemorySpace() = default; + + HipMemorySpace(int device_id) : device_id_(device_id) + { + assert(device_id < max_devices); + } + + void *raw_alloc(size_type size) const override; + + void raw_free(void *ptr) const noexcept override; + + GKO_ENABLE_FOR_ALL_MEMORY_SPACES(GKO_OVERRIDE_RAW_COPY_TO); + +private: + int device_id_; + static constexpr int max_devices = 64; +}; + +#undef GKO_OVERRIDE_RAW_COPY_TO + + +} // namespace gko + + +#endif // GKO_CORE_MEMORY_SPACE_HPP_ From d98259d529c84b3421fc3b180019e7fda6713b1c Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 19 Oct 2020 17:00:42 +0200 Subject: [PATCH 2/8] Add implementations for cuda, hip and host memory operations. --- core/CMakeLists.txt | 1 + core/base/memory_space.cpp | 69 ++++++++ core/device_hooks/cuda_hooks.cpp | 74 +++++++-- core/device_hooks/hip_hooks.cpp | 36 ++-- core/devices/cuda/executor.cpp | 13 ++ core/devices/hip/executor.cpp | 13 ++ core/devices/omp/executor.cpp | 15 +- cuda/CMakeLists.txt | 1 + cuda/base/memory_space.cpp | 277 +++++++++++++++++++++++++++++++ hip/CMakeLists.txt | 1 + hip/base/memory_space.hip.cpp | 170 +++++++++++++++++++ 11 files changed, 640 insertions(+), 30 deletions(-) create mode 100644 core/base/memory_space.cpp create mode 100644 cuda/base/memory_space.cpp create mode 100644 hip/base/memory_space.hip.cpp diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index f16d63ff617..1a4cac50e89 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -8,6 +8,7 @@ target_sources(ginkgo base/combination.cpp base/composition.cpp base/executor.cpp + base/memory_space.cpp base/mtx_io.cpp base/perturbation.cpp base/version.cpp diff --git a/core/base/memory_space.cpp b/core/base/memory_space.cpp new file mode 100644 index 00000000000..b03b5c8cad7 --- /dev/null +++ b/core/base/memory_space.cpp @@ -0,0 +1,69 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include + + +#include +#include + + +namespace gko { + + +void HostMemorySpace::raw_free(void *ptr) const noexcept { std::free(ptr); } + + +void HostMemorySpace::synchronize() const +{ + // Currently a no-op +} + + +void *HostMemorySpace::raw_alloc(size_type num_bytes) const +{ + return GKO_ENSURE_ALLOCATED(std::malloc(num_bytes), "Host", num_bytes); +} + + +void HostMemorySpace::raw_copy_to(const HostMemorySpace *, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const +{ + std::memcpy(dest_ptr, src_ptr, num_bytes); +} + + +} // namespace gko diff --git a/core/device_hooks/cuda_hooks.cpp b/core/device_hooks/cuda_hooks.cpp index d41d77d24d9..be44f709872 100644 --- a/core/device_hooks/cuda_hooks.cpp +++ b/core/device_hooks/cuda_hooks.cpp @@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include #include @@ -62,12 +63,30 @@ std::shared_ptr CudaExecutor::create( } -void OmpExecutor::raw_copy_to(const CudaExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +std::shared_ptr CudaExecutor::create( + int device_id, std::shared_ptr mem_space, + std::shared_ptr master, bool device_reset) +{ + return std::shared_ptr( + new CudaExecutor(device_id, mem_space, std::move(master)), + device_reset); +} + + +void HostMemorySpace::raw_copy_to(const CudaMemorySpace *, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const GKO_NOT_COMPILED(cuda); -void CudaExecutor::raw_free(void *ptr) const noexcept +void CudaMemorySpace::raw_free(void *ptr) const noexcept +{ + // Free must never fail, as it can be called in destructors. + // If the nvidia module was not compiled, the library couldn't have + // allocated the memory, so there is no need to deallocate it. +} + + +void CudaUVMSpace::raw_free(void *ptr) const noexcept { // Free must never fail, as it can be called in destructors. // If the nvidia module was not compiled, the library couldn't have @@ -75,24 +94,59 @@ void CudaExecutor::raw_free(void *ptr) const noexcept } -void *CudaExecutor::raw_alloc(size_type num_bytes) const GKO_NOT_COMPILED(cuda); +void *CudaMemorySpace::raw_alloc(size_type num_bytes) const + GKO_NOT_COMPILED(nvidia); + +void *CudaUVMSpace::raw_alloc(size_type num_bytes) const + GKO_NOT_COMPILED(nvidia); -void CudaExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const + +void CudaMemorySpace::raw_copy_to(const HostMemorySpace *, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const GKO_NOT_COMPILED(cuda); -void CudaExecutor::raw_copy_to(const CudaExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void CudaMemorySpace::raw_copy_to(const CudaMemorySpace *, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const GKO_NOT_COMPILED(cuda); -void CudaExecutor::raw_copy_to(const HipExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void CudaMemorySpace::raw_copy_to(const HipMemorySpace *, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const GKO_NOT_COMPILED(cuda); +void CudaMemorySpace::raw_copy_to(const CudaUVMSpace *dest_mem_space, + size_type n_bytes, const void *src_ptr, + void *dest_ptr) const GKO_NOT_COMPILED(cuda); + + +void CudaUVMSpace::raw_copy_to(const CudaUVMSpace *dest_mem_space, + size_type n_bytes, const void *src_ptr, + void *dest_ptr) const GKO_NOT_COMPILED(cuda); + + +void CudaUVMSpace::raw_copy_to(const CudaMemorySpace *dest_mem_space, + size_type n_bytes, const void *src_ptr, + void *dest_ptr) const GKO_NOT_COMPILED(cuda); + + +void CudaUVMSpace::raw_copy_to(const HipMemorySpace *dest_mem_space, + size_type n_bytes, const void *src_ptr, + void *dest_ptr) const GKO_NOT_COMPILED(cuda); + + +void HostMemorySpace::raw_copy_to(const CudaUVMSpace *dest_mem_space, + size_type n_bytes, const void *src_ptr, + void *dest_ptr) const GKO_NOT_COMPILED(cuda); + + +void CudaUVMSpace::raw_copy_to(const HostMemorySpace *dest_mem_space, + size_type n_bytes, const void *src_ptr, + void *dest_ptr) const GKO_NOT_COMPILED(cuda); + + void CudaExecutor::synchronize() const GKO_NOT_COMPILED(cuda); diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp index a2e288b4157..8e658299816 100644 --- a/core/device_hooks/hip_hooks.cpp +++ b/core/device_hooks/hip_hooks.cpp @@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include #include @@ -59,12 +60,21 @@ std::shared_ptr HipExecutor::create( } -void OmpExecutor::raw_copy_to(const HipExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +std::shared_ptr HipExecutor::create( + int device_id, std::shared_ptr memory_space, + std::shared_ptr master, bool device_reset) +{ + return std::shared_ptr( + new HipExecutor(device_id, memory_space, std::move(master)), + device_reset); +} + +void HostMemorySpace::raw_copy_to(const HipMemorySpace *, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const GKO_NOT_COMPILED(hip); -void HipExecutor::raw_free(void *ptr) const noexcept +void HipMemorySpace::raw_free(void *ptr) const noexcept { // Free must never fail, as it can be called in destructors. // If the nvidia module was not compiled, the library couldn't have @@ -72,21 +82,27 @@ void HipExecutor::raw_free(void *ptr) const noexcept } -void *HipExecutor::raw_alloc(size_type num_bytes) const GKO_NOT_COMPILED(hip); +void *HipMemorySpace::raw_alloc(size_type num_bytes) const + GKO_NOT_COMPILED(hip); + + +void HipMemorySpace::raw_copy_to(const HostMemorySpace *, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const + GKO_NOT_COMPILED(hip); -void HipExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void HipMemorySpace::raw_copy_to(const CudaMemorySpace *, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const GKO_NOT_COMPILED(hip); -void HipExecutor::raw_copy_to(const CudaExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void HipMemorySpace::raw_copy_to(const CudaUVMSpace *, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const GKO_NOT_COMPILED(hip); -void HipExecutor::raw_copy_to(const HipExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void HipMemorySpace::raw_copy_to(const HipMemorySpace *, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const GKO_NOT_COMPILED(hip); diff --git a/core/devices/cuda/executor.cpp b/core/devices/cuda/executor.cpp index 3566578a681..a5eb8304eb1 100644 --- a/core/devices/cuda/executor.cpp +++ b/core/devices/cuda/executor.cpp @@ -31,6 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ #include +#include namespace gko { @@ -48,6 +49,18 @@ std::shared_ptr CudaExecutor::get_master() const noexcept } +std::shared_ptr CudaExecutor::get_mem_space() noexcept +{ + return this->mem_space_instance_; +} + + +std::shared_ptr CudaExecutor::get_mem_space() const noexcept +{ + return this->mem_space_instance_; +} + + unsigned CudaExecutor::num_execs[max_devices]; diff --git a/core/devices/hip/executor.cpp b/core/devices/hip/executor.cpp index f4787523290..58b67cc06f7 100644 --- a/core/devices/hip/executor.cpp +++ b/core/devices/hip/executor.cpp @@ -31,6 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ #include +#include namespace gko { @@ -45,6 +46,18 @@ std::shared_ptr HipExecutor::get_master() const noexcept } +std::shared_ptr HipExecutor::get_mem_space() noexcept +{ + return this->mem_space_instance_; +} + + +std::shared_ptr HipExecutor::get_mem_space() const noexcept +{ + return this->mem_space_instance_; +} + + int HipExecutor::num_execs[max_devices]; diff --git a/core/devices/omp/executor.cpp b/core/devices/omp/executor.cpp index e53a1b53c43..c621333d8f5 100644 --- a/core/devices/omp/executor.cpp +++ b/core/devices/omp/executor.cpp @@ -31,6 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ #include +#include #include @@ -44,9 +45,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { -void OmpExecutor::raw_free(void *ptr) const noexcept { std::free(ptr); } - - std::shared_ptr OmpExecutor::get_master() noexcept { return this->shared_from_this(); @@ -59,18 +57,15 @@ std::shared_ptr OmpExecutor::get_master() const noexcept } -void *OmpExecutor::raw_alloc(size_type num_bytes) const +std::shared_ptr OmpExecutor::get_mem_space() noexcept { - return GKO_ENSURE_ALLOCATED(std::malloc(num_bytes), "OMP", num_bytes); + return this->mem_space_instance_; } -void OmpExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +std::shared_ptr OmpExecutor::get_mem_space() const noexcept { - if (num_bytes > 0) { - std::memcpy(dest_ptr, src_ptr, num_bytes); - } + return this->mem_space_instance_; } diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt index b19651b6a59..382ba7d69aa 100644 --- a/cuda/CMakeLists.txt +++ b/cuda/CMakeLists.txt @@ -69,6 +69,7 @@ target_sources(ginkgo_cuda PRIVATE base/exception.cpp base/executor.cpp + base/memory_space.cpp base/version.cpp components/absolute_array.cu components/fill_array.cu diff --git a/cuda/base/memory_space.cpp b/cuda/base/memory_space.cpp new file mode 100644 index 00000000000..9d0d9780b05 --- /dev/null +++ b/cuda/base/memory_space.cpp @@ -0,0 +1,277 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include +#include + + +#include "cuda/base/device_guard.hpp" + + +namespace gko { + + +void CudaMemorySpace::synchronize() const +{ + cuda::device_guard g(this->get_device_id()); + GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceSynchronize()); +} + + +void CudaUVMSpace::synchronize() const +{ + cuda::device_guard g(this->get_device_id()); + GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceSynchronize()); +} + + +int CudaMemorySpace::get_num_devices() +{ + int deviceCount = 0; + auto error_code = cudaGetDeviceCount(&deviceCount); + if (error_code == cudaErrorNoDevice) { + return 0; + } + GKO_ASSERT_NO_CUDA_ERRORS(error_code); + return deviceCount; +} + + +int CudaUVMSpace::get_num_devices() +{ + int deviceCount = 0; + auto error_code = cudaGetDeviceCount(&deviceCount); + if (error_code == cudaErrorNoDevice) { + return 0; + } + GKO_ASSERT_NO_CUDA_ERRORS(error_code); + return deviceCount; +} + + +void HostMemorySpace::raw_copy_to(const CudaMemorySpace *dest, + size_type num_bytes, const void *src_ptr, + void *dest_ptr) const +{ + if (num_bytes > 0) { + cuda::device_guard g(dest->get_device_id()); + GKO_ASSERT_NO_CUDA_ERRORS( + cudaMemcpy(dest_ptr, src_ptr, num_bytes, cudaMemcpyHostToDevice)); + } +} + + +void CudaMemorySpace::raw_free(void *ptr) const noexcept +{ + cuda::device_guard g(this->get_device_id()); + auto error_code = cudaFree(ptr); + if (error_code != cudaSuccess) { +#if GKO_VERBOSE_LEVEL >= 1 + // Unfortunately, if memory free fails, there's not much we can do + std::cerr << "Unrecoverable CUDA error on device " << this->device_id_ + << " in " << __func__ << ": " << cudaGetErrorName(error_code) + << ": " << cudaGetErrorString(error_code) << std::endl + << "Exiting program" << std::endl; +#endif + std::exit(error_code); + } +} + + +void CudaUVMSpace::raw_free(void *ptr) const noexcept +{ + cuda::device_guard g(this->get_device_id()); + auto error_code = cudaFree(ptr); + if (error_code != cudaSuccess) { +#if GKO_VERBOSE_LEVEL >= 1 + // Unfortunately, if memory free fails, there's not much we can do + std::cerr << "Unrecoverable CUDA error on device " << this->device_id_ + << " in " << __func__ << ": " << cudaGetErrorName(error_code) + << ": " << cudaGetErrorString(error_code) << std::endl + << "Exiting program" << std::endl; +#endif + std::exit(error_code); + } +} + + +void *CudaMemorySpace::raw_alloc(size_type num_bytes) const +{ + void *dev_ptr = nullptr; + cuda::device_guard g(this->get_device_id()); + auto error_code = cudaMalloc(&dev_ptr, num_bytes); + if (error_code != cudaErrorMemoryAllocation) { + GKO_ASSERT_NO_CUDA_ERRORS(error_code); + } + GKO_ENSURE_ALLOCATED(dev_ptr, "cuda", num_bytes); + return dev_ptr; +} + + +void CudaMemorySpace::raw_copy_to(const HostMemorySpace *, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const +{ + if (num_bytes > 0) { + cuda::device_guard g(this->get_device_id()); + GKO_ASSERT_NO_CUDA_ERRORS( + cudaMemcpy(dest_ptr, src_ptr, num_bytes, cudaMemcpyDeviceToHost)); + } +} + + +void CudaMemorySpace::raw_copy_to(const CudaMemorySpace *dest, + size_type num_bytes, const void *src_ptr, + void *dest_ptr) const +{ + if (num_bytes > 0) { + cuda::device_guard g(this->get_device_id()); + GKO_ASSERT_NO_CUDA_ERRORS( + cudaMemcpyPeer(dest_ptr, dest->get_device_id(), src_ptr, + this->get_device_id(), num_bytes)); + } +} + + +void CudaUVMSpace::raw_copy_to(const HipMemorySpace *dest, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const +{ +#if GINKGO_HIP_PLATFORM_NVCC == 1 + if (num_bytes > 0) { + cuda::device_guard g(this->get_device_id()); + GKO_ASSERT_NO_CUDA_ERRORS( + cudaMemcpyPeer(dest_ptr, dest->get_device_id(), src_ptr, + this->get_device_id(), num_bytes)); + } +#else + GKO_NOT_SUPPORTED(this); +#endif +} + + +void CudaMemorySpace::raw_copy_to(const HipMemorySpace *dest, + size_type num_bytes, const void *src_ptr, + void *dest_ptr) const +{ +#if GINKGO_HIP_PLATFORM_NVCC == 1 + if (num_bytes > 0) { + cuda::device_guard g(this->get_device_id()); + GKO_ASSERT_NO_CUDA_ERRORS( + cudaMemcpyPeer(dest_ptr, dest->get_device_id(), src_ptr, + this->get_device_id(), num_bytes)); + } +#else + GKO_NOT_SUPPORTED(this); +#endif +} + + +void CudaMemorySpace::raw_copy_to(const CudaUVMSpace *dest, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const +{ + if (num_bytes > 0) { + cuda::device_guard g(this->get_device_id()); + GKO_ASSERT_NO_CUDA_ERRORS( + cudaMemcpyPeer(dest_ptr, dest->get_device_id(), src_ptr, + this->get_device_id(), num_bytes)); + } +} + + +void CudaUVMSpace::raw_copy_to(const CudaMemorySpace *dest, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const +{ + if (num_bytes > 0) { + cuda::device_guard g(this->get_device_id()); + GKO_ASSERT_NO_CUDA_ERRORS( + cudaMemcpyPeer(dest_ptr, dest->get_device_id(), src_ptr, + this->get_device_id(), num_bytes)); + } +} + + +void CudaUVMSpace::raw_copy_to(const CudaUVMSpace *dest, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const +{ + if (num_bytes > 0) { + cuda::device_guard g(this->get_device_id()); + GKO_ASSERT_NO_CUDA_ERRORS( + cudaMemcpyPeer(dest_ptr, dest->get_device_id(), src_ptr, + this->get_device_id(), num_bytes)); + } +} + + +void HostMemorySpace::raw_copy_to(const CudaUVMSpace *dest, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const +{ + if (num_bytes > 0) { + cuda::device_guard g(dest->get_device_id()); + GKO_ASSERT_NO_CUDA_ERRORS( + cudaMemcpy(dest_ptr, src_ptr, num_bytes, cudaMemcpyHostToDevice)); + } +} + + +void *CudaUVMSpace::raw_alloc(size_type num_bytes) const +{ + void *dev_ptr = nullptr; + cuda::device_guard g(this->get_device_id()); + auto error_code = cudaMallocManaged(&dev_ptr, num_bytes); + if (error_code != cudaErrorMemoryAllocation) { + GKO_ASSERT_NO_CUDA_ERRORS(error_code); + } + GKO_ENSURE_ALLOCATED(dev_ptr, "cuda", num_bytes); + return dev_ptr; +} + + +void CudaUVMSpace::raw_copy_to(const HostMemorySpace *, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const +{ + if (num_bytes > 0) { + cuda::device_guard g(this->get_device_id()); + GKO_ASSERT_NO_CUDA_ERRORS( + cudaMemcpy(dest_ptr, src_ptr, num_bytes, cudaMemcpyDeviceToHost)); + } +} + + +} // namespace gko diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt index 2bb1e6911ee..90255bce397 100644 --- a/hip/CMakeLists.txt +++ b/hip/CMakeLists.txt @@ -142,6 +142,7 @@ endif() set(GINKGO_HIP_SOURCES base/exception.hip.cpp base/executor.hip.cpp + base/memory_space.hip.cpp base/version.hip.cpp components/absolute_array.hip.cpp components/fill_array.hip.cpp diff --git a/hip/base/memory_space.hip.cpp b/hip/base/memory_space.hip.cpp new file mode 100644 index 00000000000..f46b23d8027 --- /dev/null +++ b/hip/base/memory_space.hip.cpp @@ -0,0 +1,170 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include +#include + + +#include "hip/base/device_guard.hip.hpp" + + +namespace gko { + + +void HostMemorySpace::raw_copy_to(const HipMemorySpace *dest, + size_type num_bytes, const void *src_ptr, + void *dest_ptr) const +{ + if (num_bytes > 0) { + hip::device_guard g(dest->get_device_id()); + GKO_ASSERT_NO_HIP_ERRORS( + hipMemcpy(dest_ptr, src_ptr, num_bytes, hipMemcpyHostToDevice)); + } +} + + +void HipMemorySpace::raw_free(void *ptr) const noexcept +{ + hip::device_guard g(this->get_device_id()); + auto error_code = hipFree(ptr); + if (error_code != hipSuccess) { +#if GKO_VERBOSE_LEVEL >= 1 + // Unfortunately, if memory free fails, there's not much we can do + std::cerr << "Unrecoverable HIP error on device " << this->device_id_ + << " in " << __func__ << ": " << hipGetErrorName(error_code) + << ": " << hipGetErrorString(error_code) << std::endl + << "Exiting program" << std::endl; +#endif + std::exit(error_code); + } +} + + +void *HipMemorySpace::raw_alloc(size_type num_bytes) const +{ + void *dev_ptr = nullptr; + hip::device_guard g(this->get_device_id()); + auto error_code = hipMalloc(&dev_ptr, num_bytes); + if (error_code != hipErrorMemoryAllocation) { + GKO_ASSERT_NO_HIP_ERRORS(error_code); + } + GKO_ENSURE_ALLOCATED(dev_ptr, "hip", num_bytes); + return dev_ptr; +} + + +void HipMemorySpace::raw_copy_to(const HostMemorySpace *, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const +{ + if (num_bytes > 0) { + hip::device_guard g(this->get_device_id()); + GKO_ASSERT_NO_HIP_ERRORS( + hipMemcpy(dest_ptr, src_ptr, num_bytes, hipMemcpyDeviceToHost)); + } +} + + +void HipMemorySpace::raw_copy_to(const CudaMemorySpace *dest, + size_type num_bytes, const void *src_ptr, + void *dest_ptr) const +{ +#if GINKGO_HIP_PLATFORM_NVCC == 1 + if (num_bytes > 0) { + hip::device_guard g(this->get_device_id()); + GKO_ASSERT_NO_HIP_ERRORS(hipMemcpyPeer(dest_ptr, dest->get_device_id(), + src_ptr, this->get_device_id(), + num_bytes)); + } +#else + GKO_NOT_SUPPORTED(this); +#endif +} + + +void HipMemorySpace::raw_copy_to(const CudaUVMSpace *dest, size_type num_bytes, + const void *src_ptr, void *dest_ptr) const +{ +#if GINKGO_HIP_PLATFORM_NVCC == 1 + if (num_bytes > 0) { + hip::device_guard g(this->get_device_id()); + GKO_ASSERT_NO_HIP_ERRORS(hipMemcpyPeer(dest_ptr, dest->get_device_id(), + src_ptr, this->get_device_id(), + num_bytes)); + } +#else + GKO_NOT_SUPPORTED(this); +#endif +} + + +void HipMemorySpace::raw_copy_to(const HipMemorySpace *dest, + size_type num_bytes, const void *src_ptr, + void *dest_ptr) const +{ + if (num_bytes > 0) { + hip::device_guard g(this->get_device_id()); + GKO_ASSERT_NO_HIP_ERRORS(hipMemcpyPeer(dest_ptr, dest->get_device_id(), + src_ptr, this->get_device_id(), + num_bytes)); + } +} + + +void HipMemorySpace::synchronize() const +{ + hip::device_guard g(this->get_device_id()); + GKO_ASSERT_NO_HIP_ERRORS(hipDeviceSynchronize()); +} + + +int HipMemorySpace::get_num_devices() +{ + int deviceCount = 0; + auto error_code = hipGetDeviceCount(&deviceCount); + if (error_code == hipErrorNoDevice) { + return 0; + } + GKO_ASSERT_NO_HIP_ERRORS(error_code); + return deviceCount; +} + + +} // namespace gko From a0d5c11c094f070036aecd763c65fde171a1dd5f Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 19 Oct 2020 17:01:53 +0200 Subject: [PATCH 3/8] Add core and host memory space tests. --- core/test/base/executor.cpp | 168 +------------------------------- core/test/base/memory_space.cpp | 159 ++++++++++++++++++++++++++++++ 2 files changed, 161 insertions(+), 166 deletions(-) create mode 100644 core/test/base/memory_space.cpp diff --git a/core/test/base/executor.cpp b/core/test/base/executor.cpp index 1b2e1b0698e..2df2e8b7a41 100644 --- a/core/test/base/executor.cpp +++ b/core/test/base/executor.cpp @@ -95,52 +95,6 @@ TEST(OmpExecutor, RunsCorrectLambdaOperation) } -TEST(OmpExecutor, AllocatesAndFreesMemory) -{ - const int num_elems = 10; - exec_ptr omp = gko::OmpExecutor::create(); - int *ptr = nullptr; - - ASSERT_NO_THROW(ptr = omp->alloc(num_elems)); - ASSERT_NO_THROW(omp->free(ptr)); -} - - -TEST(OmpExecutor, FreeAcceptsNullptr) -{ - exec_ptr omp = gko::OmpExecutor::create(); - ASSERT_NO_THROW(omp->free(nullptr)); -} - - -TEST(OmpExecutor, FailsWhenOverallocating) -{ - const gko::size_type num_elems = 1ll << 50; // 4PB of integers - exec_ptr omp = gko::OmpExecutor::create(); - int *ptr = nullptr; - - ASSERT_THROW(ptr = omp->alloc(num_elems), gko::AllocationError); - - omp->free(ptr); -} - - -TEST(OmpExecutor, CopiesData) -{ - int orig[] = {3, 8}; - const int num_elems = std::extent::value; - exec_ptr omp = gko::OmpExecutor::create(); - int *copy = omp->alloc(num_elems); - - // user code is run on the OMP, so local variables are in OMP memory - omp->copy(num_elems, orig, copy); - EXPECT_EQ(3, copy[0]); - EXPECT_EQ(8, copy[1]); - - omp->free(copy); -} - - TEST(OmpExecutor, IsItsOwnMaster) { exec_ptr omp = gko::OmpExecutor::create(); @@ -172,95 +126,15 @@ TEST(ReferenceExecutor, RunsCorrectLambdaOperation) } -TEST(ReferenceExecutor, AllocatesAndFreesMemory) -{ - const int num_elems = 10; - exec_ptr ref = gko::ReferenceExecutor::create(); - int *ptr = nullptr; - - ASSERT_NO_THROW(ptr = ref->alloc(num_elems)); - ASSERT_NO_THROW(ref->free(ptr)); -} - - -TEST(ReferenceExecutor, FreeAcceptsNullptr) -{ - exec_ptr omp = gko::ReferenceExecutor::create(); - ASSERT_NO_THROW(omp->free(nullptr)); -} - - -TEST(ReferenceExecutor, FailsWhenOverallocating) -{ - const gko::size_type num_elems = 1ll << 50; // 4PB of integers - exec_ptr ref = gko::ReferenceExecutor::create(); - int *ptr = nullptr; - - ASSERT_THROW(ptr = ref->alloc(num_elems), gko::AllocationError); - - ref->free(ptr); -} - - -TEST(ReferenceExecutor, CopiesData) -{ - int orig[] = {3, 8}; - const int num_elems = std::extent::value; - exec_ptr ref = gko::ReferenceExecutor::create(); - int *copy = ref->alloc(num_elems); - - // ReferenceExecutor is a type of OMP executor, so this is O.K. - ref->copy(num_elems, orig, copy); - EXPECT_EQ(3, copy[0]); - EXPECT_EQ(8, copy[1]); - - ref->free(copy); -} - - TEST(ReferenceExecutor, CopiesSingleValue) { exec_ptr ref = gko::ReferenceExecutor::create(); - int *el = ref->alloc(1); + int *el = ref->get_mem_space()->alloc(1); el[0] = 83683; EXPECT_EQ(83683, ref->copy_val_to_host(el)); - ref->free(el); -} - - -TEST(ReferenceExecutor, CopiesDataFromOmp) -{ - int orig[] = {3, 8}; - const int num_elems = std::extent::value; - exec_ptr omp = gko::OmpExecutor::create(); - exec_ptr ref = gko::ReferenceExecutor::create(); - int *copy = ref->alloc(num_elems); - - // ReferenceExecutor is a type of OMP executor, so this is O.K. - ref->copy_from(omp.get(), num_elems, orig, copy); - EXPECT_EQ(3, copy[0]); - EXPECT_EQ(8, copy[1]); - - ref->free(copy); -} - - -TEST(ReferenceExecutor, CopiesDataToOmp) -{ - int orig[] = {3, 8}; - const int num_elems = std::extent::value; - exec_ptr omp = gko::OmpExecutor::create(); - exec_ptr ref = gko::ReferenceExecutor::create(); - int *copy = omp->alloc(num_elems); - - // ReferenceExecutor is a type of OMP executor, so this is O.K. - omp->copy_from(ref.get(), num_elems, orig, copy); - EXPECT_EQ(3, copy[0]); - EXPECT_EQ(8, copy[1]); - - ref->free(copy); + ref->get_mem_space()->free(el); } @@ -414,42 +288,4 @@ TEST(HipExecutor, CanSetDeviceResetBoolean) } -template -struct mock_free : T { - /** - * @internal Due to a bug with gcc 5.3, the constructor needs to be called - * with `()` operator instead of `{}`. - */ - template - mock_free(Params &&... params) : T(std::forward(params)...) - {} - - void raw_free(void *ptr) const noexcept override - { - called_free = true; - T::raw_free(ptr); - } - - mutable bool called_free{false}; -}; - - -TEST(ExecutorDeleter, DeletesObject) -{ - auto ref = std::make_shared>(); - auto x = ref->alloc(5); - - gko::executor_deleter{ref}(x); - - ASSERT_TRUE(ref->called_free); -} - - -TEST(ExecutorDeleter, AvoidsDeletionForNullExecutor) -{ - int x[5]; - ASSERT_NO_THROW(gko::executor_deleter{nullptr}(x)); -} - - } // namespace diff --git a/core/test/base/memory_space.cpp b/core/test/base/memory_space.cpp new file mode 100644 index 00000000000..df7cd0a5152 --- /dev/null +++ b/core/test/base/memory_space.cpp @@ -0,0 +1,159 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include + + +namespace { + + +using mem_space_ptr = std::shared_ptr; + + +TEST(HostMemorySpace, AllocatesAndFreesMemory) +{ + const int num_elems = 10; + mem_space_ptr host = gko::HostMemorySpace::create(); + int *ptr = nullptr; + + ASSERT_NO_THROW(ptr = host->alloc(num_elems)); + ASSERT_NO_THROW(host->free(ptr)); +} + + +TEST(HostMemorySpace, FreeAcceptsNullptr) +{ + mem_space_ptr host = gko::HostMemorySpace::create(); + ASSERT_NO_THROW(host->free(nullptr)); +} + + +TEST(HostMemorySpace, FailsWhenOverallocating) +{ + const gko::size_type num_elems = 1ll << 50; // 4PB of integers + mem_space_ptr host = gko::HostMemorySpace::create(); + int *ptr = nullptr; + + ASSERT_THROW(ptr = host->alloc(num_elems), gko::AllocationError); + + host->free(ptr); +} + + +TEST(HostMemorySpace, CopiesData) +{ + int orig[] = {3, 8}; + const int num_elems = std::extent::value; + mem_space_ptr host = gko::HostMemorySpace::create(); + int *copy = host->alloc(num_elems); + + // user code is run on the HOST, so local variables are in HOST memory + host->copy_from(host.get(), num_elems, orig, copy); + EXPECT_EQ(3, copy[0]); + EXPECT_EQ(8, copy[1]); + + host->free(copy); +} + + +TEST(CudaMemorySpace, KnowsItsDeviceId) +{ + auto cuda = gko::CudaMemorySpace::create(0); + + ASSERT_EQ(0, cuda->get_device_id()); +} + + +TEST(CudaUVMSpace, KnowsItsDeviceId) +{ + auto cuda_uvm = gko::CudaUVMSpace::create(0); + + ASSERT_EQ(0, cuda_uvm->get_device_id()); +} + + +TEST(HipMemorySpace, KnowsItsDeviceId) +{ + auto hip = gko::HipMemorySpace::create(0); + + ASSERT_EQ(0, hip->get_device_id()); +} + + +template +struct mock_free : T { + /** + * @internal Due to a bug with gcc 5.3, the constructor needs to be called + * with `()` operator instead of `{}`. + */ + template + mock_free(Params &&... params) : T(std::forward(params)...) + {} + + void raw_free(void *ptr) const noexcept override + { + called_free = true; + T::raw_free(ptr); + } + + mutable bool called_free{false}; +}; + + +TEST(MemorySpaceDeleter, DeletesObject) +{ + auto host = std::make_shared>(); + auto x = host->alloc(5); + + gko::memory_space_deleter{host}(x); + + ASSERT_TRUE(host->called_free); +} + + +TEST(MemorySpaceDeleter, AvoidsDeletionForNullMemorySpace) +{ + int x[5]; + ASSERT_NO_THROW(gko::memory_space_deleter{nullptr}(x)); +} + + +} // namespace From 8d567a34d4a5044dd655ae74d7653550de73cf99 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 19 Oct 2020 17:02:15 +0200 Subject: [PATCH 4/8] Add CUDA and HIP memory space tests. --- cuda/test/base/CMakeLists.txt | 1 + cuda/test/base/cuda_executor.cu | 91 ---------- cuda/test/base/cuda_memory_space.cu | 238 +++++++++++++++++++++++++ hip/test/base/CMakeLists.txt | 1 + hip/test/base/hip_executor.hip.cpp | 95 ---------- hip/test/base/hip_memory_space.hip.cpp | 178 ++++++++++++++++++ 6 files changed, 418 insertions(+), 186 deletions(-) create mode 100644 cuda/test/base/cuda_memory_space.cu create mode 100644 hip/test/base/hip_memory_space.hip.cpp diff --git a/cuda/test/base/CMakeLists.txt b/cuda/test/base/CMakeLists.txt index d9ba808fa21..37fdc0b989e 100644 --- a/cuda/test/base/CMakeLists.txt +++ b/cuda/test/base/CMakeLists.txt @@ -1,4 +1,5 @@ ginkgo_create_cuda_test(cuda_executor) +ginkgo_create_cuda_test(cuda_memory_space) ginkgo_create_cuda_test(exception_helpers) ginkgo_create_cuda_test(lin_op) ginkgo_create_cuda_test(math) diff --git a/cuda/test/base/cuda_executor.cu b/cuda/test/base/cuda_executor.cu index 2bcf5961bbd..aaa1c9c99f2 100644 --- a/cuda/test/base/cuda_executor.cu +++ b/cuda/test/base/cuda_executor.cu @@ -126,71 +126,6 @@ TEST_F(CudaExecutor, MasterKnowsNumberOfDevices) } -TEST_F(CudaExecutor, AllocatesAndFreesMemory) -{ - int *ptr = nullptr; - - ASSERT_NO_THROW(ptr = cuda->alloc(2)); - ASSERT_NO_THROW(cuda->free(ptr)); -} - - -TEST_F(CudaExecutor, FailsWhenOverallocating) -{ - const gko::size_type num_elems = 1ll << 50; // 4PB of integers - int *ptr = nullptr; - - ASSERT_THROW( - { - ptr = cuda->alloc(num_elems); - cuda->synchronize(); - }, - gko::AllocationError); - - cuda->free(ptr); -} - - -__global__ void check_data(int *data) -{ - if (data[0] != 3 || data[1] != 8) { - asm("trap;"); - } -} - -TEST_F(CudaExecutor, CopiesDataToCuda) -{ - int orig[] = {3, 8}; - auto *copy = cuda->alloc(2); - - cuda->copy_from(omp.get(), 2, orig, copy); - - check_data<<<1, 1>>>(copy); - ASSERT_NO_THROW(cuda->synchronize()); - cuda->free(copy); -} - - -__global__ void init_data(int *data) -{ - data[0] = 3; - data[1] = 8; -} - -TEST_F(CudaExecutor, CopiesDataFromCuda) -{ - int copy[2]; - auto orig = cuda->alloc(2); - init_data<<<1, 1>>>(orig); - - omp->copy_from(cuda.get(), 2, orig, copy); - - EXPECT_EQ(3, copy[0]); - ASSERT_EQ(8, copy[1]); - cuda->free(orig); -} - - /* Properly checks if it works only when multiple GPUs exist */ TEST_F(CudaExecutor, PreservesDeviceSettings) { @@ -218,32 +153,6 @@ TEST_F(CudaExecutor, RunsOnProperDevice) } -TEST_F(CudaExecutor, CopiesDataFromCudaToCuda) -{ - int copy[2]; - auto orig = cuda->alloc(2); - GKO_ASSERT_NO_CUDA_ERRORS(cudaSetDevice(0)); - init_data<<<1, 1>>>(orig); - - auto copy_cuda2 = cuda2->alloc(2); - cuda2->copy_from(cuda.get(), 2, orig, copy_cuda2); - - // Check that the data is really on GPU2 and ensure we did not cheat - int value = -1; - GKO_ASSERT_NO_CUDA_ERRORS(cudaSetDevice(cuda2->get_device_id())); - check_data<<<1, 1>>>(copy_cuda2); - GKO_ASSERT_NO_CUDA_ERRORS(cudaSetDevice(0)); - cuda2->run(ExampleOperation(value)); - ASSERT_EQ(value, cuda2->get_device_id()); - // Put the results on OpenMP and run CPU side assertions - omp->copy_from(cuda2.get(), 2, copy_cuda2, copy); - EXPECT_EQ(3, copy[0]); - ASSERT_EQ(8, copy[1]); - cuda->free(copy_cuda2); - cuda->free(orig); -} - - TEST_F(CudaExecutor, Synchronizes) { // Todo design a proper unit test once we support streams diff --git a/cuda/test/base/cuda_memory_space.cu b/cuda/test/base/cuda_memory_space.cu new file mode 100644 index 00000000000..39421ffd9dc --- /dev/null +++ b/cuda/test/base/cuda_memory_space.cu @@ -0,0 +1,238 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include + + +#include + + +#include + + +#include +#include + + +namespace { + + +class CudaMemorySpace : public ::testing::Test { +protected: + CudaMemorySpace() : cuda(nullptr), cuda2(nullptr) {} + + void SetUp() + { + omp = gko::HostMemorySpace::create(); + cuda = gko::CudaMemorySpace::create(0); + cuda2 = gko::CudaMemorySpace::create( + gko::CudaMemorySpace::get_num_devices() - 1); + } + + void TearDown() + { + if (cuda != nullptr) { + // ensure that previous calls finished and didn't throw an error + ASSERT_NO_THROW(cuda->synchronize()); + } + if (cuda2 != nullptr) { + // ensure that previous calls finished and didn't throw an error + ASSERT_NO_THROW(cuda2->synchronize()); + } + } + + std::shared_ptr omp; + std::shared_ptr cuda; + std::shared_ptr cuda2; +}; + + +TEST_F(CudaMemorySpace, AllocatesAndFreesMemory) +{ + int *ptr = nullptr; + + ASSERT_NO_THROW(ptr = cuda->alloc(2)); + ASSERT_NO_THROW(cuda->free(ptr)); +} + + +TEST_F(CudaMemorySpace, FailsWhenOverallocating) +{ + const gko::size_type num_elems = 1ll << 50; // 4PB of integers + int *ptr = nullptr; + + ASSERT_THROW( + { + ptr = cuda->alloc(num_elems); + cuda->synchronize(); + }, + gko::AllocationError); + + cuda->free(ptr); +} + + +__global__ void check_data(int *data) +{ + if (data[0] != 3 || data[1] != 8) { + asm("trap;"); + } +} + + +TEST_F(CudaMemorySpace, CopiesDataToCuda) +{ + int orig[] = {3, 8}; + auto *copy = cuda->alloc(2); + + cuda->copy_from(omp.get(), 2, orig, copy); + + check_data<<<1, 1>>>(copy); + ASSERT_NO_THROW(cuda->synchronize()); + cuda->free(copy); +} + + +__global__ void init_data(int *data) +{ + data[0] = 3; + data[1] = 8; +} + +TEST_F(CudaMemorySpace, CopiesDataFromCuda) +{ + int copy[2]; + auto orig = cuda->alloc(2); + init_data<<<1, 1>>>(orig); + + omp->copy_from(cuda.get(), 2, orig, copy); + + EXPECT_EQ(3, copy[0]); + ASSERT_EQ(8, copy[1]); + cuda->free(orig); +} + + +TEST_F(CudaMemorySpace, CopiesDataFromCudaToCuda) +{ + int copy[2]; + auto orig = cuda->alloc(2); + GKO_ASSERT_NO_CUDA_ERRORS(cudaSetDevice(0)); + init_data<<<1, 1>>>(orig); + + auto copy_cuda2 = cuda2->alloc(2); + cuda2->copy_from(cuda.get(), 2, orig, copy_cuda2); + + // Check that the data is really on GPU2 and ensure we did not cheat + GKO_ASSERT_NO_CUDA_ERRORS(cudaSetDevice(cuda2->get_device_id())); + check_data<<<1, 1>>>(copy_cuda2); + GKO_ASSERT_NO_CUDA_ERRORS(cudaSetDevice(0)); + + omp->copy_from(cuda2.get(), 2, copy_cuda2, copy); + + EXPECT_EQ(3, copy[0]); + ASSERT_EQ(8, copy[1]); + cuda2->free(copy_cuda2); + cuda->free(orig); +} + + +class CudaUVMSpace : public ::testing::Test { +protected: + CudaUVMSpace() : cuda_uvm(nullptr) {} + + void SetUp() { cuda_uvm = gko::CudaUVMSpace::create(0); } + + void TearDown() + { + if (cuda_uvm != nullptr) { + // ensure that previous calls finished and didn't throw an error + // ASSERT_NO_THROW(cuda_uvm->synchronize()); + cuda_uvm->synchronize(); + } + } + + std::shared_ptr cuda_uvm; +}; + + +TEST_F(CudaUVMSpace, UVMAllocatesAndFreesMemory) +{ + int *ptr = nullptr; + + ASSERT_NO_THROW(ptr = cuda_uvm->alloc(2)); + cuda_uvm->synchronize(); + ASSERT_NO_THROW(cuda_uvm->free(ptr)); +} + + +// TEST_F(CudaUVMSpace, UVMFailsWhenOverallocating) +// { +// const gko::size_type num_elems = 1ll << 50; // 4PB of integers +// int *ptr = nullptr; + +// ASSERT_THROW( +// { +// ptr = cuda_uvm->alloc(num_elems); +// cuda_uvm->synchronize(); +// }, +// gko::AllocationError); + +// cuda_uvm->free(ptr); +// } + + +TEST_F(CudaUVMSpace, CanBeAccessedFromHost) +{ + int *orig = cuda_uvm->alloc(2); + orig[0] = 1; + orig[1] = 2; + ASSERT_EQ(orig[0], 1); + ASSERT_EQ(orig[1], 2); + cuda_uvm->synchronize(); + cuda_uvm->free(orig); +} + + +TEST_F(CudaUVMSpace, CanBeAccessedFromDevice) +{ + int *orig = cuda_uvm->alloc(2); + orig[0] = 3; + orig[1] = 8; + check_data<<<1, 1>>>(orig); + cuda_uvm->synchronize(); + cuda_uvm->free(orig); +} + + +} // namespace diff --git a/hip/test/base/CMakeLists.txt b/hip/test/base/CMakeLists.txt index 795ededc410..e814ea7e390 100644 --- a/hip/test/base/CMakeLists.txt +++ b/hip/test/base/CMakeLists.txt @@ -1,4 +1,5 @@ ginkgo_create_hip_test(hip_executor) +ginkgo_create_hip_test(hip_memory_space) ginkgo_create_hip_test(lin_op) ginkgo_create_hip_test(math) # Only hcc needs the libraries. nvcc only requires the headers. diff --git a/hip/test/base/hip_executor.hip.cpp b/hip/test/base/hip_executor.hip.cpp index 635639fc21e..7ff522bb62a 100644 --- a/hip/test/base/hip_executor.hip.cpp +++ b/hip/test/base/hip_executor.hip.cpp @@ -131,75 +131,6 @@ TEST_F(HipExecutor, MasterKnowsNumberOfDevices) } -TEST_F(HipExecutor, AllocatesAndFreesMemory) -{ - int *ptr = nullptr; - - ASSERT_NO_THROW(ptr = hip->alloc(2)); - ASSERT_NO_THROW(hip->free(ptr)); -} - - -TEST_F(HipExecutor, FailsWhenOverallocating) -{ - const gko::size_type num_elems = 1ll << 50; // 4PB of integers - int *ptr = nullptr; - - ASSERT_THROW( - { - ptr = hip->alloc(num_elems); - hip->synchronize(); - }, - gko::AllocationError); - - hip->free(ptr); -} - - -__global__ void check_data(int *data) -{ - if (data[0] != 3 || data[1] != 8) { -#if GINKGO_HIP_PLATFORM_HCC - asm("s_trap 0x02;"); -#else // GINKGO_HIP_PLATFORM_NVCC - asm("trap;"); -#endif - } -} - -TEST_F(HipExecutor, CopiesDataToHip) -{ - int orig[] = {3, 8}; - auto *copy = hip->alloc(2); - - hip->copy_from(omp.get(), 2, orig, copy); - - hipLaunchKernelGGL((check_data), dim3(1), dim3(1), 0, 0, copy); - ASSERT_NO_THROW(hip->synchronize()); - hip->free(copy); -} - - -__global__ void init_data(int *data) -{ - data[0] = 3; - data[1] = 8; -} - -TEST_F(HipExecutor, CopiesDataFromHip) -{ - int copy[2]; - auto orig = hip->alloc(2); - hipLaunchKernelGGL((init_data), dim3(1), dim3(1), 0, 0, orig); - - omp->copy_from(hip.get(), 2, orig, copy); - - EXPECT_EQ(3, copy[0]); - ASSERT_EQ(8, copy[1]); - hip->free(orig); -} - - /* Properly checks if it works only when multiple GPUs exist */ TEST_F(HipExecutor, PreservesDeviceSettings) { @@ -227,32 +158,6 @@ TEST_F(HipExecutor, RunsOnProperDevice) } -TEST_F(HipExecutor, CopiesDataFromHipToHip) -{ - int copy[2]; - auto orig = hip->alloc(2); - GKO_ASSERT_NO_HIP_ERRORS(hipSetDevice(0)); - hipLaunchKernelGGL((init_data), dim3(1), dim3(1), 0, 0, orig); - - auto copy_hip2 = hip2->alloc(2); - hip2->copy_from(hip.get(), 2, orig, copy_hip2); - - // Check that the data is really on GPU2 and ensure we did not cheat - int value = -1; - GKO_ASSERT_NO_HIP_ERRORS(hipSetDevice(hip2->get_device_id())); - hipLaunchKernelGGL((check_data), dim3(1), dim3(1), 0, 0, copy_hip2); - GKO_ASSERT_NO_HIP_ERRORS(hipSetDevice(0)); - hip2->run(ExampleOperation(value)); - ASSERT_EQ(value, hip2->get_device_id()); - // Put the results on OpenMP and run CPU side assertions - omp->copy_from(hip2.get(), 2, copy_hip2, copy); - EXPECT_EQ(3, copy[0]); - ASSERT_EQ(8, copy[1]); - hip->free(copy_hip2); - hip->free(orig); -} - - TEST_F(HipExecutor, Synchronizes) { // Todo design a proper unit test once we support streams diff --git a/hip/test/base/hip_memory_space.hip.cpp b/hip/test/base/hip_memory_space.hip.cpp new file mode 100644 index 00000000000..4fe5e8d9b48 --- /dev/null +++ b/hip/test/base/hip_memory_space.hip.cpp @@ -0,0 +1,178 @@ +/************************************************************* +Copyright (c) 2017-2020, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include + + +#include +#include + + +#include +#include + + +namespace { + + +class HipMemorySpace : public ::testing::Test { +protected: + HipMemorySpace() + : omp(gko::HostMemorySpace::create()), hip(nullptr), hip2(nullptr) + {} + + void SetUp() + { + ASSERT_GT(gko::HipMemorySpace::get_num_devices(), 0); + hip = gko::HipMemorySpace::create(0); + hip2 = gko::HipMemorySpace::create( + gko::HipMemorySpace::get_num_devices() - 1); + } + + void TearDown() + { + if (hip != nullptr) { + // ensure that previous calls finished and didn't throw an error + ASSERT_NO_THROW(hip->synchronize()); + } + } + + std::shared_ptr omp; + std::shared_ptr hip; + std::shared_ptr hip2; +}; + + +TEST_F(HipMemorySpace, AllocatesAndFreesMemory) +{ + int *ptr = nullptr; + + ASSERT_NO_THROW(ptr = hip->alloc(2)); + ASSERT_NO_THROW(hip->free(ptr)); +} + + +TEST_F(HipMemorySpace, FailsWhenOverallocating) +{ + const gko::size_type num_elems = 1ll << 50; // 4PB of integers + int *ptr = nullptr; + + ASSERT_THROW( + { + ptr = hip->alloc(num_elems); + hip->synchronize(); + }, + gko::AllocationError); + + hip->free(ptr); +} + + +__global__ void check_data(int *data) +{ + if (data[0] != 3 || data[1] != 8) { +#if GINKGO_HIP_PLATFORM_HCC + asm("s_trap 0x02;"); +#else // GINKGO_HIP_PLATFORM_NVCC + asm("trap;"); +#endif + } +} + +TEST_F(HipMemorySpace, CopiesDataToHip) +{ + int orig[] = {3, 8}; + auto *copy = hip->alloc(2); + + hip->copy_from(omp.get(), 2, orig, copy); + + hipLaunchKernelGGL((check_data), dim3(1), dim3(1), 0, 0, copy); + ASSERT_NO_THROW(hip->synchronize()); + hip->free(copy); +} + + +__global__ void init_data(int *data) +{ + data[0] = 3; + data[1] = 8; +} + +TEST_F(HipMemorySpace, CopiesDataFromHip) +{ + int copy[2]; + auto orig = hip->alloc(2); + hipLaunchKernelGGL((init_data), dim3(1), dim3(1), 0, 0, orig); + + omp->copy_from(hip.get(), 2, orig, copy); + + EXPECT_EQ(3, copy[0]); + ASSERT_EQ(8, copy[1]); + hip->free(orig); +} + + +TEST_F(HipMemorySpace, CopiesDataFromHipToHip) +{ + int copy[2]; + auto orig = hip->alloc(2); + GKO_ASSERT_NO_HIP_ERRORS(hipSetDevice(0)); + hipLaunchKernelGGL((init_data), dim3(1), dim3(1), 0, 0, orig); + + auto copy_hip2 = hip2->alloc(2); + hip2->copy_from(hip.get(), 2, orig, copy_hip2); + + // Check that the data is really on GPU2 and ensure we did not cheat + GKO_ASSERT_NO_HIP_ERRORS(hipSetDevice(hip2->get_device_id())); + hipLaunchKernelGGL((check_data), dim3(1), dim3(1), 0, 0, copy_hip2); + GKO_ASSERT_NO_HIP_ERRORS(hipSetDevice(0)); + // Put the results on OpenMP and run CPU side assertions + omp->copy_from(hip2.get(), 2, copy_hip2, copy); + EXPECT_EQ(3, copy[0]); + ASSERT_EQ(8, copy[1]); + hip->free(copy_hip2); + hip->free(orig); +} + + +TEST_F(HipMemorySpace, Synchronizes) +{ + // Todo design a proper unit test once we support streams + ASSERT_NO_THROW(hip->synchronize()); +} + + +} // namespace From 3fb1cb22b03305c90d0ae3d831755675881a8bed Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 19 Oct 2020 23:58:55 +0200 Subject: [PATCH 5/8] Update Logger interface (INTERFACE BREAKING), add some exceptions --- benchmark/utils/loggers.hpp | 54 +++++---- core/log/logger.cpp | 2 +- core/log/papi.cpp | 23 ++-- core/log/record.cpp | 36 +++--- core/log/stream.cpp | 25 ++-- core/test/log/convergence.cpp | 3 +- core/test/log/logger.cpp | 24 ++-- core/test/log/papi.cpp | 47 +++++--- core/test/log/record.cpp | 114 +++++++++++------- core/test/log/stream.cpp | 99 +++++++++------ .../adaptiveprecision-blockjacobi.cpp | 2 +- examples/custom-logger/custom-logger.cpp | 3 +- .../custom-stopping-criterion.cpp | 3 +- .../ir-ilu-preconditioned-solver.cpp | 2 +- .../iterative-refinement.cpp | 2 +- .../performance-debugging.cpp | 51 ++++---- .../simple-solver-logging.cpp | 18 +-- include/ginkgo/core/base/exception.hpp | 24 ++++ .../ginkgo/core/base/exception_helpers.hpp | 13 ++ include/ginkgo/core/base/executor.hpp | 14 +-- include/ginkgo/core/base/types.hpp | 17 +++ include/ginkgo/core/log/convergence.hpp | 6 +- include/ginkgo/core/log/logger.hpp | 63 +++++----- include/ginkgo/core/log/papi.hpp | 49 ++++---- include/ginkgo/core/log/record.hpp | 36 +++--- include/ginkgo/core/log/stream.hpp | 18 +-- reference/test/log/convergence.cpp | 3 +- reference/test/log/papi.cpp | 2 +- 28 files changed, 451 insertions(+), 302 deletions(-) diff --git a/benchmark/utils/loggers.hpp b/benchmark/utils/loggers.hpp index ea6bbea2797..635af7a0f48 100644 --- a/benchmark/utils/loggers.hpp +++ b/benchmark/utils/loggers.hpp @@ -48,41 +48,43 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // A logger that accumulates the time of all operations struct OperationLogger : gko::log::Logger { - void on_allocation_started(const gko::Executor *exec, + void on_allocation_started(const gko::MemorySpace *mem_space, const gko::size_type &) const override { - this->start_operation(exec, "allocate"); + this->start_operation(mem_space, "allocate"); } - void on_allocation_completed(const gko::Executor *exec, + void on_allocation_completed(const gko::MemorySpace *mem_space, const gko::size_type &, const gko::uintptr &) const override { - this->end_operation(exec, "allocate"); + this->end_operation(mem_space, "allocate"); } - void on_free_started(const gko::Executor *exec, + void on_free_started(const gko::MemorySpace *mem_space, const gko::uintptr &) const override { - this->start_operation(exec, "free"); + this->start_operation(mem_space, "free"); } - void on_free_completed(const gko::Executor *exec, + void on_free_completed(const gko::MemorySpace *mem_space, const gko::uintptr &) const override { - this->end_operation(exec, "free"); + this->end_operation(mem_space, "free"); } - void on_copy_started(const gko::Executor *from, const gko::Executor *to, - const gko::uintptr &, const gko::uintptr &, + void on_copy_started(const gko::MemorySpace *from, + const gko::MemorySpace *to, const gko::uintptr &, + const gko::uintptr &, const gko::size_type &) const override { from->synchronize(); this->start_operation(to, "copy"); } - void on_copy_completed(const gko::Executor *from, const gko::Executor *to, - const gko::uintptr &, const gko::uintptr &, + void on_copy_completed(const gko::MemorySpace *from, + const gko::MemorySpace *to, const gko::uintptr &, + const gko::uintptr &, const gko::size_type &) const override { from->synchronize(); @@ -118,14 +120,15 @@ struct OperationLogger : gko::log::Logger { } OperationLogger(std::shared_ptr exec, bool nested_name) - : gko::log::Logger(exec), use_nested_name{nested_name} + : gko::log::Logger(exec, exec->get_mem_space()), + use_nested_name{nested_name} {} private: - void start_operation(const gko::Executor *exec, - const std::string &name) const + template + void start_operation(const LogObject *obj, const std::string &name) const { - exec->synchronize(); + obj->synchronize(); const std::lock_guard lock(mutex); auto nested_name = nested.empty() || !use_nested_name ? name @@ -134,9 +137,12 @@ struct OperationLogger : gko::log::Logger { start[nested_name] = std::chrono::steady_clock::now(); } - void end_operation(const gko::Executor *exec, const std::string &name) const + // Helper to compute the end time and store the operation's time at its + // end. Also time nested operations. + template + void end_operation(const LogObject *obj, const std::string &name) const { - exec->synchronize(); + obj->synchronize(); const std::lock_guard lock(mutex); // if operations are properly nested, nested_name now ends with name auto nested_name = nested.back().first; @@ -163,7 +169,7 @@ struct OperationLogger : gko::log::Logger { struct StorageLogger : gko::log::Logger { - void on_allocation_completed(const gko::Executor *, + void on_allocation_completed(const gko::MemorySpace *, const gko::size_type &num_bytes, const gko::uintptr &location) const override { @@ -171,7 +177,7 @@ struct StorageLogger : gko::log::Logger { storage[location] = num_bytes; } - void on_free_completed(const gko::Executor *, + void on_free_completed(const gko::MemorySpace *, const gko::uintptr &location) const override { const std::lock_guard lock(mutex); @@ -190,7 +196,7 @@ struct StorageLogger : gko::log::Logger { } StorageLogger(std::shared_ptr exec) - : gko::log::Logger(exec) + : gko::log::Logger(exec, exec->get_mem_space()) {} private: @@ -235,7 +241,8 @@ struct ResidualLogger : gko::log::Logger { rapidjson::Value &true_res_norms, rapidjson::Value ×tamps, rapidjson::MemoryPoolAllocator<> &alloc) - : gko::log::Logger(exec, gko::log::Logger::iteration_complete_mask), + : gko::log::Logger(exec, exec->get_mem_space(), + gko::log::Logger::iteration_complete_mask), matrix{matrix}, b{b}, start{std::chrono::steady_clock::now()}, @@ -267,7 +274,8 @@ struct IterationLogger : gko::log::Logger { } IterationLogger(std::shared_ptr exec) - : gko::log::Logger(exec, gko::log::Logger::iteration_complete_mask) + : gko::log::Logger(exec, exec->get_mem_space(), + gko::log::Logger::iteration_complete_mask) {} void write_data(rapidjson::Value &output, diff --git a/core/log/logger.cpp b/core/log/logger.cpp index 46ee98b2895..9820850be9b 100644 --- a/core/log/logger.cpp +++ b/core/log/logger.cpp @@ -38,7 +38,7 @@ namespace log { constexpr Logger::mask_type Logger::all_events_mask; -constexpr Logger::mask_type Logger::executor_events_mask; +constexpr Logger::mask_type Logger::memory_space_events_mask; constexpr Logger::mask_type Logger::operation_events_mask; constexpr Logger::mask_type Logger::polymorphic_object_events_mask; constexpr Logger::mask_type Logger::linop_events_mask; diff --git a/core/log/papi.cpp b/core/log/papi.cpp index 1c8a17419fa..a920b5226b7 100644 --- a/core/log/papi.cpp +++ b/core/log/papi.cpp @@ -42,40 +42,41 @@ namespace log { template -void Papi::on_allocation_started(const Executor *exec, +void Papi::on_allocation_started(const MemorySpace *mem_space, const size_type &num_bytes) const { - allocation_started.get_counter(exec) += num_bytes; + allocation_started.get_counter(mem_space) += num_bytes; } template -void Papi::on_allocation_completed(const Executor *exec, +void Papi::on_allocation_completed(const MemorySpace *mem_space, const size_type &num_bytes, const uintptr &location) const { - allocation_completed.get_counter(exec) += num_bytes; + allocation_completed.get_counter(mem_space) += num_bytes; } template -void Papi::on_free_started(const Executor *exec, +void Papi::on_free_started(const MemorySpace *mem_space, const uintptr &location) const { - free_started.get_counter(exec) += 1; + free_started.get_counter(mem_space) += 1; } template -void Papi::on_free_completed(const Executor *exec, +void Papi::on_free_completed(const MemorySpace *mem_space, const uintptr &location) const { - free_completed.get_counter(exec) += 1; + free_completed.get_counter(mem_space) += 1; } template -void Papi::on_copy_started(const Executor *from, const Executor *to, +void Papi::on_copy_started(const MemorySpace *from, + const MemorySpace *to, const uintptr &location_from, const uintptr &location_to, const size_type &num_bytes) const @@ -86,8 +87,8 @@ void Papi::on_copy_started(const Executor *from, const Executor *to, template -void Papi::on_copy_completed(const Executor *from, - const Executor *to, +void Papi::on_copy_completed(const MemorySpace *from, + const MemorySpace *to, const uintptr &location_from, const uintptr &location_to, const size_type &num_bytes) const diff --git a/core/log/record.cpp b/core/log/record.cpp index 48026c1563b..48040198c5a 100644 --- a/core/log/record.cpp +++ b/core/log/record.cpp @@ -42,49 +42,49 @@ namespace gko { namespace log { -void Record::on_allocation_started(const Executor *exec, +void Record::on_allocation_started(const MemorySpace *mem_space, const size_type &num_bytes) const { append_deque(data_.allocation_started, - (std::unique_ptr( - new executor_data{exec, num_bytes, 0}))); + (std::unique_ptr( + new memory_space_data{mem_space, num_bytes, 0}))); } -void Record::on_allocation_completed(const Executor *exec, +void Record::on_allocation_completed(const MemorySpace *mem_space, const size_type &num_bytes, const uintptr &location) const { append_deque(data_.allocation_completed, - (std::unique_ptr( - new executor_data{exec, num_bytes, location}))); + (std::unique_ptr( + new memory_space_data{mem_space, num_bytes, location}))); } -void Record::on_free_started(const Executor *exec, +void Record::on_free_started(const MemorySpace *mem_space, const uintptr &location) const { - append_deque( - data_.free_started, - (std::unique_ptr(new executor_data{exec, 0, location}))); + append_deque(data_.free_started, + (std::unique_ptr( + new memory_space_data{mem_space, 0, location}))); } -void Record::on_free_completed(const Executor *exec, +void Record::on_free_completed(const MemorySpace *mem_space, const uintptr &location) const { - append_deque( - data_.free_completed, - (std::unique_ptr(new executor_data{exec, 0, location}))); + append_deque(data_.free_completed, + (std::unique_ptr( + new memory_space_data{mem_space, 0, location}))); } -void Record::on_copy_started(const Executor *from, const Executor *to, +void Record::on_copy_started(const MemorySpace *from, const MemorySpace *to, const uintptr &location_from, const uintptr &location_to, const size_type &num_bytes) const { - using tuple = std::tuple; + using tuple = std::tuple; append_deque( data_.copy_started, (std::unique_ptr(new tuple{{from, num_bytes, location_from}, @@ -92,12 +92,12 @@ void Record::on_copy_started(const Executor *from, const Executor *to, } -void Record::on_copy_completed(const Executor *from, const Executor *to, +void Record::on_copy_completed(const MemorySpace *from, const MemorySpace *to, const uintptr &location_from, const uintptr &location_to, const size_type &num_bytes) const { - using tuple = std::tuple; + using tuple = std::tuple; append_deque( data_.copy_completed, (std::unique_ptr(new tuple{{from, num_bytes, location_from}, diff --git a/core/log/stream.cpp b/core/log/stream.cpp index 3cad7421aee..9810f450815 100644 --- a/core/log/stream.cpp +++ b/core/log/stream.cpp @@ -116,6 +116,7 @@ GKO_ENABLE_DEMANGLE_NAME(LinOp); GKO_ENABLE_DEMANGLE_NAME(LinOpFactory); GKO_ENABLE_DEMANGLE_NAME(stop::Criterion); GKO_ENABLE_DEMANGLE_NAME(Executor); +GKO_ENABLE_DEMANGLE_NAME(MemorySpace); GKO_ENABLE_DEMANGLE_NAME(Operation); @@ -123,46 +124,46 @@ GKO_ENABLE_DEMANGLE_NAME(Operation); template -void Stream::on_allocation_started(const Executor *exec, +void Stream::on_allocation_started(const MemorySpace *mem_space, const size_type &num_bytes) const { - os_ << prefix_ << "allocation started on " << demangle_name(exec) + os_ << prefix_ << "allocation started on " << demangle_name(mem_space) << " with " << bytes_name(num_bytes) << std::endl; } template -void Stream::on_allocation_completed(const Executor *exec, +void Stream::on_allocation_completed(const MemorySpace *mem_space, const size_type &num_bytes, const uintptr &location) const { - os_ << prefix_ << "allocation completed on " << demangle_name(exec) + os_ << prefix_ << "allocation completed on " << demangle_name(mem_space) << " at " << location_name(location) << " with " << bytes_name(num_bytes) << std::endl; } template -void Stream::on_free_started(const Executor *exec, +void Stream::on_free_started(const MemorySpace *mem_space, const uintptr &location) const { - os_ << prefix_ << "free started on " << demangle_name(exec) << " at " + os_ << prefix_ << "free started on " << demangle_name(mem_space) << " at " << location_name(location) << std::endl; } template -void Stream::on_free_completed(const Executor *exec, +void Stream::on_free_completed(const MemorySpace *mem_space, const uintptr &location) const { - os_ << prefix_ << "free completed on " << demangle_name(exec) << " at " + os_ << prefix_ << "free completed on " << demangle_name(mem_space) << " at " << location_name(location) << std::endl; } template -void Stream::on_copy_started(const Executor *from, - const Executor *to, +void Stream::on_copy_started(const MemorySpace *from, + const MemorySpace *to, const uintptr &location_from, const uintptr &location_to, const size_type &num_bytes) const @@ -175,8 +176,8 @@ void Stream::on_copy_started(const Executor *from, template -void Stream::on_copy_completed(const Executor *from, - const Executor *to, +void Stream::on_copy_completed(const MemorySpace *from, + const MemorySpace *to, const uintptr &location_from, const uintptr &location_to, const size_type &num_bytes) const diff --git a/core/test/log/convergence.cpp b/core/test/log/convergence.cpp index bb05007817b..25fcb2c531d 100644 --- a/core/test/log/convergence.cpp +++ b/core/test/log/convergence.cpp @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include "core/test/utils.hpp" @@ -54,7 +55,7 @@ TYPED_TEST(Convergence, CanGetData) { auto exec = gko::ReferenceExecutor::create(); auto logger = gko::log::Convergence::create( - exec, gko::log::Logger::iteration_complete_mask); + exec, exec->get_mem_space(), gko::log::Logger::iteration_complete_mask); ASSERT_EQ(logger->get_num_iterations(), 0); ASSERT_EQ(logger->get_residual(), nullptr); diff --git a/core/test/log/logger.cpp b/core/test/log/logger.cpp index e051e16f692..19b8ceb51f2 100644 --- a/core/test/log/logger.cpp +++ b/core/test/log/logger.cpp @@ -65,8 +65,8 @@ TEST(DummyLogged, CanAddLogger) auto exec = gko::ReferenceExecutor::create(); DummyLoggedClass c; - c.add_logger( - gko::log::Record::create(exec, gko::log::Logger::all_events_mask)); + c.add_logger(gko::log::Record::create(exec, exec->get_mem_space(), + gko::log::Logger::all_events_mask)); ASSERT_EQ(c.get_num_loggers(), 1); } @@ -77,10 +77,11 @@ TEST(DummyLogged, CanAddMultipleLoggers) auto exec = gko::ReferenceExecutor::create(); DummyLoggedClass c; - c.add_logger( - gko::log::Record::create(exec, gko::log::Logger::all_events_mask)); - c.add_logger(gko::log::Stream<>::create( - exec, gko::log::Logger::all_events_mask, std::cout)); + c.add_logger(gko::log::Record::create(exec, exec->get_mem_space(), + gko::log::Logger::all_events_mask)); + c.add_logger(gko::log::Stream<>::create(exec, exec->get_mem_space(), + gko::log::Logger::all_events_mask, + std::cout)); ASSERT_EQ(c.get_num_loggers(), 2); } @@ -90,11 +91,12 @@ TEST(DummyLogged, CanRemoveLogger) { auto exec = gko::ReferenceExecutor::create(); DummyLoggedClass c; - auto r = gko::share( - gko::log::Record::create(exec, gko::log::Logger::all_events_mask)); + auto r = gko::share(gko::log::Record::create( + exec, exec->get_mem_space(), gko::log::Logger::all_events_mask)); c.add_logger(r); - c.add_logger(gko::log::Stream<>::create( - exec, gko::log::Logger::all_events_mask, std::cout)); + c.add_logger(gko::log::Stream<>::create(exec, exec->get_mem_space(), + gko::log::Logger::all_events_mask, + std::cout)); c.remove_logger(gko::lend(r)); @@ -107,7 +109,7 @@ struct DummyLogger : gko::log::Logger { explicit DummyLogger( std::shared_ptr exec, const mask_type &enabled_events = Logger::all_events_mask) - : Logger(exec, enabled_events) + : Logger(exec, exec->get_mem_space(), enabled_events) {} void on_iteration_complete( diff --git a/core/test/log/papi.cpp b/core/test/log/papi.cpp index d318a29f228..07585ba7f04 100644 --- a/core/test/log/papi.cpp +++ b/core/test/log/papi.cpp @@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include #include @@ -77,7 +78,7 @@ class Papi : public ::testing::Test { const std::string init(const gko::log::Logger::mask_type &event, const std::string &event_name, U *ptr) { - logger = gko::log::Papi::create(exec, event); + logger = gko::log::Papi::create(exec, exec->get_mem_space(), event); std::ostringstream os; os << "sde:::" << logger->get_handle_name() << "::" << event_name << "::" << reinterpret_cast(ptr); @@ -125,13 +126,14 @@ TYPED_TEST_CASE(Papi, gko::test::ValueTypes); TYPED_TEST(Papi, CatchesAllocationStarted) { int logged_value = 42; - auto str = this->init(gko::log::Logger::allocation_started_mask, - "allocation_started", this->exec.get()); + auto str = + this->init(gko::log::Logger::allocation_started_mask, + "allocation_started", this->exec->get_mem_space().get()); this->add_event(str); this->start(); this->logger->template on( - this->exec.get(), logged_value); + this->exec->get_mem_space().get(), logged_value); long long int value = 0; this->stop(&value); @@ -142,13 +144,14 @@ TYPED_TEST(Papi, CatchesAllocationStarted) TYPED_TEST(Papi, CatchesAllocationCompleted) { int logged_value = 42; - auto str = this->init(gko::log::Logger::allocation_completed_mask, - "allocation_completed", this->exec.get()); + auto str = + this->init(gko::log::Logger::allocation_completed_mask, + "allocation_completed", this->exec->get_mem_space().get()); this->add_event(str); this->start(); this->logger->template on( - this->exec.get(), logged_value, 0); + this->exec->get_mem_space().get(), logged_value, 0); long long int value = 0; this->stop(&value); @@ -159,12 +162,12 @@ TYPED_TEST(Papi, CatchesAllocationCompleted) TYPED_TEST(Papi, CatchesFreeStarted) { auto str = this->init(gko::log::Logger::free_started_mask, "free_started", - this->exec.get()); + this->exec->get_mem_space().get()); this->add_event(str); this->start(); - this->logger->template on(this->exec.get(), - 0); + this->logger->template on( + this->exec->get_mem_space().get(), 0); long long int value = 0; this->stop(&value); @@ -175,12 +178,12 @@ TYPED_TEST(Papi, CatchesFreeStarted) TYPED_TEST(Papi, CatchesFreeCompleted) { auto str = this->init(gko::log::Logger::free_completed_mask, - "free_completed", this->exec.get()); + "free_completed", this->exec->get_mem_space().get()); this->add_event(str); this->start(); this->logger->template on( - this->exec.get(), 0); + this->exec->get_mem_space().get(), 0); long long int value = 0; this->stop(&value); @@ -191,18 +194,20 @@ TYPED_TEST(Papi, CatchesFreeCompleted) TYPED_TEST(Papi, CatchesCopyStarted) { auto logged_value = 42; - auto str = this->init(gko::log::Logger::copy_started_mask, - "copy_started_from", this->exec.get()); + auto str = + this->init(gko::log::Logger::copy_started_mask, "copy_started_from", + this->exec->get_mem_space().get()); std::ostringstream os_out; os_out << "sde:::" << this->logger->get_handle_name() << "::copy_started_to::" - << reinterpret_cast(this->exec.get()); + << reinterpret_cast(this->exec->get_mem_space().get()); this->add_event(str); this->add_event(os_out.str()); this->start(); this->logger->template on( - this->exec.get(), this->exec.get(), 0, 0, logged_value); + this->exec->get_mem_space().get(), this->exec->get_mem_space().get(), 0, + 0, logged_value); long long int values[2]; this->stop(values); @@ -214,18 +219,20 @@ TYPED_TEST(Papi, CatchesCopyStarted) TYPED_TEST(Papi, CatchesCopyCompleted) { auto logged_value = 42; - auto str = this->init(gko::log::Logger::copy_completed_mask, - "copy_completed_from", this->exec.get()); + auto str = + this->init(gko::log::Logger::copy_completed_mask, "copy_completed_from", + this->exec->get_mem_space().get()); std::ostringstream os_out; os_out << "sde:::" << this->logger->get_handle_name() << "::copy_completed_to::" - << reinterpret_cast(this->exec.get()); + << reinterpret_cast(this->exec->get_mem_space().get()); this->add_event(str); this->add_event(os_out.str()); this->start(); this->logger->template on( - this->exec.get(), this->exec.get(), 0, 0, logged_value); + this->exec->get_mem_space().get(), this->exec->get_mem_space().get(), 0, + 0, logged_value); long long int values[2]; this->stop(values); diff --git a/core/test/log/record.cpp b/core/test/log/record.cpp index dd829d39a8c..c926670c17e 100644 --- a/core/test/log/record.cpp +++ b/core/test/log/record.cpp @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include #include @@ -56,7 +57,7 @@ TEST(Record, CanGetData) { auto exec = gko::ReferenceExecutor::create(); auto logger = gko::log::Record::create( - exec, gko::log::Logger::iteration_complete_mask); + exec, exec->get_mem_space(), gko::log::Logger::iteration_complete_mask); ASSERT_EQ(logger->get().allocation_started.size(), 0); } @@ -66,12 +67,13 @@ TEST(Record, CatchesAllocationStarted) { auto exec = gko::ReferenceExecutor::create(); auto logger = gko::log::Record::create( - exec, gko::log::Logger::allocation_started_mask); + exec, exec->get_mem_space(), gko::log::Logger::allocation_started_mask); - logger->on(exec.get(), 42); + logger->on( + exec->get_mem_space().get(), 42); auto &data = logger->get().allocation_started.back(); - ASSERT_EQ(data->exec, exec.get()); + ASSERT_EQ(data->mem_space, exec->get_mem_space().get()); ASSERT_EQ(data->num_bytes, 42); ASSERT_EQ(data->location, 0); } @@ -80,15 +82,17 @@ TEST(Record, CatchesAllocationStarted) TEST(Record, CatchesAllocationCompleted) { auto exec = gko::ReferenceExecutor::create(); - auto logger = gko::log::Record::create( - exec, gko::log::Logger::allocation_completed_mask); + auto logger = + gko::log::Record::create(exec, exec->get_mem_space(), + gko::log::Logger::allocation_completed_mask); int dummy = 1; auto ptr = reinterpret_cast(&dummy); - logger->on(exec.get(), 42, ptr); + logger->on( + exec->get_mem_space().get(), 42, ptr); auto &data = logger->get().allocation_completed.back(); - ASSERT_EQ(data->exec, exec.get()); + ASSERT_EQ(data->mem_space, exec->get_mem_space().get()); ASSERT_EQ(data->num_bytes, 42); ASSERT_EQ(data->location, ptr); } @@ -97,15 +101,16 @@ TEST(Record, CatchesAllocationCompleted) TEST(Record, CatchesFreeStarted) { auto exec = gko::ReferenceExecutor::create(); - auto logger = - gko::log::Record::create(exec, gko::log::Logger::free_started_mask); + auto logger = gko::log::Record::create(exec, exec->get_mem_space(), + gko::log::Logger::free_started_mask); int dummy = 1; auto ptr = reinterpret_cast(&dummy); - logger->on(exec.get(), ptr); + logger->on(exec->get_mem_space().get(), + ptr); auto &data = logger->get().free_started.back(); - ASSERT_EQ(data->exec, exec.get()); + ASSERT_EQ(data->mem_space, exec->get_mem_space().get()); ASSERT_EQ(data->num_bytes, 0); ASSERT_EQ(data->location, ptr); } @@ -114,15 +119,16 @@ TEST(Record, CatchesFreeStarted) TEST(Record, CatchesFreeCompleted) { auto exec = gko::ReferenceExecutor::create(); - auto logger = - gko::log::Record::create(exec, gko::log::Logger::free_completed_mask); + auto logger = gko::log::Record::create( + exec, exec->get_mem_space(), gko::log::Logger::free_completed_mask); int dummy = 1; auto ptr = reinterpret_cast(&dummy); - logger->on(exec.get(), ptr); + logger->on(exec->get_mem_space().get(), + ptr); auto &data = logger->get().free_completed.back(); - ASSERT_EQ(data->exec, exec.get()); + ASSERT_EQ(data->mem_space, exec->get_mem_space().get()); ASSERT_EQ(data->num_bytes, 0); ASSERT_EQ(data->location, ptr); } @@ -131,23 +137,24 @@ TEST(Record, CatchesFreeCompleted) TEST(Record, CatchesCopyStarted) { auto exec = gko::ReferenceExecutor::create(); - auto logger = - gko::log::Record::create(exec, gko::log::Logger::copy_started_mask); + auto logger = gko::log::Record::create(exec, exec->get_mem_space(), + gko::log::Logger::copy_started_mask); int dummy_from = 1; int dummy_to = 1; auto ptr_from = reinterpret_cast(&dummy_from); auto ptr_to = reinterpret_cast(&dummy_to); - logger->on(exec.get(), exec.get(), ptr_from, - ptr_to, 42); + logger->on(exec->get_mem_space().get(), + exec->get_mem_space().get(), + ptr_from, ptr_to, 42); auto &data = logger->get().copy_started.back(); auto data_from = std::get<0>(*data); auto data_to = std::get<1>(*data); - ASSERT_EQ(data_from.exec, exec.get()); + ASSERT_EQ(data_from.mem_space, exec->get_mem_space().get()); ASSERT_EQ(data_from.num_bytes, 42); ASSERT_EQ(data_from.location, ptr_from); - ASSERT_EQ(data_to.exec, exec.get()); + ASSERT_EQ(data_to.mem_space, exec->get_mem_space().get()); ASSERT_EQ(data_to.num_bytes, 42); ASSERT_EQ(data_to.location, ptr_to); } @@ -156,23 +163,24 @@ TEST(Record, CatchesCopyStarted) TEST(Record, CatchesCopyCompleted) { auto exec = gko::ReferenceExecutor::create(); - auto logger = - gko::log::Record::create(exec, gko::log::Logger::copy_completed_mask); + auto logger = gko::log::Record::create( + exec, exec->get_mem_space(), gko::log::Logger::copy_completed_mask); int dummy_from = 1; int dummy_to = 1; auto ptr_from = reinterpret_cast(&dummy_from); auto ptr_to = reinterpret_cast(&dummy_to); - logger->on(exec.get(), exec.get(), + logger->on(exec->get_mem_space().get(), + exec->get_mem_space().get(), ptr_from, ptr_to, 42); auto &data = logger->get().copy_completed.back(); auto data_from = std::get<0>(*data); auto data_to = std::get<1>(*data); - ASSERT_EQ(data_from.exec, exec.get()); + ASSERT_EQ(data_from.mem_space, exec->get_mem_space().get()); ASSERT_EQ(data_from.num_bytes, 42); ASSERT_EQ(data_from.location, ptr_from); - ASSERT_EQ(data_to.exec, exec.get()); + ASSERT_EQ(data_to.mem_space, exec->get_mem_space().get()); ASSERT_EQ(data_to.num_bytes, 42); ASSERT_EQ(data_to.location, ptr_to); } @@ -182,7 +190,7 @@ TEST(Record, CatchesOperationLaunched) { auto exec = gko::ReferenceExecutor::create(); auto logger = gko::log::Record::create( - exec, gko::log::Logger::operation_launched_mask); + exec, exec->get_mem_space(), gko::log::Logger::operation_launched_mask); gko::Operation op; logger->on(exec.get(), &op); @@ -196,8 +204,9 @@ TEST(Record, CatchesOperationLaunched) TEST(Record, CatchesOperationCompleted) { auto exec = gko::ReferenceExecutor::create(); - auto logger = gko::log::Record::create( - exec, gko::log::Logger::operation_completed_mask); + auto logger = + gko::log::Record::create(exec, exec->get_mem_space(), + gko::log::Logger::operation_completed_mask); gko::Operation op; logger->on(exec.get(), &op); @@ -213,7 +222,8 @@ TEST(Record, CatchesPolymorphicObjectCreateStarted) using Dense = gko::matrix::Dense<>; auto exec = gko::ReferenceExecutor::create(); auto logger = gko::log::Record::create( - exec, gko::log::Logger::polymorphic_object_create_started_mask); + exec, exec->get_mem_space(), + gko::log::Logger::polymorphic_object_create_started_mask); auto po = gko::matrix::Dense<>::create(exec); logger->on(exec.get(), @@ -232,7 +242,8 @@ TEST(Record, CatchesPolymorphicObjectCreateCompleted) using Dense = gko::matrix::Dense<>; auto exec = gko::ReferenceExecutor::create(); auto logger = gko::log::Record::create( - exec, gko::log::Logger::polymorphic_object_create_completed_mask); + exec, exec->get_mem_space(), + gko::log::Logger::polymorphic_object_create_completed_mask); auto po = gko::matrix::Dense<>::create(exec); auto output = gko::matrix::Dense<>::create(exec); @@ -251,7 +262,8 @@ TEST(Record, CatchesPolymorphicObjectCopyStarted) using Dense = gko::matrix::Dense<>; auto exec = gko::ReferenceExecutor::create(); auto logger = gko::log::Record::create( - exec, gko::log::Logger::polymorphic_object_copy_started_mask); + exec, exec->get_mem_space(), + gko::log::Logger::polymorphic_object_copy_started_mask); auto from = gko::matrix::Dense<>::create(exec); auto to = gko::matrix::Dense<>::create(exec); @@ -270,7 +282,8 @@ TEST(Record, CatchesPolymorphicObjectCopyCompleted) using Dense = gko::matrix::Dense<>; auto exec = gko::ReferenceExecutor::create(); auto logger = gko::log::Record::create( - exec, gko::log::Logger::polymorphic_object_copy_completed_mask); + exec, exec->get_mem_space(), + gko::log::Logger::polymorphic_object_copy_completed_mask); auto from = gko::matrix::Dense<>::create(exec); auto to = gko::matrix::Dense<>::create(exec); @@ -290,7 +303,8 @@ TEST(Record, CatchesPolymorphicObjectDeleted) using Dense = gko::matrix::Dense<>; auto exec = gko::ReferenceExecutor::create(); auto logger = gko::log::Record::create( - exec, gko::log::Logger::polymorphic_object_deleted_mask); + exec, exec->get_mem_space(), + gko::log::Logger::polymorphic_object_deleted_mask); auto po = gko::matrix::Dense<>::create(exec); logger->on(exec.get(), @@ -308,8 +322,9 @@ TEST(Record, CatchesLinOpApplyStarted) { using Dense = gko::matrix::Dense<>; auto exec = gko::ReferenceExecutor::create(); - auto logger = gko::log::Record::create( - exec, gko::log::Logger::linop_apply_started_mask); + auto logger = + gko::log::Record::create(exec, exec->get_mem_space(), + gko::log::Logger::linop_apply_started_mask); auto A = gko::initialize({1.1}, exec); auto b = gko::initialize({-2.2}, exec); auto x = gko::initialize({3.3}, exec); @@ -330,8 +345,9 @@ TEST(Record, CatchesLinOpApplyCompleted) { using Dense = gko::matrix::Dense<>; auto exec = gko::ReferenceExecutor::create(); - auto logger = gko::log::Record::create( - exec, gko::log::Logger::linop_apply_completed_mask); + auto logger = + gko::log::Record::create(exec, exec->get_mem_space(), + gko::log::Logger::linop_apply_completed_mask); auto A = gko::initialize({1.1}, exec); auto b = gko::initialize({-2.2}, exec); auto x = gko::initialize({3.3}, exec); @@ -353,7 +369,8 @@ TEST(Record, CatchesLinOpAdvancedApplyStarted) using Dense = gko::matrix::Dense<>; auto exec = gko::ReferenceExecutor::create(); auto logger = gko::log::Record::create( - exec, gko::log::Logger::linop_advanced_apply_started_mask); + exec, exec->get_mem_space(), + gko::log::Logger::linop_advanced_apply_started_mask); auto A = gko::initialize({1.1}, exec); auto alpha = gko::initialize({-4.4}, exec); auto b = gko::initialize({-2.2}, exec); @@ -377,7 +394,8 @@ TEST(Record, CatchesLinOpAdvancedApplyCompleted) using Dense = gko::matrix::Dense<>; auto exec = gko::ReferenceExecutor::create(); auto logger = gko::log::Record::create( - exec, gko::log::Logger::linop_advanced_apply_completed_mask); + exec, exec->get_mem_space(), + gko::log::Logger::linop_advanced_apply_completed_mask); auto A = gko::initialize({1.1}, exec); auto alpha = gko::initialize({-4.4}, exec); auto b = gko::initialize({-2.2}, exec); @@ -400,7 +418,8 @@ TEST(Record, CatchesLinopFactoryGenerateStarted) { auto exec = gko::ReferenceExecutor::create(); auto logger = gko::log::Record::create( - exec, gko::log::Logger::linop_factory_generate_started_mask); + exec, exec->get_mem_space(), + gko::log::Logger::linop_factory_generate_started_mask); auto factory = gko::solver::Bicgstab<>::build() .with_criteria( @@ -422,7 +441,8 @@ TEST(Record, CatchesLinopFactoryGenerateCompleted) { auto exec = gko::ReferenceExecutor::create(); auto logger = gko::log::Record::create( - exec, gko::log::Logger::linop_factory_generate_completed_mask); + exec, exec->get_mem_space(), + gko::log::Logger::linop_factory_generate_completed_mask); auto factory = gko::solver::Bicgstab<>::build() .with_criteria( @@ -445,7 +465,8 @@ TEST(Record, CatchesCriterionCheckStarted) { auto exec = gko::ReferenceExecutor::create(); auto logger = gko::log::Record::create( - exec, gko::log::Logger::criterion_check_started_mask); + exec, exec->get_mem_space(), + gko::log::Logger::criterion_check_started_mask); auto criterion = gko::stop::Iteration::build().with_max_iters(3u).on(exec)->generate( nullptr, nullptr, nullptr); @@ -468,7 +489,8 @@ TEST(Record, CatchesCriterionCheckCompleted) { auto exec = gko::ReferenceExecutor::create(); auto logger = gko::log::Record::create( - exec, gko::log::Logger::criterion_check_completed_mask); + exec, exec->get_mem_space(), + gko::log::Logger::criterion_check_completed_mask); auto criterion = gko::stop::Iteration::build().with_max_iters(3u).on(exec)->generate( nullptr, nullptr, nullptr); @@ -499,7 +521,7 @@ TEST(Record, CatchesIterations) using Dense = gko::matrix::Dense<>; auto exec = gko::ReferenceExecutor::create(); auto logger = gko::log::Record::create( - exec, gko::log::Logger::iteration_complete_mask); + exec, exec->get_mem_space(), gko::log::Logger::iteration_complete_mask); auto factory = gko::solver::Bicgstab<>::build() .with_criteria( diff --git a/core/test/log/stream.cpp b/core/test/log/stream.cpp index 163a54fd74a..e293936f3ac 100644 --- a/core/test/log/stream.cpp +++ b/core/test/log/stream.cpp @@ -42,6 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include #include @@ -67,9 +68,11 @@ TYPED_TEST(Stream, CatchesAllocationStarted) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::allocation_started_mask, out); + exec, exec->get_mem_space(), gko::log::Logger::allocation_started_mask, + out); - logger->template on(exec.get(), 42); + logger->template on( + exec->get_mem_space().get(), 42); auto os = out.str(); GKO_ASSERT_STR_CONTAINS(os, "allocation started on"); @@ -82,13 +85,15 @@ TYPED_TEST(Stream, CatchesAllocationCompleted) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::allocation_completed_mask, out); + exec, exec->get_mem_space(), + gko::log::Logger::allocation_completed_mask, out); int dummy = 1; std::stringstream ptrstream; ptrstream << std::hex << "0x" << reinterpret_cast(&dummy); logger->template on( - exec.get(), 42, reinterpret_cast(&dummy)); + exec->get_mem_space().get(), 42, + reinterpret_cast(&dummy)); auto os = out.str(); GKO_ASSERT_STR_CONTAINS(os, "allocation completed on"); @@ -102,13 +107,13 @@ TYPED_TEST(Stream, CatchesFreeStarted) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::free_started_mask, out); + exec, exec->get_mem_space(), gko::log::Logger::free_started_mask, out); int dummy = 1; std::stringstream ptrstream; ptrstream << std::hex << "0x" << reinterpret_cast(&dummy); logger->template on( - exec.get(), reinterpret_cast(&dummy)); + exec->get_mem_space().get(), reinterpret_cast(&dummy)); auto os = out.str(); GKO_ASSERT_STR_CONTAINS(os, "free started on"); @@ -121,13 +126,14 @@ TYPED_TEST(Stream, CatchesFreeCompleted) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::free_completed_mask, out); + exec, exec->get_mem_space(), gko::log::Logger::free_completed_mask, + out); int dummy = 1; std::stringstream ptrstream; ptrstream << std::hex << "0x" << reinterpret_cast(&dummy); logger->template on( - exec.get(), reinterpret_cast(&dummy)); + exec->get_mem_space().get(), reinterpret_cast(&dummy)); auto os = out.str(); GKO_ASSERT_STR_CONTAINS(os, "free completed on"); @@ -140,7 +146,7 @@ TYPED_TEST(Stream, CatchesCopyStarted) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::copy_started_mask, out); + exec, exec->get_mem_space(), gko::log::Logger::copy_started_mask, out); int dummy_in = 1; int dummy_out = 1; std::stringstream ptrstream_in; @@ -151,7 +157,8 @@ TYPED_TEST(Stream, CatchesCopyStarted) << reinterpret_cast(&dummy_out); logger->template on( - exec.get(), exec.get(), reinterpret_cast(&dummy_in), + exec->get_mem_space().get(), exec->get_mem_space().get(), + reinterpret_cast(&dummy_in), reinterpret_cast(&dummy_out), 42); auto os = out.str(); @@ -167,7 +174,8 @@ TYPED_TEST(Stream, CatchesCopyCompleted) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::copy_completed_mask, out); + exec, exec->get_mem_space(), gko::log::Logger::copy_completed_mask, + out); int dummy_in = 1; int dummy_out = 1; std::stringstream ptrstream_in; @@ -178,7 +186,8 @@ TYPED_TEST(Stream, CatchesCopyCompleted) << reinterpret_cast(&dummy_out); logger->template on( - exec.get(), exec.get(), reinterpret_cast(&dummy_in), + exec->get_mem_space().get(), exec->get_mem_space().get(), + reinterpret_cast(&dummy_in), reinterpret_cast(&dummy_out), 42); auto os = out.str(); @@ -194,7 +203,8 @@ TYPED_TEST(Stream, CatchesOperationLaunched) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::operation_launched_mask, out); + exec, exec->get_mem_space(), gko::log::Logger::operation_launched_mask, + out); gko::Operation op; std::stringstream ptrstream; ptrstream << &op; @@ -212,7 +222,8 @@ TYPED_TEST(Stream, CatchesOperationCompleted) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::operation_completed_mask, out); + exec, exec->get_mem_space(), gko::log::Logger::operation_completed_mask, + out); gko::Operation op; std::stringstream ptrstream; ptrstream << &op; @@ -230,7 +241,8 @@ TYPED_TEST(Stream, CatchesPolymorphicObjectCreateStarted) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::polymorphic_object_create_started_mask, out); + exec, exec->get_mem_space(), + gko::log::Logger::polymorphic_object_create_started_mask, out); auto po = gko::matrix::Dense::create(exec); std::stringstream ptrstream; ptrstream << po.get(); @@ -249,7 +261,8 @@ TYPED_TEST(Stream, CatchesPolymorphicObjectCreateCompleted) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::polymorphic_object_create_completed_mask, out); + exec, exec->get_mem_space(), + gko::log::Logger::polymorphic_object_create_completed_mask, out); auto po = gko::matrix::Dense::create(exec); auto output = gko::matrix::Dense::create(exec); std::stringstream ptrstream_in; @@ -272,7 +285,8 @@ TYPED_TEST(Stream, CatchesPolymorphicObjectCopyStarted) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::polymorphic_object_copy_started_mask, out); + exec, exec->get_mem_space(), + gko::log::Logger::polymorphic_object_copy_started_mask, out); auto from = gko::matrix::Dense::create(exec); auto to = gko::matrix::Dense::create(exec); std::stringstream ptrstream_from; @@ -295,7 +309,8 @@ TYPED_TEST(Stream, CatchesPolymorphicObjectCopyCompleted) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::polymorphic_object_copy_completed_mask, out); + exec, exec->get_mem_space(), + gko::log::Logger::polymorphic_object_copy_completed_mask, out); auto from = gko::matrix::Dense::create(exec); auto to = gko::matrix::Dense::create(exec); std::stringstream ptrstream_from; @@ -318,7 +333,8 @@ TYPED_TEST(Stream, CatchesPolymorphicObjectDeleted) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::polymorphic_object_deleted_mask, out); + exec, exec->get_mem_space(), + gko::log::Logger::polymorphic_object_deleted_mask, out); auto po = gko::matrix::Dense::create(exec); std::stringstream ptrstream; ptrstream << po.get(); @@ -338,7 +354,8 @@ TYPED_TEST(Stream, CatchesLinOpApplyStarted) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::linop_apply_started_mask, out); + exec, exec->get_mem_space(), gko::log::Logger::linop_apply_started_mask, + out); auto A = Dense::create(exec); auto b = Dense::create(exec); auto x = Dense::create(exec); @@ -366,7 +383,8 @@ TYPED_TEST(Stream, CatchesLinOpApplyStartedWithVerbose) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::linop_apply_started_mask, out, true); + exec, exec->get_mem_space(), gko::log::Logger::linop_apply_started_mask, + out, true); auto A = gko::initialize({1.1}, exec); auto b = gko::initialize({-2.2}, exec); auto x = gko::initialize({3.3}, exec); @@ -387,7 +405,8 @@ TYPED_TEST(Stream, CatchesLinOpApplyCompleted) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::linop_apply_completed_mask, out); + exec, exec->get_mem_space(), + gko::log::Logger::linop_apply_completed_mask, out); auto A = Dense::create(exec); auto b = Dense::create(exec); auto x = Dense::create(exec); @@ -415,7 +434,8 @@ TYPED_TEST(Stream, CatchesLinOpApplyCompletedWithVerbose) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::linop_apply_completed_mask, out, true); + exec, exec->get_mem_space(), + gko::log::Logger::linop_apply_completed_mask, out, true); auto A = gko::initialize({1.1}, exec); auto b = gko::initialize({-2.2}, exec); auto x = gko::initialize({3.3}, exec); @@ -436,7 +456,8 @@ TYPED_TEST(Stream, CatchesLinOpAdvancedApplyStarted) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::linop_advanced_apply_started_mask, out); + exec, exec->get_mem_space(), + gko::log::Logger::linop_advanced_apply_started_mask, out); auto A = Dense::create(exec); auto alpha = Dense::create(exec); auto b = Dense::create(exec); @@ -472,7 +493,8 @@ TYPED_TEST(Stream, CatchesLinOpAdvancedApplyStartedWithVerbose) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::linop_advanced_apply_started_mask, out, true); + exec, exec->get_mem_space(), + gko::log::Logger::linop_advanced_apply_started_mask, out, true); auto A = gko::initialize({1.1}, exec); auto alpha = gko::initialize({-4.4}, exec); auto b = gko::initialize({-2.2}, exec); @@ -497,7 +519,8 @@ TYPED_TEST(Stream, CatchesLinOpAdvancedApplyCompleted) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::linop_advanced_apply_completed_mask, out); + exec, exec->get_mem_space(), + gko::log::Logger::linop_advanced_apply_completed_mask, out); auto A = Dense::create(exec); auto alpha = Dense::create(exec); auto b = Dense::create(exec); @@ -533,7 +556,8 @@ TYPED_TEST(Stream, CatchesLinOpAdvancedApplyCompletedWithVerbose) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::linop_advanced_apply_completed_mask, out, true); + exec, exec->get_mem_space(), + gko::log::Logger::linop_advanced_apply_completed_mask, out, true); auto A = gko::initialize({1.1}, exec); auto alpha = gko::initialize({-4.4}, exec); auto b = gko::initialize({-2.2}, exec); @@ -557,7 +581,8 @@ TYPED_TEST(Stream, CatchesLinopFactoryGenerateStarted) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::linop_factory_generate_started_mask, out); + exec, exec->get_mem_space(), + gko::log::Logger::linop_factory_generate_started_mask, out); auto factory = gko::solver::Bicgstab::build() .with_criteria( @@ -584,7 +609,8 @@ TYPED_TEST(Stream, CatchesLinopFactoryGenerateCompleted) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::linop_factory_generate_completed_mask, out); + exec, exec->get_mem_space(), + gko::log::Logger::linop_factory_generate_completed_mask, out); auto factory = gko::solver::Bicgstab::build() .with_criteria( @@ -616,7 +642,8 @@ TYPED_TEST(Stream, CatchesCriterionCheckStarted) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::criterion_check_started_mask, out); + exec, exec->get_mem_space(), + gko::log::Logger::criterion_check_started_mask, out); auto criterion = gko::stop::Iteration::build().with_max_iters(3u).on(exec)->generate( nullptr, nullptr, nullptr); @@ -643,7 +670,8 @@ TYPED_TEST(Stream, CatchesCriterionCheckCompleted) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::criterion_check_completed_mask, out); + exec, exec->get_mem_space(), + gko::log::Logger::criterion_check_completed_mask, out); auto criterion = gko::stop::Iteration::build().with_max_iters(3u).on(exec)->generate( nullptr, nullptr, nullptr); @@ -674,7 +702,8 @@ TYPED_TEST(Stream, CatchesCriterionCheckCompletedWithVerbose) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::criterion_check_completed_mask, out, true); + exec, exec->get_mem_space(), + gko::log::Logger::criterion_check_completed_mask, out, true); auto criterion = gko::stop::Iteration::build().with_max_iters(3u).on(exec)->generate( nullptr, nullptr, nullptr); @@ -703,7 +732,8 @@ TYPED_TEST(Stream, CatchesIterations) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::iteration_complete_mask, out); + exec, exec->get_mem_space(), gko::log::Logger::iteration_complete_mask, + out); auto solver = Dense::create(exec); auto residual = Dense::create(exec); auto solution = Dense::create(exec); @@ -729,7 +759,8 @@ TYPED_TEST(Stream, CatchesIterationsWithVerbose) auto exec = gko::ReferenceExecutor::create(); std::stringstream out; auto logger = gko::log::Stream::create( - exec, gko::log::Logger::iteration_complete_mask, out, true); + exec, exec->get_mem_space(), gko::log::Logger::iteration_complete_mask, + out, true); auto factory = gko::solver::Bicgstab::build() diff --git a/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp b/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp index 06ddbd8f24b..4efbac69adb 100644 --- a/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp +++ b/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp @@ -111,7 +111,7 @@ int main(int argc, char *argv[]) .on(exec); std::shared_ptr> logger = - gko::log::Convergence::create(exec); + gko::log::Convergence::create(exec, exec->get_mem_space()); iter_stop->add_logger(logger); tol_stop->add_logger(logger); diff --git a/examples/custom-logger/custom-logger.cpp b/examples/custom-logger/custom-logger.cpp index afc48e0ea1f..5a3199a87c5 100644 --- a/examples/custom-logger/custom-logger.cpp +++ b/examples/custom-logger/custom-logger.cpp @@ -171,7 +171,8 @@ struct ResidualLogger : gko::log::Logger { // Construct the logger and store the system matrix and b vectors ResidualLogger(std::shared_ptr exec, const gko::LinOp *matrix, const gko_dense *b) - : gko::log::Logger(exec, gko::log::Logger::iteration_complete_mask), + : gko::log::Logger(exec, exec->get_mem_space(), + gko::log::Logger::iteration_complete_mask), matrix{matrix}, b{b} {} diff --git a/examples/custom-stopping-criterion/custom-stopping-criterion.cpp b/examples/custom-stopping-criterion/custom-stopping-criterion.cpp index 5c7d7861b9b..c455da4135a 100644 --- a/examples/custom-stopping-criterion/custom-stopping-criterion.cpp +++ b/examples/custom-stopping-criterion/custom-stopping-criterion.cpp @@ -117,7 +117,8 @@ void run_solver(volatile bool *stop_iteration_process, .on(exec) ->generate(A); solver->add_logger(gko::log::Stream::create( - exec, gko::log::Logger::iteration_complete_mask, std::cout, true)); + exec, exec->get_mem_space(), gko::log::Logger::iteration_complete_mask, + std::cout, true)); solver->apply(lend(b), lend(x)); std::cout << "Solver stopped" << std::endl; diff --git a/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp b/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp index 627d11d88dd..b07940f3c60 100644 --- a/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp +++ b/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp @@ -143,7 +143,7 @@ int main(int argc, char *argv[]) .on(exec); std::shared_ptr> logger = - gko::log::Convergence::create(exec); + gko::log::Convergence::create(exec, exec->get_mem_space()); iter_stop->add_logger(logger); tol_stop->add_logger(logger); diff --git a/examples/iterative-refinement/iterative-refinement.cpp b/examples/iterative-refinement/iterative-refinement.cpp index a07726ec82e..0df2279e4d9 100644 --- a/examples/iterative-refinement/iterative-refinement.cpp +++ b/examples/iterative-refinement/iterative-refinement.cpp @@ -113,7 +113,7 @@ int main(int argc, char *argv[]) .on(exec); std::shared_ptr> logger = - gko::log::Convergence::create(exec); + gko::log::Convergence::create(exec, exec->get_mem_space()); iter_stop->add_logger(logger); tol_stop->add_logger(logger); diff --git a/examples/performance-debugging/performance-debugging.cpp b/examples/performance-debugging/performance-debugging.cpp index 4a9fae8bcbd..6abee3ff818 100644 --- a/examples/performance-debugging/performance-debugging.cpp +++ b/examples/performance-debugging/performance-debugging.cpp @@ -116,41 +116,43 @@ namespace loggers { // taken before and after. This can create significant overhead since to ensure // proper timings, calls to `synchronize` are required. struct OperationLogger : gko::log::Logger { - void on_allocation_started(const gko::Executor *exec, + void on_allocation_started(const gko::MemorySpace *mem_space, const gko::size_type &) const override { - this->start_operation(exec, "allocate"); + this->start_operation(mem_space, "allocate"); } - void on_allocation_completed(const gko::Executor *exec, + void on_allocation_completed(const gko::MemorySpace *mem_space, const gko::size_type &, const gko::uintptr &) const override { - this->end_operation(exec, "allocate"); + this->end_operation(mem_space, "allocate"); } - void on_free_started(const gko::Executor *exec, + void on_free_started(const gko::MemorySpace *mem_space, const gko::uintptr &) const override { - this->start_operation(exec, "free"); + this->start_operation(mem_space, "free"); } - void on_free_completed(const gko::Executor *exec, + void on_free_completed(const gko::MemorySpace *mem_space, const gko::uintptr &) const override { - this->end_operation(exec, "free"); + this->end_operation(mem_space, "free"); } - void on_copy_started(const gko::Executor *from, const gko::Executor *to, - const gko::uintptr &, const gko::uintptr &, + void on_copy_started(const gko::MemorySpace *from, + const gko::MemorySpace *to, const gko::uintptr &, + const gko::uintptr &, const gko::size_type &) const override { from->synchronize(); this->start_operation(to, "copy"); } - void on_copy_completed(const gko::Executor *from, const gko::Executor *to, - const gko::uintptr &, const gko::uintptr &, + void on_copy_completed(const gko::MemorySpace *from, + const gko::MemorySpace *to, const gko::uintptr &, + const gko::uintptr &, const gko::size_type &) const override { from->synchronize(); @@ -181,24 +183,25 @@ struct OperationLogger : gko::log::Logger { } OperationLogger(std::shared_ptr exec) - : gko::log::Logger(exec) + : gko::log::Logger(exec, exec->get_mem_space()) {} private: // Helper which synchronizes and starts the time before every operation. - void start_operation(const gko::Executor *exec, - const std::string &name) const + template + void start_operation(const Event *event, const std::string &name) const { nested.emplace_back(0); - exec->synchronize(); + event->synchronize(); start[name] = std::chrono::steady_clock::now(); } // Helper to compute the end time and store the operation's time at its // end. Also time nested operations. - void end_operation(const gko::Executor *exec, const std::string &name) const + template + void end_operation(const Event *event, const std::string &name) const { - exec->synchronize(); + event->synchronize(); const auto end = std::chrono::steady_clock::now(); const auto diff = end - start[name]; // make sure timings for nested operations are not counted twice @@ -220,7 +223,7 @@ struct OperationLogger : gko::log::Logger { // This logger tracks the persistently allocated data struct StorageLogger : gko::log::Logger { // Store amount of bytes allocated on every allocation - void on_allocation_completed(const gko::Executor *, + void on_allocation_completed(const gko::MemorySpace *, const gko::size_type &num_bytes, const gko::uintptr &location) const override { @@ -228,7 +231,7 @@ struct StorageLogger : gko::log::Logger { } // Reset the amount of bytes on every free - void on_free_completed(const gko::Executor *, + void on_free_completed(const gko::MemorySpace *, const gko::uintptr &location) const override { storage[location] = 0; @@ -245,7 +248,7 @@ struct StorageLogger : gko::log::Logger { } StorageLogger(std::shared_ptr exec) - : gko::log::Logger(exec) + : gko::log::Logger(exec, exec->get_mem_space()) {} private: @@ -281,7 +284,8 @@ struct ResidualLogger : gko::log::Logger { ResidualLogger(std::shared_ptr exec, const gko::LinOp *matrix, const vec *b) - : gko::log::Logger(exec, gko::log::Logger::iteration_complete_mask), + : gko::log::Logger(exec, exec->get_mem_space(), + gko::log::Logger::iteration_complete_mask), matrix{matrix}, b{b} {} @@ -370,8 +374,7 @@ int main(int argc, char *argv[]) // Figure out where to run the code if (argc == 2 && (std::string(argv[1]) == "--help")) { - std::cerr << "Usage: " << argv[0] << " [executor]" - << std::endl; + std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl; std::exit(-1); } diff --git a/examples/simple-solver-logging/simple-solver-logging.cpp b/examples/simple-solver-logging/simple-solver-logging.cpp index 96d32fa4b2a..2ad3fdcade8 100644 --- a/examples/simple-solver-logging/simple-solver-logging.cpp +++ b/examples/simple-solver-logging/simple-solver-logging.cpp @@ -109,7 +109,7 @@ int main(int argc, char *argv[]) // for convenience. std::shared_ptr> stream_logger = gko::log::Stream::create( - exec, + exec, exec->get_mem_space(), gko::log::Logger::all_events_mask ^ gko::log::Logger::linop_factory_events_mask ^ gko::log::Logger::polymorphic_object_events_mask, @@ -146,15 +146,17 @@ int main(int argc, char *argv[]) // Logger class for more information. std::ofstream filestream("my_file.txt"); solver->add_logger(gko::log::Stream::create( - exec, gko::log::Logger::all_events_mask, filestream)); + exec, exec->get_mem_space(), gko::log::Logger::all_events_mask, + filestream)); solver->add_logger(stream_logger); // Add another logger which puts all the data in an object, we can later // retrieve this object in our code. Here we only have want Executor // and criterion check completed events. std::shared_ptr record_logger = gko::log::Record::create( - exec, gko::log::Logger::executor_events_mask | - gko::log::Logger::criterion_check_completed_mask); + exec, exec->get_mem_space(), + gko::log::Logger::memory_space_events_mask | + gko::log::Logger::criterion_check_completed_mask); exec->add_logger(record_logger); residual_criterion->add_logger(record_logger); @@ -165,10 +167,10 @@ int main(int argc, char *argv[]) // location copied auto &last_copy = record_logger->get().copy_completed.back(); std::cout << "Last memory copied was of size " << std::hex - << std::get<0>(*last_copy).num_bytes << " FROM executor " - << std::get<0>(*last_copy).exec << " pointer " - << std::get<0>(*last_copy).location << " TO executor " - << std::get<1>(*last_copy).exec << " pointer " + << std::get<0>(*last_copy).num_bytes << " FROM Memory Space" + << std::get<0>(*last_copy).mem_space << " pointer " + << std::get<0>(*last_copy).location << " TO Memory Space " + << std::get<1>(*last_copy).mem_space << " pointer " << std::get<1>(*last_copy).location << std::dec << std::endl; // Also print the residual of the last criterion check event (where // convergence happened) diff --git a/include/ginkgo/core/base/exception.hpp b/include/ginkgo/core/base/exception.hpp index 78fe81a617e..b2b8c51ef47 100644 --- a/include/ginkgo/core/base/exception.hpp +++ b/include/ginkgo/core/base/exception.hpp @@ -173,6 +173,30 @@ class NotSupported : public Error { }; +/** + * MemSpaceMismatch is thrown in case it is not possible to + * perform the requested operation on the given object type. + */ +class MemSpaceMismatch : public Error { +public: + /** + * Initializes a MemSpaceMismatch error. + * + * @param file The name of the offending source file + * @param line The source code line number where the error occurred + * @param func The name of the function where the error occured + * @param obj_type The object type on which the requested operation + cannot be performed. + */ + MemSpaceMismatch(const std::string &file, int line, const std::string &func, + const std::string &obj_type) + : Error(file, line, + "This executor" + func + + " does not support Memory space of type " + obj_type) + {} +}; + + /** * CudaError is thrown when a CUDA routine throws a non-zero error code. */ diff --git a/include/ginkgo/core/base/exception_helpers.hpp b/include/ginkgo/core/base/exception_helpers.hpp index 287b59380f8..34410836a6f 100644 --- a/include/ginkgo/core/base/exception_helpers.hpp +++ b/include/ginkgo/core/base/exception_helpers.hpp @@ -88,6 +88,19 @@ namespace gko { "semi-colon warnings") +/** + * Creates a MemSpaceMismatch exception. + * This macro sets the correct information about the location of the error + * and fills the exception with data about _obj. + * + * @param _obj the object referenced by MemSpaceMismatch exception + * + * @return MemSpaceMismatch + */ +#define GKO_MEMSPACE_MISMATCH(_obj) \ + throw ::gko::MemSpaceMismatch(__FILE__, __LINE__, __func__, GKO_QUOTE(_obj)) + + namespace detail { diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index c76f05c29c3..378e892e724 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -41,6 +41,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include +#include #include #include #include @@ -488,7 +490,7 @@ class Executor : public log::EnableLogging { template void copy(size_type num_elems, const T *src_ptr, T *dest_ptr) const { - this->copy_from(this, num_elems, src_ptr, dest_ptr); + this->get_mem_space()->copy_from(this, num_elems, src_ptr, dest_ptr); } /** @@ -504,7 +506,7 @@ class Executor : public log::EnableLogging { T copy_val_to_host(const T *ptr) const { T out{}; - this->get_master()->copy_from(this, 1, ptr, &out); + this->get_master()->get_mem_space()->copy_from(this, 1, ptr, &out); return out; } @@ -698,14 +700,6 @@ class OmpExecutor : public detail::ExecutorBase, } } - OmpExecutor(std::shared_ptr mem_space) - : mem_space_instance_(mem_space) - { - if (!check_mem_space_validity(mem_space_instance_)) { - GKO_MEMSPACE_MISMATCH(NOT_HOST); - } - } - bool check_mem_space_validity(std::shared_ptr mem_space) { auto check_default_mem_space = diff --git a/include/ginkgo/core/base/types.hpp b/include/ginkgo/core/base/types.hpp index 10ef4d5d4cf..541381bc68a 100644 --- a/include/ginkgo/core/base/types.hpp +++ b/include/ginkgo/core/base/types.hpp @@ -395,6 +395,23 @@ GKO_ATTRIBUTES constexpr bool operator!=(precision_reduction x, _enable_macro(CudaExecutor, cuda) +/** + * Calls a given macro for each memory space type for a given kernel. + * + * The macro should take two parameters: + * + * - the first one is replaced with the memory space class name + * - the second one with the name of the kernel to be bound + * + * @param _enable_macro macro name which will be called + */ +#define GKO_ENABLE_FOR_ALL_MEMORY_SPACES(_enable_macro) \ + _enable_macro(HostMemorySpace, host); \ + _enable_macro(HipMemorySpace, hip); \ + _enable_macro(CudaMemorySpace, cuda); \ + _enable_macro(CudaUVMSpace, cuda_uvm) + + /** * Instantiates a template for each non-complex value type compiled by Ginkgo. * diff --git a/include/ginkgo/core/log/convergence.hpp b/include/ginkgo/core/log/convergence.hpp index 64cd16ea1aa..13a6119e69c 100644 --- a/include/ginkgo/core/log/convergence.hpp +++ b/include/ginkgo/core/log/convergence.hpp @@ -88,10 +88,11 @@ class Convergence : public Logger { */ static std::unique_ptr create( std::shared_ptr exec, + std::shared_ptr mem_space, const mask_type &enabled_events = Logger::all_events_mask) { return std::unique_ptr( - new Convergence(exec, enabled_events)); + new Convergence(exec, mem_space, enabled_events)); } /** @@ -131,8 +132,9 @@ class Convergence : public Logger { */ explicit Convergence( std::shared_ptr exec, + std::shared_ptr mem_space, const mask_type &enabled_events = Logger::all_events_mask) - : Logger(exec, enabled_events) + : Logger(exec, mem_space, enabled_events) {} private: diff --git a/include/ginkgo/core/log/logger.hpp b/include/ginkgo/core/log/logger.hpp index 3d6ae9b4309..e92d22ab954 100644 --- a/include/ginkgo/core/log/logger.hpp +++ b/include/ginkgo/core/log/logger.hpp @@ -52,6 +52,7 @@ namespace gko { template class Array; class Executor; +class MemorySpace; class LinOp; class LinOpFactory; class PolymorphicObject; @@ -140,68 +141,74 @@ public: \ static constexpr mask_type _event_name##_mask{mask_type{1} << _id}; /** - * Executor's allocation started event. + * Memory Space's allocation started event. * - * @param exec the executor used + * @param mem_space the memory space used * @param num_bytes the number of bytes to allocate */ - GKO_LOGGER_REGISTER_EVENT(0, allocation_started, const Executor *exec, + GKO_LOGGER_REGISTER_EVENT(0, allocation_started, + const MemorySpace *mem_space, const size_type &num_bytes) /** - * Executor's allocation completed event. + * Memory Space's allocation completed event. * - * @param exec the executor used + * @param mem_space the memory space used * @param num_bytes the number of bytes allocated * @param location the address at which the data was allocated */ - GKO_LOGGER_REGISTER_EVENT(1, allocation_completed, const Executor *exec, + GKO_LOGGER_REGISTER_EVENT(1, allocation_completed, + const MemorySpace *mem_space, const size_type &num_bytes, const uintptr &location) /** - * Executor's free started event. + * Memory Space's free started event. * - * @param exec the executor used + * @param mem_space the memory space used * @param location the address at which the data will be freed */ - GKO_LOGGER_REGISTER_EVENT(2, free_started, const Executor *exec, + GKO_LOGGER_REGISTER_EVENT(2, free_started, const MemorySpace *mem_space, const uintptr &location) /** - * Executor's free completed event. + * Memory Space's free completed event. * - * @param exec the executor used + * @param mem_space the memory space used * @param location the address at which the data was freed */ - GKO_LOGGER_REGISTER_EVENT(3, free_completed, const Executor *exec, + GKO_LOGGER_REGISTER_EVENT(3, free_completed, const MemorySpace *mem_space, const uintptr &location) /** - * Executor's copy started event. + * Memory Space's copy started event. - * @param exec_from the executor to be copied from - * @param exec_to the executor to be copied to + * @param mem_space_from the memory space to be copied from + * @param mem_space_to the memory space to be copied to * @param loc_from the address at which the data will be copied from * @param loc_to the address at which the data will be copied to * @param num_bytes the number of bytes to be copied */ - GKO_LOGGER_REGISTER_EVENT(4, copy_started, const Executor *exec_from, - const Executor *exec_to, const uintptr &loc_from, - const uintptr &loc_to, const size_type &num_bytes) + GKO_LOGGER_REGISTER_EVENT(4, copy_started, + const MemorySpace *mem_space_from, + const MemorySpace *mem_space_to, + const uintptr &loc_from, const uintptr &loc_to, + const size_type &num_bytes) /** - * Executor's copy completed event. + * Memory Space's copy completed event. * - * @param exec_from the executor copied from - * @param exec_to the executor copied to + * @param mem_space_from the memory space copied from + * @param mem_space_to the memory space copied to * @param loc_from the address at which the data was copied from * @param loc_to the address at which the data was copied to * @param num_bytes the number of bytes copied */ - GKO_LOGGER_REGISTER_EVENT(5, copy_completed, const Executor *exec_from, - const Executor *exec_to, const uintptr &loc_from, - const uintptr &loc_to, const size_type &num_bytes) + GKO_LOGGER_REGISTER_EVENT(5, copy_completed, + const MemorySpace *mem_space_from, + const MemorySpace *mem_space_to, + const uintptr &loc_from, const uintptr &loc_to, + const size_type &num_bytes) /** * Executor's operation launched event (method run). @@ -407,9 +414,9 @@ public: \ #undef GKO_LOGGER_REGISTER_EVENT /** - * Bitset Mask which activates all executor events + * Bitset Mask which activates all memory space events */ - static constexpr mask_type executor_events_mask = + static constexpr mask_type memory_space_events_mask = allocation_started_mask | allocation_completed_mask | free_started_mask | free_completed_mask | copy_started_mask | copy_completed_mask; @@ -466,12 +473,14 @@ public: \ * event. */ explicit Logger(std::shared_ptr exec, + std::shared_ptr mem_space, const mask_type &enabled_events = all_events_mask) - : exec_{exec}, enabled_events_{enabled_events} + : exec_{exec}, mem_space_{mem_space}, enabled_events_{enabled_events} {} private: std::shared_ptr exec_; + std::shared_ptr mem_space_; mask_type enabled_events_; }; diff --git a/include/ginkgo/core/log/papi.hpp b/include/ginkgo/core/log/papi.hpp index 7b54e478f6e..d38d7ef9529 100644 --- a/include/ginkgo/core/log/papi.hpp +++ b/include/ginkgo/core/log/papi.hpp @@ -91,26 +91,26 @@ static std::mutex papi_count_mutex; template class Papi : public Logger { public: - /* Executor events */ - void on_allocation_started(const Executor *exec, + /* Memory space events */ + void on_allocation_started(const MemorySpace *mem_space, const size_type &num_bytes) const override; - void on_allocation_completed(const Executor *exec, + void on_allocation_completed(const MemorySpace *mem_space, const size_type &num_bytes, const uintptr &location) const override; - void on_free_started(const Executor *exec, + void on_free_started(const MemorySpace *mem_space, const uintptr &location) const override; - void on_free_completed(const Executor *exec, + void on_free_completed(const MemorySpace *mem_space, const uintptr &location) const override; - void on_copy_started(const Executor *from, const Executor *to, + void on_copy_started(const MemorySpace *from, const MemorySpace *to, const uintptr &location_from, const uintptr &location_to, const size_type &num_bytes) const override; - void on_copy_completed(const Executor *from, const Executor *to, + void on_copy_completed(const MemorySpace *from, const MemorySpace *to, const uintptr &location_from, const uintptr &location_to, const size_type &num_bytes) const override; @@ -185,9 +185,10 @@ class Papi : public Logger { */ static std::shared_ptr create( std::shared_ptr exec, + std::shared_ptr mem_space, const Logger::mask_type &enabled_events = Logger::all_events_mask) { - return std::shared_ptr(new Papi(exec, enabled_events)); + return std::shared_ptr(new Papi(exec, mem_space, enabled_events)); } /** @@ -201,8 +202,9 @@ class Papi : public Logger { protected: explicit Papi( std::shared_ptr exec, + std::shared_ptr mem_space, const Logger::mask_type &enabled_events = Logger::all_events_mask) - : Logger(exec, enabled_events) + : Logger(exec, mem_space, enabled_events) { std::ostringstream os; @@ -257,20 +259,21 @@ class Papi : public Logger { }; - mutable papi_queue allocation_started{&papi_handle, - "allocation_started"}; - mutable papi_queue allocation_completed{&papi_handle, - "allocation_completed"}; - mutable papi_queue free_started{&papi_handle, "free_started"}; - mutable papi_queue free_completed{&papi_handle, "free_completed"}; - mutable papi_queue copy_started_from{&papi_handle, - "copy_started_from"}; - mutable papi_queue copy_started_to{&papi_handle, - "copy_started_to"}; - mutable papi_queue copy_completed_from{&papi_handle, - "copy_completed_from"}; - mutable papi_queue copy_completed_to{&papi_handle, - "copy_completed_to"}; + mutable papi_queue allocation_started{&papi_handle, + "allocation_started"}; + mutable papi_queue allocation_completed{ + &papi_handle, "allocation_completed"}; + mutable papi_queue free_started{&papi_handle, "free_started"}; + mutable papi_queue free_completed{&papi_handle, + "free_completed"}; + mutable papi_queue copy_started_from{&papi_handle, + "copy_started_from"}; + mutable papi_queue copy_started_to{&papi_handle, + "copy_started_to"}; + mutable papi_queue copy_completed_from{&papi_handle, + "copy_completed_from"}; + mutable papi_queue copy_completed_to{&papi_handle, + "copy_completed_to"}; mutable papi_queue operation_launched{&papi_handle, "operation_launched"}; diff --git a/include/ginkgo/core/log/record.hpp b/include/ginkgo/core/log/record.hpp index 0c791e5e278..b1ffcd4e0f2 100644 --- a/include/ginkgo/core/log/record.hpp +++ b/include/ginkgo/core/log/record.hpp @@ -90,8 +90,8 @@ struct iteration_complete_data { /** * Struct representing Executor related data */ -struct executor_data { - const Executor *exec; +struct memory_space_data { + const MemorySpace *mem_space; const size_type num_bytes; const uintptr location; }; @@ -235,13 +235,15 @@ class Record : public Logger { * Struct storing the actually logged data */ struct logged_data { - std::deque> allocation_started; - std::deque> allocation_completed; - std::deque> free_started; - std::deque> free_completed; - std::deque>> + std::deque> allocation_started; + std::deque> allocation_completed; + std::deque> free_started; + std::deque> free_completed; + std::deque< + std::unique_ptr>> copy_started; - std::deque>> + std::deque< + std::unique_ptr>> copy_completed; std::deque> operation_launched; @@ -275,25 +277,25 @@ class Record : public Logger { }; /* Executor events */ - void on_allocation_started(const Executor *exec, + void on_allocation_started(const MemorySpace *mem_space, const size_type &num_bytes) const override; - void on_allocation_completed(const Executor *exec, + void on_allocation_completed(const MemorySpace *mem_space, const size_type &num_bytes, const uintptr &location) const override; - void on_free_started(const Executor *exec, + void on_free_started(const MemorySpace *mem_space, const uintptr &location) const override; - void on_free_completed(const Executor *exec, + void on_free_completed(const MemorySpace *mem_space, const uintptr &location) const override; - void on_copy_started(const Executor *from, const Executor *to, + void on_copy_started(const MemorySpace *from, const MemorySpace *to, const uintptr &location_from, const uintptr &location_to, const size_type &num_bytes) const override; - void on_copy_completed(const Executor *from, const Executor *to, + void on_copy_completed(const MemorySpace *from, const MemorySpace *to, const uintptr &location_from, const uintptr &location_to, const size_type &num_bytes) const override; @@ -390,11 +392,12 @@ class Record : public Logger { */ static std::unique_ptr create( std::shared_ptr exec, + std::shared_ptr mem_space, const mask_type &enabled_events = Logger::all_events_mask, size_type max_storage = 1) { return std::unique_ptr( - new Record(exec, enabled_events, max_storage)); + new Record(exec, mem_space, enabled_events, max_storage)); } /** @@ -422,9 +425,10 @@ class Record : public Logger { * memory overhead of this logger. */ explicit Record(std::shared_ptr exec, + std::shared_ptr mem_space, const mask_type &enabled_events = Logger::all_events_mask, size_type max_storage = 0) - : Logger(exec, enabled_events), max_storage_{max_storage} + : Logger(exec, mem_space, enabled_events), max_storage_{max_storage} {} /** diff --git a/include/ginkgo/core/log/stream.hpp b/include/ginkgo/core/log/stream.hpp index d46a0d07be0..221b1112f0a 100644 --- a/include/ginkgo/core/log/stream.hpp +++ b/include/ginkgo/core/log/stream.hpp @@ -59,25 +59,25 @@ template class Stream : public Logger { public: /* Executor events */ - void on_allocation_started(const Executor *exec, + void on_allocation_started(const MemorySpace *mem_space, const size_type &num_bytes) const override; - void on_allocation_completed(const Executor *exec, + void on_allocation_completed(const MemorySpace *mem_space, const size_type &num_bytes, const uintptr &location) const override; - void on_free_started(const Executor *exec, + void on_free_started(const MemorySpace *mem_space, const uintptr &location) const override; - void on_free_completed(const Executor *exec, + void on_free_completed(const MemorySpace *mem_space, const uintptr &location) const override; - void on_copy_started(const Executor *from, const Executor *to, + void on_copy_started(const MemorySpace *from, const MemorySpace *to, const uintptr &location_from, const uintptr &location_to, const size_type &num_bytes) const override; - void on_copy_completed(const Executor *from, const Executor *to, + void on_copy_completed(const MemorySpace *from, const MemorySpace *to, const uintptr &location_from, const uintptr &location_to, const size_type &num_bytes) const override; @@ -173,11 +173,12 @@ class Stream : public Logger { */ static std::unique_ptr create( std::shared_ptr exec, + std::shared_ptr mem_space, const Logger::mask_type &enabled_events = Logger::all_events_mask, std::ostream &os = std::cout, bool verbose = false) { return std::unique_ptr( - new Stream(exec, enabled_events, os, verbose)); + new Stream(exec, mem_space, enabled_events, os, verbose)); } protected: @@ -194,9 +195,10 @@ class Stream : public Logger { */ explicit Stream( std::shared_ptr exec, + std::shared_ptr mem_space, const Logger::mask_type &enabled_events = Logger::all_events_mask, std::ostream &os = std::cout, bool verbose = false) - : Logger(exec, enabled_events), os_(os), verbose_(verbose) + : Logger(exec, mem_space, enabled_events), os_(os), verbose_(verbose) {} diff --git a/reference/test/log/convergence.cpp b/reference/test/log/convergence.cpp index 01a9b17c303..efca64341d7 100644 --- a/reference/test/log/convergence.cpp +++ b/reference/test/log/convergence.cpp @@ -58,7 +58,8 @@ TYPED_TEST(Convergence, CatchesCriterionCheckCompleted) { auto exec = gko::ReferenceExecutor::create(); auto logger = gko::log::Convergence::create( - exec, gko::log::Logger::criterion_check_completed_mask); + exec, exec->get_mem_space(), + gko::log::Logger::criterion_check_completed_mask); auto criterion = gko::stop::Iteration::build().with_max_iters(3u).on(exec)->generate( nullptr, nullptr, nullptr); diff --git a/reference/test/log/papi.cpp b/reference/test/log/papi.cpp index 842b6214374..cf84e6977a4 100644 --- a/reference/test/log/papi.cpp +++ b/reference/test/log/papi.cpp @@ -73,7 +73,7 @@ class Papi : public ::testing::Test { const std::string init(const gko::log::Logger::mask_type &event, const std::string &event_name, U *ptr) { - logger = gko::log::Papi::create(exec, event); + logger = gko::log::Papi::create(exec, exec->get_mem_space(), event); std::ostringstream os; os << "sde:::" << logger->get_handle_name() << "::" << event_name << "_" << reinterpret_cast(ptr); From bba2bd982e7430cdff2d5a7fbaf528783826c9f0 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 20 Oct 2020 09:48:09 +0200 Subject: [PATCH 6/8] Update Array and change ExecutorAllocator to MemSpaceAllocator. --- core/base/allocator.hpp | 12 ++++++++--- core/device_hooks/cuda_hooks.cpp | 17 ++++++++++++--- core/device_hooks/hip_hooks.cpp | 11 +++++++--- core/test/base/array.cpp | 5 +++-- include/ginkgo/core/base/array.hpp | 30 ++++++++++++++++----------- include/ginkgo/core/base/executor.hpp | 6 ++++-- 6 files changed, 56 insertions(+), 25 deletions(-) diff --git a/core/base/allocator.hpp b/core/base/allocator.hpp index 3f618974b46..76ac31b1147 100644 --- a/core/base/allocator.hpp +++ b/core/base/allocator.hpp @@ -99,7 +99,10 @@ class ExecutorAllocator { * @param n the number of elements to allocate * @return the pointer to a newly allocated memory area of `n` elements. */ - T *allocate(std::size_t n) const { return exec_->alloc(n); } + T *allocate(std::size_t n) const + { + return exec_->get_mem_space()->alloc(n); + } /** * Frees a memory area that was allocated by this allocator. @@ -108,7 +111,10 @@ class ExecutorAllocator { * * @note The second parameter is unused. */ - void deallocate(T *ptr, std::size_t) const { exec_->free(ptr); } + void deallocate(T *ptr, std::size_t) const + { + exec_->get_mem_space()->free(ptr); + } /** * Compares two ExecutorAllocators for equality @@ -172,4 +178,4 @@ using unordered_map = } // namespace gko -#endif // GKO_CORE_BASE_ALLOCATOR_HPP_ \ No newline at end of file +#endif // GKO_CORE_BASE_ALLOCATOR_HPP_ diff --git a/core/device_hooks/cuda_hooks.cpp b/core/device_hooks/cuda_hooks.cpp index be44f709872..024e55dbcbb 100644 --- a/core/device_hooks/cuda_hooks.cpp +++ b/core/device_hooks/cuda_hooks.cpp @@ -67,9 +67,8 @@ std::shared_ptr CudaExecutor::create( int device_id, std::shared_ptr mem_space, std::shared_ptr master, bool device_reset) { - return std::shared_ptr( - new CudaExecutor(device_id, mem_space, std::move(master)), - device_reset); + return std::shared_ptr(new CudaExecutor( + device_id, mem_space, std::move(master), device_reset)); } @@ -150,6 +149,12 @@ void CudaUVMSpace::raw_copy_to(const HostMemorySpace *dest_mem_space, void CudaExecutor::synchronize() const GKO_NOT_COMPILED(cuda); +void CudaMemorySpace::synchronize() const GKO_NOT_COMPILED(cuda); + + +void CudaUVMSpace::synchronize() const GKO_NOT_COMPILED(cuda); + + void CudaExecutor::run(const Operation &op) const { op.run( @@ -178,6 +183,12 @@ std::string CusparseError::get_error(int64) int CudaExecutor::get_num_devices() { return 0; } +int CudaMemorySpace::get_num_devices() { return 0; } + + +int CudaUVMSpace::get_num_devices() { return 0; } + + void CudaExecutor::set_gpu_property() {} diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp index 8e658299816..f0178ab7df5 100644 --- a/core/device_hooks/hip_hooks.cpp +++ b/core/device_hooks/hip_hooks.cpp @@ -64,9 +64,8 @@ std::shared_ptr HipExecutor::create( int device_id, std::shared_ptr memory_space, std::shared_ptr master, bool device_reset) { - return std::shared_ptr( - new HipExecutor(device_id, memory_space, std::move(master)), - device_reset); + return std::shared_ptr(new HipExecutor( + device_id, memory_space, std::move(master), device_reset)); } void HostMemorySpace::raw_copy_to(const HipMemorySpace *, size_type num_bytes, @@ -109,6 +108,9 @@ void HipMemorySpace::raw_copy_to(const HipMemorySpace *, size_type num_bytes, void HipExecutor::synchronize() const GKO_NOT_COMPILED(hip); +void HipMemorySpace::synchronize() const GKO_NOT_COMPILED(hip); + + void HipExecutor::run(const Operation &op) const { op.run( @@ -137,6 +139,9 @@ std::string HipsparseError::get_error(int64) int HipExecutor::get_num_devices() { return 0; } +int HipMemorySpace::get_num_devices() { return 0; } + + void HipExecutor::set_gpu_property() {} diff --git a/core/test/base/array.cpp b/core/test/base/array.cpp index b1e04dd2f39..251016c661c 100644 --- a/core/test/base/array.cpp +++ b/core/test/base/array.cpp @@ -110,8 +110,9 @@ TYPED_TEST(Array, CanBeCreatedFromExistingData) TYPED_TEST(Array, CanBeCreatedFromDataOnExecutor) { - gko::Array a{this->exec, 3, - this->exec->template alloc(3)}; + gko::Array a{ + this->exec, 3, + this->exec->get_mem_space()->template alloc(3)}; EXPECT_EQ(a.get_num_elems(), 3); } diff --git a/include/ginkgo/core/base/array.hpp b/include/ginkgo/core/base/array.hpp index 7f0df5c27f6..798e091452a 100644 --- a/include/ginkgo/core/base/array.hpp +++ b/include/ginkgo/core/base/array.hpp @@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include @@ -91,7 +92,7 @@ class Array { /** * The default deleter type used by Array. */ - using default_deleter = executor_deleter; + using default_deleter = memory_space_deleter; /** * The deleter type used for views. @@ -124,7 +125,9 @@ class Array { */ Array(std::shared_ptr exec) noexcept : num_elems_(0), - data_(nullptr, default_deleter{exec}), + data_(nullptr, + default_deleter{exec == nullptr ? nullptr + : exec->get_mem_space()}), exec_(std::move(exec)) {} @@ -137,11 +140,11 @@ class Array { */ Array(std::shared_ptr exec, size_type num_elems) : num_elems_(num_elems), - data_(nullptr, default_deleter{exec}), + data_(nullptr, default_deleter{exec->get_mem_space()}), exec_(std::move(exec)) { if (num_elems > 0) { - data_.reset(exec_->alloc(num_elems)); + data_.reset(exec_->get_mem_space()->alloc(num_elems)); } } @@ -181,7 +184,7 @@ class Array { */ Array(std::shared_ptr exec, size_type num_elems, value_type *data) - : Array(exec, num_elems, data, default_deleter{exec}) + : Array(exec, num_elems, data, default_deleter{exec->get_mem_space()}) {} /** @@ -324,8 +327,9 @@ class Array { GKO_ENSURE_COMPATIBLE_BOUNDS(other.get_num_elems(), this->num_elems_); } - exec_->copy_from(other.get_executor().get(), other.get_num_elems(), - other.get_const_data(), this->get_data()); + exec_->get_mem_space()->copy_from( + other.get_executor()->get_mem_space().get(), other.get_num_elems(), + other.get_const_data(), this->get_data()); return *this; } @@ -371,7 +375,7 @@ class Array { this->clear(); return *this; } - if (exec_ == other.get_executor()) { + if ((exec_->get_mem_space() == other.get_executor()->get_mem_space())) { // same device, only move the pointer using std::swap; swap(data_, other.data_); @@ -407,7 +411,8 @@ class Array { { if (this->exec_ == nullptr) { this->exec_ = other.get_executor(); - this->data_ = data_manager{nullptr, default_deleter{this->exec_}}; + this->data_ = data_manager{ + nullptr, default_deleter{this->exec_->get_mem_space()}}; } if (other.get_executor() == nullptr) { this->clear(); @@ -423,7 +428,8 @@ class Array { Array tmp{this->exec_}; const OtherValueType *source = other.get_const_data(); // if we are on different executors: copy, then convert - if (this->exec_ != other.get_executor()) { + if (this->exec_->get_mem_space() != + other.get_executor()->get_mem_space()) { tmp = other; source = tmp.get_const_data(); } @@ -473,7 +479,7 @@ class Array { if (num_elems > 0 && this->is_owning()) { num_elems_ = num_elems; - data_.reset(exec_->alloc(num_elems)); + data_.reset(exec_->get_mem_space()->alloc(num_elems)); } else { this->clear(); } @@ -522,7 +528,7 @@ class Array { */ void set_executor(std::shared_ptr exec) { - if (exec == exec_) { + if (exec_ && exec->get_mem_space() == exec_->get_mem_space()) { // moving to the same executor, no-op return; } diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 378e892e724..477c9ec782d 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -490,7 +490,8 @@ class Executor : public log::EnableLogging { template void copy(size_type num_elems, const T *src_ptr, T *dest_ptr) const { - this->get_mem_space()->copy_from(this, num_elems, src_ptr, dest_ptr); + this->get_mem_space()->copy_from(this->get_mem_space().get(), num_elems, + src_ptr, dest_ptr); } /** @@ -506,7 +507,8 @@ class Executor : public log::EnableLogging { T copy_val_to_host(const T *ptr) const { T out{}; - this->get_master()->get_mem_space()->copy_from(this, 1, ptr, &out); + this->get_master()->get_mem_space()->copy_from(this->get_mem_space().get(), 1, + ptr, &out); return out; } From ccdc83373464269137cfadca2dca0da28c5a0b95 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 20 Oct 2020 14:10:43 +0200 Subject: [PATCH 7/8] Update CUDA and HIP to use memory space. --- cuda/base/executor.cpp | 93 +++---------------- cuda/factorization/par_ilut_select_common.cu | 3 +- cuda/solver/common_trs_kernels.cuh | 6 +- cuda/test/base/cuda_executor.cu | 4 +- hip/base/executor.hip.cpp | 93 +++---------------- .../par_ilut_select_common.hip.cpp | 3 +- hip/solver/common_trs_kernels.hip.hpp | 4 +- hip/test/base/hip_executor.hip.cpp | 4 +- 8 files changed, 40 insertions(+), 170 deletions(-) diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp index 543e78131e0..d1dea06e99c 100644 --- a/cuda/base/executor.cpp +++ b/cuda/base/executor.cpp @@ -71,87 +71,20 @@ std::shared_ptr CudaExecutor::create( } -void OmpExecutor::raw_copy_to(const CudaExecutor *dest, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const -{ - if (num_bytes > 0) { - cuda::device_guard g(dest->get_device_id()); - GKO_ASSERT_NO_CUDA_ERRORS( - cudaMemcpy(dest_ptr, src_ptr, num_bytes, cudaMemcpyHostToDevice)); - } -} - - -void CudaExecutor::raw_free(void *ptr) const noexcept -{ - cuda::device_guard g(this->get_device_id()); - auto error_code = cudaFree(ptr); - if (error_code != cudaSuccess) { -#if GKO_VERBOSE_LEVEL >= 1 - // Unfortunately, if memory free fails, there's not much we can do - std::cerr << "Unrecoverable CUDA error on device " << this->device_id_ - << " in " << __func__ << ": " << cudaGetErrorName(error_code) - << ": " << cudaGetErrorString(error_code) << std::endl - << "Exiting program" << std::endl; -#endif // GKO_VERBOSE_LEVEL >= 1 - std::exit(error_code); - } -} - - -void *CudaExecutor::raw_alloc(size_type num_bytes) const -{ - void *dev_ptr = nullptr; - cuda::device_guard g(this->get_device_id()); -#ifdef NDEBUG - auto error_code = cudaMalloc(&dev_ptr, num_bytes); -#else - auto error_code = cudaMallocManaged(&dev_ptr, num_bytes); -#endif - if (error_code != cudaErrorMemoryAllocation) { - GKO_ASSERT_NO_CUDA_ERRORS(error_code); - } - GKO_ENSURE_ALLOCATED(dev_ptr, "cuda", num_bytes); - return dev_ptr; -} - - -void CudaExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const -{ - if (num_bytes > 0) { - cuda::device_guard g(this->get_device_id()); - GKO_ASSERT_NO_CUDA_ERRORS( - cudaMemcpy(dest_ptr, src_ptr, num_bytes, cudaMemcpyDeviceToHost)); - } -} - - -void CudaExecutor::raw_copy_to(const CudaExecutor *dest, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const -{ - if (num_bytes > 0) { - cuda::device_guard g(this->get_device_id()); - GKO_ASSERT_NO_CUDA_ERRORS( - cudaMemcpyPeer(dest_ptr, dest->get_device_id(), src_ptr, - this->get_device_id(), num_bytes)); - } -} - - -void CudaExecutor::raw_copy_to(const HipExecutor *dest, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +std::shared_ptr CudaExecutor::create( + int device_id, std::shared_ptr mem_space, + std::shared_ptr master, bool device_reset) { -#if GINKGO_HIP_PLATFORM_NVCC == 1 - if (num_bytes > 0) { - cuda::device_guard g(this->get_device_id()); - GKO_ASSERT_NO_CUDA_ERRORS( - cudaMemcpyPeer(dest_ptr, dest->get_device_id(), src_ptr, - this->get_device_id(), num_bytes)); - } -#else - GKO_NOT_SUPPORTED(this); -#endif + return std::shared_ptr( + new CudaExecutor(device_id, mem_space, std::move(master), device_reset), + [device_id](CudaExecutor *exec) { + delete exec; + if (!CudaExecutor::get_num_execs(device_id) && + exec->get_device_reset()) { + cuda::device_guard g(device_id); + cudaDeviceReset(); + } + }); } diff --git a/cuda/factorization/par_ilut_select_common.cu b/cuda/factorization/par_ilut_select_common.cu index e4d9a4c1e93..93356d6f31d 100644 --- a/cuda/factorization/par_ilut_select_common.cu +++ b/cuda/factorization/par_ilut_select_common.cu @@ -100,7 +100,8 @@ sampleselect_bucket sampleselect_find_bucket( { kernel::find_bucket<<<1, config::warp_size>>>(prefix_sum, rank); IndexType values[3]{}; - exec->get_master()->copy_from(exec.get(), 3, prefix_sum, values); + exec->get_master()->get_mem_space()->copy_from(exec->get_mem_space().get(), + 3, prefix_sum, values); return {values[0], values[1], values[2]}; } diff --git a/cuda/solver/common_trs_kernels.cuh b/cuda/solver/common_trs_kernels.cuh index f16be5ee0e1..3c57ff17b26 100644 --- a/cuda/solver/common_trs_kernels.cuh +++ b/cuda/solver/common_trs_kernels.cuh @@ -222,10 +222,12 @@ void generate_kernel(std::shared_ptr exec, // allocate workspace if (cuda_solve_struct->factor_work_vec != nullptr) { - exec->free(cuda_solve_struct->factor_work_vec); + exec->get_mem_space()->free( + cuda_solve_struct->factor_work_vec); } cuda_solve_struct->factor_work_vec = - exec->alloc(cuda_solve_struct->factor_work_size); + exec->get_mem_space()->alloc( + cuda_solve_struct->factor_work_size); cusparse::csrsm2_analysis( handle, cuda_solve_struct->algorithm, diff --git a/cuda/test/base/cuda_executor.cu b/cuda/test/base/cuda_executor.cu index aaa1c9c99f2..8cb1de9ccd5 100644 --- a/cuda/test/base/cuda_executor.cu +++ b/cuda/test/base/cuda_executor.cu @@ -131,12 +131,12 @@ TEST_F(CudaExecutor, PreservesDeviceSettings) { auto previous_device = gko::CudaExecutor::get_num_devices() - 1; GKO_ASSERT_NO_CUDA_ERRORS(cudaSetDevice(previous_device)); - auto orig = cuda->alloc(2); + auto orig = cuda->get_mem_space()->alloc(2); int current_device; GKO_ASSERT_NO_CUDA_ERRORS(cudaGetDevice(¤t_device)); ASSERT_EQ(current_device, previous_device); - cuda->free(orig); + cuda->get_mem_space()->free(orig); GKO_ASSERT_NO_CUDA_ERRORS(cudaGetDevice(¤t_device)); ASSERT_EQ(current_device, previous_device); } diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp index f41fb69f46c..7bcab762951 100644 --- a/hip/base/executor.hip.cpp +++ b/hip/base/executor.hip.cpp @@ -71,87 +71,20 @@ std::shared_ptr HipExecutor::create( } -void OmpExecutor::raw_copy_to(const HipExecutor *dest, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const -{ - if (num_bytes > 0) { - hip::device_guard g(dest->get_device_id()); - GKO_ASSERT_NO_HIP_ERRORS( - hipMemcpy(dest_ptr, src_ptr, num_bytes, hipMemcpyHostToDevice)); - } -} - - -void HipExecutor::raw_free(void *ptr) const noexcept -{ - hip::device_guard g(this->get_device_id()); - auto error_code = hipFree(ptr); - if (error_code != hipSuccess) { -#if GKO_VERBOSE_LEVEL >= 1 - // Unfortunately, if memory free fails, there's not much we can do - std::cerr << "Unrecoverable HIP error on device " << this->device_id_ - << " in " << __func__ << ": " << hipGetErrorName(error_code) - << ": " << hipGetErrorString(error_code) << std::endl - << "Exiting program" << std::endl; -#endif // GKO_VERBOSE_LEVEL >= 1 - std::exit(error_code); - } -} - - -void *HipExecutor::raw_alloc(size_type num_bytes) const -{ - void *dev_ptr = nullptr; - hip::device_guard g(this->get_device_id()); -#if defined(NDEBUG) || (GINKGO_HIP_PLATFORM_HCC == 1) - auto error_code = hipMalloc(&dev_ptr, num_bytes); -#else - auto error_code = hipMallocManaged(&dev_ptr, num_bytes); -#endif - if (error_code != hipErrorMemoryAllocation) { - GKO_ASSERT_NO_HIP_ERRORS(error_code); - } - GKO_ENSURE_ALLOCATED(dev_ptr, "hip", num_bytes); - return dev_ptr; -} - - -void HipExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const -{ - if (num_bytes > 0) { - hip::device_guard g(this->get_device_id()); - GKO_ASSERT_NO_HIP_ERRORS( - hipMemcpy(dest_ptr, src_ptr, num_bytes, hipMemcpyDeviceToHost)); - } -} - - -void HipExecutor::raw_copy_to(const CudaExecutor *dest, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const -{ -#if GINKGO_HIP_PLATFORM_NVCC == 1 - if (num_bytes > 0) { - hip::device_guard g(this->get_device_id()); - GKO_ASSERT_NO_HIP_ERRORS(hipMemcpyPeer(dest_ptr, dest->get_device_id(), - src_ptr, this->get_device_id(), - num_bytes)); - } -#else - GKO_NOT_SUPPORTED(this); -#endif -} - - -void HipExecutor::raw_copy_to(const HipExecutor *dest, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +std::shared_ptr HipExecutor::create( + int device_id, std::shared_ptr mem_space, + std::shared_ptr master, bool device_reset) { - if (num_bytes > 0) { - hip::device_guard g(this->get_device_id()); - GKO_ASSERT_NO_HIP_ERRORS(hipMemcpyPeer(dest_ptr, dest->get_device_id(), - src_ptr, this->get_device_id(), - num_bytes)); - } + return std::shared_ptr( + new HipExecutor(device_id, mem_space, std::move(master), device_reset), + [device_id](HipExecutor *exec) { + delete exec; + if (!HipExecutor::get_num_execs(device_id) && + exec->get_device_reset()) { + hip::device_guard g(device_id); + hipDeviceReset(); + } + }); } diff --git a/hip/factorization/par_ilut_select_common.hip.cpp b/hip/factorization/par_ilut_select_common.hip.cpp index 92431c6b0a3..9a0f75833a5 100644 --- a/hip/factorization/par_ilut_select_common.hip.cpp +++ b/hip/factorization/par_ilut_select_common.hip.cpp @@ -109,7 +109,8 @@ sampleselect_bucket sampleselect_find_bucket( hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel::find_bucket), dim3(1), dim3(config::warp_size), 0, 0, prefix_sum, rank); IndexType values[3]{}; - exec->get_master()->copy_from(exec.get(), 3, prefix_sum, values); + exec->get_master()->get_mem_space()->copy_from(exec->get_mem_space().get(), + 3, prefix_sum, values); return {values[0], values[1], values[2]}; } diff --git a/hip/solver/common_trs_kernels.hip.hpp b/hip/solver/common_trs_kernels.hip.hpp index 3bf0e56c7fa..025e25e29ab 100644 --- a/hip/solver/common_trs_kernels.hip.hpp +++ b/hip/solver/common_trs_kernels.hip.hpp @@ -159,10 +159,10 @@ void generate_kernel(std::shared_ptr exec, // allocate workspace if (hip_solve_struct->factor_work_vec != nullptr) { - exec->free(hip_solve_struct->factor_work_vec); + exec->get_mem_space()->free(hip_solve_struct->factor_work_vec); } hip_solve_struct->factor_work_vec = - exec->alloc(hip_solve_struct->factor_work_size); + exec->get_mem_space()->alloc(hip_solve_struct->factor_work_size); hipsparse::csrsv2_analysis( handle, HIPSPARSE_OPERATION_NON_TRANSPOSE, diff --git a/hip/test/base/hip_executor.hip.cpp b/hip/test/base/hip_executor.hip.cpp index 7ff522bb62a..bb10c091feb 100644 --- a/hip/test/base/hip_executor.hip.cpp +++ b/hip/test/base/hip_executor.hip.cpp @@ -136,12 +136,12 @@ TEST_F(HipExecutor, PreservesDeviceSettings) { auto previous_device = gko::HipExecutor::get_num_devices() - 1; GKO_ASSERT_NO_HIP_ERRORS(hipSetDevice(previous_device)); - auto orig = hip->alloc(2); + auto orig = hip->get_mem_space()->alloc(2); int current_device; GKO_ASSERT_NO_HIP_ERRORS(hipGetDevice(¤t_device)); ASSERT_EQ(current_device, previous_device); - hip->free(orig); + hip->get_mem_space()->free(orig); GKO_ASSERT_NO_HIP_ERRORS(hipGetDevice(¤t_device)); ASSERT_EQ(current_device, previous_device); } From 48c9e618d1ba47c266de1474bbd7993b0c4a8bf6 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 20 Oct 2020 14:11:06 +0200 Subject: [PATCH 8/8] Update test_install. --- core/base/allocator.hpp | 2 +- examples/papi-logging/papi-logging.cpp | 8 ++++---- test_install/test_install.cpp | 22 ++++++++++++---------- test_install/test_install_cuda.cu | 18 ++++++++---------- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/core/base/allocator.hpp b/core/base/allocator.hpp index 76ac31b1147..8b50a14cac7 100644 --- a/core/base/allocator.hpp +++ b/core/base/allocator.hpp @@ -101,7 +101,7 @@ class ExecutorAllocator { */ T *allocate(std::size_t n) const { - return exec_->get_mem_space()->alloc(n); + return exec_->get_mem_space()->template alloc(n); } /** diff --git a/examples/papi-logging/papi-logging.cpp b/examples/papi-logging/papi-logging.cpp index 2fd8cbbc8d4..0d2153d3a22 100644 --- a/examples/papi-logging/papi-logging.cpp +++ b/examples/papi-logging/papi-logging.cpp @@ -140,8 +140,7 @@ int main(int argc, char *argv[]) std::cout << gko::version_info::get() << std::endl; if (argc == 2 && (std::string(argv[1]) == "--help")) { - std::cerr << "Usage: " << argv[0] << " [executor]" - << std::endl; + std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl; std::exit(-1); } @@ -192,8 +191,9 @@ int main(int argc, char *argv[]) // Create a PAPI logger and add it to relevant LinOps auto logger = gko::log::Papi::create( - exec, gko::log::Logger::linop_apply_completed_mask | - gko::log::Logger::linop_advanced_apply_completed_mask); + exec, exec->get_mem_space(), + gko::log::Logger::linop_apply_completed_mask | + gko::log::Logger::linop_advanced_apply_completed_mask); solver->add_logger(logger); A->add_logger(logger); diff --git a/test_install/test_install.cpp b/test_install/test_install.cpp index 5ea59794440..559a05abeeb 100644 --- a/test_install/test_install.cpp +++ b/test_install/test_install.cpp @@ -183,23 +183,25 @@ int main(int, char **) // core/log/convergence.hpp { - auto test = gko::log::Convergence<>::create(refExec); + auto test = + gko::log::Convergence<>::create(refExec, refExec->get_mem_space()); } // core/log/record.hpp { - auto test = gko::log::executor_data{}; + auto test = gko::log::memory_space_data{}; } // core/log/stream.hpp { - auto test = gko::log::Stream<>::create(refExec); + auto test = + gko::log::Stream<>::create(refExec, refExec->get_mem_space()); } #if GKO_HAVE_PAPI_SDE // core/log/papi.hpp { - auto test = gko::log::Papi<>::create(refExec); + auto test = gko::log::Papi<>::create(refExec, refExec->get_mem_space()); } #endif // GKO_HAVE_PAPI_SDE @@ -357,13 +359,13 @@ int main(int, char **) .with_reduction_factor(1e-10) .on(refExec); - auto rel_res = gko::stop::RelativeResidualNorm<>::build() - .with_tolerance(1e-10) - .on(refExec); + auto rel_res = + gko::stop::RelativeResidualNorm<>::build().with_tolerance(1e-10).on( + refExec); - auto abs_res = gko::stop::AbsoluteResidualNorm<>::build() - .with_tolerance(1e-10) - .on(refExec); + auto abs_res = + gko::stop::AbsoluteResidualNorm<>::build().with_tolerance(1e-10).on( + refExec); // stopping_status.hpp auto stop_status = gko::stopping_status{}; diff --git a/test_install/test_install_cuda.cu b/test_install/test_install_cuda.cu index ed2e18c307d..c3d3386cad4 100644 --- a/test_install/test_install_cuda.cu +++ b/test_install/test_install_cuda.cu @@ -187,23 +187,23 @@ int main(int, char **) // core/log/convergence.hpp { - gko::log::Convergence<>::create(cudaExec); + gko::log::Convergence<>::create(cudaExec, cudaExec->get_mem_space()); } // core/log/record.hpp { - gko::log::executor_data{}; + gko::log::memory_space_data{}; } // core/log/stream.hpp { - gko::log::Stream<>::create(cudaExec); + gko::log::Stream<>::create(cudaExec, cudaExec->get_mem_space()); } #if GKO_HAVE_PAPI_SDE // core/log/papi.hpp { - gko::log::Papi<>::create(cudaExec); + gko::log::Papi<>::create(cudaExec, cudaExec->get_mem_space()); } #endif // GKO_HAVE_PAPI_SDE @@ -349,13 +349,11 @@ int main(int, char **) .with_reduction_factor(1e-10) .on(cudaExec); - gko::stop::RelativeResidualNorm<>::build() - .with_tolerance(1e-10) - .on(cudaExec); + gko::stop::RelativeResidualNorm<>::build().with_tolerance(1e-10).on( + cudaExec); - gko::stop::AbsoluteResidualNorm<>::build() - .with_tolerance(1e-10) - .on(cudaExec); + gko::stop::AbsoluteResidualNorm<>::build().with_tolerance(1e-10).on( + cudaExec); // stopping_status.hpp gko::stopping_status{};