From 6006fc4625875b4f99e9a38a6906f89fe5060850 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 19 Oct 2020 16:59:40 +0200
Subject: [PATCH 1/8] Move memory operations from Executor to MemorySpace
 classes.

---
 include/ginkgo/core/base/executor.hpp     | 391 +++++++++---------
 include/ginkgo/core/base/memory_space.hpp | 468 ++++++++++++++++++++++
 2 files changed, 661 insertions(+), 198 deletions(-)
 create mode 100644 include/ginkgo/core/base/memory_space.hpp
diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp
index 1df29abc59c..c76f05c29c3 100644
--- a/include/ginkgo/core/base/executor.hpp
+++ b/include/ginkgo/core/base/executor.hpp
@@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <type_traits>
 
 
+#include <ginkgo/core/base/memory_space.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/log/logger.hpp>
 #include <ginkgo/core/synthesizer/containers.hpp>
@@ -473,69 +474,6 @@ class Executor : public log::EnableLogging<Executor> {
         this->run(op);
     }
 
-    /**
-     * Allocates memory in this Executor.
-     *
-     * @tparam T  datatype to allocate
-     *
-     * @param num_elems  number of elements of type T to allocate
-     *
-     * @throw AllocationError  if the allocation failed
-     *
-     * @return pointer to allocated memory
-     */
-    template <typename T>
-    T *alloc(size_type num_elems) const
-    {
-        this->template log<log::Logger::allocation_started>(
-            this, num_elems * sizeof(T));
-        T *allocated = static_cast<T *>(this->raw_alloc(num_elems * sizeof(T)));
-        this->template log<log::Logger::allocation_completed>(
-            this, num_elems * sizeof(T), reinterpret_cast<uintptr>(allocated));
-        return allocated;
-    }
-
-    /**
-     * Frees memory previously allocated with Executor::alloc().
-     *
-     * If `ptr` is a `nullptr`, the function has no effect.
-     *
-     * @param ptr  pointer to the allocated memory block
-     */
-    void free(void *ptr) const noexcept
-    {
-        this->template log<log::Logger::free_started>(
-            this, reinterpret_cast<uintptr>(ptr));
-        this->raw_free(ptr);
-        this->template log<log::Logger::free_completed>(
-            this, reinterpret_cast<uintptr>(ptr));
-    }
-
-    /**
-     * Copies data from another Executor.
-     *
-     * @tparam T  datatype to copy
-     *
-     * @param src_exec  Executor from which the memory will be copied
-     * @param num_elems  number of elements of type T to copy
-     * @param src_ptr  pointer to a block of memory containing the data to be
-     *                 copied
-     * @param dest_ptr  pointer to an allocated block of memory
-     *                  where the data will be copied to
-     */
-    template <typename T>
-    void copy_from(const Executor *src_exec, size_type num_elems,
-                   const T *src_ptr, T *dest_ptr) const
-    {
-        this->template log<log::Logger::copy_started>(
-            src_exec, this, reinterpret_cast<uintptr>(src_ptr),
-            reinterpret_cast<uintptr>(dest_ptr), num_elems * sizeof(T));
-        this->raw_copy_from(src_exec, num_elems * sizeof(T), src_ptr, dest_ptr);
-        this->template log<log::Logger::copy_completed>(
-            src_exec, this, reinterpret_cast<uintptr>(src_ptr),
-            reinterpret_cast<uintptr>(dest_ptr), num_elems * sizeof(T));
-    }
-
     /**
      * Copies data within this Executor.
      *
@@ -582,60 +520,21 @@ class Executor : public log::EnableLogging<Executor> {
     virtual std::shared_ptr<const Executor> get_master() const noexcept = 0;
 
     /**
-     * Synchronize the operations launched on the executor with its master.
-     */
-    virtual void synchronize() const = 0;
-
-protected:
-    /**
-     * Allocates raw memory in this Executor.
-     *
-     * @param size  number of bytes to allocate
-     *
-     * @throw AllocationError  if the allocation failed
-     *
-     * @return raw pointer to allocated memory
+     * Returns the associated memory space of this Executor.
+     * @return the associated memory space of this Executor.
      */
-    virtual void *raw_alloc(size_type size) const = 0;
+    virtual std::shared_ptr<MemorySpace> get_mem_space() noexcept = 0;
 
     /**
-     * Frees memory previously allocated with Executor::alloc().
-     *
-     * If `ptr` is a `nullptr`, the function has no effect.
-     *
-     * @param ptr  pointer to the allocated memory block
+     * @copydoc get_mem_space
      */
-    virtual void raw_free(void *ptr) const noexcept = 0;
+    virtual std::shared_ptr<const MemorySpace> get_mem_space() const
+        noexcept = 0;
 
     /**
-     * Copies raw data from another Executor.
-     *
-     * @param src_exec  Executor from which the memory will be copied
-     * @param n_bytes  number of bytes to copy
-     * @param src_ptr  pointer to a block of memory containing the data to be
-     *                 copied
-     * @param dest_ptr  pointer to an allocated block of memory where the data
-     *                  will be copied to
+     * Synchronize the operations launched on the executor with its master.
      */
-    virtual void raw_copy_from(const Executor *src_exec, size_type n_bytes,
-                               const void *src_ptr, void *dest_ptr) const = 0;
-
-/**
- * @internal
- * Declares a raw_copy_to() overload for a specified Executor subclass.
- *
- * This is the second stage of the double dispatch emulation required to
- * implement raw_copy_from().
- *
- * @param _exec_type  the Executor subclass
- */
-#define GKO_ENABLE_RAW_COPY_TO(_exec_type, ...)                              \
-    virtual void raw_copy_to(const _exec_type *dest_exec, size_type n_bytes, \
-                             const void *src_ptr, void *dest_ptr) const = 0
-
-    GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_ENABLE_RAW_COPY_TO);
-
-#undef GKO_ENABLE_RAW_COPY_TO
+    virtual void synchronize() const = 0;
 
 private:
     /**
@@ -690,66 +589,6 @@ class Executor : public log::EnableLogging<Executor> {
 };
 
 
-/**
- * This is a deleter that uses an executor's `free` method to deallocate the
- * data.
- *
- * @tparam T  the type of object being deleted
- *
- * @ingroup Executor
- */
-template <typename T>
-class executor_deleter {
-public:
-    using pointer = T *;
-
-    /**
-     * Creates a new deleter.
-     *
-     * @param exec  the executor used to free the data
-     */
-    explicit executor_deleter(std::shared_ptr<const Executor> exec)
-        : exec_{exec}
-    {}
-
-    /**
-     * Deletes the object.
-     *
-     * @param ptr  pointer to the object being deleted
-     */
-    void operator()(pointer ptr) const
-    {
-        if (exec_) {
-            exec_->free(ptr);
-        }
-    }
-
-private:
-    std::shared_ptr<const Executor> exec_;
-};
-
-// a specialization for arrays
-template <typename T>
-class executor_deleter<T[]> {
-public:
-    using pointer = T[];
-
-    explicit executor_deleter(std::shared_ptr<const Executor> exec)
-        : exec_{exec}
-    {}
-
-    void operator()(pointer ptr) const
-    {
-        if (exec_) {
-            exec_->free(ptr);
-        }
-    }
-
-private:
-    std::shared_ptr<const Executor> exec_;
-};
-
-
 namespace detail {
 
 
@@ -763,13 +602,6 @@ class ExecutorBase : public Executor {
         this->template log<log::Logger::operation_completed>(this, &op);
     }
 
-protected:
-    void raw_copy_from(const Executor *src_exec, size_type n_bytes,
-                       const void *src_ptr, void *dest_ptr) const override
-    {
-        src_exec->raw_copy_to(self(), n_bytes, src_ptr, dest_ptr);
-    }
-
 private:
     ConcreteExecutor *self() noexcept
     {
@@ -823,11 +655,6 @@ class EnableDeviceReset {
 }  // namespace detail
 
 
-#define GKO_OVERRIDE_RAW_COPY_TO(_executor_type, ...)                    \
-    void raw_copy_to(const _executor_type *dest_exec, size_type n_bytes, \
-                     const void *src_ptr, void *dest_ptr) const override
-
-
 /**
  * This is the Executor subclass which represents the OpenMP device
  * (typically CPU).
@@ -840,6 +667,8 @@ class OmpExecutor : public detail::ExecutorBase<OmpExecutor>,
     friend class detail::ExecutorBase<OmpExecutor>;
 
 public:
+    using DefaultMemorySpace = HostMemorySpace;
+
     /**
      * Creates a new OmpExecutor.
      */
@@ -852,16 +681,44 @@ class OmpExecutor : public detail::ExecutorBase<OmpExecutor>,
 
     std::shared_ptr<const Executor> get_master() const noexcept override;
 
+    std::shared_ptr<MemorySpace> get_mem_space() noexcept override;
+
+    std::shared_ptr<const MemorySpace> get_mem_space() const noexcept override;
+
     void synchronize() const override;
 
 protected:
-    OmpExecutor() = default;
+    OmpExecutor() { mem_space_instance_ = HostMemorySpace::create(); }
+
+    OmpExecutor(std::shared_ptr<MemorySpace> mem_space)
+        : mem_space_instance_(mem_space)
+    {
+        if (!check_mem_space_validity(mem_space_instance_)) {
+            GKO_MEMSPACE_MISMATCH(NOT_HOST);
+        }
+    }
 
-    void *raw_alloc(size_type size) const override;
+    OmpExecutor(std::shared_ptr<MemorySpace> mem_space)
+        : mem_space_instance_(mem_space)
+    {
+        if (!check_mem_space_validity(mem_space_instance_)) {
+            GKO_MEMSPACE_MISMATCH(NOT_HOST);
+        }
+    }
 
-    void raw_free(void *ptr) const noexcept override;
+    bool check_mem_space_validity(std::shared_ptr<MemorySpace> mem_space)
+    {
+        auto check_default_mem_space =
+            dynamic_cast<DefaultMemorySpace *>(mem_space.get());
+        if (check_default_mem_space == nullptr) {
+            return false;
+        } else {
+            return true;
+        }
+    }
 
-    GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_OVERRIDE_RAW_COPY_TO);
+private:
+    std::shared_ptr<MemorySpace> mem_space_instance_;
 };
 
 
@@ -881,11 +738,27 @@ using DefaultExecutor = OmpExecutor;
  */
 class ReferenceExecutor : public OmpExecutor {
 public:
+    /**
+     * Creates a new ReferenceExecutor with an existing memory space.
+     *
+     */
     static std::shared_ptr<ReferenceExecutor> create()
     {
         return std::shared_ptr<ReferenceExecutor>(new ReferenceExecutor());
     }
 
+    /**
+     * Creates a new ReferenceExecutor with an existing memory space.
+     *
+     * @param memory_space  The memory space to be associated with the executor.
+     */
+    static std::shared_ptr<ReferenceExecutor> create(
+        std::shared_ptr<MemorySpace> memory_space)
+    {
+        return std::shared_ptr<ReferenceExecutor>(
+            new ReferenceExecutor(memory_space));
+    }
+
     void run(const Operation &op) const override
     {
         this->template log<log::Logger::operation_launched>(this, &op);
@@ -894,8 +767,40 @@ class ReferenceExecutor : public OmpExecutor {
         this->template log<log::Logger::operation_completed>(this, &op);
     }
 
+    std::shared_ptr<MemorySpace> get_mem_space() noexcept override
+    {
+        return this->mem_space_instance_;
+    }
+
+    std::shared_ptr<const MemorySpace> get_mem_space() const noexcept override
+    {
+        return this->mem_space_instance_;
+    }
+
 protected:
-    ReferenceExecutor() = default;
+    ReferenceExecutor() { mem_space_instance_ = HostMemorySpace::create(); }
+
+    ReferenceExecutor(std::shared_ptr<MemorySpace> mem_space)
+        : mem_space_instance_(mem_space)
+    {
+        if (!check_mem_space_validity(mem_space_instance_)) {
+            GKO_MEMSPACE_MISMATCH(NOT_HOST);
+        }
+    }
+
+    bool check_mem_space_validity(std::shared_ptr<MemorySpace> mem_space)
+    {
+        auto check_default_mem_space =
+            dynamic_cast<DefaultMemorySpace *>(mem_space.get());
+        if (check_default_mem_space == nullptr) {
+            return false;
+        } else {
+            return true;
+        }
+    }
+
+private:
+    std::shared_ptr<MemorySpace> mem_space_instance_;
 };
 
 
@@ -918,23 +823,45 @@ class CudaExecutor : public detail::ExecutorBase<CudaExecutor>,
     friend class detail::ExecutorBase<CudaExecutor>;
 
 public:
+    using DefaultMemorySpace = CudaMemorySpace;
+
     /**
      * Creates a new CudaExecutor.
      *
      * @param device_id  the CUDA device id of this device
      * @param master  an executor on the host that is used to invoke the device
      * kernels
+     * @param device_reset Flag that controls calls CudaDeviceReset at the end
+     * of its scope.
      */
     static std::shared_ptr<CudaExecutor> create(
         int device_id, std::shared_ptr<Executor> master,
         bool device_reset = false);
 
+    /**
+     * Creates a new CudaExecutor.
+     *
+     * @param device_id  the CUDA device id of this device
+     * @param memory_space  the memory space associated to the executor.
+     * @param master  an executor on the host that is used to invoke the device
+     * kernels
+     * @param device_reset Flag that controls calls CudaDeviceReset at the end
+     * of its scope.
+     */
+    static std::shared_ptr<CudaExecutor> create(
+        int device_id, std::shared_ptr<MemorySpace> memory_space,
+        std::shared_ptr<Executor> master, bool device_reset = false);
+
     ~CudaExecutor() { decrease_num_execs(this->device_id_); }
 
     std::shared_ptr<Executor> get_master() noexcept override;
 
     std::shared_ptr<const Executor> get_master() const noexcept override;
 
+    std::shared_ptr<MemorySpace> get_mem_space() noexcept override;
+
+    std::shared_ptr<const MemorySpace> get_mem_space() const noexcept override;
+
     void synchronize() const override;
 
     void run(const Operation &op) const override;
@@ -1019,13 +946,41 @@ class CudaExecutor : public detail::ExecutorBase<CudaExecutor>,
         this->set_gpu_property();
         this->init_handles();
         increase_num_execs(device_id);
+        mem_space_instance_ = CudaMemorySpace::create(device_id);
     }
 
-    void *raw_alloc(size_type size) const override;
-
-    void raw_free(void *ptr) const noexcept override;
+    CudaExecutor(int device_id, std::shared_ptr<MemorySpace> mem_space,
+                 std::shared_ptr<Executor> master, bool device_reset = false)
+        : EnableDeviceReset{device_reset},
+          device_id_(device_id),
+          master_(master),
+          num_warps_per_sm_(0),
+          num_multiprocessor_(0),
+          major_(0),
+          minor_(0),
+          mem_space_instance_(mem_space)
+    {
+        assert(device_id < max_devices);
+        this->set_gpu_property();
+        this->init_handles();
+        increase_num_execs(device_id);
+        if (!check_mem_space_validity(mem_space_instance_)) {
+            GKO_MEMSPACE_MISMATCH(NOT_CUDA);
+        }
+    }
 
-    GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_OVERRIDE_RAW_COPY_TO);
+    bool check_mem_space_validity(std::shared_ptr<MemorySpace> mem_space)
+    {
+        auto check_cuda_mem_space =
+            dynamic_cast<CudaMemorySpace *>(mem_space.get());
+        auto check_cuda_uvm_mem_space =
+            dynamic_cast<CudaUVMSpace *>(mem_space.get());
+        if (check_cuda_mem_space == nullptr &&
+            check_cuda_uvm_mem_space == nullptr) {
+            return false;
+        }
+        return true;
+    }
 
     static void increase_num_execs(unsigned device_id)
     {
@@ -1053,6 +1008,7 @@ class CudaExecutor : public detail::ExecutorBase<CudaExecutor>,
     int major_;
     int minor_;
     int warp_size_;
+    std::shared_ptr<MemorySpace> mem_space_instance_;
 
     template <typename T>
     using handle_manager = std::unique_ptr<T, std::function<void(T *)>>;
@@ -1084,23 +1040,43 @@ class HipExecutor : public detail::ExecutorBase<HipExecutor>,
     friend class detail::ExecutorBase<HipExecutor>;
 
 public:
+    using DefaultMemorySpace = HipMemorySpace;
+
     /**
      * Creates a new HipExecutor.
      *
      * @param device_id  the HIP device id of this device
      * @param master  an executor on the host that is used to invoke the device
      *                kernels
+     * @param device_reset Flag that controls calls HipDeviceReset at the end
+     * of its scope.
      */
     static std::shared_ptr<HipExecutor> create(int device_id,
                                                std::shared_ptr<Executor> master,
                                                bool device_reset = false);
 
+    /**
+     * Creates a new HipExecutor.
+     *
+     * @param device_id  the HIP device id of this device
+     * @param memory_space  the memory space associated to the executor.
+     * @param master  an executor on the host that is used to invoke the device
+     * kernels
+     */
+    static std::shared_ptr<HipExecutor> create(
+        int device_id, std::shared_ptr<MemorySpace> memory_space,
+        std::shared_ptr<Executor> master, bool device_reset = false);
+
     ~HipExecutor() { decrease_num_execs(this->device_id_); }
 
     std::shared_ptr<Executor> get_master() noexcept override;
 
     std::shared_ptr<const Executor> get_master() const noexcept override;
 
+    std::shared_ptr<MemorySpace> get_mem_space() noexcept override;
+
+    std::shared_ptr<const MemorySpace> get_mem_space() const noexcept override;
+
     void synchronize() const override;
 
     void run(const Operation &op) const override;
@@ -1185,13 +1161,34 @@ class HipExecutor : public detail::ExecutorBase<HipExecutor>,
         this->set_gpu_property();
         this->init_handles();
         increase_num_execs(device_id);
+        mem_space_instance_ = HipMemorySpace::create(device_id);
     }
 
-    void *raw_alloc(size_type size) const override;
-
-    void raw_free(void *ptr) const noexcept override;
+    HipExecutor(int device_id, std::shared_ptr<MemorySpace> mem_space,
+                std::shared_ptr<Executor> master, bool device_reset = false)
+        : device_id_(device_id),
+          master_(master),
+          num_multiprocessor_(0),
+          mem_space_instance_(mem_space)
+    {
+        assert(device_id < max_devices);
+        this->set_gpu_property();
+        this->init_handles();
+        increase_num_execs(device_id);
+        if (!check_mem_space_validity(mem_space_instance_)) {
+            GKO_MEMSPACE_MISMATCH(NOT_HIP);
+        }
+    }
 
-    GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_OVERRIDE_RAW_COPY_TO);
+    bool check_mem_space_validity(std::shared_ptr<MemorySpace> mem_space)
+    {
+        auto check_hip_mem_space =
+            dynamic_cast<HipMemorySpace *>(mem_space.get());
+        if (check_hip_mem_space == nullptr) {
+            return false;
+        }
+        return true;
+    }
 
     static void increase_num_execs(int device_id)
     {
@@ -1219,6 +1216,7 @@ class HipExecutor : public detail::ExecutorBase<HipExecutor>,
     int major_;
     int minor_;
     int warp_size_;
+    std::shared_ptr<MemorySpace> mem_space_instance_;
 
     template <typename T>
     using handle_manager = std::unique_ptr<T, std::function<void(T *)>>;
@@ -1238,9 +1236,6 @@ using DefaultExecutor = HipExecutor;
 }  // namespace kernels
 
 
-#undef GKO_OVERRIDE_RAW_COPY_TO
-
-
 }  // namespace gko
 
 
diff --git a/include/ginkgo/core/base/memory_space.hpp b/include/ginkgo/core/base/memory_space.hpp
new file mode 100644
index 00000000000..1a4a9a44159
--- /dev/null
+++ b/include/ginkgo/core/base/memory_space.hpp
@@ -0,0 +1,468 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2020, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_MEMORY_SPACE_HPP_
+#define GKO_CORE_MEMORY_SPACE_HPP_
+
+
+#include <memory>
+#include <mutex>
+#include <sstream>
+#include <tuple>
+#include <type_traits>
+
+
+#include <ginkgo/core/base/types.hpp>
+#include <ginkgo/core/log/logger.hpp>
+#include <ginkgo/core/synthesizer/containers.hpp>
+
+
+namespace gko {
+
+
+#define GKO_FORWARD_DECLARE(_type, ...) class _type
+
+GKO_ENABLE_FOR_ALL_MEMORY_SPACES(GKO_FORWARD_DECLARE);
+
+#undef GKO_FORWARD_DECLARE
+
+
+namespace detail {
+
+
+template <typename>
+class MemorySpaceBase;
+
+
+}  // namespace detail
+
+
+class MemorySpace : public log::EnableLogging<MemorySpace> {
+    template <typename T>
+    friend class detail::MemorySpaceBase;
+
+public:
+    virtual ~MemorySpace() = default;
+
+    MemorySpace() = default;
+    MemorySpace(MemorySpace &) = delete;
+    MemorySpace(MemorySpace &&) = default;
+    MemorySpace &operator=(MemorySpace &) = delete;
+    MemorySpace &operator=(MemorySpace &&) = default;
+
+    /**
+     * Allocates memory in this MemorySpace.
+     *
+     * @tparam T  datatype to allocate
+     *
+     * @param num_elems  number of elements of type T to allocate
+     *
+     * @throw AllocationError  if the allocation failed
+     *
+     * @return pointer to allocated memory
+     */
+    template <typename T>
+    T *alloc(size_type num_elems) const
+    {
+        this->template log<log::Logger::allocation_started>(
+            this, num_elems * sizeof(T));
+        T *allocated = static_cast<T *>(this->raw_alloc(num_elems * sizeof(T)));
+        this->template log<log::Logger::allocation_completed>(
+            this, num_elems * sizeof(T), reinterpret_cast<uintptr>(allocated));
+        return allocated;
+    }
+
+    /**
+     * Frees memory previously allocated with MemorySpace::alloc().
+     *
+     * If `ptr` is a `nullptr`, the function has no effect.
+     *
+     * @param ptr  pointer to the allocated memory block
+     */
+    void free(void *ptr) const noexcept
+    {
+        this->template log<log::Logger::free_started>(
+            this, reinterpret_cast<uintptr>(ptr));
+        this->raw_free(ptr);
+        this->template log<log::Logger::free_completed>(
+            this, reinterpret_cast<uintptr>(ptr));
+    }
+
+    /**
+     * Copies data from another MemorySpace.
+     *
+     * @tparam T  datatype to copy
+     *
+     * @param src_mem_space  MemorySpace from which the memory will be copied
+     * @param num_elems  number of elements of type T to copy
+     * @param src_ptr  pointer to a block of memory containing the data to be
+     *                 copied
+     * @param dest_ptr  pointer to an allocated block of memory
+     *                  where the data will be copied to
+     */
+    template <typename T>
+    void copy_from(const MemorySpace *src_mem_space, size_type num_elems,
+                   const T *src_ptr, T *dest_ptr) const
+    {
+        this->template log<log::Logger::copy_started>(
+            src_mem_space, this, reinterpret_cast<uintptr>(src_ptr),
+            reinterpret_cast<uintptr>(dest_ptr), num_elems * sizeof(T));
+        this->raw_copy_from(src_mem_space, num_elems * sizeof(T), src_ptr,
+                            dest_ptr);
+        this->template log<log::Logger::copy_completed>(
+            src_mem_space, this, reinterpret_cast<uintptr>(src_ptr),
+            reinterpret_cast<uintptr>(dest_ptr), num_elems * sizeof(T));
+    }
+
+    /**
+     * Synchronize the operations launched on the executor with its master.
+     */
+    virtual void synchronize() const = 0;
+
+protected:
+    /**
+     * Allocates raw memory in this MemorySpace.
+     *
+     * @param size  number of bytes to allocate
+     *
+     * @throw AllocationError  if the allocation failed
+     *
+     * @return raw pointer to allocated memory
+     */
+    virtual void *raw_alloc(size_type size) const = 0;
+
+    /**
+     * Frees memory previously allocated with MemorySpace::alloc().
+     *
+     * If `ptr` is a `nullptr`, the function has no effect.
+     *
+     * @param ptr  pointer to the allocated memory block
+     */
+    virtual void raw_free(void *ptr) const noexcept = 0;
+
+    /**
+     * Copies raw data from another MemorySpace.
+     *
+     * @param src_mem_space  MemorySpace from which the memory will be copied
+     * @param n_bytes  number of bytes to copy
+     * @param src_ptr  pointer to a block of memory containing the data to be
+     *                 copied
+     * @param dest_ptr  pointer to an allocated block of memory where the data
+     *                  will be copied to
+     */
+    virtual void raw_copy_from(const MemorySpace *src_mem_space,
+                               size_type n_bytes, const void *src_ptr,
+                               void *dest_ptr) const = 0;
+
+/**
+ * @internal
+ * Declares a raw_copy_to() overload for a specified MemorySpace subclass.
+ *
+ * This is the second stage of the double dispatch emulation required to
+ * implement raw_copy_from().
+ *
+ * @param _mem_space_type  the MemorySpace subclass
+ */
+#define GKO_ENABLE_RAW_COPY_TO(_mem_space_type, ...)                 \
+    virtual void raw_copy_to(const _mem_space_type *dest_mem_space,  \
+                             size_type n_bytes, const void *src_ptr, \
+                             void *dest_ptr) const = 0
+
+    GKO_ENABLE_FOR_ALL_MEMORY_SPACES(GKO_ENABLE_RAW_COPY_TO);
+
+#undef GKO_ENABLE_RAW_COPY_TO
+};
+
+
+/**
+ * This is a deleter that uses an mem_space's `free` method to deallocate
+ * the data.
+ *
+ * @tparam T  the type of object being deleted
+ *
+ * @ingroup MemorySpace
+ */
+template <typename T>
+class memory_space_deleter {
+public:
+    using pointer = T *;
+
+    /**
+     * Creates a new deleter.
+     *
+     * @param mem_space  the mem_spaceutor used to free the data
+     */
+    explicit memory_space_deleter(std::shared_ptr<const MemorySpace> mem_space)
+        : mem_space_{mem_space}
+    {}
+
+    /**
+     * Deletes the object.
+     *
+     * @param ptr  pointer to the object being deleted
+     */
+    void operator()(pointer ptr) const
+    {
+        if (mem_space_) {
+            mem_space_->free(ptr);
+        }
+    }
+
+private:
+    std::shared_ptr<const MemorySpace> mem_space_;
+};
+
+// a specialization for arrays
+template <typename T>
+class memory_space_deleter<T[]> {
+public:
+    using pointer = T[];
+
+    explicit memory_space_deleter(std::shared_ptr<const MemorySpace> mem_space)
+        : mem_space_{mem_space}
+    {}
+
+    void operator()(pointer ptr) const
+    {
+        if (mem_space_) {
+            mem_space_->free(ptr);
+        }
+    }
+
+private:
+    std::shared_ptr<const MemorySpace> mem_space_;
+};
+
+
+namespace detail {
+
+
+template <typename ConcreteMemorySpace>
+class MemorySpaceBase : public MemorySpace {
+public:
+    void raw_copy_from(const MemorySpace *src_mem_space, size_type n_bytes,
+                       const void *src_ptr, void *dest_ptr) const override
+    {
+        src_mem_space->raw_copy_to(self(), n_bytes, src_ptr, dest_ptr);
+    }
+
+private:
+    ConcreteMemorySpace *self() noexcept
+    {
+        return static_cast<ConcreteMemorySpace *>(this);
+    }
+
+    const ConcreteMemorySpace *self() const noexcept
+    {
+        return static_cast<const ConcreteMemorySpace *>(this);
+    }
+};
+
+
+}  // namespace detail
+
+
+#define GKO_OVERRIDE_RAW_COPY_TO(_memory_space_type, ...)                    \
+    void raw_copy_to(const _memory_space_type *dest_mem_space,               \
+                     size_type n_bytes, const void *src_ptr, void *dest_ptr) \
+        const override
+
+
+class HostMemorySpace : public detail::MemorySpaceBase<HostMemorySpace> {
+    friend class detail::MemorySpaceBase<HostMemorySpace>;
+
+public:
+    /**
+     * Creates a new HostMemorySpace.
+     */
+    static std::shared_ptr<HostMemorySpace> create()
+    {
+        return std::shared_ptr<HostMemorySpace>(new HostMemorySpace());
+    }
+
+    void synchronize() const override;
+
+protected:
+    HostMemorySpace() = default;
+
+    void *raw_alloc(size_type size) const override;
+
+    void raw_free(void *ptr) const noexcept override;
+
+    GKO_ENABLE_FOR_ALL_MEMORY_SPACES(GKO_OVERRIDE_RAW_COPY_TO);
+};
+
+
+class CudaMemorySpace : public detail::MemorySpaceBase<CudaMemorySpace> {
+    friend class detail::MemorySpaceBase<CudaMemorySpace>;
+
+public:
+    /**
+     * Creates a new CudaMemorySpace.
+     *
+     * @param device_id  the CUDA device id of this device
+     */
+    static std::shared_ptr<CudaMemorySpace> create(int device_id)
+    {
+        return std::shared_ptr<CudaMemorySpace>(new CudaMemorySpace(device_id));
+    }
+
+    /**
+     * Get the CUDA device id of the device associated to this memory_space.
+     */
+    int get_device_id() const noexcept { return this->device_id_; }
+
+    /**
+     * Get the number of devices present on the system.
+     */
+    static int get_num_devices();
+
+    void synchronize() const override;
+
+protected:
+    CudaMemorySpace() = default;
+
+    CudaMemorySpace(int device_id) : device_id_(device_id)
+    {
+        assert(device_id < max_devices);
+    }
+
+    void *raw_alloc(size_type size) const override;
+
+    void raw_free(void *ptr) const noexcept override;
+
+    GKO_ENABLE_FOR_ALL_MEMORY_SPACES(GKO_OVERRIDE_RAW_COPY_TO);
+
+private:
+    int device_id_;
+    static constexpr int max_devices = 64;
+};
+
+
+class CudaUVMSpace : public detail::MemorySpaceBase<CudaUVMSpace> {
+    friend class detail::MemorySpaceBase<CudaUVMSpace>;
+
+public:
+    /**
+     * Creates a new CudaUVMSpace.
+     *
+     * @param device_id  the CUDA device id of this device
+     */
+    static std::shared_ptr<CudaUVMSpace> create(int device_id)
+    {
+        return std::shared_ptr<CudaUVMSpace>(new CudaUVMSpace(device_id));
+    }
+
+    /**
+     * Get the CUDA device id of the device associated to this memory_space.
+     */
+    int get_device_id() const noexcept { return this->device_id_; }
+
+    /**
+     * Get the number of devices present on the system.
+     */
+    static int get_num_devices();
+
+    void synchronize() const override;
+
+protected:
+    CudaUVMSpace() = default;
+
+    CudaUVMSpace(int device_id) : device_id_(device_id)
+    {
+        assert(device_id < max_devices);
+    }
+
+    void *raw_alloc(size_type size) const override;
+
+    void raw_free(void *ptr) const noexcept override;
+
+    GKO_ENABLE_FOR_ALL_MEMORY_SPACES(GKO_OVERRIDE_RAW_COPY_TO);
+
+private:
+    int device_id_;
+    static constexpr int max_devices = 64;
+};
+
+
+class HipMemorySpace : public detail::MemorySpaceBase<HipMemorySpace> {
+    friend class detail::MemorySpaceBase<HipMemorySpace>;
+
+public:
+    /**
+     * Creates a new HipMemorySpace.
+     *
+     * @param device_id  the HIP device id of this device
+     */
+    static std::shared_ptr<HipMemorySpace> create(int device_id)
+    {
+        return std::shared_ptr<HipMemorySpace>(new HipMemorySpace(device_id));
+    }
+
+    /**
+     * Get the HIP device id of the device associated to this memory_space.
+     */
+    int get_device_id() const noexcept { return this->device_id_; }
+
+    /**
+     * Get the number of devices present on the system.
+     */
+    static int get_num_devices();
+
+    void synchronize() const override;
+
+protected:
+    HipMemorySpace() = default;
+
+    HipMemorySpace(int device_id) : device_id_(device_id)
+    {
+        assert(device_id < max_devices);
+    }
+
+    void *raw_alloc(size_type size) const override;
+
+    void raw_free(void *ptr) const noexcept override;
+
+    GKO_ENABLE_FOR_ALL_MEMORY_SPACES(GKO_OVERRIDE_RAW_COPY_TO);
+
+private:
+    int device_id_;
+    static constexpr int max_devices = 64;
+};
+
+#undef GKO_OVERRIDE_RAW_COPY_TO
+
+
+}  // namespace gko
+
+
+#endif  // GKO_CORE_MEMORY_SPACE_HPP_

From d98259d529c84b3421fc3b180019e7fda6713b1c Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 19 Oct 2020 17:00:42 +0200
Subject: [PATCH 2/8] Add implementations for cuda, hip and host memory
 operations.

---
 core/CMakeLists.txt              |   1 +
 core/base/memory_space.cpp       |  69 ++++++++
 core/device_hooks/cuda_hooks.cpp |  74 +++++++--
 core/device_hooks/hip_hooks.cpp  |  36 ++--
 core/devices/cuda/executor.cpp   |  13 ++
 core/devices/hip/executor.cpp    |  13 ++
 core/devices/omp/executor.cpp    |  15 +-
 cuda/CMakeLists.txt              |   1 +
 cuda/base/memory_space.cpp       | 277 +++++++++++++++++++++++++++++++
 hip/CMakeLists.txt               |   1 +
 hip/base/memory_space.hip.cpp    | 170 +++++++++++++++++++
 11 files changed, 640 insertions(+), 30 deletions(-)
 create mode 100644 core/base/memory_space.cpp
 create mode 100644 cuda/base/memory_space.cpp
 create mode 100644 hip/base/memory_space.hip.cpp

diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index f16d63ff617..1a4cac50e89 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -8,6 +8,7 @@ target_sources(ginkgo
     base/combination.cpp
     base/composition.cpp
     base/executor.cpp
+    base/memory_space.cpp
     base/mtx_io.cpp
     base/perturbation.cpp
     base/version.cpp
diff --git a/core/base/memory_space.cpp b/core/base/memory_space.cpp
new file mode 100644
index 00000000000..b03b5c8cad7
--- /dev/null
+++ b/core/base/memory_space.cpp
@@ -0,0 +1,69 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2020, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/memory_space.hpp>
+
+
+#include <cstdlib>
+#include <cstring>
+
+
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+
+
+namespace gko {
+
+
+void HostMemorySpace::raw_free(void *ptr) const noexcept { std::free(ptr); }
+
+
+void HostMemorySpace::synchronize() const
+{
+    // Currently a no-op
+}
+
+
+void *HostMemorySpace::raw_alloc(size_type num_bytes) const
+{
+    return GKO_ENSURE_ALLOCATED(std::malloc(num_bytes), "Host", num_bytes);
+}
+
+
+void HostMemorySpace::raw_copy_to(const HostMemorySpace *, size_type num_bytes,
+                                  const void *src_ptr, void *dest_ptr) const
+{
+    std::memcpy(dest_ptr, src_ptr, num_bytes);
+}
+
+
+}  // namespace gko
diff --git a/core/device_hooks/cuda_hooks.cpp b/core/device_hooks/cuda_hooks.cpp
index d41d77d24d9..be44f709872 100644
--- a/core/device_hooks/cuda_hooks.cpp
+++ b/core/device_hooks/cuda_hooks.cpp
@@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory_space.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/base/version.hpp>
 
@@ -62,12 +63,30 @@ std::shared_ptr<CudaExecutor> CudaExecutor::create(
 }
 
 
-void OmpExecutor::raw_copy_to(const CudaExecutor *, size_type num_bytes,
-                              const void *src_ptr, void *dest_ptr) const
+std::shared_ptr<CudaExecutor> CudaExecutor::create(
+    int device_id, std::shared_ptr<MemorySpace> mem_space,
+    std::shared_ptr<Executor> master, bool device_reset)
+{
+    return std::shared_ptr<CudaExecutor>(
+        new CudaExecutor(device_id, mem_space, std::move(master)),
+        device_reset);
+}
+
+
+void HostMemorySpace::raw_copy_to(const CudaMemorySpace *, size_type num_bytes,
+                                  const void *src_ptr, void *dest_ptr) const
     GKO_NOT_COMPILED(cuda);
 
 
-void CudaExecutor::raw_free(void *ptr) const noexcept
+void CudaMemorySpace::raw_free(void *ptr) const noexcept
+{
+    // Free must never fail, as it can be called in destructors.
+    // If the nvidia module was not compiled, the library couldn't have
+    // allocated the memory, so there is no need to deallocate it.
+}
+
+
+void CudaUVMSpace::raw_free(void *ptr) const noexcept
 {
     // Free must never fail, as it can be called in destructors.
     // If the nvidia module was not compiled, the library couldn't have
@@ -75,24 +94,59 @@ void CudaExecutor::raw_free(void *ptr) const noexcept
 }
 
 
-void *CudaExecutor::raw_alloc(size_type num_bytes) const GKO_NOT_COMPILED(cuda);
+void *CudaMemorySpace::raw_alloc(size_type num_bytes) const
+    GKO_NOT_COMPILED(nvidia);
+
 
+void *CudaUVMSpace::raw_alloc(size_type num_bytes) const
+    GKO_NOT_COMPILED(nvidia);
 
-void CudaExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes,
-                               const void *src_ptr, void *dest_ptr) const
+
+void CudaMemorySpace::raw_copy_to(const HostMemorySpace *, size_type num_bytes,
+                                  const void *src_ptr, void *dest_ptr) const
     GKO_NOT_COMPILED(cuda);
 
 
-void CudaExecutor::raw_copy_to(const CudaExecutor *, size_type num_bytes,
-                               const void *src_ptr, void *dest_ptr) const
+void CudaMemorySpace::raw_copy_to(const CudaMemorySpace *, size_type num_bytes,
+                                  const void *src_ptr, void *dest_ptr) const
     GKO_NOT_COMPILED(cuda);
 
 
-void CudaExecutor::raw_copy_to(const HipExecutor *, size_type num_bytes,
-                               const void *src_ptr, void *dest_ptr) const
+void CudaMemorySpace::raw_copy_to(const HipMemorySpace *, size_type num_bytes,
+                                  const void *src_ptr, void *dest_ptr) const
     GKO_NOT_COMPILED(cuda);
 
 
+void CudaMemorySpace::raw_copy_to(const CudaUVMSpace *dest_mem_space,
+                                  size_type n_bytes, const void *src_ptr,
+                                  void *dest_ptr) const GKO_NOT_COMPILED(cuda);
+
+
+void CudaUVMSpace::raw_copy_to(const CudaUVMSpace *dest_mem_space,
+                               size_type n_bytes, const void *src_ptr,
+                               void *dest_ptr) const GKO_NOT_COMPILED(cuda);
+
+
+void CudaUVMSpace::raw_copy_to(const CudaMemorySpace *dest_mem_space,
+                               size_type n_bytes, const void *src_ptr,
+                               void *dest_ptr) const GKO_NOT_COMPILED(cuda);
+
+
+void CudaUVMSpace::raw_copy_to(const HipMemorySpace *dest_mem_space,
+                               size_type n_bytes, const void *src_ptr,
+                               void *dest_ptr) const GKO_NOT_COMPILED(cuda);
+
+
+void HostMemorySpace::raw_copy_to(const CudaUVMSpace *dest_mem_space,
+                                  size_type n_bytes, const void *src_ptr,
+                                  void *dest_ptr) const GKO_NOT_COMPILED(cuda);
+
+
+void CudaUVMSpace::raw_copy_to(const HostMemorySpace *dest_mem_space,
+                               size_type n_bytes, const void *src_ptr,
+                               void *dest_ptr) const GKO_NOT_COMPILED(cuda);
+
+
 void CudaExecutor::synchronize() const GKO_NOT_COMPILED(cuda);
 
 
diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp
index a2e288b4157..8e658299816 100644
--- a/core/device_hooks/hip_hooks.cpp
+++ b/core/device_hooks/hip_hooks.cpp
@@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory_space.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/base/version.hpp>
 
@@ -59,12 +60,21 @@ std::shared_ptr<HipExecutor> HipExecutor::create(
 }
 
 
-void OmpExecutor::raw_copy_to(const HipExecutor *, size_type num_bytes,
-                              const void *src_ptr, void *dest_ptr) const
+std::shared_ptr<HipExecutor> HipExecutor::create(
+    int device_id, std::shared_ptr<MemorySpace> memory_space,
+    std::shared_ptr<Executor> master, bool device_reset)
+{
+    return std::shared_ptr<HipExecutor>(
+        new HipExecutor(device_id, memory_space, std::move(master)),
+        device_reset);
+}
+
+void HostMemorySpace::raw_copy_to(const HipMemorySpace *, size_type num_bytes,
+                                  const void *src_ptr, void *dest_ptr) const
     GKO_NOT_COMPILED(hip);
 
 
-void HipExecutor::raw_free(void *ptr) const noexcept
+void HipMemorySpace::raw_free(void *ptr) const noexcept
 {
     // Free must never fail, as it can be called in destructors.
     // If the nvidia module was not compiled, the library couldn't have
@@ -72,21 +82,27 @@ void HipExecutor::raw_free(void *ptr) const noexcept
 }
 
 
-void *HipExecutor::raw_alloc(size_type num_bytes) const GKO_NOT_COMPILED(hip);
+void *HipMemorySpace::raw_alloc(size_type num_bytes) const
+    GKO_NOT_COMPILED(hip);
+
+
+void HipMemorySpace::raw_copy_to(const HostMemorySpace *, size_type num_bytes,
+                                 const void *src_ptr, void *dest_ptr) const
+    GKO_NOT_COMPILED(hip);
 
 
-void HipExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes,
-                              const void *src_ptr, void *dest_ptr) const
+void HipMemorySpace::raw_copy_to(const CudaMemorySpace *, size_type num_bytes,
+                                 const void *src_ptr, void *dest_ptr) const
     GKO_NOT_COMPILED(hip);
 
 
-void HipExecutor::raw_copy_to(const CudaExecutor *, size_type num_bytes,
-                              const void *src_ptr, void *dest_ptr) const
+void HipMemorySpace::raw_copy_to(const CudaUVMSpace *, size_type num_bytes,
+                                 const void *src_ptr, void *dest_ptr) const
     GKO_NOT_COMPILED(hip);
 
 
-void HipExecutor::raw_copy_to(const HipExecutor *, size_type num_bytes,
-                              const void *src_ptr, void *dest_ptr) const
+void HipMemorySpace::raw_copy_to(const HipMemorySpace *, size_type num_bytes,
+                                 const void *src_ptr, void *dest_ptr) const
     GKO_NOT_COMPILED(hip);
 
 
diff --git a/core/devices/cuda/executor.cpp b/core/devices/cuda/executor.cpp
index 3566578a681..a5eb8304eb1 100644
--- a/core/devices/cuda/executor.cpp
+++ b/core/devices/cuda/executor.cpp
@@ -31,6 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory_space.hpp>
 
 
 namespace gko {
@@ -48,6 +49,18 @@ std::shared_ptr<const Executor> CudaExecutor::get_master() const noexcept
 }
 
 
+std::shared_ptr<MemorySpace> CudaExecutor::get_mem_space() noexcept
+{
+    return this->mem_space_instance_;
+}
+
+
+std::shared_ptr<const MemorySpace> CudaExecutor::get_mem_space() const noexcept
+{
+    return this->mem_space_instance_;
+}
+
+
 unsigned CudaExecutor::num_execs[max_devices];
 
 
diff --git a/core/devices/hip/executor.cpp b/core/devices/hip/executor.cpp
index f4787523290..58b67cc06f7 100644
--- a/core/devices/hip/executor.cpp
+++ b/core/devices/hip/executor.cpp
@@ -31,6 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory_space.hpp>
 
 
 namespace gko {
@@ -45,6 +46,18 @@ std::shared_ptr<const Executor> HipExecutor::get_master() const noexcept
 }
 
 
+std::shared_ptr<MemorySpace> HipExecutor::get_mem_space() noexcept
+{
+    return this->mem_space_instance_;
+}
+
+
+std::shared_ptr<const MemorySpace> HipExecutor::get_mem_space() const noexcept
+{
+    return this->mem_space_instance_;
+}
+
+
 int HipExecutor::num_execs[max_devices];
 
 
diff --git a/core/devices/omp/executor.cpp b/core/devices/omp/executor.cpp
index e53a1b53c43..c621333d8f5 100644
--- a/core/devices/omp/executor.cpp
+++ b/core/devices/omp/executor.cpp
@@ -31,6 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory_space.hpp>
 
 
 #include <cstdlib>
@@ -44,9 +45,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 
 
-void OmpExecutor::raw_free(void *ptr) const noexcept { std::free(ptr); }
-
-
 std::shared_ptr<Executor> OmpExecutor::get_master() noexcept
 {
     return this->shared_from_this();
@@ -59,18 +57,15 @@ std::shared_ptr<const Executor> OmpExecutor::get_master() const noexcept
 }
 
 
-void *OmpExecutor::raw_alloc(size_type num_bytes) const
+std::shared_ptr<MemorySpace> OmpExecutor::get_mem_space() noexcept
 {
-    return GKO_ENSURE_ALLOCATED(std::malloc(num_bytes), "OMP", num_bytes);
+    return this->mem_space_instance_;
 }
 
 
-void OmpExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes,
-                              const void *src_ptr, void *dest_ptr) const
+std::shared_ptr<const MemorySpace> OmpExecutor::get_mem_space() const noexcept
 {
-    if (num_bytes > 0) {
-        std::memcpy(dest_ptr, src_ptr, num_bytes);
-    }
+    return this->mem_space_instance_;
 }
 
 
diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt
index b19651b6a59..382ba7d69aa 100644
--- a/cuda/CMakeLists.txt
+++ b/cuda/CMakeLists.txt
@@ -69,6 +69,7 @@ target_sources(ginkgo_cuda
     PRIVATE
     base/exception.cpp
     base/executor.cpp
+    base/memory_space.cpp
     base/version.cpp
     components/absolute_array.cu
     components/fill_array.cu
diff --git a/cuda/base/memory_space.cpp b/cuda/base/memory_space.cpp
new file mode 100644
index 00000000000..9d0d9780b05
--- /dev/null
+++ b/cuda/base/memory_space.cpp
@@ -0,0 +1,277 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2020, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/memory_space.hpp>
+
+
+#include <iostream>
+
+
+#include <cuda_runtime.h>
+
+
+#include <ginkgo/config.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+
+
+#include "cuda/base/device_guard.hpp"
+
+
+namespace gko {
+
+
+void CudaMemorySpace::synchronize() const
+{
+    cuda::device_guard g(this->get_device_id());
+    GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceSynchronize());
+}
+
+
+void CudaUVMSpace::synchronize() const
+{
+    cuda::device_guard g(this->get_device_id());
+    GKO_ASSERT_NO_CUDA_ERRORS(cudaDeviceSynchronize());
+}
+
+
+int CudaMemorySpace::get_num_devices()
+{
+    int deviceCount = 0;
+    auto error_code = cudaGetDeviceCount(&deviceCount);
+    if (error_code == cudaErrorNoDevice) {
+        return 0;
+    }
+    GKO_ASSERT_NO_CUDA_ERRORS(error_code);
+    return deviceCount;
+}
+
+
+int CudaUVMSpace::get_num_devices()
+{
+    int deviceCount = 0;
+    auto error_code = cudaGetDeviceCount(&deviceCount);
+    if (error_code == cudaErrorNoDevice) {
+        return 0;
+    }
+    GKO_ASSERT_NO_CUDA_ERRORS(error_code);
+    return deviceCount;
+}
+
+
+void HostMemorySpace::raw_copy_to(const CudaMemorySpace *dest,
+                                  size_type num_bytes, const void *src_ptr,
+                                  void *dest_ptr) const
+{
+    if (num_bytes > 0) {
+        cuda::device_guard g(dest->get_device_id());
+        GKO_ASSERT_NO_CUDA_ERRORS(
+            cudaMemcpy(dest_ptr, src_ptr, num_bytes, cudaMemcpyHostToDevice));
+    }
+}
+
+
+void CudaMemorySpace::raw_free(void *ptr) const noexcept
+{
+    cuda::device_guard g(this->get_device_id());
+    auto error_code = cudaFree(ptr);
+    if (error_code != cudaSuccess) {
+#if GKO_VERBOSE_LEVEL >= 1
+        // Unfortunately, if memory free fails, there's not much we can do
+        std::cerr << "Unrecoverable CUDA error on device " << this->device_id_
+                  << " in " << __func__ << ": " << cudaGetErrorName(error_code)
+                  << ": " << cudaGetErrorString(error_code) << std::endl
+                  << "Exiting program" << std::endl;
+#endif
+        std::exit(error_code);
+    }
+}
+
+
+void CudaUVMSpace::raw_free(void *ptr) const noexcept
+{
+    cuda::device_guard g(this->get_device_id());
+    auto error_code = cudaFree(ptr);
+    if (error_code != cudaSuccess) {
+#if GKO_VERBOSE_LEVEL >= 1
+        // Unfortunately, if memory free fails, there's not much we can do
+        std::cerr << "Unrecoverable CUDA error on device " << this->device_id_
+                  << " in " << __func__ << ": " << cudaGetErrorName(error_code)
+                  << ": " << cudaGetErrorString(error_code) << std::endl
+                  << "Exiting program" << std::endl;
+#endif
+        std::exit(error_code);
+    }
+}
+
+
+void *CudaMemorySpace::raw_alloc(size_type num_bytes) const
+{
+    void *dev_ptr = nullptr;
+    cuda::device_guard g(this->get_device_id());
+    auto error_code = cudaMalloc(&dev_ptr, num_bytes);
+    if (error_code != cudaErrorMemoryAllocation) {
+        GKO_ASSERT_NO_CUDA_ERRORS(error_code);
+    }
+    GKO_ENSURE_ALLOCATED(dev_ptr, "cuda", num_bytes);
+    return dev_ptr;
+}
+
+
+void CudaMemorySpace::raw_copy_to(const HostMemorySpace *, size_type num_bytes,
+                                  const void *src_ptr, void *dest_ptr) const
+{
+    if (num_bytes > 0) {
+        cuda::device_guard g(this->get_device_id());
+        GKO_ASSERT_NO_CUDA_ERRORS(
+            cudaMemcpy(dest_ptr, src_ptr, num_bytes, cudaMemcpyDeviceToHost));
+    }
+}
+
+
+void CudaMemorySpace::raw_copy_to(const CudaMemorySpace *dest,
+                                  size_type num_bytes, const void *src_ptr,
+                                  void *dest_ptr) const
+{
+    if (num_bytes > 0) {
+        cuda::device_guard g(this->get_device_id());
+        GKO_ASSERT_NO_CUDA_ERRORS(
+            cudaMemcpyPeer(dest_ptr, dest->get_device_id(), src_ptr,
+                           this->get_device_id(), num_bytes));
+    }
+}
+
+
+void CudaUVMSpace::raw_copy_to(const HipMemorySpace *dest, size_type num_bytes,
+                               const void *src_ptr, void *dest_ptr) const
+{
+#if GINKGO_HIP_PLATFORM_NVCC == 1
+    if (num_bytes > 0) {
+        cuda::device_guard g(this->get_device_id());
+        GKO_ASSERT_NO_CUDA_ERRORS(
+            cudaMemcpyPeer(dest_ptr, dest->get_device_id(), src_ptr,
+                           this->get_device_id(), num_bytes));
+    }
+#else
+    GKO_NOT_SUPPORTED(this);
+#endif
+}
+
+
+void CudaMemorySpace::raw_copy_to(const HipMemorySpace *dest,
+                                  size_type num_bytes, const void *src_ptr,
+                                  void *dest_ptr) const
+{
+#if GINKGO_HIP_PLATFORM_NVCC == 1
+    if (num_bytes > 0) {
+        cuda::device_guard g(this->get_device_id());
+        GKO_ASSERT_NO_CUDA_ERRORS(
+            cudaMemcpyPeer(dest_ptr, dest->get_device_id(), src_ptr,
+                           this->get_device_id(), num_bytes));
+    }
+#else
+    GKO_NOT_SUPPORTED(this);
+#endif
+}
+
+
+void CudaMemorySpace::raw_copy_to(const CudaUVMSpace *dest, size_type num_bytes,
+                                  const void *src_ptr, void *dest_ptr) const
+{
+    if (num_bytes > 0) {
+        cuda::device_guard g(this->get_device_id());
+        GKO_ASSERT_NO_CUDA_ERRORS(
+            cudaMemcpyPeer(dest_ptr, dest->get_device_id(), src_ptr,
+                           this->get_device_id(), num_bytes));
+    }
+}
+
+
+void CudaUVMSpace::raw_copy_to(const CudaMemorySpace *dest, size_type num_bytes,
+                               const void *src_ptr, void *dest_ptr) const
+{
+    if (num_bytes > 0) {
+        cuda::device_guard g(this->get_device_id());
+        GKO_ASSERT_NO_CUDA_ERRORS(
+            cudaMemcpyPeer(dest_ptr, dest->get_device_id(), src_ptr,
+                           this->get_device_id(), num_bytes));
+    }
+}
+
+
+void CudaUVMSpace::raw_copy_to(const CudaUVMSpace *dest, size_type num_bytes,
+                               const void *src_ptr, void *dest_ptr) const
+{
+    if (num_bytes > 0) {
+        cuda::device_guard g(this->get_device_id());
+        GKO_ASSERT_NO_CUDA_ERRORS(
+            cudaMemcpyPeer(dest_ptr, dest->get_device_id(), src_ptr,
+                           this->get_device_id(), num_bytes));
+    }
+}
+
+
+void HostMemorySpace::raw_copy_to(const CudaUVMSpace *dest, size_type num_bytes,
+                                  const void *src_ptr, void *dest_ptr) const
+{
+    if (num_bytes > 0) {
+        cuda::device_guard g(dest->get_device_id());
+        GKO_ASSERT_NO_CUDA_ERRORS(
+            cudaMemcpy(dest_ptr, src_ptr, num_bytes, cudaMemcpyHostToDevice));
+    }
+}
+
+
+void *CudaUVMSpace::raw_alloc(size_type num_bytes) const
+{
+    void *dev_ptr = nullptr;
+    cuda::device_guard g(this->get_device_id());
+    auto error_code = cudaMallocManaged(&dev_ptr, num_bytes);
+    if (error_code != cudaErrorMemoryAllocation) {
+        GKO_ASSERT_NO_CUDA_ERRORS(error_code);
+    }
+    GKO_ENSURE_ALLOCATED(dev_ptr, "cuda", num_bytes);
+    return dev_ptr;
+}
+
+
+void CudaUVMSpace::raw_copy_to(const HostMemorySpace *, size_type num_bytes,
+                               const void *src_ptr, void *dest_ptr) const
+{
+    if (num_bytes > 0) {
+        cuda::device_guard g(this->get_device_id());
+        GKO_ASSERT_NO_CUDA_ERRORS(
+            cudaMemcpy(dest_ptr, src_ptr, num_bytes, cudaMemcpyDeviceToHost));
+    }
+}
+
+
+}  // namespace gko
diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt
index 2bb1e6911ee..90255bce397 100644
--- a/hip/CMakeLists.txt
+++ b/hip/CMakeLists.txt
@@ -142,6 +142,7 @@ endif()
 set(GINKGO_HIP_SOURCES
     base/exception.hip.cpp
     base/executor.hip.cpp
+    base/memory_space.hip.cpp
     base/version.hip.cpp
     components/absolute_array.hip.cpp
     components/fill_array.hip.cpp
diff --git a/hip/base/memory_space.hip.cpp b/hip/base/memory_space.hip.cpp
new file mode 100644
index 00000000000..f46b23d8027
--- /dev/null
+++ b/hip/base/memory_space.hip.cpp
@@ -0,0 +1,170 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2020, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/memory_space.hpp>
+
+
+#include <iostream>
+
+
+#include <hip/hip_runtime.h>
+
+
+#include <ginkgo/config.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+
+
+#include "hip/base/device_guard.hip.hpp"
+
+
+namespace gko {
+
+
+void HostMemorySpace::raw_copy_to(const HipMemorySpace *dest,
+                                  size_type num_bytes, const void *src_ptr,
+                                  void *dest_ptr) const
+{
+    if (num_bytes > 0) {
+        hip::device_guard g(dest->get_device_id());
+        GKO_ASSERT_NO_HIP_ERRORS(
+            hipMemcpy(dest_ptr, src_ptr, num_bytes, hipMemcpyHostToDevice));
+    }
+}
+
+
+void HipMemorySpace::raw_free(void *ptr) const noexcept
+{
+    hip::device_guard g(this->get_device_id());
+    auto error_code = hipFree(ptr);
+    if (error_code != hipSuccess) {
+#if GKO_VERBOSE_LEVEL >= 1
+        // Unfortunately, if memory free fails, there's not much we can do
+        std::cerr << "Unrecoverable HIP error on device " << this->device_id_
+                  << " in " << __func__ << ": " << hipGetErrorName(error_code)
+                  << ": " << hipGetErrorString(error_code) << std::endl
+                  << "Exiting program" << std::endl;
+#endif
+        std::exit(error_code);
+    }
+}
+
+
+void *HipMemorySpace::raw_alloc(size_type num_bytes) const
+{
+    void *dev_ptr = nullptr;
+    hip::device_guard g(this->get_device_id());
+    auto error_code = hipMalloc(&dev_ptr, num_bytes);
+    if (error_code != hipErrorMemoryAllocation) {
+        GKO_ASSERT_NO_HIP_ERRORS(error_code);
+    }
+    GKO_ENSURE_ALLOCATED(dev_ptr, "hip", num_bytes);
+    return dev_ptr;
+}
+
+
+void HipMemorySpace::raw_copy_to(const HostMemorySpace *, size_type num_bytes,
+                                 const void *src_ptr, void *dest_ptr) const
+{
+    if (num_bytes > 0) {
+        hip::device_guard g(this->get_device_id());
+        GKO_ASSERT_NO_HIP_ERRORS(
+            hipMemcpy(dest_ptr, src_ptr, num_bytes, hipMemcpyDeviceToHost));
+    }
+}
+
+
+void HipMemorySpace::raw_copy_to(const CudaMemorySpace *dest,
+                                 size_type num_bytes, const void *src_ptr,
+                                 void *dest_ptr) const
+{
+#if GINKGO_HIP_PLATFORM_NVCC == 1
+    if (num_bytes > 0) {
+        hip::device_guard g(this->get_device_id());
+        GKO_ASSERT_NO_HIP_ERRORS(hipMemcpyPeer(dest_ptr, dest->get_device_id(),
+                                               src_ptr, this->get_device_id(),
+                                               num_bytes));
+    }
+#else
+    GKO_NOT_SUPPORTED(this);
+#endif
+}
+
+
+void HipMemorySpace::raw_copy_to(const CudaUVMSpace *dest, size_type num_bytes,
+                                 const void *src_ptr, void *dest_ptr) const
+{
+#if GINKGO_HIP_PLATFORM_NVCC == 1
+    if (num_bytes > 0) {
+        hip::device_guard g(this->get_device_id());
+        GKO_ASSERT_NO_HIP_ERRORS(hipMemcpyPeer(dest_ptr, dest->get_device_id(),
+                                               src_ptr, this->get_device_id(),
+                                               num_bytes));
+    }
+#else
+    GKO_NOT_SUPPORTED(this);
+#endif
+}
+
+
+void HipMemorySpace::raw_copy_to(const HipMemorySpace *dest,
+                                 size_type num_bytes, const void *src_ptr,
+                                 void *dest_ptr) const
+{
+    if (num_bytes > 0) {
+        hip::device_guard g(this->get_device_id());
+        GKO_ASSERT_NO_HIP_ERRORS(hipMemcpyPeer(dest_ptr, dest->get_device_id(),
+                                               src_ptr, this->get_device_id(),
+                                               num_bytes));
+    }
+}
+
+
+void HipMemorySpace::synchronize() const
+{
+    hip::device_guard g(this->get_device_id());
+    GKO_ASSERT_NO_HIP_ERRORS(hipDeviceSynchronize());
+}
+
+
+int HipMemorySpace::get_num_devices()
+{
+    int deviceCount = 0;
+    auto error_code = hipGetDeviceCount(&deviceCount);
+    if (error_code == hipErrorNoDevice) {
+        return 0;
+    }
+    GKO_ASSERT_NO_HIP_ERRORS(error_code);
+    return deviceCount;
+}
+
+
+}  // namespace gko

From a0d5c11c094f070036aecd763c65fde171a1dd5f Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 19 Oct 2020 17:01:53 +0200
Subject: [PATCH 3/8] Add core and host memory space tests.

---
 core/test/base/executor.cpp     | 168 +-------------------------------
 core/test/base/memory_space.cpp | 159 ++++++++++++++++++++++++++++++
 2 files changed, 161 insertions(+), 166 deletions(-)
 create mode 100644 core/test/base/memory_space.cpp

diff --git a/core/test/base/executor.cpp b/core/test/base/executor.cpp
index 1b2e1b0698e..2df2e8b7a41 100644
--- a/core/test/base/executor.cpp
+++ b/core/test/base/executor.cpp
@@ -95,52 +95,6 @@ TEST(OmpExecutor, RunsCorrectLambdaOperation)
 }
 
 
-TEST(OmpExecutor, AllocatesAndFreesMemory)
-{
-    const int num_elems = 10;
-    exec_ptr omp = gko::OmpExecutor::create();
-    int *ptr = nullptr;
-
-    ASSERT_NO_THROW(ptr = omp->alloc<int>(num_elems));
-    ASSERT_NO_THROW(omp->free(ptr));
-}
-
-
-TEST(OmpExecutor, FreeAcceptsNullptr)
-{
-    exec_ptr omp = gko::OmpExecutor::create();
-    ASSERT_NO_THROW(omp->free(nullptr));
-}
-
-
-TEST(OmpExecutor, FailsWhenOverallocating)
-{
-    const gko::size_type num_elems = 1ll << 50;  // 4PB of integers
-    exec_ptr omp = gko::OmpExecutor::create();
-    int *ptr = nullptr;
-
-    ASSERT_THROW(ptr = omp->alloc<int>(num_elems), gko::AllocationError);
-
-    omp->free(ptr);
-}
-
-
-TEST(OmpExecutor, CopiesData)
-{
-    int orig[] = {3, 8};
-    const int num_elems = std::extent<decltype(orig)>::value;
-    exec_ptr omp = gko::OmpExecutor::create();
-    int *copy = omp->alloc<int>(num_elems);
-
-    // user code is run on the OMP, so local variables are in OMP memory
-    omp->copy(num_elems, orig, copy);
-    EXPECT_EQ(3, copy[0]);
-    EXPECT_EQ(8, copy[1]);
-
-    omp->free(copy);
-}
-
-
 TEST(OmpExecutor, IsItsOwnMaster)
 {
     exec_ptr omp = gko::OmpExecutor::create();
@@ -172,95 +126,15 @@ TEST(ReferenceExecutor, RunsCorrectLambdaOperation)
 }
 
 
-TEST(ReferenceExecutor, AllocatesAndFreesMemory)
-{
-    const int num_elems = 10;
-    exec_ptr ref = gko::ReferenceExecutor::create();
-    int *ptr = nullptr;
-
-    ASSERT_NO_THROW(ptr = ref->alloc<int>(num_elems));
-    ASSERT_NO_THROW(ref->free(ptr));
-}
-
-
-TEST(ReferenceExecutor, FreeAcceptsNullptr)
-{
-    exec_ptr omp = gko::ReferenceExecutor::create();
-    ASSERT_NO_THROW(omp->free(nullptr));
-}
-
-
-TEST(ReferenceExecutor, FailsWhenOverallocating)
-{
-    const gko::size_type num_elems = 1ll << 50;  // 4PB of integers
-    exec_ptr ref = gko::ReferenceExecutor::create();
-    int *ptr = nullptr;
-
-    ASSERT_THROW(ptr = ref->alloc<int>(num_elems), gko::AllocationError);
-
-    ref->free(ptr);
-}
-
-
-TEST(ReferenceExecutor, CopiesData)
-{
-    int orig[] = {3, 8};
-    const int num_elems = std::extent<decltype(orig)>::value;
-    exec_ptr ref = gko::ReferenceExecutor::create();
-    int *copy = ref->alloc<int>(num_elems);
-
-    // ReferenceExecutor is a type of OMP executor, so this is O.K.
-    ref->copy(num_elems, orig, copy);
-    EXPECT_EQ(3, copy[0]);
-    EXPECT_EQ(8, copy[1]);
-
-    ref->free(copy);
-}
-
-
 TEST(ReferenceExecutor, CopiesSingleValue)
 {
     exec_ptr ref = gko::ReferenceExecutor::create();
-    int *el = ref->alloc<int>(1);
+    int *el = ref->get_mem_space()->alloc<int>(1);
     el[0] = 83683;
 
     EXPECT_EQ(83683, ref->copy_val_to_host(el));
 
-    ref->free(el);
-}
-
-
-TEST(ReferenceExecutor, CopiesDataFromOmp)
-{
-    int orig[] = {3, 8};
-    const int num_elems = std::extent<decltype(orig)>::value;
-    exec_ptr omp = gko::OmpExecutor::create();
-    exec_ptr ref = gko::ReferenceExecutor::create();
-    int *copy = ref->alloc<int>(num_elems);
-
-    // ReferenceExecutor is a type of OMP executor, so this is O.K.
-    ref->copy_from(omp.get(), num_elems, orig, copy);
-    EXPECT_EQ(3, copy[0]);
-    EXPECT_EQ(8, copy[1]);
-
-    ref->free(copy);
-}
-
-
-TEST(ReferenceExecutor, CopiesDataToOmp)
-{
-    int orig[] = {3, 8};
-    const int num_elems = std::extent<decltype(orig)>::value;
-    exec_ptr omp = gko::OmpExecutor::create();
-    exec_ptr ref = gko::ReferenceExecutor::create();
-    int *copy = omp->alloc<int>(num_elems);
-
-    // ReferenceExecutor is a type of OMP executor, so this is O.K.
-    omp->copy_from(ref.get(), num_elems, orig, copy);
-    EXPECT_EQ(3, copy[0]);
-    EXPECT_EQ(8, copy[1]);
-
-    ref->free(copy);
+    ref->get_mem_space()->free(el);
 }
 
 
@@ -414,42 +288,4 @@ TEST(HipExecutor, CanSetDeviceResetBoolean)
 }
 
 
-template <typename T>
-struct mock_free : T {
-    /**
-     * @internal Due to a bug with gcc 5.3, the constructor needs to be called
-     * with `()` operator instead of `{}`.
-     */
-    template <typename... Params>
-    mock_free(Params &&... params) : T(std::forward<Params>(params)...)
-    {}
-
-    void raw_free(void *ptr) const noexcept override
-    {
-        called_free = true;
-        T::raw_free(ptr);
-    }
-
-    mutable bool called_free{false};
-};
-
-
-TEST(ExecutorDeleter, DeletesObject)
-{
-    auto ref = std::make_shared<mock_free<gko::ReferenceExecutor>>();
-    auto x = ref->alloc<int>(5);
-
-    gko::executor_deleter<int>{ref}(x);
-
-    ASSERT_TRUE(ref->called_free);
-}
-
-
-TEST(ExecutorDeleter, AvoidsDeletionForNullExecutor)
-{
-    int x[5];
-    ASSERT_NO_THROW(gko::executor_deleter<int>{nullptr}(x));
-}
-
-
 }  // namespace
diff --git a/core/test/base/memory_space.cpp b/core/test/base/memory_space.cpp
new file mode 100644
index 00000000000..df7cd0a5152
--- /dev/null
+++ b/core/test/base/memory_space.cpp
@@ -0,0 +1,159 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2020, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/memory_space.hpp>
+
+
+#include <type_traits>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/exception.hpp>
+
+
+namespace {
+
+
+using mem_space_ptr = std::shared_ptr<gko::MemorySpace>;
+
+
+TEST(HostMemorySpace, AllocatesAndFreesMemory)
+{
+    const int num_elems = 10;
+    mem_space_ptr host = gko::HostMemorySpace::create();
+    int *ptr = nullptr;
+
+    ASSERT_NO_THROW(ptr = host->alloc<int>(num_elems));
+    ASSERT_NO_THROW(host->free(ptr));
+}
+
+
+TEST(HostMemorySpace, FreeAcceptsNullptr)
+{
+    mem_space_ptr host = gko::HostMemorySpace::create();
+    ASSERT_NO_THROW(host->free(nullptr));
+}
+
+
+TEST(HostMemorySpace, FailsWhenOverallocating)
+{
+    const gko::size_type num_elems = 1ll << 50;  // 4PB of integers
+    mem_space_ptr host = gko::HostMemorySpace::create();
+    int *ptr = nullptr;
+
+    ASSERT_THROW(ptr = host->alloc<int>(num_elems), gko::AllocationError);
+
+    host->free(ptr);
+}
+
+
+TEST(HostMemorySpace, CopiesData)
+{
+    int orig[] = {3, 8};
+    const int num_elems = std::extent<decltype(orig)>::value;
+    mem_space_ptr host = gko::HostMemorySpace::create();
+    int *copy = host->alloc<int>(num_elems);
+
+    // user code is run on the HOST, so local variables are in HOST memory
+    host->copy_from(host.get(), num_elems, orig, copy);
+    EXPECT_EQ(3, copy[0]);
+    EXPECT_EQ(8, copy[1]);
+
+    host->free(copy);
+}
+
+
+TEST(CudaMemorySpace, KnowsItsDeviceId)
+{
+    auto cuda = gko::CudaMemorySpace::create(0);
+
+    ASSERT_EQ(0, cuda->get_device_id());
+}
+
+
+TEST(CudaUVMSpace, KnowsItsDeviceId)
+{
+    auto cuda_uvm = gko::CudaUVMSpace::create(0);
+
+    ASSERT_EQ(0, cuda_uvm->get_device_id());
+}
+
+
+TEST(HipMemorySpace, KnowsItsDeviceId)
+{
+    auto hip = gko::HipMemorySpace::create(0);
+
+    ASSERT_EQ(0, hip->get_device_id());
+}
+
+
+template <typename T>
+struct mock_free : T {
+    /**
+     * @internal Due to a bug with gcc 5.3, the constructor needs to be called
+     * with `()` operator instead of `{}`.
+     */
+    template <typename... Params>
+    mock_free(Params &&... params) : T(std::forward<Params>(params)...)
+    {}
+
+    void raw_free(void *ptr) const noexcept override
+    {
+        called_free = true;
+        T::raw_free(ptr);
+    }
+
+    mutable bool called_free{false};
+};
+
+
+TEST(MemorySpaceDeleter, DeletesObject)
+{
+    auto host = std::make_shared<mock_free<gko::HostMemorySpace>>();
+    auto x = host->alloc<int>(5);
+
+    gko::memory_space_deleter<int>{host}(x);
+
+    ASSERT_TRUE(host->called_free);
+}
+
+
+TEST(MemorySpaceDeleter, AvoidsDeletionForNullMemorySpace)
+{
+    int x[5];
+    ASSERT_NO_THROW(gko::memory_space_deleter<int>{nullptr}(x));
+}
+
+
+}  // namespace

From 8d567a34d4a5044dd655ae74d7653550de73cf99 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 19 Oct 2020 17:02:15 +0200
Subject: [PATCH 4/8] Add CUDA and HIP memory space tests.

---
 cuda/test/base/CMakeLists.txt          |   1 +
 cuda/test/base/cuda_executor.cu        |  91 ----------
 cuda/test/base/cuda_memory_space.cu    | 238 +++++++++++++++++++++++++
 hip/test/base/CMakeLists.txt           |   1 +
 hip/test/base/hip_executor.hip.cpp     |  95 ----------
 hip/test/base/hip_memory_space.hip.cpp | 178 ++++++++++++++++++
 6 files changed, 418 insertions(+), 186 deletions(-)
 create mode 100644 cuda/test/base/cuda_memory_space.cu
 create mode 100644 hip/test/base/hip_memory_space.hip.cpp

diff --git a/cuda/test/base/CMakeLists.txt b/cuda/test/base/CMakeLists.txt
index d9ba808fa21..37fdc0b989e 100644
--- a/cuda/test/base/CMakeLists.txt
+++ b/cuda/test/base/CMakeLists.txt
@@ -1,4 +1,5 @@
 ginkgo_create_cuda_test(cuda_executor)
+ginkgo_create_cuda_test(cuda_memory_space)
 ginkgo_create_cuda_test(exception_helpers)
 ginkgo_create_cuda_test(lin_op)
 ginkgo_create_cuda_test(math)
diff --git a/cuda/test/base/cuda_executor.cu b/cuda/test/base/cuda_executor.cu
index 2bcf5961bbd..aaa1c9c99f2 100644
--- a/cuda/test/base/cuda_executor.cu
+++ b/cuda/test/base/cuda_executor.cu
@@ -126,71 +126,6 @@ TEST_F(CudaExecutor, MasterKnowsNumberOfDevices)
 }
 
 
-TEST_F(CudaExecutor, AllocatesAndFreesMemory)
-{
-    int *ptr = nullptr;
-
-    ASSERT_NO_THROW(ptr = cuda->alloc<int>(2));
-    ASSERT_NO_THROW(cuda->free(ptr));
-}
-
-
-TEST_F(CudaExecutor, FailsWhenOverallocating)
-{
-    const gko::size_type num_elems = 1ll << 50;  // 4PB of integers
-    int *ptr = nullptr;
-
-    ASSERT_THROW(
-        {
-            ptr = cuda->alloc<int>(num_elems);
-            cuda->synchronize();
-        },
-        gko::AllocationError);
-
-    cuda->free(ptr);
-}
-
-
-__global__ void check_data(int *data)
-{
-    if (data[0] != 3 || data[1] != 8) {
-        asm("trap;");
-    }
-}
-
-TEST_F(CudaExecutor, CopiesDataToCuda)
-{
-    int orig[] = {3, 8};
-    auto *copy = cuda->alloc<int>(2);
-
-    cuda->copy_from(omp.get(), 2, orig, copy);
-
-    check_data<<<1, 1>>>(copy);
-    ASSERT_NO_THROW(cuda->synchronize());
-    cuda->free(copy);
-}
-
-
-__global__ void init_data(int *data)
-{
-    data[0] = 3;
-    data[1] = 8;
-}
-
-TEST_F(CudaExecutor, CopiesDataFromCuda)
-{
-    int copy[2];
-    auto orig = cuda->alloc<int>(2);
-    init_data<<<1, 1>>>(orig);
-
-    omp->copy_from(cuda.get(), 2, orig, copy);
-
-    EXPECT_EQ(3, copy[0]);
-    ASSERT_EQ(8, copy[1]);
-    cuda->free(orig);
-}
-
-
 /* Properly checks if it works only when multiple GPUs exist */
 TEST_F(CudaExecutor, PreservesDeviceSettings)
 {
@@ -218,32 +153,6 @@ TEST_F(CudaExecutor, RunsOnProperDevice)
 }
 
 
-TEST_F(CudaExecutor, CopiesDataFromCudaToCuda)
-{
-    int copy[2];
-    auto orig = cuda->alloc<int>(2);
-    GKO_ASSERT_NO_CUDA_ERRORS(cudaSetDevice(0));
-    init_data<<<1, 1>>>(orig);
-
-    auto copy_cuda2 = cuda2->alloc<int>(2);
-    cuda2->copy_from(cuda.get(), 2, orig, copy_cuda2);
-
-    // Check that the data is really on GPU2 and ensure we did not cheat
-    int value = -1;
-    GKO_ASSERT_NO_CUDA_ERRORS(cudaSetDevice(cuda2->get_device_id()));
-    check_data<<<1, 1>>>(copy_cuda2);
-    GKO_ASSERT_NO_CUDA_ERRORS(cudaSetDevice(0));
-    cuda2->run(ExampleOperation(value));
-    ASSERT_EQ(value, cuda2->get_device_id());
-    // Put the results on OpenMP and run CPU side assertions
-    omp->copy_from(cuda2.get(), 2, copy_cuda2, copy);
-    EXPECT_EQ(3, copy[0]);
-    ASSERT_EQ(8, copy[1]);
-    cuda->free(copy_cuda2);
-    cuda->free(orig);
-}
-
-
 TEST_F(CudaExecutor, Synchronizes)
 {
     // Todo design a proper unit test once we support streams
diff --git a/cuda/test/base/cuda_memory_space.cu b/cuda/test/base/cuda_memory_space.cu
new file mode 100644
index 00000000000..39421ffd9dc
--- /dev/null
+++ b/cuda/test/base/cuda_memory_space.cu
@@ -0,0 +1,238 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2020, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory_space.hpp>
+
+
+#include <type_traits>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+
+
+namespace {
+
+
+class CudaMemorySpace : public ::testing::Test {
+protected:
+    CudaMemorySpace() : cuda(nullptr), cuda2(nullptr) {}
+
+    void SetUp()
+    {
+        omp = gko::HostMemorySpace::create();
+        cuda = gko::CudaMemorySpace::create(0);
+        cuda2 = gko::CudaMemorySpace::create(
+            gko::CudaMemorySpace::get_num_devices() - 1);
+    }
+
+    void TearDown()
+    {
+        if (cuda != nullptr) {
+            // ensure that previous calls finished and didn't throw an error
+            ASSERT_NO_THROW(cuda->synchronize());
+        }
+        if (cuda2 != nullptr) {
+            // ensure that previous calls finished and didn't throw an error
+            ASSERT_NO_THROW(cuda2->synchronize());
+        }
+    }
+
+    std::shared_ptr<gko::HostMemorySpace> omp;
+    std::shared_ptr<gko::CudaMemorySpace> cuda;
+    std::shared_ptr<gko::CudaMemorySpace> cuda2;
+};
+
+
+TEST_F(CudaMemorySpace, AllocatesAndFreesMemory)
+{
+    int *ptr = nullptr;
+
+    ASSERT_NO_THROW(ptr = cuda->alloc<int>(2));
+    ASSERT_NO_THROW(cuda->free(ptr));
+}
+
+
+TEST_F(CudaMemorySpace, FailsWhenOverallocating)
+{
+    const gko::size_type num_elems = 1ll << 50;  // 4PB of integers
+    int *ptr = nullptr;
+
+    ASSERT_THROW(
+        {
+            ptr = cuda->alloc<int>(num_elems);
+            cuda->synchronize();
+        },
+        gko::AllocationError);
+
+    cuda->free(ptr);
+}
+
+
+__global__ void check_data(int *data)
+{
+    if (data[0] != 3 || data[1] != 8) {
+        asm("trap;");
+    }
+}
+
+
+TEST_F(CudaMemorySpace, CopiesDataToCuda)
+{
+    int orig[] = {3, 8};
+    auto *copy = cuda->alloc<int>(2);
+
+    cuda->copy_from(omp.get(), 2, orig, copy);
+
+    check_data<<<1, 1>>>(copy);
+    ASSERT_NO_THROW(cuda->synchronize());
+    cuda->free(copy);
+}
+
+
+__global__ void init_data(int *data)
+{
+    data[0] = 3;
+    data[1] = 8;
+}
+
+TEST_F(CudaMemorySpace, CopiesDataFromCuda)
+{
+    int copy[2];
+    auto orig = cuda->alloc<int>(2);
+    init_data<<<1, 1>>>(orig);
+
+    omp->copy_from(cuda.get(), 2, orig, copy);
+
+    EXPECT_EQ(3, copy[0]);
+    ASSERT_EQ(8, copy[1]);
+    cuda->free(orig);
+}
+
+
+TEST_F(CudaMemorySpace, CopiesDataFromCudaToCuda)
+{
+    int copy[2];
+    auto orig = cuda->alloc<int>(2);
+    GKO_ASSERT_NO_CUDA_ERRORS(cudaSetDevice(0));
+    init_data<<<1, 1>>>(orig);
+
+    auto copy_cuda2 = cuda2->alloc<int>(2);
+    cuda2->copy_from(cuda.get(), 2, orig, copy_cuda2);
+
+    // Check that the data is really on GPU2 and ensure we did not cheat
+    GKO_ASSERT_NO_CUDA_ERRORS(cudaSetDevice(cuda2->get_device_id()));
+    check_data<<<1, 1>>>(copy_cuda2);
+    GKO_ASSERT_NO_CUDA_ERRORS(cudaSetDevice(0));
+
+    omp->copy_from(cuda2.get(), 2, copy_cuda2, copy);
+
+    EXPECT_EQ(3, copy[0]);
+    ASSERT_EQ(8, copy[1]);
+    cuda2->free(copy_cuda2);
+    cuda->free(orig);
+}
+
+
+class CudaUVMSpace : public ::testing::Test {
+protected:
+    CudaUVMSpace() : cuda_uvm(nullptr) {}
+
+    void SetUp() { cuda_uvm = gko::CudaUVMSpace::create(0); }
+
+    void TearDown()
+    {
+        if (cuda_uvm != nullptr) {
+            // ensure that previous calls finished and didn't throw an error
+            // ASSERT_NO_THROW(cuda_uvm->synchronize());
+            cuda_uvm->synchronize();
+        }
+    }
+
+    std::shared_ptr<gko::CudaUVMSpace> cuda_uvm;
+};
+
+
+TEST_F(CudaUVMSpace, UVMAllocatesAndFreesMemory)
+{
+    int *ptr = nullptr;
+
+    ASSERT_NO_THROW(ptr = cuda_uvm->alloc<int>(2));
+    cuda_uvm->synchronize();
+    ASSERT_NO_THROW(cuda_uvm->free(ptr));
+}
+
+
+// TEST_F(CudaUVMSpace, UVMFailsWhenOverallocating)
+// {
+//     const gko::size_type num_elems = 1ll << 50;  // 4PB of integers
+//     int *ptr = nullptr;
+
+//     ASSERT_THROW(
+//         {
+//             ptr = cuda_uvm->alloc<int>(num_elems);
+//             cuda_uvm->synchronize();
+//         },
+//         gko::AllocationError);
+
+//     cuda_uvm->free(ptr);
+// }
+
+
+TEST_F(CudaUVMSpace, CanBeAccessedFromHost)
+{
+    int *orig = cuda_uvm->alloc<int>(2);
+    orig[0] = 1;
+    orig[1] = 2;
+    ASSERT_EQ(orig[0], 1);
+    ASSERT_EQ(orig[1], 2);
+    cuda_uvm->synchronize();
+    cuda_uvm->free(orig);
+}
+
+
+TEST_F(CudaUVMSpace, CanBeAccessedFromDevice)
+{
+    int *orig = cuda_uvm->alloc<int>(2);
+    orig[0] = 3;
+    orig[1] = 8;
+    check_data<<<1, 1>>>(orig);
+    cuda_uvm->synchronize();
+    cuda_uvm->free(orig);
+}
+
+
+}  // namespace
diff --git a/hip/test/base/CMakeLists.txt b/hip/test/base/CMakeLists.txt
index 795ededc410..e814ea7e390 100644
--- a/hip/test/base/CMakeLists.txt
+++ b/hip/test/base/CMakeLists.txt
@@ -1,4 +1,5 @@
 ginkgo_create_hip_test(hip_executor)
+ginkgo_create_hip_test(hip_memory_space)
 ginkgo_create_hip_test(lin_op)
 ginkgo_create_hip_test(math)
 # Only hcc needs the libraries. nvcc only requires the headers.
diff --git a/hip/test/base/hip_executor.hip.cpp b/hip/test/base/hip_executor.hip.cpp
index 635639fc21e..7ff522bb62a 100644
--- a/hip/test/base/hip_executor.hip.cpp
+++ b/hip/test/base/hip_executor.hip.cpp
@@ -131,75 +131,6 @@ TEST_F(HipExecutor, MasterKnowsNumberOfDevices)
 }
 
 
-TEST_F(HipExecutor, AllocatesAndFreesMemory)
-{
-    int *ptr = nullptr;
-
-    ASSERT_NO_THROW(ptr = hip->alloc<int>(2));
-    ASSERT_NO_THROW(hip->free(ptr));
-}
-
-
-TEST_F(HipExecutor, FailsWhenOverallocating)
-{
-    const gko::size_type num_elems = 1ll << 50;  // 4PB of integers
-    int *ptr = nullptr;
-
-    ASSERT_THROW(
-        {
-            ptr = hip->alloc<int>(num_elems);
-            hip->synchronize();
-        },
-        gko::AllocationError);
-
-    hip->free(ptr);
-}
-
-
-__global__ void check_data(int *data)
-{
-    if (data[0] != 3 || data[1] != 8) {
-#if GINKGO_HIP_PLATFORM_HCC
-        asm("s_trap 0x02;");
-#else  // GINKGO_HIP_PLATFORM_NVCC
-        asm("trap;");
-#endif
-    }
-}
-
-TEST_F(HipExecutor, CopiesDataToHip)
-{
-    int orig[] = {3, 8};
-    auto *copy = hip->alloc<int>(2);
-
-    hip->copy_from(omp.get(), 2, orig, copy);
-
-    hipLaunchKernelGGL((check_data), dim3(1), dim3(1), 0, 0, copy);
-    ASSERT_NO_THROW(hip->synchronize());
-    hip->free(copy);
-}
-
-
-__global__ void init_data(int *data)
-{
-    data[0] = 3;
-    data[1] = 8;
-}
-
-TEST_F(HipExecutor, CopiesDataFromHip)
-{
-    int copy[2];
-    auto orig = hip->alloc<int>(2);
-    hipLaunchKernelGGL((init_data), dim3(1), dim3(1), 0, 0, orig);
-
-    omp->copy_from(hip.get(), 2, orig, copy);
-
-    EXPECT_EQ(3, copy[0]);
-    ASSERT_EQ(8, copy[1]);
-    hip->free(orig);
-}
-
-
 /* Properly checks if it works only when multiple GPUs exist */
 TEST_F(HipExecutor, PreservesDeviceSettings)
 {
@@ -227,32 +158,6 @@ TEST_F(HipExecutor, RunsOnProperDevice)
 }
 
 
-TEST_F(HipExecutor, CopiesDataFromHipToHip)
-{
-    int copy[2];
-    auto orig = hip->alloc<int>(2);
-    GKO_ASSERT_NO_HIP_ERRORS(hipSetDevice(0));
-    hipLaunchKernelGGL((init_data), dim3(1), dim3(1), 0, 0, orig);
-
-    auto copy_hip2 = hip2->alloc<int>(2);
-    hip2->copy_from(hip.get(), 2, orig, copy_hip2);
-
-    // Check that the data is really on GPU2 and ensure we did not cheat
-    int value = -1;
-    GKO_ASSERT_NO_HIP_ERRORS(hipSetDevice(hip2->get_device_id()));
-    hipLaunchKernelGGL((check_data), dim3(1), dim3(1), 0, 0, copy_hip2);
-    GKO_ASSERT_NO_HIP_ERRORS(hipSetDevice(0));
-    hip2->run(ExampleOperation(value));
-    ASSERT_EQ(value, hip2->get_device_id());
-    // Put the results on OpenMP and run CPU side assertions
-    omp->copy_from(hip2.get(), 2, copy_hip2, copy);
-    EXPECT_EQ(3, copy[0]);
-    ASSERT_EQ(8, copy[1]);
-    hip->free(copy_hip2);
-    hip->free(orig);
-}
-
-
 TEST_F(HipExecutor, Synchronizes)
 {
     // Todo design a proper unit test once we support streams
diff --git a/hip/test/base/hip_memory_space.hip.cpp b/hip/test/base/hip_memory_space.hip.cpp
new file mode 100644
index 00000000000..4fe5e8d9b48
--- /dev/null
+++ b/hip/test/base/hip_memory_space.hip.cpp
@@ -0,0 +1,178 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2020, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/memory_space.hpp>
+
+
+#include <memory>
+#include <type_traits>
+
+
+#include <gtest/gtest.h>
+#include <hip/hip_runtime.h>
+
+
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+
+
+namespace {
+
+
+class HipMemorySpace : public ::testing::Test {
+protected:
+    HipMemorySpace()
+        : omp(gko::HostMemorySpace::create()), hip(nullptr), hip2(nullptr)
+    {}
+
+    void SetUp()
+    {
+        ASSERT_GT(gko::HipMemorySpace::get_num_devices(), 0);
+        hip = gko::HipMemorySpace::create(0);
+        hip2 = gko::HipMemorySpace::create(
+            gko::HipMemorySpace::get_num_devices() - 1);
+    }
+
+    void TearDown()
+    {
+        if (hip != nullptr) {
+            // ensure that previous calls finished and didn't throw an error
+            ASSERT_NO_THROW(hip->synchronize());
+        }
+    }
+
+    std::shared_ptr<gko::MemorySpace> omp;
+    std::shared_ptr<gko::HipMemorySpace> hip;
+    std::shared_ptr<gko::HipMemorySpace> hip2;
+};
+
+
+TEST_F(HipMemorySpace, AllocatesAndFreesMemory)
+{
+    int *ptr = nullptr;
+
+    ASSERT_NO_THROW(ptr = hip->alloc<int>(2));
+    ASSERT_NO_THROW(hip->free(ptr));
+}
+
+
+TEST_F(HipMemorySpace, FailsWhenOverallocating)
+{
+    const gko::size_type num_elems = 1ll << 50;  // 4PB of integers
+    int *ptr = nullptr;
+
+    ASSERT_THROW(
+        {
+            ptr = hip->alloc<int>(num_elems);
+            hip->synchronize();
+        },
+        gko::AllocationError);
+
+    hip->free(ptr);
+}
+
+
+__global__ void check_data(int *data)
+{
+    if (data[0] != 3 || data[1] != 8) {
+#if GINKGO_HIP_PLATFORM_HCC
+        asm("s_trap 0x02;");
+#else  // GINKGO_HIP_PLATFORM_NVCC
+        asm("trap;");
+#endif
+    }
+}
+
+TEST_F(HipMemorySpace, CopiesDataToHip)
+{
+    int orig[] = {3, 8};
+    auto *copy = hip->alloc<int>(2);
+
+    hip->copy_from(omp.get(), 2, orig, copy);
+
+    hipLaunchKernelGGL((check_data), dim3(1), dim3(1), 0, 0, copy);
+    ASSERT_NO_THROW(hip->synchronize());
+    hip->free(copy);
+}
+
+
+__global__ void init_data(int *data)
+{
+    data[0] = 3;
+    data[1] = 8;
+}
+
+TEST_F(HipMemorySpace, CopiesDataFromHip)
+{
+    int copy[2];
+    auto orig = hip->alloc<int>(2);
+    hipLaunchKernelGGL((init_data), dim3(1), dim3(1), 0, 0, orig);
+
+    omp->copy_from(hip.get(), 2, orig, copy);
+
+    EXPECT_EQ(3, copy[0]);
+    ASSERT_EQ(8, copy[1]);
+    hip->free(orig);
+}
+
+
+TEST_F(HipMemorySpace, CopiesDataFromHipToHip)
+{
+    int copy[2];
+    auto orig = hip->alloc<int>(2);
+    GKO_ASSERT_NO_HIP_ERRORS(hipSetDevice(0));
+    hipLaunchKernelGGL((init_data), dim3(1), dim3(1), 0, 0, orig);
+
+    auto copy_hip2 = hip2->alloc<int>(2);
+    hip2->copy_from(hip.get(), 2, orig, copy_hip2);
+
+    // Check that the data is really on GPU2 and ensure we did not cheat
+    GKO_ASSERT_NO_HIP_ERRORS(hipSetDevice(hip2->get_device_id()));
+    hipLaunchKernelGGL((check_data), dim3(1), dim3(1), 0, 0, copy_hip2);
+    GKO_ASSERT_NO_HIP_ERRORS(hipSetDevice(0));
+    // Put the results on OpenMP and run CPU side assertions
+    omp->copy_from(hip2.get(), 2, copy_hip2, copy);
+    EXPECT_EQ(3, copy[0]);
+    ASSERT_EQ(8, copy[1]);
+    hip->free(copy_hip2);
+    hip->free(orig);
+}
+
+
+TEST_F(HipMemorySpace, Synchronizes)
+{
+    // Todo design a proper unit test once we support streams
+    ASSERT_NO_THROW(hip->synchronize());
+}
+
+
+}  // namespace

From 3fb1cb22b03305c90d0ae3d831755675881a8bed Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Mon, 19 Oct 2020 23:58:55 +0200
Subject: [PATCH 5/8] Update Logger interface (INTERFACE BREAKING), add some
 exceptions

---
 benchmark/utils/loggers.hpp                   |  54 +++++----
 core/log/logger.cpp                           |   2 +-
 core/log/papi.cpp                             |  23 ++--
 core/log/record.cpp                           |  36 +++---
 core/log/stream.cpp                           |  25 ++--
 core/test/log/convergence.cpp                 |   3 +-
 core/test/log/logger.cpp                      |  24 ++--
 core/test/log/papi.cpp                        |  47 +++++---
 core/test/log/record.cpp                      | 114 +++++++++++-------
 core/test/log/stream.cpp                      |  99 +++++++++------
 .../adaptiveprecision-blockjacobi.cpp         |   2 +-
 examples/custom-logger/custom-logger.cpp      |   3 +-
 .../custom-stopping-criterion.cpp             |   3 +-
 .../ir-ilu-preconditioned-solver.cpp          |   2 +-
 .../iterative-refinement.cpp                  |   2 +-
 .../performance-debugging.cpp                 |  51 ++++----
 .../simple-solver-logging.cpp                 |  18 +--
 include/ginkgo/core/base/exception.hpp        |  24 ++++
 .../ginkgo/core/base/exception_helpers.hpp    |  13 ++
 include/ginkgo/core/base/executor.hpp         |  14 +--
 include/ginkgo/core/base/types.hpp            |  17 +++
 include/ginkgo/core/log/convergence.hpp       |   6 +-
 include/ginkgo/core/log/logger.hpp            |  63 +++++-----
 include/ginkgo/core/log/papi.hpp              |  49 ++++----
 include/ginkgo/core/log/record.hpp            |  36 +++---
 include/ginkgo/core/log/stream.hpp            |  18 +--
 reference/test/log/convergence.cpp            |   3 +-
 reference/test/log/papi.cpp                   |   2 +-
 28 files changed, 451 insertions(+), 302 deletions(-)

diff --git a/benchmark/utils/loggers.hpp b/benchmark/utils/loggers.hpp
index ea6bbea2797..635af7a0f48 100644
--- a/benchmark/utils/loggers.hpp
+++ b/benchmark/utils/loggers.hpp
@@ -48,41 +48,43 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 // A logger that accumulates the time of all operations
 struct OperationLogger : gko::log::Logger {
-    void on_allocation_started(const gko::Executor *exec,
+    void on_allocation_started(const gko::MemorySpace *mem_space,
                                const gko::size_type &) const override
     {
-        this->start_operation(exec, "allocate");
+        this->start_operation(mem_space, "allocate");
     }
 
-    void on_allocation_completed(const gko::Executor *exec,
+    void on_allocation_completed(const gko::MemorySpace *mem_space,
                                  const gko::size_type &,
                                  const gko::uintptr &) const override
     {
-        this->end_operation(exec, "allocate");
+        this->end_operation(mem_space, "allocate");
     }
 
-    void on_free_started(const gko::Executor *exec,
+    void on_free_started(const gko::MemorySpace *mem_space,
                          const gko::uintptr &) const override
     {
-        this->start_operation(exec, "free");
+        this->start_operation(mem_space, "free");
     }
 
-    void on_free_completed(const gko::Executor *exec,
+    void on_free_completed(const gko::MemorySpace *mem_space,
                            const gko::uintptr &) const override
     {
-        this->end_operation(exec, "free");
+        this->end_operation(mem_space, "free");
     }
 
-    void on_copy_started(const gko::Executor *from, const gko::Executor *to,
-                         const gko::uintptr &, const gko::uintptr &,
+    void on_copy_started(const gko::MemorySpace *from,
+                         const gko::MemorySpace *to, const gko::uintptr &,
+                         const gko::uintptr &,
                          const gko::size_type &) const override
     {
         from->synchronize();
         this->start_operation(to, "copy");
     }
 
-    void on_copy_completed(const gko::Executor *from, const gko::Executor *to,
-                           const gko::uintptr &, const gko::uintptr &,
+    void on_copy_completed(const gko::MemorySpace *from,
+                           const gko::MemorySpace *to, const gko::uintptr &,
+                           const gko::uintptr &,
                            const gko::size_type &) const override
     {
         from->synchronize();
@@ -118,14 +120,15 @@ struct OperationLogger : gko::log::Logger {
     }
 
     OperationLogger(std::shared_ptr<const gko::Executor> exec, bool nested_name)
-        : gko::log::Logger(exec), use_nested_name{nested_name}
+        : gko::log::Logger(exec, exec->get_mem_space()),
+          use_nested_name{nested_name}
     {}
 
 private:
-    void start_operation(const gko::Executor *exec,
-                         const std::string &name) const
+    template <typename LogObject>
+    void start_operation(const LogObject *obj, const std::string &name) const
     {
-        exec->synchronize();
+        obj->synchronize();
         const std::lock_guard<std::mutex> lock(mutex);
         auto nested_name = nested.empty() || !use_nested_name
                                ? name
@@ -134,9 +137,12 @@ struct OperationLogger : gko::log::Logger {
         start[nested_name] = std::chrono::steady_clock::now();
     }
 
-    void end_operation(const gko::Executor *exec, const std::string &name) const
+    // Helper to compute the end time and store the operation's time at its
+    // end. Also time nested operations.
+    template <typename LogObject>
+    void end_operation(const LogObject *obj, const std::string &name) const
     {
-        exec->synchronize();
+        obj->synchronize();
         const std::lock_guard<std::mutex> lock(mutex);
         // if operations are properly nested, nested_name now ends with name
         auto nested_name = nested.back().first;
@@ -163,7 +169,7 @@ struct OperationLogger : gko::log::Logger {
 
 
 struct StorageLogger : gko::log::Logger {
-    void on_allocation_completed(const gko::Executor *,
+    void on_allocation_completed(const gko::MemorySpace *,
                                  const gko::size_type &num_bytes,
                                  const gko::uintptr &location) const override
     {
@@ -171,7 +177,7 @@ struct StorageLogger : gko::log::Logger {
         storage[location] = num_bytes;
     }
 
-    void on_free_completed(const gko::Executor *,
+    void on_free_completed(const gko::MemorySpace *,
                            const gko::uintptr &location) const override
     {
         const std::lock_guard<std::mutex> lock(mutex);
@@ -190,7 +196,7 @@ struct StorageLogger : gko::log::Logger {
     }
 
     StorageLogger(std::shared_ptr<const gko::Executor> exec)
-        : gko::log::Logger(exec)
+        : gko::log::Logger(exec, exec->get_mem_space())
     {}
 
 private:
@@ -235,7 +241,8 @@ struct ResidualLogger : gko::log::Logger {
                    rapidjson::Value &true_res_norms,
                    rapidjson::Value &timestamps,
                    rapidjson::MemoryPoolAllocator<> &alloc)
-        : gko::log::Logger(exec, gko::log::Logger::iteration_complete_mask),
+        : gko::log::Logger(exec, exec->get_mem_space(),
+                           gko::log::Logger::iteration_complete_mask),
           matrix{matrix},
           b{b},
           start{std::chrono::steady_clock::now()},
@@ -267,7 +274,8 @@ struct IterationLogger : gko::log::Logger {
     }
 
     IterationLogger(std::shared_ptr<const gko::Executor> exec)
-        : gko::log::Logger(exec, gko::log::Logger::iteration_complete_mask)
+        : gko::log::Logger(exec, exec->get_mem_space(),
+                           gko::log::Logger::iteration_complete_mask)
     {}
 
     void write_data(rapidjson::Value &output,
diff --git a/core/log/logger.cpp b/core/log/logger.cpp
index 46ee98b2895..9820850be9b 100644
--- a/core/log/logger.cpp
+++ b/core/log/logger.cpp
@@ -38,7 +38,7 @@ namespace log {
 
 
 constexpr Logger::mask_type Logger::all_events_mask;
-constexpr Logger::mask_type Logger::executor_events_mask;
+constexpr Logger::mask_type Logger::memory_space_events_mask;
 constexpr Logger::mask_type Logger::operation_events_mask;
 constexpr Logger::mask_type Logger::polymorphic_object_events_mask;
 constexpr Logger::mask_type Logger::linop_events_mask;
diff --git a/core/log/papi.cpp b/core/log/papi.cpp
index 1c8a17419fa..a920b5226b7 100644
--- a/core/log/papi.cpp
+++ b/core/log/papi.cpp
@@ -42,40 +42,41 @@ namespace log {
 
 
 template <typename ValueType>
-void Papi<ValueType>::on_allocation_started(const Executor *exec,
+void Papi<ValueType>::on_allocation_started(const MemorySpace *mem_space,
                                             const size_type &num_bytes) const
 {
-    allocation_started.get_counter(exec) += num_bytes;
+    allocation_started.get_counter(mem_space) += num_bytes;
 }
 
 
 template <typename ValueType>
-void Papi<ValueType>::on_allocation_completed(const Executor *exec,
+void Papi<ValueType>::on_allocation_completed(const MemorySpace *mem_space,
                                               const size_type &num_bytes,
                                               const uintptr &location) const
 {
-    allocation_completed.get_counter(exec) += num_bytes;
+    allocation_completed.get_counter(mem_space) += num_bytes;
 }
 
 
 template <typename ValueType>
-void Papi<ValueType>::on_free_started(const Executor *exec,
+void Papi<ValueType>::on_free_started(const MemorySpace *mem_space,
                                       const uintptr &location) const
 {
-    free_started.get_counter(exec) += 1;
+    free_started.get_counter(mem_space) += 1;
 }
 
 
 template <typename ValueType>
-void Papi<ValueType>::on_free_completed(const Executor *exec,
+void Papi<ValueType>::on_free_completed(const MemorySpace *mem_space,
                                         const uintptr &location) const
 {
-    free_completed.get_counter(exec) += 1;
+    free_completed.get_counter(mem_space) += 1;
 }
 
 
 template <typename ValueType>
-void Papi<ValueType>::on_copy_started(const Executor *from, const Executor *to,
+void Papi<ValueType>::on_copy_started(const MemorySpace *from,
+                                      const MemorySpace *to,
                                       const uintptr &location_from,
                                       const uintptr &location_to,
                                       const size_type &num_bytes) const
@@ -86,8 +87,8 @@ void Papi<ValueType>::on_copy_started(const Executor *from, const Executor *to,
 
 
 template <typename ValueType>
-void Papi<ValueType>::on_copy_completed(const Executor *from,
-                                        const Executor *to,
+void Papi<ValueType>::on_copy_completed(const MemorySpace *from,
+                                        const MemorySpace *to,
                                         const uintptr &location_from,
                                         const uintptr &location_to,
                                         const size_type &num_bytes) const
diff --git a/core/log/record.cpp b/core/log/record.cpp
index 48026c1563b..48040198c5a 100644
--- a/core/log/record.cpp
+++ b/core/log/record.cpp
@@ -42,49 +42,49 @@ namespace gko {
 namespace log {
 
 
-void Record::on_allocation_started(const Executor *exec,
+void Record::on_allocation_started(const MemorySpace *mem_space,
                                    const size_type &num_bytes) const
 {
     append_deque(data_.allocation_started,
-                 (std::unique_ptr<executor_data>(
-                     new executor_data{exec, num_bytes, 0})));
+                 (std::unique_ptr<memory_space_data>(
+                     new memory_space_data{mem_space, num_bytes, 0})));
 }
 
 
-void Record::on_allocation_completed(const Executor *exec,
+void Record::on_allocation_completed(const MemorySpace *mem_space,
                                      const size_type &num_bytes,
                                      const uintptr &location) const
 {
     append_deque(data_.allocation_completed,
-                 (std::unique_ptr<executor_data>(
-                     new executor_data{exec, num_bytes, location})));
+                 (std::unique_ptr<memory_space_data>(
+                     new memory_space_data{mem_space, num_bytes, location})));
 }
 
 
-void Record::on_free_started(const Executor *exec,
+void Record::on_free_started(const MemorySpace *mem_space,
                              const uintptr &location) const
 {
-    append_deque(
-        data_.free_started,
-        (std::unique_ptr<executor_data>(new executor_data{exec, 0, location})));
+    append_deque(data_.free_started,
+                 (std::unique_ptr<memory_space_data>(
+                     new memory_space_data{mem_space, 0, location})));
 }
 
 
-void Record::on_free_completed(const Executor *exec,
+void Record::on_free_completed(const MemorySpace *mem_space,
                                const uintptr &location) const
 {
-    append_deque(
-        data_.free_completed,
-        (std::unique_ptr<executor_data>(new executor_data{exec, 0, location})));
+    append_deque(data_.free_completed,
+                 (std::unique_ptr<memory_space_data>(
+                     new memory_space_data{mem_space, 0, location})));
 }
 
 
-void Record::on_copy_started(const Executor *from, const Executor *to,
+void Record::on_copy_started(const MemorySpace *from, const MemorySpace *to,
                              const uintptr &location_from,
                              const uintptr &location_to,
                              const size_type &num_bytes) const
 {
-    using tuple = std::tuple<executor_data, executor_data>;
+    using tuple = std::tuple<memory_space_data, memory_space_data>;
     append_deque(
         data_.copy_started,
         (std::unique_ptr<tuple>(new tuple{{from, num_bytes, location_from},
@@ -92,12 +92,12 @@ void Record::on_copy_started(const Executor *from, const Executor *to,
 }
 
 
-void Record::on_copy_completed(const Executor *from, const Executor *to,
+void Record::on_copy_completed(const MemorySpace *from, const MemorySpace *to,
                                const uintptr &location_from,
                                const uintptr &location_to,
                                const size_type &num_bytes) const
 {
-    using tuple = std::tuple<executor_data, executor_data>;
+    using tuple = std::tuple<memory_space_data, memory_space_data>;
     append_deque(
         data_.copy_completed,
         (std::unique_ptr<tuple>(new tuple{{from, num_bytes, location_from},
diff --git a/core/log/stream.cpp b/core/log/stream.cpp
index 3cad7421aee..9810f450815 100644
--- a/core/log/stream.cpp
+++ b/core/log/stream.cpp
@@ -116,6 +116,7 @@ GKO_ENABLE_DEMANGLE_NAME(LinOp);
 GKO_ENABLE_DEMANGLE_NAME(LinOpFactory);
 GKO_ENABLE_DEMANGLE_NAME(stop::Criterion);
 GKO_ENABLE_DEMANGLE_NAME(Executor);
+GKO_ENABLE_DEMANGLE_NAME(MemorySpace);
 GKO_ENABLE_DEMANGLE_NAME(Operation);
 
 
@@ -123,46 +124,46 @@ GKO_ENABLE_DEMANGLE_NAME(Operation);
 
 
 template <typename ValueType>
-void Stream<ValueType>::on_allocation_started(const Executor *exec,
+void Stream<ValueType>::on_allocation_started(const MemorySpace *mem_space,
                                               const size_type &num_bytes) const
 {
-    os_ << prefix_ << "allocation started on " << demangle_name(exec)
+    os_ << prefix_ << "allocation started on " << demangle_name(mem_space)
         << " with " << bytes_name(num_bytes) << std::endl;
 }
 
 
 template <typename ValueType>
-void Stream<ValueType>::on_allocation_completed(const Executor *exec,
+void Stream<ValueType>::on_allocation_completed(const MemorySpace *mem_space,
                                                 const size_type &num_bytes,
                                                 const uintptr &location) const
 {
-    os_ << prefix_ << "allocation completed on " << demangle_name(exec)
+    os_ << prefix_ << "allocation completed on " << demangle_name(mem_space)
         << " at " << location_name(location) << " with "
         << bytes_name(num_bytes) << std::endl;
 }
 
 
 template <typename ValueType>
-void Stream<ValueType>::on_free_started(const Executor *exec,
+void Stream<ValueType>::on_free_started(const MemorySpace *mem_space,
                                         const uintptr &location) const
 {
-    os_ << prefix_ << "free started on " << demangle_name(exec) << " at "
+    os_ << prefix_ << "free started on " << demangle_name(mem_space) << " at "
         << location_name(location) << std::endl;
 }
 
 
 template <typename ValueType>
-void Stream<ValueType>::on_free_completed(const Executor *exec,
+void Stream<ValueType>::on_free_completed(const MemorySpace *mem_space,
                                           const uintptr &location) const
 {
-    os_ << prefix_ << "free completed on " << demangle_name(exec) << " at "
+    os_ << prefix_ << "free completed on " << demangle_name(mem_space) << " at "
         << location_name(location) << std::endl;
 }
 
 
 template <typename ValueType>
-void Stream<ValueType>::on_copy_started(const Executor *from,
-                                        const Executor *to,
+void Stream<ValueType>::on_copy_started(const MemorySpace *from,
+                                        const MemorySpace *to,
                                         const uintptr &location_from,
                                         const uintptr &location_to,
                                         const size_type &num_bytes) const
@@ -175,8 +176,8 @@ void Stream<ValueType>::on_copy_started(const Executor *from,
 
 
 template <typename ValueType>
-void Stream<ValueType>::on_copy_completed(const Executor *from,
-                                          const Executor *to,
+void Stream<ValueType>::on_copy_completed(const MemorySpace *from,
+                                          const MemorySpace *to,
                                           const uintptr &location_from,
                                           const uintptr &location_to,
                                           const size_type &num_bytes) const
diff --git a/core/test/log/convergence.cpp b/core/test/log/convergence.cpp
index bb05007817b..25fcb2c531d 100644
--- a/core/test/log/convergence.cpp
+++ b/core/test/log/convergence.cpp
@@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory_space.hpp>
 
 
 #include "core/test/utils.hpp"
@@ -54,7 +55,7 @@ TYPED_TEST(Convergence, CanGetData)
 {
     auto exec = gko::ReferenceExecutor::create();
     auto logger = gko::log::Convergence<TypeParam>::create(
-        exec, gko::log::Logger::iteration_complete_mask);
+        exec, exec->get_mem_space(), gko::log::Logger::iteration_complete_mask);
 
     ASSERT_EQ(logger->get_num_iterations(), 0);
     ASSERT_EQ(logger->get_residual(), nullptr);
diff --git a/core/test/log/logger.cpp b/core/test/log/logger.cpp
index e051e16f692..19b8ceb51f2 100644
--- a/core/test/log/logger.cpp
+++ b/core/test/log/logger.cpp
@@ -65,8 +65,8 @@ TEST(DummyLogged, CanAddLogger)
     auto exec = gko::ReferenceExecutor::create();
     DummyLoggedClass c;
 
-    c.add_logger(
-        gko::log::Record::create(exec, gko::log::Logger::all_events_mask));
+    c.add_logger(gko::log::Record::create(exec, exec->get_mem_space(),
+                                          gko::log::Logger::all_events_mask));
 
     ASSERT_EQ(c.get_num_loggers(), 1);
 }
@@ -77,10 +77,11 @@ TEST(DummyLogged, CanAddMultipleLoggers)
     auto exec = gko::ReferenceExecutor::create();
     DummyLoggedClass c;
 
-    c.add_logger(
-        gko::log::Record::create(exec, gko::log::Logger::all_events_mask));
-    c.add_logger(gko::log::Stream<>::create(
-        exec, gko::log::Logger::all_events_mask, std::cout));
+    c.add_logger(gko::log::Record::create(exec, exec->get_mem_space(),
+                                          gko::log::Logger::all_events_mask));
+    c.add_logger(gko::log::Stream<>::create(exec, exec->get_mem_space(),
+                                            gko::log::Logger::all_events_mask,
+                                            std::cout));
 
     ASSERT_EQ(c.get_num_loggers(), 2);
 }
@@ -90,11 +91,12 @@ TEST(DummyLogged, CanRemoveLogger)
 {
     auto exec = gko::ReferenceExecutor::create();
     DummyLoggedClass c;
-    auto r = gko::share(
-        gko::log::Record::create(exec, gko::log::Logger::all_events_mask));
+    auto r = gko::share(gko::log::Record::create(
+        exec, exec->get_mem_space(), gko::log::Logger::all_events_mask));
     c.add_logger(r);
-    c.add_logger(gko::log::Stream<>::create(
-        exec, gko::log::Logger::all_events_mask, std::cout));
+    c.add_logger(gko::log::Stream<>::create(exec, exec->get_mem_space(),
+                                            gko::log::Logger::all_events_mask,
+                                            std::cout));
 
     c.remove_logger(gko::lend(r));
 
@@ -107,7 +109,7 @@ struct DummyLogger : gko::log::Logger {
     explicit DummyLogger(
         std::shared_ptr<const gko::Executor> exec,
         const mask_type &enabled_events = Logger::all_events_mask)
-        : Logger(exec, enabled_events)
+        : Logger(exec, exec->get_mem_space(), enabled_events)
     {}
 
     void on_iteration_complete(
diff --git a/core/test/log/papi.cpp b/core/test/log/papi.cpp
index d318a29f228..07585ba7f04 100644
--- a/core/test/log/papi.cpp
+++ b/core/test/log/papi.cpp
@@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory_space.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
 #include <ginkgo/core/solver/bicgstab.hpp>
 #include <ginkgo/core/stop/iteration.hpp>
@@ -77,7 +78,7 @@ class Papi : public ::testing::Test {
     const std::string init(const gko::log::Logger::mask_type &event,
                            const std::string &event_name, U *ptr)
     {
-        logger = gko::log::Papi<T>::create(exec, event);
+        logger = gko::log::Papi<T>::create(exec, exec->get_mem_space(), event);
         std::ostringstream os;
         os << "sde:::" << logger->get_handle_name() << "::" << event_name
            << "::" << reinterpret_cast<gko::uintptr>(ptr);
@@ -125,13 +126,14 @@ TYPED_TEST_CASE(Papi, gko::test::ValueTypes);
 TYPED_TEST(Papi, CatchesAllocationStarted)
 {
     int logged_value = 42;
-    auto str = this->init(gko::log::Logger::allocation_started_mask,
-                          "allocation_started", this->exec.get());
+    auto str =
+        this->init(gko::log::Logger::allocation_started_mask,
+                   "allocation_started", this->exec->get_mem_space().get());
     this->add_event(str);
 
     this->start();
     this->logger->template on<gko::log::Logger::allocation_started>(
-        this->exec.get(), logged_value);
+        this->exec->get_mem_space().get(), logged_value);
     long long int value = 0;
     this->stop(&value);
 
@@ -142,13 +144,14 @@ TYPED_TEST(Papi, CatchesAllocationStarted)
 TYPED_TEST(Papi, CatchesAllocationCompleted)
 {
     int logged_value = 42;
-    auto str = this->init(gko::log::Logger::allocation_completed_mask,
-                          "allocation_completed", this->exec.get());
+    auto str =
+        this->init(gko::log::Logger::allocation_completed_mask,
+                   "allocation_completed", this->exec->get_mem_space().get());
     this->add_event(str);
 
     this->start();
     this->logger->template on<gko::log::Logger::allocation_completed>(
-        this->exec.get(), logged_value, 0);
+        this->exec->get_mem_space().get(), logged_value, 0);
     long long int value = 0;
     this->stop(&value);
 
@@ -159,12 +162,12 @@ TYPED_TEST(Papi, CatchesAllocationCompleted)
 TYPED_TEST(Papi, CatchesFreeStarted)
 {
     auto str = this->init(gko::log::Logger::free_started_mask, "free_started",
-                          this->exec.get());
+                          this->exec->get_mem_space().get());
     this->add_event(str);
 
     this->start();
-    this->logger->template on<gko::log::Logger::free_started>(this->exec.get(),
-                                                              0);
+    this->logger->template on<gko::log::Logger::free_started>(
+        this->exec->get_mem_space().get(), 0);
     long long int value = 0;
     this->stop(&value);
 
@@ -175,12 +178,12 @@ TYPED_TEST(Papi, CatchesFreeStarted)
 TYPED_TEST(Papi, CatchesFreeCompleted)
 {
     auto str = this->init(gko::log::Logger::free_completed_mask,
-                          "free_completed", this->exec.get());
+                          "free_completed", this->exec->get_mem_space().get());
     this->add_event(str);
 
     this->start();
     this->logger->template on<gko::log::Logger::free_completed>(
-        this->exec.get(), 0);
+        this->exec->get_mem_space().get(), 0);
     long long int value = 0;
     this->stop(&value);
 
@@ -191,18 +194,20 @@ TYPED_TEST(Papi, CatchesFreeCompleted)
 TYPED_TEST(Papi, CatchesCopyStarted)
 {
     auto logged_value = 42;
-    auto str = this->init(gko::log::Logger::copy_started_mask,
-                          "copy_started_from", this->exec.get());
+    auto str =
+        this->init(gko::log::Logger::copy_started_mask, "copy_started_from",
+                   this->exec->get_mem_space().get());
     std::ostringstream os_out;
     os_out << "sde:::" << this->logger->get_handle_name()
            << "::copy_started_to::"
-           << reinterpret_cast<gko::uintptr>(this->exec.get());
+           << reinterpret_cast<gko::uintptr>(this->exec->get_mem_space().get());
     this->add_event(str);
     this->add_event(os_out.str());
 
     this->start();
     this->logger->template on<gko::log::Logger::copy_started>(
-        this->exec.get(), this->exec.get(), 0, 0, logged_value);
+        this->exec->get_mem_space().get(), this->exec->get_mem_space().get(), 0,
+        0, logged_value);
     long long int values[2];
     this->stop(values);
 
@@ -214,18 +219,20 @@ TYPED_TEST(Papi, CatchesCopyStarted)
 TYPED_TEST(Papi, CatchesCopyCompleted)
 {
     auto logged_value = 42;
-    auto str = this->init(gko::log::Logger::copy_completed_mask,
-                          "copy_completed_from", this->exec.get());
+    auto str =
+        this->init(gko::log::Logger::copy_completed_mask, "copy_completed_from",
+                   this->exec->get_mem_space().get());
     std::ostringstream os_out;
     os_out << "sde:::" << this->logger->get_handle_name()
            << "::copy_completed_to::"
-           << reinterpret_cast<gko::uintptr>(this->exec.get());
+           << reinterpret_cast<gko::uintptr>(this->exec->get_mem_space().get());
     this->add_event(str);
     this->add_event(os_out.str());
 
     this->start();
     this->logger->template on<gko::log::Logger::copy_completed>(
-        this->exec.get(), this->exec.get(), 0, 0, logged_value);
+        this->exec->get_mem_space().get(), this->exec->get_mem_space().get(), 0,
+        0, logged_value);
     long long int values[2];
     this->stop(values);
 
diff --git a/core/test/log/record.cpp b/core/test/log/record.cpp
index dd829d39a8c..c926670c17e 100644
--- a/core/test/log/record.cpp
+++ b/core/test/log/record.cpp
@@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory_space.hpp>
 #include <ginkgo/core/base/utils.hpp>
 #include <ginkgo/core/solver/bicgstab.hpp>
 #include <ginkgo/core/stop/iteration.hpp>
@@ -56,7 +57,7 @@ TEST(Record, CanGetData)
 {
     auto exec = gko::ReferenceExecutor::create();
     auto logger = gko::log::Record::create(
-        exec, gko::log::Logger::iteration_complete_mask);
+        exec, exec->get_mem_space(), gko::log::Logger::iteration_complete_mask);
 
     ASSERT_EQ(logger->get().allocation_started.size(), 0);
 }
@@ -66,12 +67,13 @@ TEST(Record, CatchesAllocationStarted)
 {
     auto exec = gko::ReferenceExecutor::create();
     auto logger = gko::log::Record::create(
-        exec, gko::log::Logger::allocation_started_mask);
+        exec, exec->get_mem_space(), gko::log::Logger::allocation_started_mask);
 
-    logger->on<gko::log::Logger::allocation_started>(exec.get(), 42);
+    logger->on<gko::log::Logger::allocation_started>(
+        exec->get_mem_space().get(), 42);
 
     auto &data = logger->get().allocation_started.back();
-    ASSERT_EQ(data->exec, exec.get());
+    ASSERT_EQ(data->mem_space, exec->get_mem_space().get());
     ASSERT_EQ(data->num_bytes, 42);
     ASSERT_EQ(data->location, 0);
 }
@@ -80,15 +82,17 @@ TEST(Record, CatchesAllocationStarted)
 TEST(Record, CatchesAllocationCompleted)
 {
     auto exec = gko::ReferenceExecutor::create();
-    auto logger = gko::log::Record::create(
-        exec, gko::log::Logger::allocation_completed_mask);
+    auto logger =
+        gko::log::Record::create(exec, exec->get_mem_space(),
+                                 gko::log::Logger::allocation_completed_mask);
     int dummy = 1;
     auto ptr = reinterpret_cast<gko::uintptr>(&dummy);
 
-    logger->on<gko::log::Logger::allocation_completed>(exec.get(), 42, ptr);
+    logger->on<gko::log::Logger::allocation_completed>(
+        exec->get_mem_space().get(), 42, ptr);
 
     auto &data = logger->get().allocation_completed.back();
-    ASSERT_EQ(data->exec, exec.get());
+    ASSERT_EQ(data->mem_space, exec->get_mem_space().get());
     ASSERT_EQ(data->num_bytes, 42);
     ASSERT_EQ(data->location, ptr);
 }
@@ -97,15 +101,16 @@ TEST(Record, CatchesAllocationCompleted)
 TEST(Record, CatchesFreeStarted)
 {
     auto exec = gko::ReferenceExecutor::create();
-    auto logger =
-        gko::log::Record::create(exec, gko::log::Logger::free_started_mask);
+    auto logger = gko::log::Record::create(exec, exec->get_mem_space(),
+                                           gko::log::Logger::free_started_mask);
     int dummy = 1;
     auto ptr = reinterpret_cast<gko::uintptr>(&dummy);
 
-    logger->on<gko::log::Logger::free_started>(exec.get(), ptr);
+    logger->on<gko::log::Logger::free_started>(exec->get_mem_space().get(),
+                                               ptr);
 
     auto &data = logger->get().free_started.back();
-    ASSERT_EQ(data->exec, exec.get());
+    ASSERT_EQ(data->mem_space, exec->get_mem_space().get());
     ASSERT_EQ(data->num_bytes, 0);
     ASSERT_EQ(data->location, ptr);
 }
@@ -114,15 +119,16 @@ TEST(Record, CatchesFreeStarted)
 TEST(Record, CatchesFreeCompleted)
 {
     auto exec = gko::ReferenceExecutor::create();
-    auto logger =
-        gko::log::Record::create(exec, gko::log::Logger::free_completed_mask);
+    auto logger = gko::log::Record::create(
+        exec, exec->get_mem_space(), gko::log::Logger::free_completed_mask);
     int dummy = 1;
     auto ptr = reinterpret_cast<gko::uintptr>(&dummy);
 
-    logger->on<gko::log::Logger::free_completed>(exec.get(), ptr);
+    logger->on<gko::log::Logger::free_completed>(exec->get_mem_space().get(),
+                                                 ptr);
 
     auto &data = logger->get().free_completed.back();
-    ASSERT_EQ(data->exec, exec.get());
+    ASSERT_EQ(data->mem_space, exec->get_mem_space().get());
     ASSERT_EQ(data->num_bytes, 0);
     ASSERT_EQ(data->location, ptr);
 }
@@ -131,23 +137,24 @@ TEST(Record, CatchesFreeCompleted)
 TEST(Record, CatchesCopyStarted)
 {
     auto exec = gko::ReferenceExecutor::create();
-    auto logger =
-        gko::log::Record::create(exec, gko::log::Logger::copy_started_mask);
+    auto logger = gko::log::Record::create(exec, exec->get_mem_space(),
+                                           gko::log::Logger::copy_started_mask);
     int dummy_from = 1;
     int dummy_to = 1;
     auto ptr_from = reinterpret_cast<gko::uintptr>(&dummy_from);
     auto ptr_to = reinterpret_cast<gko::uintptr>(&dummy_to);
 
-    logger->on<gko::log::Logger::copy_started>(exec.get(), exec.get(), ptr_from,
-                                               ptr_to, 42);
+    logger->on<gko::log::Logger::copy_started>(exec->get_mem_space().get(),
+                                               exec->get_mem_space().get(),
+                                               ptr_from, ptr_to, 42);
 
     auto &data = logger->get().copy_started.back();
     auto data_from = std::get<0>(*data);
     auto data_to = std::get<1>(*data);
-    ASSERT_EQ(data_from.exec, exec.get());
+    ASSERT_EQ(data_from.mem_space, exec->get_mem_space().get());
     ASSERT_EQ(data_from.num_bytes, 42);
     ASSERT_EQ(data_from.location, ptr_from);
-    ASSERT_EQ(data_to.exec, exec.get());
+    ASSERT_EQ(data_to.mem_space, exec->get_mem_space().get());
     ASSERT_EQ(data_to.num_bytes, 42);
     ASSERT_EQ(data_to.location, ptr_to);
 }
@@ -156,23 +163,24 @@ TEST(Record, CatchesCopyStarted)
 TEST(Record, CatchesCopyCompleted)
 {
     auto exec = gko::ReferenceExecutor::create();
-    auto logger =
-        gko::log::Record::create(exec, gko::log::Logger::copy_completed_mask);
+    auto logger = gko::log::Record::create(
+        exec, exec->get_mem_space(), gko::log::Logger::copy_completed_mask);
     int dummy_from = 1;
     int dummy_to = 1;
     auto ptr_from = reinterpret_cast<gko::uintptr>(&dummy_from);
     auto ptr_to = reinterpret_cast<gko::uintptr>(&dummy_to);
 
-    logger->on<gko::log::Logger::copy_completed>(exec.get(), exec.get(),
+    logger->on<gko::log::Logger::copy_completed>(exec->get_mem_space().get(),
+                                                 exec->get_mem_space().get(),
                                                  ptr_from, ptr_to, 42);
 
     auto &data = logger->get().copy_completed.back();
     auto data_from = std::get<0>(*data);
     auto data_to = std::get<1>(*data);
-    ASSERT_EQ(data_from.exec, exec.get());
+    ASSERT_EQ(data_from.mem_space, exec->get_mem_space().get());
     ASSERT_EQ(data_from.num_bytes, 42);
     ASSERT_EQ(data_from.location, ptr_from);
-    ASSERT_EQ(data_to.exec, exec.get());
+    ASSERT_EQ(data_to.mem_space, exec->get_mem_space().get());
     ASSERT_EQ(data_to.num_bytes, 42);
     ASSERT_EQ(data_to.location, ptr_to);
 }
@@ -182,7 +190,7 @@ TEST(Record, CatchesOperationLaunched)
 {
     auto exec = gko::ReferenceExecutor::create();
     auto logger = gko::log::Record::create(
-        exec, gko::log::Logger::operation_launched_mask);
+        exec, exec->get_mem_space(), gko::log::Logger::operation_launched_mask);
     gko::Operation op;
 
     logger->on<gko::log::Logger::operation_launched>(exec.get(), &op);
@@ -196,8 +204,9 @@ TEST(Record, CatchesOperationLaunched)
 TEST(Record, CatchesOperationCompleted)
 {
     auto exec = gko::ReferenceExecutor::create();
-    auto logger = gko::log::Record::create(
-        exec, gko::log::Logger::operation_completed_mask);
+    auto logger =
+        gko::log::Record::create(exec, exec->get_mem_space(),
+                                 gko::log::Logger::operation_completed_mask);
     gko::Operation op;
 
     logger->on<gko::log::Logger::operation_completed>(exec.get(), &op);
@@ -213,7 +222,8 @@ TEST(Record, CatchesPolymorphicObjectCreateStarted)
     using Dense = gko::matrix::Dense<>;
     auto exec = gko::ReferenceExecutor::create();
     auto logger = gko::log::Record::create(
-        exec, gko::log::Logger::polymorphic_object_create_started_mask);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::polymorphic_object_create_started_mask);
     auto po = gko::matrix::Dense<>::create(exec);
 
     logger->on<gko::log::Logger::polymorphic_object_create_started>(exec.get(),
@@ -232,7 +242,8 @@ TEST(Record, CatchesPolymorphicObjectCreateCompleted)
     using Dense = gko::matrix::Dense<>;
     auto exec = gko::ReferenceExecutor::create();
     auto logger = gko::log::Record::create(
-        exec, gko::log::Logger::polymorphic_object_create_completed_mask);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::polymorphic_object_create_completed_mask);
     auto po = gko::matrix::Dense<>::create(exec);
     auto output = gko::matrix::Dense<>::create(exec);
 
@@ -251,7 +262,8 @@ TEST(Record, CatchesPolymorphicObjectCopyStarted)
     using Dense = gko::matrix::Dense<>;
     auto exec = gko::ReferenceExecutor::create();
     auto logger = gko::log::Record::create(
-        exec, gko::log::Logger::polymorphic_object_copy_started_mask);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::polymorphic_object_copy_started_mask);
     auto from = gko::matrix::Dense<>::create(exec);
     auto to = gko::matrix::Dense<>::create(exec);
 
@@ -270,7 +282,8 @@ TEST(Record, CatchesPolymorphicObjectCopyCompleted)
     using Dense = gko::matrix::Dense<>;
     auto exec = gko::ReferenceExecutor::create();
     auto logger = gko::log::Record::create(
-        exec, gko::log::Logger::polymorphic_object_copy_completed_mask);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::polymorphic_object_copy_completed_mask);
     auto from = gko::matrix::Dense<>::create(exec);
     auto to = gko::matrix::Dense<>::create(exec);
 
@@ -290,7 +303,8 @@ TEST(Record, CatchesPolymorphicObjectDeleted)
     using Dense = gko::matrix::Dense<>;
     auto exec = gko::ReferenceExecutor::create();
     auto logger = gko::log::Record::create(
-        exec, gko::log::Logger::polymorphic_object_deleted_mask);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::polymorphic_object_deleted_mask);
     auto po = gko::matrix::Dense<>::create(exec);
 
     logger->on<gko::log::Logger::polymorphic_object_deleted>(exec.get(),
@@ -308,8 +322,9 @@ TEST(Record, CatchesLinOpApplyStarted)
 {
     using Dense = gko::matrix::Dense<>;
     auto exec = gko::ReferenceExecutor::create();
-    auto logger = gko::log::Record::create(
-        exec, gko::log::Logger::linop_apply_started_mask);
+    auto logger =
+        gko::log::Record::create(exec, exec->get_mem_space(),
+                                 gko::log::Logger::linop_apply_started_mask);
     auto A = gko::initialize<Dense>({1.1}, exec);
     auto b = gko::initialize<Dense>({-2.2}, exec);
     auto x = gko::initialize<Dense>({3.3}, exec);
@@ -330,8 +345,9 @@ TEST(Record, CatchesLinOpApplyCompleted)
 {
     using Dense = gko::matrix::Dense<>;
     auto exec = gko::ReferenceExecutor::create();
-    auto logger = gko::log::Record::create(
-        exec, gko::log::Logger::linop_apply_completed_mask);
+    auto logger =
+        gko::log::Record::create(exec, exec->get_mem_space(),
+                                 gko::log::Logger::linop_apply_completed_mask);
     auto A = gko::initialize<Dense>({1.1}, exec);
     auto b = gko::initialize<Dense>({-2.2}, exec);
     auto x = gko::initialize<Dense>({3.3}, exec);
@@ -353,7 +369,8 @@ TEST(Record, CatchesLinOpAdvancedApplyStarted)
     using Dense = gko::matrix::Dense<>;
     auto exec = gko::ReferenceExecutor::create();
     auto logger = gko::log::Record::create(
-        exec, gko::log::Logger::linop_advanced_apply_started_mask);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::linop_advanced_apply_started_mask);
     auto A = gko::initialize<Dense>({1.1}, exec);
     auto alpha = gko::initialize<Dense>({-4.4}, exec);
     auto b = gko::initialize<Dense>({-2.2}, exec);
@@ -377,7 +394,8 @@ TEST(Record, CatchesLinOpAdvancedApplyCompleted)
     using Dense = gko::matrix::Dense<>;
     auto exec = gko::ReferenceExecutor::create();
     auto logger = gko::log::Record::create(
-        exec, gko::log::Logger::linop_advanced_apply_completed_mask);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::linop_advanced_apply_completed_mask);
     auto A = gko::initialize<Dense>({1.1}, exec);
     auto alpha = gko::initialize<Dense>({-4.4}, exec);
     auto b = gko::initialize<Dense>({-2.2}, exec);
@@ -400,7 +418,8 @@ TEST(Record, CatchesLinopFactoryGenerateStarted)
 {
     auto exec = gko::ReferenceExecutor::create();
     auto logger = gko::log::Record::create(
-        exec, gko::log::Logger::linop_factory_generate_started_mask);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::linop_factory_generate_started_mask);
     auto factory =
         gko::solver::Bicgstab<>::build()
             .with_criteria(
@@ -422,7 +441,8 @@ TEST(Record, CatchesLinopFactoryGenerateCompleted)
 {
     auto exec = gko::ReferenceExecutor::create();
     auto logger = gko::log::Record::create(
-        exec, gko::log::Logger::linop_factory_generate_completed_mask);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::linop_factory_generate_completed_mask);
     auto factory =
         gko::solver::Bicgstab<>::build()
             .with_criteria(
@@ -445,7 +465,8 @@ TEST(Record, CatchesCriterionCheckStarted)
 {
     auto exec = gko::ReferenceExecutor::create();
     auto logger = gko::log::Record::create(
-        exec, gko::log::Logger::criterion_check_started_mask);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::criterion_check_started_mask);
     auto criterion =
         gko::stop::Iteration::build().with_max_iters(3u).on(exec)->generate(
             nullptr, nullptr, nullptr);
@@ -468,7 +489,8 @@ TEST(Record, CatchesCriterionCheckCompleted)
 {
     auto exec = gko::ReferenceExecutor::create();
     auto logger = gko::log::Record::create(
-        exec, gko::log::Logger::criterion_check_completed_mask);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::criterion_check_completed_mask);
     auto criterion =
         gko::stop::Iteration::build().with_max_iters(3u).on(exec)->generate(
             nullptr, nullptr, nullptr);
@@ -499,7 +521,7 @@ TEST(Record, CatchesIterations)
     using Dense = gko::matrix::Dense<>;
     auto exec = gko::ReferenceExecutor::create();
     auto logger = gko::log::Record::create(
-        exec, gko::log::Logger::iteration_complete_mask);
+        exec, exec->get_mem_space(), gko::log::Logger::iteration_complete_mask);
     auto factory =
         gko::solver::Bicgstab<>::build()
             .with_criteria(
diff --git a/core/test/log/stream.cpp b/core/test/log/stream.cpp
index 163a54fd74a..e293936f3ac 100644
--- a/core/test/log/stream.cpp
+++ b/core/test/log/stream.cpp
@@ -42,6 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory_space.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
 #include <ginkgo/core/solver/bicgstab.hpp>
 #include <ginkgo/core/stop/iteration.hpp>
@@ -67,9 +68,11 @@ TYPED_TEST(Stream, CatchesAllocationStarted)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::allocation_started_mask, out);
+        exec, exec->get_mem_space(), gko::log::Logger::allocation_started_mask,
+        out);
 
-    logger->template on<gko::log::Logger::allocation_started>(exec.get(), 42);
+    logger->template on<gko::log::Logger::allocation_started>(
+        exec->get_mem_space().get(), 42);
 
     auto os = out.str();
     GKO_ASSERT_STR_CONTAINS(os, "allocation started on");
@@ -82,13 +85,15 @@ TYPED_TEST(Stream, CatchesAllocationCompleted)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::allocation_completed_mask, out);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::allocation_completed_mask, out);
     int dummy = 1;
     std::stringstream ptrstream;
     ptrstream << std::hex << "0x" << reinterpret_cast<gko::uintptr>(&dummy);
 
     logger->template on<gko::log::Logger::allocation_completed>(
-        exec.get(), 42, reinterpret_cast<gko::uintptr>(&dummy));
+        exec->get_mem_space().get(), 42,
+        reinterpret_cast<gko::uintptr>(&dummy));
 
     auto os = out.str();
     GKO_ASSERT_STR_CONTAINS(os, "allocation completed on");
@@ -102,13 +107,13 @@ TYPED_TEST(Stream, CatchesFreeStarted)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::free_started_mask, out);
+        exec, exec->get_mem_space(), gko::log::Logger::free_started_mask, out);
     int dummy = 1;
     std::stringstream ptrstream;
     ptrstream << std::hex << "0x" << reinterpret_cast<gko::uintptr>(&dummy);
 
     logger->template on<gko::log::Logger::free_started>(
-        exec.get(), reinterpret_cast<gko::uintptr>(&dummy));
+        exec->get_mem_space().get(), reinterpret_cast<gko::uintptr>(&dummy));
 
     auto os = out.str();
     GKO_ASSERT_STR_CONTAINS(os, "free started on");
@@ -121,13 +126,14 @@ TYPED_TEST(Stream, CatchesFreeCompleted)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::free_completed_mask, out);
+        exec, exec->get_mem_space(), gko::log::Logger::free_completed_mask,
+        out);
     int dummy = 1;
     std::stringstream ptrstream;
     ptrstream << std::hex << "0x" << reinterpret_cast<gko::uintptr>(&dummy);
 
     logger->template on<gko::log::Logger::free_completed>(
-        exec.get(), reinterpret_cast<gko::uintptr>(&dummy));
+        exec->get_mem_space().get(), reinterpret_cast<gko::uintptr>(&dummy));
 
     auto os = out.str();
     GKO_ASSERT_STR_CONTAINS(os, "free completed on");
@@ -140,7 +146,7 @@ TYPED_TEST(Stream, CatchesCopyStarted)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::copy_started_mask, out);
+        exec, exec->get_mem_space(), gko::log::Logger::copy_started_mask, out);
     int dummy_in = 1;
     int dummy_out = 1;
     std::stringstream ptrstream_in;
@@ -151,7 +157,8 @@ TYPED_TEST(Stream, CatchesCopyStarted)
                   << reinterpret_cast<gko::uintptr>(&dummy_out);
 
     logger->template on<gko::log::Logger::copy_started>(
-        exec.get(), exec.get(), reinterpret_cast<gko::uintptr>(&dummy_in),
+        exec->get_mem_space().get(), exec->get_mem_space().get(),
+        reinterpret_cast<gko::uintptr>(&dummy_in),
         reinterpret_cast<gko::uintptr>(&dummy_out), 42);
 
     auto os = out.str();
@@ -167,7 +174,8 @@ TYPED_TEST(Stream, CatchesCopyCompleted)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::copy_completed_mask, out);
+        exec, exec->get_mem_space(), gko::log::Logger::copy_completed_mask,
+        out);
     int dummy_in = 1;
     int dummy_out = 1;
     std::stringstream ptrstream_in;
@@ -178,7 +186,8 @@ TYPED_TEST(Stream, CatchesCopyCompleted)
                   << reinterpret_cast<gko::uintptr>(&dummy_out);
 
     logger->template on<gko::log::Logger::copy_completed>(
-        exec.get(), exec.get(), reinterpret_cast<gko::uintptr>(&dummy_in),
+        exec->get_mem_space().get(), exec->get_mem_space().get(),
+        reinterpret_cast<gko::uintptr>(&dummy_in),
         reinterpret_cast<gko::uintptr>(&dummy_out), 42);
 
     auto os = out.str();
@@ -194,7 +203,8 @@ TYPED_TEST(Stream, CatchesOperationLaunched)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::operation_launched_mask, out);
+        exec, exec->get_mem_space(), gko::log::Logger::operation_launched_mask,
+        out);
     gko::Operation op;
     std::stringstream ptrstream;
     ptrstream << &op;
@@ -212,7 +222,8 @@ TYPED_TEST(Stream, CatchesOperationCompleted)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::operation_completed_mask, out);
+        exec, exec->get_mem_space(), gko::log::Logger::operation_completed_mask,
+        out);
     gko::Operation op;
     std::stringstream ptrstream;
     ptrstream << &op;
@@ -230,7 +241,8 @@ TYPED_TEST(Stream, CatchesPolymorphicObjectCreateStarted)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::polymorphic_object_create_started_mask, out);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::polymorphic_object_create_started_mask, out);
     auto po = gko::matrix::Dense<TypeParam>::create(exec);
     std::stringstream ptrstream;
     ptrstream << po.get();
@@ -249,7 +261,8 @@ TYPED_TEST(Stream, CatchesPolymorphicObjectCreateCompleted)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::polymorphic_object_create_completed_mask, out);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::polymorphic_object_create_completed_mask, out);
     auto po = gko::matrix::Dense<TypeParam>::create(exec);
     auto output = gko::matrix::Dense<TypeParam>::create(exec);
     std::stringstream ptrstream_in;
@@ -272,7 +285,8 @@ TYPED_TEST(Stream, CatchesPolymorphicObjectCopyStarted)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::polymorphic_object_copy_started_mask, out);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::polymorphic_object_copy_started_mask, out);
     auto from = gko::matrix::Dense<TypeParam>::create(exec);
     auto to = gko::matrix::Dense<TypeParam>::create(exec);
     std::stringstream ptrstream_from;
@@ -295,7 +309,8 @@ TYPED_TEST(Stream, CatchesPolymorphicObjectCopyCompleted)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::polymorphic_object_copy_completed_mask, out);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::polymorphic_object_copy_completed_mask, out);
     auto from = gko::matrix::Dense<TypeParam>::create(exec);
     auto to = gko::matrix::Dense<TypeParam>::create(exec);
     std::stringstream ptrstream_from;
@@ -318,7 +333,8 @@ TYPED_TEST(Stream, CatchesPolymorphicObjectDeleted)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::polymorphic_object_deleted_mask, out);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::polymorphic_object_deleted_mask, out);
     auto po = gko::matrix::Dense<TypeParam>::create(exec);
     std::stringstream ptrstream;
     ptrstream << po.get();
@@ -338,7 +354,8 @@ TYPED_TEST(Stream, CatchesLinOpApplyStarted)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::linop_apply_started_mask, out);
+        exec, exec->get_mem_space(), gko::log::Logger::linop_apply_started_mask,
+        out);
     auto A = Dense::create(exec);
     auto b = Dense::create(exec);
     auto x = Dense::create(exec);
@@ -366,7 +383,8 @@ TYPED_TEST(Stream, CatchesLinOpApplyStartedWithVerbose)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::linop_apply_started_mask, out, true);
+        exec, exec->get_mem_space(), gko::log::Logger::linop_apply_started_mask,
+        out, true);
     auto A = gko::initialize<Dense>({1.1}, exec);
     auto b = gko::initialize<Dense>({-2.2}, exec);
     auto x = gko::initialize<Dense>({3.3}, exec);
@@ -387,7 +405,8 @@ TYPED_TEST(Stream, CatchesLinOpApplyCompleted)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::linop_apply_completed_mask, out);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::linop_apply_completed_mask, out);
     auto A = Dense::create(exec);
     auto b = Dense::create(exec);
     auto x = Dense::create(exec);
@@ -415,7 +434,8 @@ TYPED_TEST(Stream, CatchesLinOpApplyCompletedWithVerbose)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::linop_apply_completed_mask, out, true);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::linop_apply_completed_mask, out, true);
     auto A = gko::initialize<Dense>({1.1}, exec);
     auto b = gko::initialize<Dense>({-2.2}, exec);
     auto x = gko::initialize<Dense>({3.3}, exec);
@@ -436,7 +456,8 @@ TYPED_TEST(Stream, CatchesLinOpAdvancedApplyStarted)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::linop_advanced_apply_started_mask, out);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::linop_advanced_apply_started_mask, out);
     auto A = Dense::create(exec);
     auto alpha = Dense::create(exec);
     auto b = Dense::create(exec);
@@ -472,7 +493,8 @@ TYPED_TEST(Stream, CatchesLinOpAdvancedApplyStartedWithVerbose)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::linop_advanced_apply_started_mask, out, true);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::linop_advanced_apply_started_mask, out, true);
     auto A = gko::initialize<Dense>({1.1}, exec);
     auto alpha = gko::initialize<Dense>({-4.4}, exec);
     auto b = gko::initialize<Dense>({-2.2}, exec);
@@ -497,7 +519,8 @@ TYPED_TEST(Stream, CatchesLinOpAdvancedApplyCompleted)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::linop_advanced_apply_completed_mask, out);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::linop_advanced_apply_completed_mask, out);
     auto A = Dense::create(exec);
     auto alpha = Dense::create(exec);
     auto b = Dense::create(exec);
@@ -533,7 +556,8 @@ TYPED_TEST(Stream, CatchesLinOpAdvancedApplyCompletedWithVerbose)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::linop_advanced_apply_completed_mask, out, true);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::linop_advanced_apply_completed_mask, out, true);
     auto A = gko::initialize<Dense>({1.1}, exec);
     auto alpha = gko::initialize<Dense>({-4.4}, exec);
     auto b = gko::initialize<Dense>({-2.2}, exec);
@@ -557,7 +581,8 @@ TYPED_TEST(Stream, CatchesLinopFactoryGenerateStarted)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::linop_factory_generate_started_mask, out);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::linop_factory_generate_started_mask, out);
     auto factory =
         gko::solver::Bicgstab<TypeParam>::build()
             .with_criteria(
@@ -584,7 +609,8 @@ TYPED_TEST(Stream, CatchesLinopFactoryGenerateCompleted)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::linop_factory_generate_completed_mask, out);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::linop_factory_generate_completed_mask, out);
     auto factory =
         gko::solver::Bicgstab<TypeParam>::build()
             .with_criteria(
@@ -616,7 +642,8 @@ TYPED_TEST(Stream, CatchesCriterionCheckStarted)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::criterion_check_started_mask, out);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::criterion_check_started_mask, out);
     auto criterion =
         gko::stop::Iteration::build().with_max_iters(3u).on(exec)->generate(
             nullptr, nullptr, nullptr);
@@ -643,7 +670,8 @@ TYPED_TEST(Stream, CatchesCriterionCheckCompleted)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::criterion_check_completed_mask, out);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::criterion_check_completed_mask, out);
     auto criterion =
         gko::stop::Iteration::build().with_max_iters(3u).on(exec)->generate(
             nullptr, nullptr, nullptr);
@@ -674,7 +702,8 @@ TYPED_TEST(Stream, CatchesCriterionCheckCompletedWithVerbose)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::criterion_check_completed_mask, out, true);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::criterion_check_completed_mask, out, true);
     auto criterion =
         gko::stop::Iteration::build().with_max_iters(3u).on(exec)->generate(
             nullptr, nullptr, nullptr);
@@ -703,7 +732,8 @@ TYPED_TEST(Stream, CatchesIterations)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::iteration_complete_mask, out);
+        exec, exec->get_mem_space(), gko::log::Logger::iteration_complete_mask,
+        out);
     auto solver = Dense::create(exec);
     auto residual = Dense::create(exec);
     auto solution = Dense::create(exec);
@@ -729,7 +759,8 @@ TYPED_TEST(Stream, CatchesIterationsWithVerbose)
     auto exec = gko::ReferenceExecutor::create();
     std::stringstream out;
     auto logger = gko::log::Stream<TypeParam>::create(
-        exec, gko::log::Logger::iteration_complete_mask, out, true);
+        exec, exec->get_mem_space(), gko::log::Logger::iteration_complete_mask,
+        out, true);
 
     auto factory =
         gko::solver::Bicgstab<TypeParam>::build()
diff --git a/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp b/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp
index 06ddbd8f24b..4efbac69adb 100644
--- a/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp
+++ b/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp
@@ -111,7 +111,7 @@ int main(int argc, char *argv[])
                         .on(exec);
 
     std::shared_ptr<const gko::log::Convergence<ValueType>> logger =
-        gko::log::Convergence<ValueType>::create(exec);
+        gko::log::Convergence<ValueType>::create(exec, exec->get_mem_space());
     iter_stop->add_logger(logger);
     tol_stop->add_logger(logger);
 
diff --git a/examples/custom-logger/custom-logger.cpp b/examples/custom-logger/custom-logger.cpp
index afc48e0ea1f..5a3199a87c5 100644
--- a/examples/custom-logger/custom-logger.cpp
+++ b/examples/custom-logger/custom-logger.cpp
@@ -171,7 +171,8 @@ struct ResidualLogger : gko::log::Logger {
     // Construct the logger and store the system matrix and b vectors
     ResidualLogger(std::shared_ptr<const gko::Executor> exec,
                    const gko::LinOp *matrix, const gko_dense *b)
-        : gko::log::Logger(exec, gko::log::Logger::iteration_complete_mask),
+        : gko::log::Logger(exec, exec->get_mem_space(),
+                           gko::log::Logger::iteration_complete_mask),
           matrix{matrix},
           b{b}
     {}
diff --git a/examples/custom-stopping-criterion/custom-stopping-criterion.cpp b/examples/custom-stopping-criterion/custom-stopping-criterion.cpp
index 5c7d7861b9b..c455da4135a 100644
--- a/examples/custom-stopping-criterion/custom-stopping-criterion.cpp
+++ b/examples/custom-stopping-criterion/custom-stopping-criterion.cpp
@@ -117,7 +117,8 @@ void run_solver(volatile bool *stop_iteration_process,
                       .on(exec)
                       ->generate(A);
     solver->add_logger(gko::log::Stream<ValueType>::create(
-        exec, gko::log::Logger::iteration_complete_mask, std::cout, true));
+        exec, exec->get_mem_space(), gko::log::Logger::iteration_complete_mask,
+        std::cout, true));
     solver->apply(lend(b), lend(x));
 
     std::cout << "Solver stopped" << std::endl;
diff --git a/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp b/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp
index 627d11d88dd..b07940f3c60 100644
--- a/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp
+++ b/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp
@@ -143,7 +143,7 @@ int main(int argc, char *argv[])
                         .on(exec);
 
     std::shared_ptr<const gko::log::Convergence<ValueType>> logger =
-        gko::log::Convergence<ValueType>::create(exec);
+        gko::log::Convergence<ValueType>::create(exec, exec->get_mem_space());
     iter_stop->add_logger(logger);
     tol_stop->add_logger(logger);
 
diff --git a/examples/iterative-refinement/iterative-refinement.cpp b/examples/iterative-refinement/iterative-refinement.cpp
index a07726ec82e..0df2279e4d9 100644
--- a/examples/iterative-refinement/iterative-refinement.cpp
+++ b/examples/iterative-refinement/iterative-refinement.cpp
@@ -113,7 +113,7 @@ int main(int argc, char *argv[])
                         .on(exec);
 
     std::shared_ptr<const gko::log::Convergence<ValueType>> logger =
-        gko::log::Convergence<ValueType>::create(exec);
+        gko::log::Convergence<ValueType>::create(exec, exec->get_mem_space());
     iter_stop->add_logger(logger);
     tol_stop->add_logger(logger);
 
diff --git a/examples/performance-debugging/performance-debugging.cpp b/examples/performance-debugging/performance-debugging.cpp
index 4a9fae8bcbd..6abee3ff818 100644
--- a/examples/performance-debugging/performance-debugging.cpp
+++ b/examples/performance-debugging/performance-debugging.cpp
@@ -116,41 +116,43 @@ namespace loggers {
 // taken before and after. This can create significant overhead since to ensure
 // proper timings, calls to `synchronize` are required.
 struct OperationLogger : gko::log::Logger {
-    void on_allocation_started(const gko::Executor *exec,
+    void on_allocation_started(const gko::MemorySpace *mem_space,
                                const gko::size_type &) const override
     {
-        this->start_operation(exec, "allocate");
+        this->start_operation(mem_space, "allocate");
     }
 
-    void on_allocation_completed(const gko::Executor *exec,
+    void on_allocation_completed(const gko::MemorySpace *mem_space,
                                  const gko::size_type &,
                                  const gko::uintptr &) const override
     {
-        this->end_operation(exec, "allocate");
+        this->end_operation(mem_space, "allocate");
     }
 
-    void on_free_started(const gko::Executor *exec,
+    void on_free_started(const gko::MemorySpace *mem_space,
                          const gko::uintptr &) const override
     {
-        this->start_operation(exec, "free");
+        this->start_operation(mem_space, "free");
     }
 
-    void on_free_completed(const gko::Executor *exec,
+    void on_free_completed(const gko::MemorySpace *mem_space,
                            const gko::uintptr &) const override
     {
-        this->end_operation(exec, "free");
+        this->end_operation(mem_space, "free");
     }
 
-    void on_copy_started(const gko::Executor *from, const gko::Executor *to,
-                         const gko::uintptr &, const gko::uintptr &,
+    void on_copy_started(const gko::MemorySpace *from,
+                         const gko::MemorySpace *to, const gko::uintptr &,
+                         const gko::uintptr &,
                          const gko::size_type &) const override
     {
         from->synchronize();
         this->start_operation(to, "copy");
     }
 
-    void on_copy_completed(const gko::Executor *from, const gko::Executor *to,
-                           const gko::uintptr &, const gko::uintptr &,
+    void on_copy_completed(const gko::MemorySpace *from,
+                           const gko::MemorySpace *to, const gko::uintptr &,
+                           const gko::uintptr &,
                            const gko::size_type &) const override
     {
         from->synchronize();
@@ -181,24 +183,25 @@ struct OperationLogger : gko::log::Logger {
     }
 
     OperationLogger(std::shared_ptr<const gko::Executor> exec)
-        : gko::log::Logger(exec)
+        : gko::log::Logger(exec, exec->get_mem_space())
     {}
 
 private:
     // Helper which synchronizes and starts the time before every operation.
-    void start_operation(const gko::Executor *exec,
-                         const std::string &name) const
+    template <typename Event>
+    void start_operation(const Event *event, const std::string &name) const
     {
         nested.emplace_back(0);
-        exec->synchronize();
+        event->synchronize();
         start[name] = std::chrono::steady_clock::now();
     }
 
     // Helper to compute the end time and store the operation's time at its
     // end. Also time nested operations.
-    void end_operation(const gko::Executor *exec, const std::string &name) const
+    template <typename Event>
+    void end_operation(const Event *event, const std::string &name) const
     {
-        exec->synchronize();
+        event->synchronize();
         const auto end = std::chrono::steady_clock::now();
         const auto diff = end - start[name];
         // make sure timings for nested operations are not counted twice
@@ -220,7 +223,7 @@ struct OperationLogger : gko::log::Logger {
 // This logger tracks the persistently allocated data
 struct StorageLogger : gko::log::Logger {
     // Store amount of bytes allocated on every allocation
-    void on_allocation_completed(const gko::Executor *,
+    void on_allocation_completed(const gko::MemorySpace *,
                                  const gko::size_type &num_bytes,
                                  const gko::uintptr &location) const override
     {
@@ -228,7 +231,7 @@ struct StorageLogger : gko::log::Logger {
     }
 
     // Reset the amount of bytes on every free
-    void on_free_completed(const gko::Executor *,
+    void on_free_completed(const gko::MemorySpace *,
                            const gko::uintptr &location) const override
     {
         storage[location] = 0;
@@ -245,7 +248,7 @@ struct StorageLogger : gko::log::Logger {
     }
 
     StorageLogger(std::shared_ptr<const gko::Executor> exec)
-        : gko::log::Logger(exec)
+        : gko::log::Logger(exec, exec->get_mem_space())
     {}
 
 private:
@@ -281,7 +284,8 @@ struct ResidualLogger : gko::log::Logger {
 
     ResidualLogger(std::shared_ptr<const gko::Executor> exec,
                    const gko::LinOp *matrix, const vec<ValueType> *b)
-        : gko::log::Logger(exec, gko::log::Logger::iteration_complete_mask),
+        : gko::log::Logger(exec, exec->get_mem_space(),
+                           gko::log::Logger::iteration_complete_mask),
           matrix{matrix},
           b{b}
     {}
@@ -370,8 +374,7 @@ int main(int argc, char *argv[])
 
     // Figure out where to run the code
     if (argc == 2 && (std::string(argv[1]) == "--help")) {
-        std::cerr << "Usage: " << argv[0] << " [executor]"
-                  << std::endl;
+        std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl;
         std::exit(-1);
     }
 
diff --git a/examples/simple-solver-logging/simple-solver-logging.cpp b/examples/simple-solver-logging/simple-solver-logging.cpp
index 96d32fa4b2a..2ad3fdcade8 100644
--- a/examples/simple-solver-logging/simple-solver-logging.cpp
+++ b/examples/simple-solver-logging/simple-solver-logging.cpp
@@ -109,7 +109,7 @@ int main(int argc, char *argv[])
     // for convenience.
     std::shared_ptr<gko::log::Stream<ValueType>> stream_logger =
         gko::log::Stream<ValueType>::create(
-            exec,
+            exec, exec->get_mem_space(),
             gko::log::Logger::all_events_mask ^
                 gko::log::Logger::linop_factory_events_mask ^
                 gko::log::Logger::polymorphic_object_events_mask,
@@ -146,15 +146,17 @@ int main(int argc, char *argv[])
     // Logger class for more information.
     std::ofstream filestream("my_file.txt");
     solver->add_logger(gko::log::Stream<ValueType>::create(
-        exec, gko::log::Logger::all_events_mask, filestream));
+        exec, exec->get_mem_space(), gko::log::Logger::all_events_mask,
+        filestream));
     solver->add_logger(stream_logger);
 
     // Add another logger which puts all the data in an object, we can later
     // retrieve this object in our code. Here we only have want Executor
     // and criterion check completed events.
     std::shared_ptr<gko::log::Record> record_logger = gko::log::Record::create(
-        exec, gko::log::Logger::executor_events_mask |
-                  gko::log::Logger::criterion_check_completed_mask);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::memory_space_events_mask |
+            gko::log::Logger::criterion_check_completed_mask);
     exec->add_logger(record_logger);
     residual_criterion->add_logger(record_logger);
 
@@ -165,10 +167,10 @@ int main(int argc, char *argv[])
     // location copied
     auto &last_copy = record_logger->get().copy_completed.back();
     std::cout << "Last memory copied was of size " << std::hex
-              << std::get<0>(*last_copy).num_bytes << " FROM executor "
-              << std::get<0>(*last_copy).exec << " pointer "
-              << std::get<0>(*last_copy).location << " TO executor "
-              << std::get<1>(*last_copy).exec << " pointer "
+              << std::get<0>(*last_copy).num_bytes << " FROM Memory Space"
+              << std::get<0>(*last_copy).mem_space << " pointer "
+              << std::get<0>(*last_copy).location << " TO Memory Space "
+              << std::get<1>(*last_copy).mem_space << " pointer "
               << std::get<1>(*last_copy).location << std::dec << std::endl;
     // Also print the residual of the last criterion check event (where
     // convergence happened)
diff --git a/include/ginkgo/core/base/exception.hpp b/include/ginkgo/core/base/exception.hpp
index 78fe81a617e..b2b8c51ef47 100644
--- a/include/ginkgo/core/base/exception.hpp
+++ b/include/ginkgo/core/base/exception.hpp
@@ -173,6 +173,30 @@ class NotSupported : public Error {
 };
 
 
+/**
+ * MemSpaceMismatch is thrown in case it is not possible to
+ * perform the requested operation on the given object type.
+ */
+class MemSpaceMismatch : public Error {
+public:
+    /**
+     * Initializes a MemSpaceMismatch error.
+     *
+     * @param file The name of the offending source file
+     * @param line The source code line number where the error occurred
+     * @param func The name of the function where the error occured
+     * @param obj_type The object type on which the requested operation
+     cannot be performed.
+    */
+    MemSpaceMismatch(const std::string &file, int line, const std::string &func,
+                     const std::string &obj_type)
+        : Error(file, line,
+                "This executor" + func +
+                    " does not support Memory space of type " + obj_type)
+    {}
+};
+
+
 /**
  * CudaError is thrown when a CUDA routine throws a non-zero error code.
  */
diff --git a/include/ginkgo/core/base/exception_helpers.hpp b/include/ginkgo/core/base/exception_helpers.hpp
index 287b59380f8..34410836a6f 100644
--- a/include/ginkgo/core/base/exception_helpers.hpp
+++ b/include/ginkgo/core/base/exception_helpers.hpp
@@ -88,6 +88,19 @@ namespace gko {
                   "semi-colon warnings")
 
 
+/**
+ * Creates a MemSpaceMismatch exception.
+ * This macro sets the correct information about the location of the error
+ * and fills the exception with data about _obj.
+ *
+ * @param _obj  the object referenced by MemSpaceMismatch exception
+ *
+ * @return MemSpaceMismatch
+ */
+#define GKO_MEMSPACE_MISMATCH(_obj) \
+    throw ::gko::MemSpaceMismatch(__FILE__, __LINE__, __func__, GKO_QUOTE(_obj))
+
+
 namespace detail {
 
 
diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp
index c76f05c29c3..378e892e724 100644
--- a/include/ginkgo/core/base/executor.hpp
+++ b/include/ginkgo/core/base/executor.hpp
@@ -41,6 +41,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <type_traits>
 
 
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/memory_space.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/log/logger.hpp>
@@ -488,7 +490,7 @@ class Executor : public log::EnableLogging<Executor> {
     template <typename T>
     void copy(size_type num_elems, const T *src_ptr, T *dest_ptr) const
     {
-        this->copy_from(this, num_elems, src_ptr, dest_ptr);
+        this->get_mem_space()->copy_from(this, num_elems, src_ptr, dest_ptr);
     }
 
     /**
@@ -504,7 +506,7 @@ class Executor : public log::EnableLogging<Executor> {
     T copy_val_to_host(const T *ptr) const
     {
         T out{};
-        this->get_master()->copy_from(this, 1, ptr, &out);
+        this->get_master()->get_mem_space()->copy_from(this, 1, ptr, &out);
         return out;
     }
 
@@ -698,14 +700,6 @@ class OmpExecutor : public detail::ExecutorBase<OmpExecutor>,
         }
     }
 
-    OmpExecutor(std::shared_ptr<MemorySpace> mem_space)
-        : mem_space_instance_(mem_space)
-    {
-        if (!check_mem_space_validity(mem_space_instance_)) {
-            GKO_MEMSPACE_MISMATCH(NOT_HOST);
-        }
-    }
-
     bool check_mem_space_validity(std::shared_ptr<MemorySpace> mem_space)
     {
         auto check_default_mem_space =
diff --git a/include/ginkgo/core/base/types.hpp b/include/ginkgo/core/base/types.hpp
index 10ef4d5d4cf..541381bc68a 100644
--- a/include/ginkgo/core/base/types.hpp
+++ b/include/ginkgo/core/base/types.hpp
@@ -395,6 +395,23 @@ GKO_ATTRIBUTES constexpr bool operator!=(precision_reduction x,
     _enable_macro(CudaExecutor, cuda)
 
 
+/**
+ * Calls a given macro for each memory space type for a given kernel.
+ *
+ * The macro should take two parameters:
+ *
+ * -   the first one is replaced with the memory space class name
+ * -   the second one with the name of the kernel to be bound
+ *
+ * @param _enable_macro  macro name which will be called
+ */
+#define GKO_ENABLE_FOR_ALL_MEMORY_SPACES(_enable_macro) \
+    _enable_macro(HostMemorySpace, host);               \
+    _enable_macro(HipMemorySpace, hip);                 \
+    _enable_macro(CudaMemorySpace, cuda);               \
+    _enable_macro(CudaUVMSpace, cuda_uvm)
+
+
 /**
  * Instantiates a template for each non-complex value type compiled by Ginkgo.
  *
diff --git a/include/ginkgo/core/log/convergence.hpp b/include/ginkgo/core/log/convergence.hpp
index 64cd16ea1aa..13a6119e69c 100644
--- a/include/ginkgo/core/log/convergence.hpp
+++ b/include/ginkgo/core/log/convergence.hpp
@@ -88,10 +88,11 @@ class Convergence : public Logger {
      */
     static std::unique_ptr<Convergence> create(
         std::shared_ptr<const Executor> exec,
+        std::shared_ptr<const MemorySpace> mem_space,
         const mask_type &enabled_events = Logger::all_events_mask)
     {
         return std::unique_ptr<Convergence>(
-            new Convergence(exec, enabled_events));
+            new Convergence(exec, mem_space, enabled_events));
     }
 
     /**
@@ -131,8 +132,9 @@ class Convergence : public Logger {
      */
     explicit Convergence(
         std::shared_ptr<const gko::Executor> exec,
+        std::shared_ptr<const gko::MemorySpace> mem_space,
         const mask_type &enabled_events = Logger::all_events_mask)
-        : Logger(exec, enabled_events)
+        : Logger(exec, mem_space, enabled_events)
     {}
 
 private:
diff --git a/include/ginkgo/core/log/logger.hpp b/include/ginkgo/core/log/logger.hpp
index 3d6ae9b4309..e92d22ab954 100644
--- a/include/ginkgo/core/log/logger.hpp
+++ b/include/ginkgo/core/log/logger.hpp
@@ -52,6 +52,7 @@ namespace gko {
 template <typename ValueType>
 class Array;
 class Executor;
+class MemorySpace;
 class LinOp;
 class LinOpFactory;
 class PolymorphicObject;
@@ -140,68 +141,74 @@ public:                                                              \
     static constexpr mask_type _event_name##_mask{mask_type{1} << _id};
 
     /**
-     * Executor's allocation started event.
+     * Memory Space's allocation started event.
      *
-     * @param exec  the executor used
+     * @param mem_space  the memory space used
      * @param num_bytes  the number of bytes to allocate
      */
-    GKO_LOGGER_REGISTER_EVENT(0, allocation_started, const Executor *exec,
+    GKO_LOGGER_REGISTER_EVENT(0, allocation_started,
+                              const MemorySpace *mem_space,
                               const size_type &num_bytes)
 
     /**
-     * Executor's allocation completed event.
+     * Memory Space's allocation completed event.
      *
-     * @param exec  the executor used
+     * @param mem_space  the memory space used
      * @param num_bytes  the number of bytes allocated
      * @param location  the address at which the data was allocated
      */
-    GKO_LOGGER_REGISTER_EVENT(1, allocation_completed, const Executor *exec,
+    GKO_LOGGER_REGISTER_EVENT(1, allocation_completed,
+                              const MemorySpace *mem_space,
                               const size_type &num_bytes,
                               const uintptr &location)
 
     /**
-     * Executor's free started event.
+     * Memory Space's free started event.
      *
-     * @param exec  the executor used
+     * @param mem_space  the memory space used
      * @param location  the address at which the data will be freed
      */
-    GKO_LOGGER_REGISTER_EVENT(2, free_started, const Executor *exec,
+    GKO_LOGGER_REGISTER_EVENT(2, free_started, const MemorySpace *mem_space,
                               const uintptr &location)
 
     /**
-     * Executor's free completed event.
+     * Memory Space's free completed event.
      *
-     * @param exec  the executor used
+     * @param mem_space  the memory space used
      * @param location  the address at which the data was freed
      */
-    GKO_LOGGER_REGISTER_EVENT(3, free_completed, const Executor *exec,
+    GKO_LOGGER_REGISTER_EVENT(3, free_completed, const MemorySpace *mem_space,
                               const uintptr &location)
 
     /**
-     * Executor's copy started event.
+     * Memory Space's copy started event.
 
-     * @param exec_from  the executor to be copied from
-     * @param exec_to  the executor to be copied to
+     * @param mem_space_from  the memory space to be copied from
+     * @param mem_space_to  the memory space to be copied to
      * @param loc_from  the address at which the data will be copied from
      * @param loc_to  the address at which the data will be copied to
      * @param num_bytes  the number of bytes to be copied
      */
-    GKO_LOGGER_REGISTER_EVENT(4, copy_started, const Executor *exec_from,
-                              const Executor *exec_to, const uintptr &loc_from,
-                              const uintptr &loc_to, const size_type &num_bytes)
+    GKO_LOGGER_REGISTER_EVENT(4, copy_started,
+                              const MemorySpace *mem_space_from,
+                              const MemorySpace *mem_space_to,
+                              const uintptr &loc_from, const uintptr &loc_to,
+                              const size_type &num_bytes)
 
     /**
-     * Executor's copy completed event.
+     * Memory Space's copy completed event.
      *
-     * @param exec_from  the executor copied from
-     * @param exec_to  the executor copied to
+     * @param mem_space_from  the memory space copied from
+     * @param mem_space_to  the memory space copied to
      * @param loc_from  the address at which the data was copied from
      * @param loc_to  the address at which the data was copied to
      * @param num_bytes  the number of bytes copied
      */
-    GKO_LOGGER_REGISTER_EVENT(5, copy_completed, const Executor *exec_from,
-                              const Executor *exec_to, const uintptr &loc_from,
-                              const uintptr &loc_to, const size_type &num_bytes)
+    GKO_LOGGER_REGISTER_EVENT(5, copy_completed,
+                              const MemorySpace *mem_space_from,
+                              const MemorySpace *mem_space_to,
+                              const uintptr &loc_from, const uintptr &loc_to,
+                              const size_type &num_bytes)
 
     /**
      * Executor's operation launched event (method run).
@@ -407,9 +414,9 @@ public:                                                              \
 #undef GKO_LOGGER_REGISTER_EVENT
 
     /**
-     * Bitset Mask which activates all executor events
+     * Bitset Mask which activates all memory space events
      */
-    static constexpr mask_type executor_events_mask =
+    static constexpr mask_type memory_space_events_mask =
         allocation_started_mask | allocation_completed_mask |
         free_started_mask | free_completed_mask | copy_started_mask |
         copy_completed_mask;
@@ -466,12 +473,14 @@ public:                                                              \
      *                           event.
      */
     explicit Logger(std::shared_ptr<const gko::Executor> exec,
+                    std::shared_ptr<const gko::MemorySpace> mem_space,
                     const mask_type &enabled_events = all_events_mask)
-        : exec_{exec}, enabled_events_{enabled_events}
+        : exec_{exec}, mem_space_{mem_space}, enabled_events_{enabled_events}
     {}
 
 private:
     std::shared_ptr<const Executor> exec_;
+    std::shared_ptr<const MemorySpace> mem_space_;
     mask_type enabled_events_;
 };
 
diff --git a/include/ginkgo/core/log/papi.hpp b/include/ginkgo/core/log/papi.hpp
index 7b54e478f6e..d38d7ef9529 100644
--- a/include/ginkgo/core/log/papi.hpp
+++ b/include/ginkgo/core/log/papi.hpp
@@ -91,26 +91,26 @@ static std::mutex papi_count_mutex;
 template <typename ValueType = default_precision>
 class Papi : public Logger {
 public:
-    /* Executor events */
-    void on_allocation_started(const Executor *exec,
+    /* Memory space events */
+    void on_allocation_started(const MemorySpace *mem_space,
                                const size_type &num_bytes) const override;
 
-    void on_allocation_completed(const Executor *exec,
+    void on_allocation_completed(const MemorySpace *mem_space,
                                  const size_type &num_bytes,
                                  const uintptr &location) const override;
 
-    void on_free_started(const Executor *exec,
+    void on_free_started(const MemorySpace *mem_space,
                          const uintptr &location) const override;
 
-    void on_free_completed(const Executor *exec,
+    void on_free_completed(const MemorySpace *mem_space,
                            const uintptr &location) const override;
 
-    void on_copy_started(const Executor *from, const Executor *to,
+    void on_copy_started(const MemorySpace *from, const MemorySpace *to,
                          const uintptr &location_from,
                          const uintptr &location_to,
                          const size_type &num_bytes) const override;
 
-    void on_copy_completed(const Executor *from, const Executor *to,
+    void on_copy_completed(const MemorySpace *from, const MemorySpace *to,
                            const uintptr &location_from,
                            const uintptr &location_to,
                            const size_type &num_bytes) const override;
@@ -185,9 +185,10 @@ class Papi : public Logger {
      */
     static std::shared_ptr<Papi> create(
         std::shared_ptr<const gko::Executor> exec,
+        std::shared_ptr<const gko::MemorySpace> mem_space,
         const Logger::mask_type &enabled_events = Logger::all_events_mask)
     {
-        return std::shared_ptr<Papi>(new Papi(exec, enabled_events));
+        return std::shared_ptr<Papi>(new Papi(exec, mem_space, enabled_events));
     }
 
     /**
@@ -201,8 +202,9 @@ class Papi : public Logger {
 protected:
     explicit Papi(
         std::shared_ptr<const gko::Executor> exec,
+        std::shared_ptr<const gko::MemorySpace> mem_space,
         const Logger::mask_type &enabled_events = Logger::all_events_mask)
-        : Logger(exec, enabled_events)
+        : Logger(exec, mem_space, enabled_events)
     {
         std::ostringstream os;
 
@@ -257,20 +259,21 @@ class Papi : public Logger {
     };
 
 
-    mutable papi_queue<Executor> allocation_started{&papi_handle,
-                                                    "allocation_started"};
-    mutable papi_queue<Executor> allocation_completed{&papi_handle,
-                                                      "allocation_completed"};
-    mutable papi_queue<Executor> free_started{&papi_handle, "free_started"};
-    mutable papi_queue<Executor> free_completed{&papi_handle, "free_completed"};
-    mutable papi_queue<Executor> copy_started_from{&papi_handle,
-                                                   "copy_started_from"};
-    mutable papi_queue<Executor> copy_started_to{&papi_handle,
-                                                 "copy_started_to"};
-    mutable papi_queue<Executor> copy_completed_from{&papi_handle,
-                                                     "copy_completed_from"};
-    mutable papi_queue<Executor> copy_completed_to{&papi_handle,
-                                                   "copy_completed_to"};
+    mutable papi_queue<MemorySpace> allocation_started{&papi_handle,
+                                                       "allocation_started"};
+    mutable papi_queue<MemorySpace> allocation_completed{
+        &papi_handle, "allocation_completed"};
+    mutable papi_queue<MemorySpace> free_started{&papi_handle, "free_started"};
+    mutable papi_queue<MemorySpace> free_completed{&papi_handle,
+                                                   "free_completed"};
+    mutable papi_queue<MemorySpace> copy_started_from{&papi_handle,
+                                                      "copy_started_from"};
+    mutable papi_queue<MemorySpace> copy_started_to{&papi_handle,
+                                                    "copy_started_to"};
+    mutable papi_queue<MemorySpace> copy_completed_from{&papi_handle,
+                                                        "copy_completed_from"};
+    mutable papi_queue<MemorySpace> copy_completed_to{&papi_handle,
+                                                      "copy_completed_to"};
 
     mutable papi_queue<Executor> operation_launched{&papi_handle,
                                                     "operation_launched"};
diff --git a/include/ginkgo/core/log/record.hpp b/include/ginkgo/core/log/record.hpp
index 0c791e5e278..b1ffcd4e0f2 100644
--- a/include/ginkgo/core/log/record.hpp
+++ b/include/ginkgo/core/log/record.hpp
@@ -90,8 +90,8 @@ struct iteration_complete_data {
 /**
  * Struct representing Executor related data
  */
-struct executor_data {
-    const Executor *exec;
+struct memory_space_data {
+    const MemorySpace *mem_space;
     const size_type num_bytes;
     const uintptr location;
 };
@@ -235,13 +235,15 @@ class Record : public Logger {
      * Struct storing the actually logged data
      */
     struct logged_data {
-        std::deque<std::unique_ptr<executor_data>> allocation_started;
-        std::deque<std::unique_ptr<executor_data>> allocation_completed;
-        std::deque<std::unique_ptr<executor_data>> free_started;
-        std::deque<std::unique_ptr<executor_data>> free_completed;
-        std::deque<std::unique_ptr<std::tuple<executor_data, executor_data>>>
+        std::deque<std::unique_ptr<memory_space_data>> allocation_started;
+        std::deque<std::unique_ptr<memory_space_data>> allocation_completed;
+        std::deque<std::unique_ptr<memory_space_data>> free_started;
+        std::deque<std::unique_ptr<memory_space_data>> free_completed;
+        std::deque<
+            std::unique_ptr<std::tuple<memory_space_data, memory_space_data>>>
             copy_started;
-        std::deque<std::unique_ptr<std::tuple<executor_data, executor_data>>>
+        std::deque<
+            std::unique_ptr<std::tuple<memory_space_data, memory_space_data>>>
             copy_completed;
 
         std::deque<std::unique_ptr<operation_data>> operation_launched;
@@ -275,25 +277,25 @@ class Record : public Logger {
     };
 
     /* Executor events */
-    void on_allocation_started(const Executor *exec,
+    void on_allocation_started(const MemorySpace *mem_space,
                                const size_type &num_bytes) const override;
 
-    void on_allocation_completed(const Executor *exec,
+    void on_allocation_completed(const MemorySpace *mem_space,
                                  const size_type &num_bytes,
                                  const uintptr &location) const override;
 
-    void on_free_started(const Executor *exec,
+    void on_free_started(const MemorySpace *mem_space,
                          const uintptr &location) const override;
 
-    void on_free_completed(const Executor *exec,
+    void on_free_completed(const MemorySpace *mem_space,
                            const uintptr &location) const override;
 
-    void on_copy_started(const Executor *from, const Executor *to,
+    void on_copy_started(const MemorySpace *from, const MemorySpace *to,
                          const uintptr &location_from,
                          const uintptr &location_to,
                          const size_type &num_bytes) const override;
 
-    void on_copy_completed(const Executor *from, const Executor *to,
+    void on_copy_completed(const MemorySpace *from, const MemorySpace *to,
                            const uintptr &location_from,
                            const uintptr &location_to,
                            const size_type &num_bytes) const override;
@@ -390,11 +392,12 @@ class Record : public Logger {
      */
     static std::unique_ptr<Record> create(
         std::shared_ptr<const Executor> exec,
+        std::shared_ptr<const MemorySpace> mem_space,
         const mask_type &enabled_events = Logger::all_events_mask,
         size_type max_storage = 1)
     {
         return std::unique_ptr<Record>(
-            new Record(exec, enabled_events, max_storage));
+            new Record(exec, mem_space, enabled_events, max_storage));
     }
 
     /**
@@ -422,9 +425,10 @@ class Record : public Logger {
      *                     memory overhead of this logger.
      */
     explicit Record(std::shared_ptr<const gko::Executor> exec,
+                    std::shared_ptr<const gko::MemorySpace> mem_space,
                     const mask_type &enabled_events = Logger::all_events_mask,
                     size_type max_storage = 0)
-        : Logger(exec, enabled_events), max_storage_{max_storage}
+        : Logger(exec, mem_space, enabled_events), max_storage_{max_storage}
     {}
 
     /**
diff --git a/include/ginkgo/core/log/stream.hpp b/include/ginkgo/core/log/stream.hpp
index d46a0d07be0..221b1112f0a 100644
--- a/include/ginkgo/core/log/stream.hpp
+++ b/include/ginkgo/core/log/stream.hpp
@@ -59,25 +59,25 @@ template <typename ValueType = default_precision>
 class Stream : public Logger {
 public:
     /* Executor events */
-    void on_allocation_started(const Executor *exec,
+    void on_allocation_started(const MemorySpace *mem_space,
                                const size_type &num_bytes) const override;
 
-    void on_allocation_completed(const Executor *exec,
+    void on_allocation_completed(const MemorySpace *mem_space,
                                  const size_type &num_bytes,
                                  const uintptr &location) const override;
 
-    void on_free_started(const Executor *exec,
+    void on_free_started(const MemorySpace *mem_space,
                          const uintptr &location) const override;
 
-    void on_free_completed(const Executor *exec,
+    void on_free_completed(const MemorySpace *mem_space,
                            const uintptr &location) const override;
 
-    void on_copy_started(const Executor *from, const Executor *to,
+    void on_copy_started(const MemorySpace *from, const MemorySpace *to,
                          const uintptr &location_from,
                          const uintptr &location_to,
                          const size_type &num_bytes) const override;
 
-    void on_copy_completed(const Executor *from, const Executor *to,
+    void on_copy_completed(const MemorySpace *from, const MemorySpace *to,
                            const uintptr &location_from,
                            const uintptr &location_to,
                            const size_type &num_bytes) const override;
@@ -173,11 +173,12 @@ class Stream : public Logger {
      */
     static std::unique_ptr<Stream> create(
         std::shared_ptr<const Executor> exec,
+        std::shared_ptr<const MemorySpace> mem_space,
         const Logger::mask_type &enabled_events = Logger::all_events_mask,
         std::ostream &os = std::cout, bool verbose = false)
     {
         return std::unique_ptr<Stream>(
-            new Stream(exec, enabled_events, os, verbose));
+            new Stream(exec, mem_space, enabled_events, os, verbose));
     }
 
 protected:
@@ -194,9 +195,10 @@ class Stream : public Logger {
      */
     explicit Stream(
         std::shared_ptr<const gko::Executor> exec,
+        std::shared_ptr<const gko::MemorySpace> mem_space,
         const Logger::mask_type &enabled_events = Logger::all_events_mask,
         std::ostream &os = std::cout, bool verbose = false)
-        : Logger(exec, enabled_events), os_(os), verbose_(verbose)
+        : Logger(exec, mem_space, enabled_events), os_(os), verbose_(verbose)
     {}
 
 
diff --git a/reference/test/log/convergence.cpp b/reference/test/log/convergence.cpp
index 01a9b17c303..efca64341d7 100644
--- a/reference/test/log/convergence.cpp
+++ b/reference/test/log/convergence.cpp
@@ -58,7 +58,8 @@ TYPED_TEST(Convergence, CatchesCriterionCheckCompleted)
 {
     auto exec = gko::ReferenceExecutor::create();
     auto logger = gko::log::Convergence<TypeParam>::create(
-        exec, gko::log::Logger::criterion_check_completed_mask);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::criterion_check_completed_mask);
     auto criterion =
         gko::stop::Iteration::build().with_max_iters(3u).on(exec)->generate(
             nullptr, nullptr, nullptr);
diff --git a/reference/test/log/papi.cpp b/reference/test/log/papi.cpp
index 842b6214374..cf84e6977a4 100644
--- a/reference/test/log/papi.cpp
+++ b/reference/test/log/papi.cpp
@@ -73,7 +73,7 @@ class Papi : public ::testing::Test {
     const std::string init(const gko::log::Logger::mask_type &event,
                            const std::string &event_name, U *ptr)
     {
-        logger = gko::log::Papi<T>::create(exec, event);
+        logger = gko::log::Papi<T>::create(exec, exec->get_mem_space(), event);
         std::ostringstream os;
         os << "sde:::" << logger->get_handle_name() << "::" << event_name << "_"
            << reinterpret_cast<gko::uintptr>(ptr);

From bba2bd982e7430cdff2d5a7fbaf528783826c9f0 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 20 Oct 2020 09:48:09 +0200
Subject: [PATCH 6/8] Update Array and change ExecutorAllocator to
 MemSpaceAllocator.

---
 core/base/allocator.hpp               | 12 ++++++++---
 core/device_hooks/cuda_hooks.cpp      | 17 ++++++++++++---
 core/device_hooks/hip_hooks.cpp       | 11 +++++++---
 core/test/base/array.cpp              |  5 +++--
 include/ginkgo/core/base/array.hpp    | 30 ++++++++++++++++-----------
 include/ginkgo/core/base/executor.hpp |  6 ++++--
 6 files changed, 56 insertions(+), 25 deletions(-)

diff --git a/core/base/allocator.hpp b/core/base/allocator.hpp
index 3f618974b46..76ac31b1147 100644
--- a/core/base/allocator.hpp
+++ b/core/base/allocator.hpp
@@ -99,7 +99,10 @@ class ExecutorAllocator {
      * @param n  the number of elements to allocate
      * @return  the pointer to a newly allocated memory area of `n` elements.
      */
-    T *allocate(std::size_t n) const { return exec_->alloc<T>(n); }
+    T *allocate(std::size_t n) const
+    {
+        return exec_->get_mem_space()->alloc<T>(n);
+    }
 
     /**
      * Frees a memory area that was allocated by this allocator.
@@ -108,7 +111,10 @@ class ExecutorAllocator {
      *
      * @note  The second parameter is unused.
      */
-    void deallocate(T *ptr, std::size_t) const { exec_->free(ptr); }
+    void deallocate(T *ptr, std::size_t) const
+    {
+        exec_->get_mem_space()->free(ptr);
+    }
 
     /**
      * Compares two ExecutorAllocators for equality
@@ -172,4 +178,4 @@ using unordered_map =
 
 }  // namespace gko
 
-#endif  // GKO_CORE_BASE_ALLOCATOR_HPP_
\ No newline at end of file
+#endif  // GKO_CORE_BASE_ALLOCATOR_HPP_
diff --git a/core/device_hooks/cuda_hooks.cpp b/core/device_hooks/cuda_hooks.cpp
index be44f709872..024e55dbcbb 100644
--- a/core/device_hooks/cuda_hooks.cpp
+++ b/core/device_hooks/cuda_hooks.cpp
@@ -67,9 +67,8 @@ std::shared_ptr<CudaExecutor> CudaExecutor::create(
     int device_id, std::shared_ptr<MemorySpace> mem_space,
     std::shared_ptr<Executor> master, bool device_reset)
 {
-    return std::shared_ptr<CudaExecutor>(
-        new CudaExecutor(device_id, mem_space, std::move(master)),
-        device_reset);
+    return std::shared_ptr<CudaExecutor>(new CudaExecutor(
+        device_id, mem_space, std::move(master), device_reset));
 }
 
 
@@ -150,6 +149,12 @@ void CudaUVMSpace::raw_copy_to(const HostMemorySpace *dest_mem_space,
 void CudaExecutor::synchronize() const GKO_NOT_COMPILED(cuda);
 
 
+void CudaMemorySpace::synchronize() const GKO_NOT_COMPILED(cuda);
+
+
+void CudaUVMSpace::synchronize() const GKO_NOT_COMPILED(cuda);
+
+
 void CudaExecutor::run(const Operation &op) const
 {
     op.run(
@@ -178,6 +183,12 @@ std::string CusparseError::get_error(int64)
 int CudaExecutor::get_num_devices() { return 0; }
 
 
+int CudaMemorySpace::get_num_devices() { return 0; }
+
+
+int CudaUVMSpace::get_num_devices() { return 0; }
+
+
 void CudaExecutor::set_gpu_property() {}
 
 
diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp
index 8e658299816..f0178ab7df5 100644
--- a/core/device_hooks/hip_hooks.cpp
+++ b/core/device_hooks/hip_hooks.cpp
@@ -64,9 +64,8 @@ std::shared_ptr<HipExecutor> HipExecutor::create(
     int device_id, std::shared_ptr<MemorySpace> memory_space,
     std::shared_ptr<Executor> master, bool device_reset)
 {
-    return std::shared_ptr<HipExecutor>(
-        new HipExecutor(device_id, memory_space, std::move(master)),
-        device_reset);
+    return std::shared_ptr<HipExecutor>(new HipExecutor(
+        device_id, memory_space, std::move(master), device_reset));
 }
 
 void HostMemorySpace::raw_copy_to(const HipMemorySpace *, size_type num_bytes,
@@ -109,6 +108,9 @@ void HipMemorySpace::raw_copy_to(const HipMemorySpace *, size_type num_bytes,
 void HipExecutor::synchronize() const GKO_NOT_COMPILED(hip);
 
 
+void HipMemorySpace::synchronize() const GKO_NOT_COMPILED(hip);
+
+
 void HipExecutor::run(const Operation &op) const
 {
     op.run(
@@ -137,6 +139,9 @@ std::string HipsparseError::get_error(int64)
 int HipExecutor::get_num_devices() { return 0; }
 
 
+int HipMemorySpace::get_num_devices() { return 0; }
+
+
 void HipExecutor::set_gpu_property() {}
 
 
diff --git a/core/test/base/array.cpp b/core/test/base/array.cpp
index b1e04dd2f39..251016c661c 100644
--- a/core/test/base/array.cpp
+++ b/core/test/base/array.cpp
@@ -110,8 +110,9 @@ TYPED_TEST(Array, CanBeCreatedFromExistingData)
 
 TYPED_TEST(Array, CanBeCreatedFromDataOnExecutor)
 {
-    gko::Array<TypeParam> a{this->exec, 3,
-                            this->exec->template alloc<TypeParam>(3)};
+    gko::Array<TypeParam> a{
+        this->exec, 3,
+        this->exec->get_mem_space()->template alloc<TypeParam>(3)};
 
     EXPECT_EQ(a.get_num_elems(), 3);
 }
diff --git a/include/ginkgo/core/base/array.hpp b/include/ginkgo/core/base/array.hpp
index 7f0df5c27f6..798e091452a 100644
--- a/include/ginkgo/core/base/array.hpp
+++ b/include/ginkgo/core/base/array.hpp
@@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/exception.hpp>
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory_space.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/base/utils.hpp>
 
@@ -91,7 +92,7 @@ class Array {
     /**
      * The default deleter type used by Array.
      */
-    using default_deleter = executor_deleter<value_type[]>;
+    using default_deleter = memory_space_deleter<value_type[]>;
 
     /**
      * The deleter type used for views.
@@ -124,7 +125,9 @@ class Array {
      */
     Array(std::shared_ptr<const Executor> exec) noexcept
         : num_elems_(0),
-          data_(nullptr, default_deleter{exec}),
+          data_(nullptr,
+                default_deleter{exec == nullptr ? nullptr
+                                                : exec->get_mem_space()}),
           exec_(std::move(exec))
     {}
 
@@ -137,11 +140,11 @@ class Array {
      */
     Array(std::shared_ptr<const Executor> exec, size_type num_elems)
         : num_elems_(num_elems),
-          data_(nullptr, default_deleter{exec}),
+          data_(nullptr, default_deleter{exec->get_mem_space()}),
           exec_(std::move(exec))
     {
         if (num_elems > 0) {
-            data_.reset(exec_->alloc<value_type>(num_elems));
+            data_.reset(exec_->get_mem_space()->alloc<value_type>(num_elems));
         }
     }
 
@@ -181,7 +184,7 @@ class Array {
      */
     Array(std::shared_ptr<const Executor> exec, size_type num_elems,
           value_type *data)
-        : Array(exec, num_elems, data, default_deleter{exec})
+        : Array(exec, num_elems, data, default_deleter{exec->get_mem_space()})
     {}
 
     /**
@@ -324,8 +327,9 @@ class Array {
             GKO_ENSURE_COMPATIBLE_BOUNDS(other.get_num_elems(),
                                          this->num_elems_);
         }
-        exec_->copy_from(other.get_executor().get(), other.get_num_elems(),
-                         other.get_const_data(), this->get_data());
+        exec_->get_mem_space()->copy_from(
+            other.get_executor()->get_mem_space().get(), other.get_num_elems(),
+            other.get_const_data(), this->get_data());
         return *this;
     }
 
@@ -371,7 +375,7 @@ class Array {
             this->clear();
             return *this;
         }
-        if (exec_ == other.get_executor()) {
+        if ((exec_->get_mem_space() == other.get_executor()->get_mem_space())) {
             // same device, only move the pointer
             using std::swap;
             swap(data_, other.data_);
@@ -407,7 +411,8 @@ class Array {
     {
         if (this->exec_ == nullptr) {
             this->exec_ = other.get_executor();
-            this->data_ = data_manager{nullptr, default_deleter{this->exec_}};
+            this->data_ = data_manager{
+                nullptr, default_deleter{this->exec_->get_mem_space()}};
         }
         if (other.get_executor() == nullptr) {
             this->clear();
@@ -423,7 +428,8 @@ class Array {
         Array<OtherValueType> tmp{this->exec_};
         const OtherValueType *source = other.get_const_data();
         // if we are on different executors: copy, then convert
-        if (this->exec_ != other.get_executor()) {
+        if (this->exec_->get_mem_space() !=
+            other.get_executor()->get_mem_space()) {
             tmp = other;
             source = tmp.get_const_data();
         }
@@ -473,7 +479,7 @@ class Array {
 
         if (num_elems > 0 && this->is_owning()) {
             num_elems_ = num_elems;
-            data_.reset(exec_->alloc<value_type>(num_elems));
+            data_.reset(exec_->get_mem_space()->alloc<value_type>(num_elems));
         } else {
             this->clear();
         }
@@ -522,7 +528,7 @@ class Array {
      */
     void set_executor(std::shared_ptr<const Executor> exec)
     {
-        if (exec == exec_) {
+        if (exec_ && exec->get_mem_space() == exec_->get_mem_space()) {
             // moving to the same executor, no-op
             return;
         }
diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp
index 378e892e724..477c9ec782d 100644
--- a/include/ginkgo/core/base/executor.hpp
+++ b/include/ginkgo/core/base/executor.hpp
@@ -490,7 +490,8 @@ class Executor : public log::EnableLogging<Executor> {
     template <typename T>
     void copy(size_type num_elems, const T *src_ptr, T *dest_ptr) const
     {
-        this->get_mem_space()->copy_from(this, num_elems, src_ptr, dest_ptr);
+        this->get_mem_space()->copy_from(this->get_mem_space().get(), num_elems,
+                                         src_ptr, dest_ptr);
     }
 
     /**
@@ -506,7 +507,8 @@ class Executor : public log::EnableLogging<Executor> {
     T copy_val_to_host(const T *ptr) const
     {
         T out{};
-        this->get_master()->get_mem_space()->copy_from(this, 1, ptr, &out);
+        this->get_master()->get_mem_space()->copy_from(this->get_mem_space().get(), 1,
+                                                       ptr, &out);
         return out;
     }
 

From ccdc83373464269137cfadca2dca0da28c5a0b95 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 20 Oct 2020 14:10:43 +0200
Subject: [PATCH 7/8] Update CUDA and HIP to use memory space.

---
 cuda/base/executor.cpp                        | 93 +++----------------
 cuda/factorization/par_ilut_select_common.cu  |  3 +-
 cuda/solver/common_trs_kernels.cuh            |  6 +-
 cuda/test/base/cuda_executor.cu               |  4 +-
 hip/base/executor.hip.cpp                     | 93 +++----------------
 .../par_ilut_select_common.hip.cpp            |  3 +-
 hip/solver/common_trs_kernels.hip.hpp         |  4 +-
 hip/test/base/hip_executor.hip.cpp            |  4 +-
 8 files changed, 40 insertions(+), 170 deletions(-)

diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp
index 543e78131e0..d1dea06e99c 100644
--- a/cuda/base/executor.cpp
+++ b/cuda/base/executor.cpp
@@ -71,87 +71,20 @@ std::shared_ptr<CudaExecutor> CudaExecutor::create(
 }
 
 
-void OmpExecutor::raw_copy_to(const CudaExecutor *dest, size_type num_bytes,
-                              const void *src_ptr, void *dest_ptr) const
-{
-    if (num_bytes > 0) {
-        cuda::device_guard g(dest->get_device_id());
-        GKO_ASSERT_NO_CUDA_ERRORS(
-            cudaMemcpy(dest_ptr, src_ptr, num_bytes, cudaMemcpyHostToDevice));
-    }
-}
-
-
-void CudaExecutor::raw_free(void *ptr) const noexcept
-{
-    cuda::device_guard g(this->get_device_id());
-    auto error_code = cudaFree(ptr);
-    if (error_code != cudaSuccess) {
-#if GKO_VERBOSE_LEVEL >= 1
-        // Unfortunately, if memory free fails, there's not much we can do
-        std::cerr << "Unrecoverable CUDA error on device " << this->device_id_
-                  << " in " << __func__ << ": " << cudaGetErrorName(error_code)
-                  << ": " << cudaGetErrorString(error_code) << std::endl
-                  << "Exiting program" << std::endl;
-#endif  // GKO_VERBOSE_LEVEL >= 1
-        std::exit(error_code);
-    }
-}
-
-
-void *CudaExecutor::raw_alloc(size_type num_bytes) const
-{
-    void *dev_ptr = nullptr;
-    cuda::device_guard g(this->get_device_id());
-#ifdef NDEBUG
-    auto error_code = cudaMalloc(&dev_ptr, num_bytes);
-#else
-    auto error_code = cudaMallocManaged(&dev_ptr, num_bytes);
-#endif
-    if (error_code != cudaErrorMemoryAllocation) {
-        GKO_ASSERT_NO_CUDA_ERRORS(error_code);
-    }
-    GKO_ENSURE_ALLOCATED(dev_ptr, "cuda", num_bytes);
-    return dev_ptr;
-}
-
-
-void CudaExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes,
-                               const void *src_ptr, void *dest_ptr) const
-{
-    if (num_bytes > 0) {
-        cuda::device_guard g(this->get_device_id());
-        GKO_ASSERT_NO_CUDA_ERRORS(
-            cudaMemcpy(dest_ptr, src_ptr, num_bytes, cudaMemcpyDeviceToHost));
-    }
-}
-
-
-void CudaExecutor::raw_copy_to(const CudaExecutor *dest, size_type num_bytes,
-                               const void *src_ptr, void *dest_ptr) const
-{
-    if (num_bytes > 0) {
-        cuda::device_guard g(this->get_device_id());
-        GKO_ASSERT_NO_CUDA_ERRORS(
-            cudaMemcpyPeer(dest_ptr, dest->get_device_id(), src_ptr,
-                           this->get_device_id(), num_bytes));
-    }
-}
-
-
-void CudaExecutor::raw_copy_to(const HipExecutor *dest, size_type num_bytes,
-                               const void *src_ptr, void *dest_ptr) const
+std::shared_ptr<CudaExecutor> CudaExecutor::create(
+    int device_id, std::shared_ptr<MemorySpace> mem_space,
+    std::shared_ptr<Executor> master, bool device_reset)
 {
-#if GINKGO_HIP_PLATFORM_NVCC == 1
-    if (num_bytes > 0) {
-        cuda::device_guard g(this->get_device_id());
-        GKO_ASSERT_NO_CUDA_ERRORS(
-            cudaMemcpyPeer(dest_ptr, dest->get_device_id(), src_ptr,
-                           this->get_device_id(), num_bytes));
-    }
-#else
-    GKO_NOT_SUPPORTED(this);
-#endif
+    return std::shared_ptr<CudaExecutor>(
+        new CudaExecutor(device_id, mem_space, std::move(master), device_reset),
+        [device_id](CudaExecutor *exec) {
+            delete exec;
+            if (!CudaExecutor::get_num_execs(device_id) &&
+                exec->get_device_reset()) {
+                cuda::device_guard g(device_id);
+                cudaDeviceReset();
+            }
+        });
 }
 
 
diff --git a/cuda/factorization/par_ilut_select_common.cu b/cuda/factorization/par_ilut_select_common.cu
index e4d9a4c1e93..93356d6f31d 100644
--- a/cuda/factorization/par_ilut_select_common.cu
+++ b/cuda/factorization/par_ilut_select_common.cu
@@ -100,7 +100,8 @@ sampleselect_bucket<IndexType> sampleselect_find_bucket(
 {
     kernel::find_bucket<<<1, config::warp_size>>>(prefix_sum, rank);
     IndexType values[3]{};
-    exec->get_master()->copy_from(exec.get(), 3, prefix_sum, values);
+    exec->get_master()->get_mem_space()->copy_from(exec->get_mem_space().get(),
+                                                   3, prefix_sum, values);
     return {values[0], values[1], values[2]};
 }
 
diff --git a/cuda/solver/common_trs_kernels.cuh b/cuda/solver/common_trs_kernels.cuh
index f16be5ee0e1..3c57ff17b26 100644
--- a/cuda/solver/common_trs_kernels.cuh
+++ b/cuda/solver/common_trs_kernels.cuh
@@ -222,10 +222,12 @@ void generate_kernel(std::shared_ptr<const CudaExecutor> exec,
 
                 // allocate workspace
                 if (cuda_solve_struct->factor_work_vec != nullptr) {
-                    exec->free(cuda_solve_struct->factor_work_vec);
+                    exec->get_mem_space()->free(
+                        cuda_solve_struct->factor_work_vec);
                 }
                 cuda_solve_struct->factor_work_vec =
-                    exec->alloc<void *>(cuda_solve_struct->factor_work_size);
+                    exec->get_mem_space()->alloc<void *>(
+                        cuda_solve_struct->factor_work_size);
 
                 cusparse::csrsm2_analysis(
                     handle, cuda_solve_struct->algorithm,
diff --git a/cuda/test/base/cuda_executor.cu b/cuda/test/base/cuda_executor.cu
index aaa1c9c99f2..8cb1de9ccd5 100644
--- a/cuda/test/base/cuda_executor.cu
+++ b/cuda/test/base/cuda_executor.cu
@@ -131,12 +131,12 @@ TEST_F(CudaExecutor, PreservesDeviceSettings)
 {
     auto previous_device = gko::CudaExecutor::get_num_devices() - 1;
     GKO_ASSERT_NO_CUDA_ERRORS(cudaSetDevice(previous_device));
-    auto orig = cuda->alloc<int>(2);
+    auto orig = cuda->get_mem_space()->alloc<int>(2);
     int current_device;
     GKO_ASSERT_NO_CUDA_ERRORS(cudaGetDevice(&current_device));
     ASSERT_EQ(current_device, previous_device);
 
-    cuda->free(orig);
+    cuda->get_mem_space()->free(orig);
     GKO_ASSERT_NO_CUDA_ERRORS(cudaGetDevice(&current_device));
     ASSERT_EQ(current_device, previous_device);
 }
diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp
index f41fb69f46c..7bcab762951 100644
--- a/hip/base/executor.hip.cpp
+++ b/hip/base/executor.hip.cpp
@@ -71,87 +71,20 @@ std::shared_ptr<HipExecutor> HipExecutor::create(
 }
 
 
-void OmpExecutor::raw_copy_to(const HipExecutor *dest, size_type num_bytes,
-                              const void *src_ptr, void *dest_ptr) const
-{
-    if (num_bytes > 0) {
-        hip::device_guard g(dest->get_device_id());
-        GKO_ASSERT_NO_HIP_ERRORS(
-            hipMemcpy(dest_ptr, src_ptr, num_bytes, hipMemcpyHostToDevice));
-    }
-}
-
-
-void HipExecutor::raw_free(void *ptr) const noexcept
-{
-    hip::device_guard g(this->get_device_id());
-    auto error_code = hipFree(ptr);
-    if (error_code != hipSuccess) {
-#if GKO_VERBOSE_LEVEL >= 1
-        // Unfortunately, if memory free fails, there's not much we can do
-        std::cerr << "Unrecoverable HIP error on device " << this->device_id_
-                  << " in " << __func__ << ": " << hipGetErrorName(error_code)
-                  << ": " << hipGetErrorString(error_code) << std::endl
-                  << "Exiting program" << std::endl;
-#endif  // GKO_VERBOSE_LEVEL >= 1
-        std::exit(error_code);
-    }
-}
-
-
-void *HipExecutor::raw_alloc(size_type num_bytes) const
-{
-    void *dev_ptr = nullptr;
-    hip::device_guard g(this->get_device_id());
-#if defined(NDEBUG) || (GINKGO_HIP_PLATFORM_HCC == 1)
-    auto error_code = hipMalloc(&dev_ptr, num_bytes);
-#else
-    auto error_code = hipMallocManaged(&dev_ptr, num_bytes);
-#endif
-    if (error_code != hipErrorMemoryAllocation) {
-        GKO_ASSERT_NO_HIP_ERRORS(error_code);
-    }
-    GKO_ENSURE_ALLOCATED(dev_ptr, "hip", num_bytes);
-    return dev_ptr;
-}
-
-
-void HipExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes,
-                              const void *src_ptr, void *dest_ptr) const
-{
-    if (num_bytes > 0) {
-        hip::device_guard g(this->get_device_id());
-        GKO_ASSERT_NO_HIP_ERRORS(
-            hipMemcpy(dest_ptr, src_ptr, num_bytes, hipMemcpyDeviceToHost));
-    }
-}
-
-
-void HipExecutor::raw_copy_to(const CudaExecutor *dest, size_type num_bytes,
-                              const void *src_ptr, void *dest_ptr) const
-{
-#if GINKGO_HIP_PLATFORM_NVCC == 1
-    if (num_bytes > 0) {
-        hip::device_guard g(this->get_device_id());
-        GKO_ASSERT_NO_HIP_ERRORS(hipMemcpyPeer(dest_ptr, dest->get_device_id(),
-                                               src_ptr, this->get_device_id(),
-                                               num_bytes));
-    }
-#else
-    GKO_NOT_SUPPORTED(this);
-#endif
-}
-
-
-void HipExecutor::raw_copy_to(const HipExecutor *dest, size_type num_bytes,
-                              const void *src_ptr, void *dest_ptr) const
+std::shared_ptr<HipExecutor> HipExecutor::create(
+    int device_id, std::shared_ptr<MemorySpace> mem_space,
+    std::shared_ptr<Executor> master, bool device_reset)
 {
-    if (num_bytes > 0) {
-        hip::device_guard g(this->get_device_id());
-        GKO_ASSERT_NO_HIP_ERRORS(hipMemcpyPeer(dest_ptr, dest->get_device_id(),
-                                               src_ptr, this->get_device_id(),
-                                               num_bytes));
-    }
+    return std::shared_ptr<HipExecutor>(
+        new HipExecutor(device_id, mem_space, std::move(master), device_reset),
+        [device_id](HipExecutor *exec) {
+            delete exec;
+            if (!HipExecutor::get_num_execs(device_id) &&
+                exec->get_device_reset()) {
+                hip::device_guard g(device_id);
+                hipDeviceReset();
+            }
+        });
 }
 
 
diff --git a/hip/factorization/par_ilut_select_common.hip.cpp b/hip/factorization/par_ilut_select_common.hip.cpp
index 92431c6b0a3..9a0f75833a5 100644
--- a/hip/factorization/par_ilut_select_common.hip.cpp
+++ b/hip/factorization/par_ilut_select_common.hip.cpp
@@ -109,7 +109,8 @@ sampleselect_bucket<IndexType> sampleselect_find_bucket(
     hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel::find_bucket), dim3(1),
                        dim3(config::warp_size), 0, 0, prefix_sum, rank);
     IndexType values[3]{};
-    exec->get_master()->copy_from(exec.get(), 3, prefix_sum, values);
+    exec->get_master()->get_mem_space()->copy_from(exec->get_mem_space().get(),
+                                                   3, prefix_sum, values);
     return {values[0], values[1], values[2]};
 }
 
diff --git a/hip/solver/common_trs_kernels.hip.hpp b/hip/solver/common_trs_kernels.hip.hpp
index 3bf0e56c7fa..025e25e29ab 100644
--- a/hip/solver/common_trs_kernels.hip.hpp
+++ b/hip/solver/common_trs_kernels.hip.hpp
@@ -159,10 +159,10 @@ void generate_kernel(std::shared_ptr<const HipExecutor> exec,
 
                 // allocate workspace
                 if (hip_solve_struct->factor_work_vec != nullptr) {
-                    exec->free(hip_solve_struct->factor_work_vec);
+                    exec->get_mem_space()->free(hip_solve_struct->factor_work_vec);
                 }
                 hip_solve_struct->factor_work_vec =
-                    exec->alloc<void *>(hip_solve_struct->factor_work_size);
+                    exec->get_mem_space()->alloc<void *>(hip_solve_struct->factor_work_size);
 
                 hipsparse::csrsv2_analysis(
                     handle, HIPSPARSE_OPERATION_NON_TRANSPOSE,
diff --git a/hip/test/base/hip_executor.hip.cpp b/hip/test/base/hip_executor.hip.cpp
index 7ff522bb62a..bb10c091feb 100644
--- a/hip/test/base/hip_executor.hip.cpp
+++ b/hip/test/base/hip_executor.hip.cpp
@@ -136,12 +136,12 @@ TEST_F(HipExecutor, PreservesDeviceSettings)
 {
     auto previous_device = gko::HipExecutor::get_num_devices() - 1;
     GKO_ASSERT_NO_HIP_ERRORS(hipSetDevice(previous_device));
-    auto orig = hip->alloc<int>(2);
+    auto orig = hip->get_mem_space()->alloc<int>(2);
     int current_device;
     GKO_ASSERT_NO_HIP_ERRORS(hipGetDevice(&current_device));
     ASSERT_EQ(current_device, previous_device);
 
-    hip->free(orig);
+    hip->get_mem_space()->free(orig);
     GKO_ASSERT_NO_HIP_ERRORS(hipGetDevice(&current_device));
     ASSERT_EQ(current_device, previous_device);
 }

From 48c9e618d1ba47c266de1474bbd7993b0c4a8bf6 Mon Sep 17 00:00:00 2001
From: Pratik Nayak <pratikvn@protonmail.com>
Date: Tue, 20 Oct 2020 14:11:06 +0200
Subject: [PATCH 8/8] Update test_install.

---
 core/base/allocator.hpp                |  2 +-
 examples/papi-logging/papi-logging.cpp |  8 ++++----
 test_install/test_install.cpp          | 22 ++++++++++++----------
 test_install/test_install_cuda.cu      | 18 ++++++++----------
 4 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/core/base/allocator.hpp b/core/base/allocator.hpp
index 76ac31b1147..8b50a14cac7 100644
--- a/core/base/allocator.hpp
+++ b/core/base/allocator.hpp
@@ -101,7 +101,7 @@ class ExecutorAllocator {
      */
     T *allocate(std::size_t n) const
     {
-        return exec_->get_mem_space()->alloc<T>(n);
+        return exec_->get_mem_space()->template alloc<T>(n);
     }
 
     /**
diff --git a/examples/papi-logging/papi-logging.cpp b/examples/papi-logging/papi-logging.cpp
index 2fd8cbbc8d4..0d2153d3a22 100644
--- a/examples/papi-logging/papi-logging.cpp
+++ b/examples/papi-logging/papi-logging.cpp
@@ -140,8 +140,7 @@ int main(int argc, char *argv[])
     std::cout << gko::version_info::get() << std::endl;
 
     if (argc == 2 && (std::string(argv[1]) == "--help")) {
-        std::cerr << "Usage: " << argv[0] << " [executor]"
-                  << std::endl;
+        std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl;
         std::exit(-1);
     }
 
@@ -192,8 +191,9 @@ int main(int argc, char *argv[])
 
     // Create a PAPI logger and add it to relevant LinOps
     auto logger = gko::log::Papi<ValueType>::create(
-        exec, gko::log::Logger::linop_apply_completed_mask |
-                  gko::log::Logger::linop_advanced_apply_completed_mask);
+        exec, exec->get_mem_space(),
+        gko::log::Logger::linop_apply_completed_mask |
+            gko::log::Logger::linop_advanced_apply_completed_mask);
     solver->add_logger(logger);
     A->add_logger(logger);
 
diff --git a/test_install/test_install.cpp b/test_install/test_install.cpp
index 5ea59794440..559a05abeeb 100644
--- a/test_install/test_install.cpp
+++ b/test_install/test_install.cpp
@@ -183,23 +183,25 @@ int main(int, char **)
 
     // core/log/convergence.hpp
     {
-        auto test = gko::log::Convergence<>::create(refExec);
+        auto test =
+            gko::log::Convergence<>::create(refExec, refExec->get_mem_space());
     }
 
     // core/log/record.hpp
     {
-        auto test = gko::log::executor_data{};
+        auto test = gko::log::memory_space_data{};
     }
 
     // core/log/stream.hpp
     {
-        auto test = gko::log::Stream<>::create(refExec);
+        auto test =
+            gko::log::Stream<>::create(refExec, refExec->get_mem_space());
     }
 
 #if GKO_HAVE_PAPI_SDE
     // core/log/papi.hpp
     {
-        auto test = gko::log::Papi<>::create(refExec);
+        auto test = gko::log::Papi<>::create(refExec, refExec->get_mem_space());
     }
 #endif  // GKO_HAVE_PAPI_SDE
 
@@ -357,13 +359,13 @@ int main(int, char **)
                            .with_reduction_factor(1e-10)
                            .on(refExec);
 
-        auto rel_res = gko::stop::RelativeResidualNorm<>::build()
-                           .with_tolerance(1e-10)
-                           .on(refExec);
+        auto rel_res =
+            gko::stop::RelativeResidualNorm<>::build().with_tolerance(1e-10).on(
+                refExec);
 
-        auto abs_res = gko::stop::AbsoluteResidualNorm<>::build()
-                           .with_tolerance(1e-10)
-                           .on(refExec);
+        auto abs_res =
+            gko::stop::AbsoluteResidualNorm<>::build().with_tolerance(1e-10).on(
+                refExec);
 
         // stopping_status.hpp
         auto stop_status = gko::stopping_status{};
diff --git a/test_install/test_install_cuda.cu b/test_install/test_install_cuda.cu
index ed2e18c307d..c3d3386cad4 100644
--- a/test_install/test_install_cuda.cu
+++ b/test_install/test_install_cuda.cu
@@ -187,23 +187,23 @@ int main(int, char **)
 
     // core/log/convergence.hpp
     {
-        gko::log::Convergence<>::create(cudaExec);
+        gko::log::Convergence<>::create(cudaExec, cudaExec->get_mem_space());
     }
 
     // core/log/record.hpp
     {
-        gko::log::executor_data{};
+        gko::log::memory_space_data{};
     }
 
     // core/log/stream.hpp
     {
-        gko::log::Stream<>::create(cudaExec);
+        gko::log::Stream<>::create(cudaExec, cudaExec->get_mem_space());
     }
 
 #if GKO_HAVE_PAPI_SDE
     // core/log/papi.hpp
     {
-        gko::log::Papi<>::create(cudaExec);
+        gko::log::Papi<>::create(cudaExec, cudaExec->get_mem_space());
     }
 #endif  // GKO_HAVE_PAPI_SDE
 
@@ -349,13 +349,11 @@ int main(int, char **)
             .with_reduction_factor(1e-10)
             .on(cudaExec);
 
-        gko::stop::RelativeResidualNorm<>::build()
-            .with_tolerance(1e-10)
-            .on(cudaExec);
+        gko::stop::RelativeResidualNorm<>::build().with_tolerance(1e-10).on(
+            cudaExec);
 
-        gko::stop::AbsoluteResidualNorm<>::build()
-            .with_tolerance(1e-10)
-            .on(cudaExec);
+        gko::stop::AbsoluteResidualNorm<>::build().with_tolerance(1e-10).on(
+            cudaExec);
 
         // stopping_status.hpp
         gko::stopping_status{};