From 3a43510e071cd2c7a6b81d3afe5de5e02a6f3cf5 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Wed, 31 Aug 2022 21:09:49 -0400 Subject: [PATCH 1/6] add the property selection on dpcpp executor --- dpcpp/base/executor.dp.cpp | 23 +++++++++++++--- include/ginkgo/core/base/executor.hpp | 39 +++++++++++++++++++++++---- 2 files changed, 53 insertions(+), 9 deletions(-) diff --git a/dpcpp/base/executor.dp.cpp b/dpcpp/base/executor.dp.cpp index bd6f1eeeadd..a520e62932a 100644 --- a/dpcpp/base/executor.dp.cpp +++ b/dpcpp/base/executor.dp.cpp @@ -86,10 +86,11 @@ bool OmpExecutor::verify_memory_to(const DpcppExecutor* dest_exec) const std::shared_ptr DpcppExecutor::create( - int device_id, std::shared_ptr master, std::string device_type) + int device_id, std::shared_ptr master, std::string device_type, + dpcpp_queue_property property) { return std::shared_ptr( - new DpcppExecutor(device_id, std::move(master), device_type)); + new DpcppExecutor(device_id, std::move(master), device_type, property)); } @@ -234,10 +235,24 @@ void delete_queue(sycl::queue* queue) } +::cl::sycl::property_list get_property_list(dpcpp_queue_property property) +{ + if (property == dpcpp_queue_property::in_order) { + return {sycl::property::queue::in_order{}}; + } else if (property == dpcpp_queue_property::enable_profiling) { + GKO_NOT_SUPPORTED(property); + return {sycl::property::queue::enable_profiling{}}; + } else { + return {sycl::property::queue::in_order{}, + sycl::property::queue::enable_profiling{}}; + } +} + + } // namespace detail -void DpcppExecutor::set_device_property() +void DpcppExecutor::set_device_property(dpcpp_queue_property property) { assert(this->get_exec_info().device_id < DpcppExecutor::get_num_devices(this->get_exec_info().device_type)); @@ -277,7 +292,7 @@ void DpcppExecutor::set_device_property() // `wait()` would be needed after every call to a DPC++ function or kernel. // For example, without `in_order`, doing a copy, a kernel, and a copy, will // not necessarily happen in that order by default, which we need to avoid. - auto* queue = new sycl::queue{device, sycl::property::queue::in_order{}}; + auto* queue = new sycl::queue{device, detail::get_property_list(property)}; queue_ = std::move(queue_manager{queue, detail::delete_queue}); } diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index ca53434520a..4aaf8e87f43 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -104,12 +104,37 @@ constexpr allocation_mode default_hip_alloc_mode = inline namespace cl { namespace sycl { + class queue; + } // namespace sycl } // namespace cl +/** + * The enum class for the dpcpp queue property. + */ +enum class dpcpp_queue_property { + /** + * queue executed in order + */ + in_order = 1, + + /** + * queue enables the profiling + */ + enable_profiling = 2 +}; + +GKO_ATTRIBUTES GKO_INLINE dpcpp_queue_property operator|(dpcpp_queue_property a, + dpcpp_queue_property b) +{ + return static_cast(static_cast(a) | + static_cast(b)); +} + + struct cublasContext; struct cusparseContext; @@ -1770,7 +1795,8 @@ class DpcppExecutor : public detail::ExecutorBase, */ static std::shared_ptr create( int device_id, std::shared_ptr master, - std::string device_type = "all"); + std::string device_type = "all", + dpcpp_queue_property property = dpcpp_queue_property::in_order); std::shared_ptr get_master() noexcept override; @@ -1862,17 +1888,20 @@ class DpcppExecutor : public detail::ExecutorBase, } protected: - void set_device_property(); + void set_device_property( + dpcpp_queue_property property = dpcpp_queue_property::in_order); - DpcppExecutor(int device_id, std::shared_ptr master, - std::string device_type = "all") + DpcppExecutor( + int device_id, std::shared_ptr master, + std::string device_type = "all", + dpcpp_queue_property property = dpcpp_queue_property::in_order) : master_(master) { std::for_each(device_type.begin(), device_type.end(), [](char& c) { c = std::tolower(c); }); this->get_exec_info().device_type = std::string(device_type); this->get_exec_info().device_id = device_id; - this->set_device_property(); + this->set_device_property(property); } void populate_exec_info(const machine_topology* mach_topo) override; From d46536c8dbd68aa0af8cb3c7338c6e2ff9cc6ffe Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Wed, 31 Aug 2022 21:11:19 -0400 Subject: [PATCH 2/6] add the dpcpp device timer --- benchmark/CMakeLists.txt | 11 +- benchmark/blas/blas.cpp | 3 +- benchmark/conversions/conversions.cpp | 2 +- benchmark/preconditioner/preconditioner.cpp | 2 +- benchmark/solver/solver.cpp | 9 +- benchmark/sparse_blas/sparse_blas.cpp | 2 +- benchmark/spmv/spmv.cpp | 2 +- benchmark/utils/dpcpp_timer.dp.cpp | 108 ++++++++++++++++++++ benchmark/utils/general.hpp | 25 +++-- benchmark/utils/timer.hpp | 15 ++- 10 files changed, 155 insertions(+), 24 deletions(-) create mode 100644 benchmark/utils/dpcpp_timer.dp.cpp diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 3d3b22fd5e5..bb0921db3d9 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -61,25 +61,28 @@ function(ginkgo_add_single_benchmark_executable name use_lib_linops macro_def ty add_executable("${name}" ${ARGN}) target_link_libraries("${name}" ginkgo gflags rapidjson) if (GINKGO_BUILD_CUDA) + target_compile_definitions("${name}" PRIVATE HAS_CUDA=1) target_link_libraries("${name}" cuda_timer) endif() if (GINKGO_BUILD_HIP) + target_compile_definitions("${name}" PRIVATE HAS_HIP=1) target_link_libraries("${name}" hip_timer) endif() + if (GINKGO_BUILD_DPCPP) + target_compile_definitions("${name}" PRIVATE HAS_DPCPP=1) + target_link_libraries("${name}" dpcpp_timer) + endif() target_compile_definitions("${name}" PRIVATE "${macro_def}") target_compile_options("${name}" PRIVATE ${GINKGO_COMPILER_FLAGS}) ginkgo_benchmark_add_tuning_maybe("${name}") if("${use_lib_linops}") if (GINKGO_BUILD_CUDA) - target_compile_definitions("${name}" PRIVATE HAS_CUDA=1) target_link_libraries("${name}" cusparse_linops_${type}) endif() if (GINKGO_BUILD_HIP) - target_compile_definitions("${name}" PRIVATE HAS_HIP=1) target_link_libraries("${name}" hipsparse_linops_${type}) endif() if (GINKGO_BUILD_DPCPP) - target_compile_definitions("${name}" PRIVATE HAS_DPCPP=1) target_link_libraries("${name}" onemkl_linops_${type}) endif() endif() @@ -131,6 +134,8 @@ if (GINKGO_BUILD_DPCPP) ginkgo_benchmark_onemkl_linops(s GKO_BENCHMARK_USE_SINGLE_PRECISION) ginkgo_benchmark_onemkl_linops(z GKO_BENCHMARK_USE_DOUBLE_COMPLEX_PRECISION) ginkgo_benchmark_onemkl_linops(c GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION) + add_library(dpcpp_timer utils/dpcpp_timer.dp.cpp) + target_link_libraries(dpcpp_timer ginkgo) endif() add_subdirectory(blas) diff --git a/benchmark/blas/blas.cpp b/benchmark/blas/blas.cpp index 6dfe2414a9f..703bdbc2853 100644 --- a/benchmark/blas/blas.cpp +++ b/benchmark/blas/blas.cpp @@ -506,8 +506,7 @@ int main(int argc, char* argv[]) std::string extra_information = "The operations are " + FLAGS_operations; print_general_information(extra_information); - - auto exec = executor_factory.at(FLAGS_executor)(); + auto exec = executor_factory.at(FLAGS_executor)(FLAGS_gpu_timer); auto engine = get_engine(); auto operations = split(FLAGS_operations, ','); diff --git a/benchmark/conversions/conversions.cpp b/benchmark/conversions/conversions.cpp index 3036d1504f9..b130ad7632e 100644 --- a/benchmark/conversions/conversions.cpp +++ b/benchmark/conversions/conversions.cpp @@ -123,7 +123,7 @@ int main(int argc, char* argv[]) std::string() + "The formats are " + FLAGS_formats + "\n"; print_general_information(extra_information); - auto exec = executor_factory.at(FLAGS_executor)(); + auto exec = executor_factory.at(FLAGS_executor)(FLAGS_gpu_timer); auto formats = split(FLAGS_formats, ','); rapidjson::IStreamWrapper jcin(std::cin); diff --git a/benchmark/preconditioner/preconditioner.cpp b/benchmark/preconditioner/preconditioner.cpp index 3fe1a48420c..507a7676ba0 100644 --- a/benchmark/preconditioner/preconditioner.cpp +++ b/benchmark/preconditioner/preconditioner.cpp @@ -256,7 +256,7 @@ int main(int argc, char* argv[]) "Running with preconditioners: " + FLAGS_preconditioners + "\n"; print_general_information(extra_information); - auto exec = get_executor(); + auto exec = get_executor(FLAGS_gpu_timer); auto& engine = get_engine(); auto preconditioners = split(FLAGS_preconditioners, ','); diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp index 1aed2458973..c948438b997 100644 --- a/benchmark/solver/solver.cpp +++ b/benchmark/solver/solver.cpp @@ -443,9 +443,10 @@ void solve_system(const std::string& solver_name, add_or_set_member(solver_json, "preconditioner", rapidjson::Value(rapidjson::kObjectType), allocator); - write_precond_info(lend(clone(get_executor()->get_master(), - prec->get_preconditioner())), - solver_json["preconditioner"], allocator); + write_precond_info( + lend(clone(get_executor(FLAGS_gpu_timer)->get_master(), + prec->get_preconditioner())), + solver_json["preconditioner"], allocator); } auto apply_logger = @@ -571,7 +572,7 @@ int main(int argc, char* argv[]) std::to_string(FLAGS_nrhs) + "\n"; print_general_information(extra_information); - auto exec = get_executor(); + auto exec = get_executor(FLAGS_gpu_timer); auto solvers = split(FLAGS_solvers, ','); auto preconds = split(FLAGS_preconditioners, ','); std::vector precond_solvers; diff --git a/benchmark/sparse_blas/sparse_blas.cpp b/benchmark/sparse_blas/sparse_blas.cpp index e084510c834..96f1b216285 100644 --- a/benchmark/sparse_blas/sparse_blas.cpp +++ b/benchmark/sparse_blas/sparse_blas.cpp @@ -459,7 +459,7 @@ int main(int argc, char* argv[]) " { \"filename\": \"my_file2.mtx\"}\n" + " ]\n\n"; initialize_argument_parsing(&argc, &argv, header, format); - auto exec = executor_factory.at(FLAGS_executor)(); + auto exec = executor_factory.at(FLAGS_executor)(FLAGS_gpu_timer); rapidjson::IStreamWrapper jcin(std::cin); rapidjson::Document test_cases; diff --git a/benchmark/spmv/spmv.cpp b/benchmark/spmv/spmv.cpp index c8093611326..468fabc17e3 100644 --- a/benchmark/spmv/spmv.cpp +++ b/benchmark/spmv/spmv.cpp @@ -180,7 +180,7 @@ int main(int argc, char* argv[]) std::to_string(FLAGS_nrhs) + "\n"; print_general_information(extra_information); - auto exec = executor_factory.at(FLAGS_executor)(); + auto exec = executor_factory.at(FLAGS_executor)(FLAGS_gpu_timer); auto engine = get_engine(); auto formats = split(FLAGS_formats, ','); diff --git a/benchmark/utils/dpcpp_timer.dp.cpp b/benchmark/utils/dpcpp_timer.dp.cpp new file mode 100644 index 00000000000..2257cae4e1f --- /dev/null +++ b/benchmark/utils/dpcpp_timer.dp.cpp @@ -0,0 +1,108 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include +#include + + +#include "benchmark/utils/timer_impl.hpp" + + +/** + * DpcppTimer uses dpcpp executor and event to measure the timing. + */ +class DpcppTimer : public Timer { +public: + /** + * Create a DpcppTimer. + * + * @param exec Executor which should be a DpcppExecutor + */ + DpcppTimer(std::shared_ptr exec) + : DpcppTimer(std::dynamic_pointer_cast(exec)) + {} + + /** + * Create a DpcppTimer. + * + * @param exec DpcppExecutor associated to the timer + */ + DpcppTimer(std::shared_ptr exec) : Timer() + { + assert(exec != nullptr); + if (!exec->get_queue() + ->template has_property< + sycl::property::queue::enable_profiling>()) { + GKO_NOT_SUPPORTED(exec); + } + std::cout << "EEEE" << std::endl; + exec_ = exec; + } + +protected: + void tic_impl() override + { + exec_->synchronize(); + // Currently, gko::DpcppExecutor always use default stream. + start_ = exec_->get_queue()->submit([&](sycl::handler& cgh) { + cgh.parallel_for(1, [=](sycl::id<1> id) {}); + }); + } + + double toc_impl() override + { + auto stop = exec_->get_queue()->submit([&](sycl::handler& cgh) { + cgh.parallel_for(1, [=](sycl::id<1> id) {}); + }); + stop.wait_and_throw(); + // get the start time of stop + auto stop_time = stop.get_profiling_info< + sycl::info::event_profiling::command_start>(); + // get the end time of start + auto start_time = + start_ + .get_profiling_info(); + return (stop_time - start_time) / double{1.0e9}; + } + +private: + std::shared_ptr exec_; + sycl::event start_; + int id_; +}; + + +std::shared_ptr get_dpcpp_timer( + std::shared_ptr exec) +{ + return std::make_shared(exec); +} diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp index b949a1d247f..9fff4d9a1ec 100644 --- a/benchmark/utils/general.hpp +++ b/benchmark/utils/general.hpp @@ -290,30 +290,35 @@ void backup_results(rapidjson::Document& results) // executor mapping -const std::map()>> +const std::map(bool)>> executor_factory{ - {"reference", [] { return gko::ReferenceExecutor::create(); }}, - {"omp", [] { return gko::OmpExecutor::create(); }}, + {"reference", [](bool) { return gko::ReferenceExecutor::create(); }}, + {"omp", [](bool) { return gko::OmpExecutor::create(); }}, {"cuda", - [] { + [](bool) { return gko::CudaExecutor::create(FLAGS_device_id, gko::OmpExecutor::create(), true); }}, {"hip", - [] { + [](bool) { return gko::HipExecutor::create(FLAGS_device_id, gko::OmpExecutor::create(), true); }}, - {"dpcpp", [] { - return gko::DpcppExecutor::create(FLAGS_device_id, - gko::OmpExecutor::create()); + {"dpcpp", [](bool use_gpu_timer) { + auto property = dpcpp_queue_property::in_order; + if (use_gpu_timer) { + property = dpcpp_queue_property::in_order | + dpcpp_queue_property::enable_profiling; + } + return gko::DpcppExecutor::create( + FLAGS_device_id, gko::OmpExecutor::create(), "all", property); }}}; // returns the appropriate executor, as set by the executor flag -std::shared_ptr get_executor() +std::shared_ptr get_executor(bool use_gpu_timer) { - static auto exec = executor_factory.at(FLAGS_executor)(); + static auto exec = executor_factory.at(FLAGS_executor)(use_gpu_timer); return exec; } diff --git a/benchmark/utils/timer.hpp b/benchmark/utils/timer.hpp index 843b5e72724..d0ccefe7f37 100644 --- a/benchmark/utils/timer.hpp +++ b/benchmark/utils/timer.hpp @@ -64,6 +64,12 @@ std::shared_ptr get_hip_timer( #endif // HAS_HIP +#ifdef HAS_DPCPP +std::shared_ptr get_dpcpp_timer( + std::shared_ptr exec); +#endif // HAS_DPCPP + + /** * Get the timer. If the executor does not support gpu timer, still return the * cpu timer. @@ -88,8 +94,15 @@ std::shared_ptr get_timer(std::shared_ptr exec, return get_hip_timer(hip); } #endif // HAS_HIP + +#ifdef HAS_DPCPP + if (auto dpcpp = + std::dynamic_pointer_cast(exec)) { + return get_dpcpp_timer(dpcpp); + } +#endif // HAS_DPCPP } - // No cuda/hip executor available or no gpu_timer used + // No cuda/hip/dpcpp executor available or no gpu_timer used return std::make_shared(exec); } From 007e454072e24d3ea6ae13522433b0552b54fb25 Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Tue, 27 Sep 2022 15:45:57 +0800 Subject: [PATCH 3/6] remove unnecessary code Co-authored-by: Tobias Ribizel --- benchmark/utils/dpcpp_timer.dp.cpp | 1 - dpcpp/base/executor.dp.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/benchmark/utils/dpcpp_timer.dp.cpp b/benchmark/utils/dpcpp_timer.dp.cpp index 2257cae4e1f..6b66556cc17 100644 --- a/benchmark/utils/dpcpp_timer.dp.cpp +++ b/benchmark/utils/dpcpp_timer.dp.cpp @@ -64,7 +64,6 @@ class DpcppTimer : public Timer { sycl::property::queue::enable_profiling>()) { GKO_NOT_SUPPORTED(exec); } - std::cout << "EEEE" << std::endl; exec_ = exec; } diff --git a/dpcpp/base/executor.dp.cpp b/dpcpp/base/executor.dp.cpp index a520e62932a..969203da226 100644 --- a/dpcpp/base/executor.dp.cpp +++ b/dpcpp/base/executor.dp.cpp @@ -241,7 +241,6 @@ ::cl::sycl::property_list get_property_list(dpcpp_queue_property property) return {sycl::property::queue::in_order{}}; } else if (property == dpcpp_queue_property::enable_profiling) { GKO_NOT_SUPPORTED(property); - return {sycl::property::queue::enable_profiling{}}; } else { return {sycl::property::queue::in_order{}, sycl::property::queue::enable_profiling{}}; From 2577d680123383d72623f7cf3f84ed34ecfaec6f Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Tue, 27 Sep 2022 16:40:22 +0800 Subject: [PATCH 4/6] fix missing arg in dummy impl --- core/device_hooks/dpcpp_hooks.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/core/device_hooks/dpcpp_hooks.cpp b/core/device_hooks/dpcpp_hooks.cpp index 4ef8f15cbe4..9cd057fbeab 100644 --- a/core/device_hooks/dpcpp_hooks.cpp +++ b/core/device_hooks/dpcpp_hooks.cpp @@ -52,10 +52,11 @@ version version_info::get_dpcpp_version() noexcept std::shared_ptr DpcppExecutor::create( - int device_id, std::shared_ptr master, std::string device_type) + int device_id, std::shared_ptr master, std::string device_type, + dpcpp_queue_property property) { return std::shared_ptr( - new DpcppExecutor(device_id, std::move(master), device_type)); + new DpcppExecutor(device_id, std::move(master), device_type, property)); } @@ -123,7 +124,7 @@ void DpcppExecutor::run(const Operation& op) const int DpcppExecutor::get_num_devices(std::string) { return 0; } -void DpcppExecutor::set_device_property() {} +void DpcppExecutor::set_device_property(dpcpp_queue_property property) {} bool DpcppExecutor::verify_memory_to(const OmpExecutor* dest_exec) const From 794194775894a9a4d027ca741f804a409fd002eb Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Tue, 27 Sep 2022 17:50:19 +0800 Subject: [PATCH 5/6] separate timer flag and vendors' format flag --- benchmark/CMakeLists.txt | 10 +++++++--- benchmark/utils/timer.hpp | 24 ++++++++++++------------ 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index bb0921db3d9..5f8d59a8017 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -60,16 +60,17 @@ endfunction() function(ginkgo_add_single_benchmark_executable name use_lib_linops macro_def type) add_executable("${name}" ${ARGN}) target_link_libraries("${name}" ginkgo gflags rapidjson) + # always include the device timer if (GINKGO_BUILD_CUDA) - target_compile_definitions("${name}" PRIVATE HAS_CUDA=1) + target_compile_definitions("${name}" PRIVATE HAS_CUDA_TIMER=1) target_link_libraries("${name}" cuda_timer) endif() if (GINKGO_BUILD_HIP) - target_compile_definitions("${name}" PRIVATE HAS_HIP=1) + target_compile_definitions("${name}" PRIVATE HAS_HIP_TIMER=1) target_link_libraries("${name}" hip_timer) endif() if (GINKGO_BUILD_DPCPP) - target_compile_definitions("${name}" PRIVATE HAS_DPCPP=1) + target_compile_definitions("${name}" PRIVATE HAS_DPCPP_TIMER=1) target_link_libraries("${name}" dpcpp_timer) endif() target_compile_definitions("${name}" PRIVATE "${macro_def}") @@ -77,12 +78,15 @@ function(ginkgo_add_single_benchmark_executable name use_lib_linops macro_def ty ginkgo_benchmark_add_tuning_maybe("${name}") if("${use_lib_linops}") if (GINKGO_BUILD_CUDA) + target_compile_definitions("${name}" PRIVATE HAS_CUDA=1) target_link_libraries("${name}" cusparse_linops_${type}) endif() if (GINKGO_BUILD_HIP) + target_compile_definitions("${name}" PRIVATE HAS_HIP=1) target_link_libraries("${name}" hipsparse_linops_${type}) endif() if (GINKGO_BUILD_DPCPP) + target_compile_definitions("${name}" PRIVATE HAS_DPCPP=1) target_link_libraries("${name}" onemkl_linops_${type}) endif() endif() diff --git a/benchmark/utils/timer.hpp b/benchmark/utils/timer.hpp index d0ccefe7f37..5cafa8ebac3 100644 --- a/benchmark/utils/timer.hpp +++ b/benchmark/utils/timer.hpp @@ -52,22 +52,22 @@ DEFINE_bool(gpu_timer, false, "executor is cuda or hip"); -#ifdef HAS_CUDA +#ifdef HAS_CUDA_TIMER std::shared_ptr get_cuda_timer( std::shared_ptr exec); -#endif // HAS_CUDA +#endif // HAS_CUDA_TIMER -#ifdef HAS_HIP +#ifdef HAS_HIP_TIMER std::shared_ptr get_hip_timer( std::shared_ptr exec); -#endif // HAS_HIP +#endif // HAS_HIP_TIMER -#ifdef HAS_DPCPP +#ifdef HAS_DPCPP_TIMER std::shared_ptr get_dpcpp_timer( std::shared_ptr exec); -#endif // HAS_DPCPP +#endif // HAS_DPCPP_TIMER /** @@ -81,26 +81,26 @@ std::shared_ptr get_timer(std::shared_ptr exec, bool use_gpu_timer) { if (use_gpu_timer) { -#ifdef HAS_CUDA +#ifdef HAS_CUDA_TIMER if (auto cuda = std::dynamic_pointer_cast(exec)) { return get_cuda_timer(cuda); } -#endif // HAS_CUDA +#endif // HAS_CUDA_TIMER -#ifdef HAS_HIP +#ifdef HAS_HIP_TIMER if (auto hip = std::dynamic_pointer_cast(exec)) { return get_hip_timer(hip); } -#endif // HAS_HIP +#endif // HAS_HIP_TIMER -#ifdef HAS_DPCPP +#ifdef HAS_DPCPP_TIMER if (auto dpcpp = std::dynamic_pointer_cast(exec)) { return get_dpcpp_timer(dpcpp); } -#endif // HAS_DPCPP +#endif // HAS_DPCPP_TIMER } // No cuda/hip/dpcpp executor available or no gpu_timer used return std::make_shared(exec); From 13432f71cbcddbc7ed76c679860216a24efbfe5e Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Wed, 12 Oct 2022 13:55:21 +0800 Subject: [PATCH 6/6] update the documentation and reorder the condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Thomas Grützmacher --- benchmark/utils/dpcpp_timer.dp.cpp | 4 +++- dpcpp/base/executor.dp.cpp | 7 ++++--- include/ginkgo/core/base/executor.hpp | 5 +++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/benchmark/utils/dpcpp_timer.dp.cpp b/benchmark/utils/dpcpp_timer.dp.cpp index 6b66556cc17..3cdc10e417c 100644 --- a/benchmark/utils/dpcpp_timer.dp.cpp +++ b/benchmark/utils/dpcpp_timer.dp.cpp @@ -30,10 +30,12 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#include #include +#include + + #include "benchmark/utils/timer_impl.hpp" diff --git a/dpcpp/base/executor.dp.cpp b/dpcpp/base/executor.dp.cpp index 969203da226..cf51b504e64 100644 --- a/dpcpp/base/executor.dp.cpp +++ b/dpcpp/base/executor.dp.cpp @@ -239,11 +239,12 @@ ::cl::sycl::property_list get_property_list(dpcpp_queue_property property) { if (property == dpcpp_queue_property::in_order) { return {sycl::property::queue::in_order{}}; - } else if (property == dpcpp_queue_property::enable_profiling) { - GKO_NOT_SUPPORTED(property); - } else { + } else if (property == (dpcpp_queue_property::in_order | + dpcpp_queue_property::enable_profiling)) { return {sycl::property::queue::in_order{}, sycl::property::queue::enable_profiling{}}; + } else { + GKO_NOT_SUPPORTED(property); } } diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 4aaf8e87f43..697e86f941c 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -113,11 +113,12 @@ class queue; /** - * The enum class for the dpcpp queue property. + * The enum class is for the dpcpp queue property. It's legal to use a binary + * or(|) operation to combine several properties. */ enum class dpcpp_queue_property { /** - * queue executed in order + * queue executes in order */ in_order = 1,