Merge DPC++ Executor

DPC++ executor with a few kernels and tests Some major changes: + Upgrade GTEST to a more recent version which allows using `GTEST_SKIP()` to skip selected tests. + Add a new `Dpcpp` executor. Use it in all `gko::Operation` and wherever appropriate. + Add the general architecture with all kernels set as `GKO_NOT_IMPLEMENTED` + Cope with unsupported copies between devices by going through the master executors. This allows to copy from an AMD GPU to a NVIDIA GPU, or from Intel to AMD/NVIDIA. + Implement a few simple kernels in the components and `stop/criterion` to ensure kernel execution works properly. + Implement DPC++ specific executor tests. + Circumvent some issues with `abs` and `sqrt` by using the sycl specific functions when appropriate. Note that it's still needed to namespace `abs` (and probably `sqrt` as well) calls inside DPC++ kernels, otherwise the default (std) version is still used. + Also add a new Intel CPU CI job. Related PR: #648
ginkgo-project · Nov 4, 2020 · 99c2004 · 99c2004
2 parents 9caff82 + a5cc5d2
commit 99c2004
Show file tree

Hide file tree

Showing 229 changed files with 6,367 additions and 298 deletions.
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -52,6 +52,7 @@ stages:
       CUDA_ARCH_STR=-DGINKGO_CUDA_ARCHITECTURES=${CUDA_ARCH};
       CUDA_HOST_STR=-DCMAKE_CUDA_HOST_COMPILER=$(which ${CXX_COMPILER});
       fi
+    - if [ ! -z ${SYCL_DEVICE_TYPE+x} ]; then export SYCL_DEVICE_TYPE; fi
     - cmake ${CI_PROJECT_DIR}${CI_PROJECT_DIR_SUFFIX}
         -GNinja
         -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${CXX_COMPILER}
@@ -65,6 +66,7 @@ stages:
         -DGINKGO_RUN_EXAMPLES=${RUN_EXAMPLES}
         -DGINKGO_CONFIG_LOG_DETAILED=${CONFIG_LOG}
     - ninja -j${NUM_CORES} -l${CI_LOAD_LIMIT}
+    - if [ ! -z ${SYCL_DEVICE_TYPE+x} ]; then unset SYCL_DEVICE_TYPE; fi
   dependencies: []
   except:
       - schedules
@@ -79,6 +81,7 @@ stages:
       CUDA_ARCH_STR=-DGINKGO_CUDA_ARCHITECTURES=${CUDA_ARCH};
       CUDA_HOST_STR=-DCMAKE_CUDA_HOST_COMPILER=$(which ${CXX_COMPILER});
       fi
+    - if [ ! -z ${SYCL_DEVICE_TYPE+x} ]; then export SYCL_DEVICE_TYPE; fi
     - cmake ${CI_PROJECT_DIR}${CI_PROJECT_DIR_SUFFIX}
         -GNinja
         -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${CXX_COMPILER}
@@ -117,6 +120,7 @@ stages:
             ninja validate_all_examples
         fi
       fi
+    - if [ ! -z ${SYCL_DEVICE_TYPE+x} ]; then unset SYCL_DEVICE_TYPE; fi
   dependencies: []
   except:
       - schedules
@@ -660,6 +664,24 @@ build/nocuda/intel/omp/release/static:
     - cuda
     - cpu
 
+build/dpcpp/cpu/release/static:
+  <<: *default_build_with_test
+  image: localhost:5000/gko-oneapi
+  variables:
+    <<: *default_variables
+    C_COMPILER: "gcc"
+    CXX_COMPILER: "dpcpp"
+    BUILD_DPCPP: "ON"
+    BUILD_TYPE: "Release"
+    BUILD_SHARED_LIBS: "OFF"
+    SYCL_DEVICE_TYPE: "CPU"
+  only:
+    variables:
+      - $RUN_CI_TAG
+  tags:
+    - private_ci
+    - cuda
+    - cpu
 
 # Job with important warnings as error
 warnings:

diff --git a/BENCHMARKING.md b/BENCHMARKING.md
@@ -147,8 +147,8 @@ collection. For details, see the [available benchmark options](### 5: Available
 benchmark options). Here are the most important options:
 * `BENCHMARK={spmv, solver, preconditioner}` - allows to select the type of
     benchmark to be ran.
-* `EXECUTOR={reference,cuda,hip,omp}` - select the executor and platform the
-    benchmarks should be ran on.
+* `EXECUTOR={reference,cuda,hip,omp,dpcpp}` - select the executor and platform
+    the benchmarks should be ran on.
 * `SYSTEM_NAME=<name>` - a name which will be used to designate this platform
     (e.g. V100, RadeonVII, ...).
 * `SEGMENTS=<N>` - Split the benchmarked matrix space into `<N>` segments. If
@@ -256,8 +256,8 @@ The supported environment variables are described in the following list:
                  benchmark.
     *   `preconditioner` - Runs the preconditioner benchmarks on artificially
                  generated block-diagonal matrices.
-* `EXECUTOR={reference,cuda,hip,omp}` - select the executor and platform the
-    benchmarks should be ran on. Default is `cuda`.
+* `EXECUTOR={reference,cuda,hip,omp,dpcpp}` - select the executor and platform
+    the benchmarks should be ran on. Default is `cuda`.
 * `SYSTEM_NAME=<name>` - a name which will be used to designate this platform
     (e.g. V100, RadeonVII, ...) and not overwrite previous results. Default is
     `unknown`.

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -27,6 +27,8 @@ option(GINKGO_BUILD_EXAMPLES "Build Ginkgo's examples" ON)
 option(GINKGO_BUILD_BENCHMARKS "Build Ginkgo's benchmarks" ON)
 option(GINKGO_BUILD_REFERENCE "Compile reference CPU kernels" ON)
 option(GINKGO_BUILD_OMP "Compile OpenMP kernels for CPU" ${GINKGO_HAS_OMP})
+option(GINKGO_BUILD_DPCPP
+    "Compile DPC++ kernels for Intel GPUs or other DPC++ enabled hardware" ${GINKGO_HAS_DPCPP})
 option(GINKGO_BUILD_CUDA "Compile kernels for NVIDIA GPUs" ${GINKGO_HAS_CUDA})
 option(GINKGO_BUILD_HIP "Compile kernels for AMD or NVIDIA GPUs" ${GINKGO_HAS_HIP})
 option(GINKGO_BUILD_DOC "Generate documentation" OFF)
@@ -72,7 +74,8 @@ option(BUILD_SHARED_LIBS "Build shared (.so, .dylib, .dll) libraries" ON)
 set(GINKGO_CIRCULAR_DEPS_FLAGS "-Wl,--no-undefined")
 
 if(BUILD_SHARED_LIBS AND (WIN32 OR CYGWIN) AND (GINKGO_BUILD_TESTS OR GINKGO_BUILD_EXAMPLES OR GINKGO_BUILD_BENCHMARKS))
-    # Change shared libraries output only if this build has executable program with shared libraries.
+    # Change shared libraries output only if this build has executable program
+    # with shared libraries.
     set(GINKGO_CHANGED_SHARED_LIBRARY TRUE)
     option(GINKGO_CHECK_PATH "Tell Ginkgo to check if the environment variable PATH is available for this build." ON)
     set(GINKGO_WINDOWS_SHARED_LIBRARY_RELPATH "windows_shared_library" CACHE STRING
@@ -83,7 +86,7 @@ else()
     set(GINKGO_CHANGED_SHARED_LIBRARY FALSE)
 endif()
 
-if(GINKGO_BUILD_TESTS AND (GINKGO_BUILD_CUDA OR GINKGO_BUILD_OMP OR GINKGO_BUILD_HIP))
+if(GINKGO_BUILD_TESTS AND (GINKGO_BUILD_CUDA OR GINKGO_BUILD_OMP OR GINKGO_BUILD_HIP OR GINKGO_BUILD_DPCPP))
     message(STATUS "GINKGO_BUILD_TESTS is ON, enabling GINKGO_BUILD_REFERENCE")
     set(GINKGO_BUILD_REFERENCE ON CACHE BOOL "Compile reference CPU kernels" FORCE)
 endif()
@@ -127,8 +130,9 @@ endif()
 
 list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/Modules/")
 
-# Find important header files, store the definitions in include/ginkgo/config.h.in
-# For details, see https://gitlab.kitware.com/cmake/community/wikis/doc/tutorials/How-To-Write-Platform-Checks
+# Find important header files, store the definitions in
+# include/ginkgo/config.h.in For details, see
+# https://gitlab.kitware.com/cmake/community/wikis/doc/tutorials/How-To-Write-Platform-Checks
 include(CheckIncludeFileCXX)
 check_include_file_cxx(cxxabi.h GKO_HAVE_CXXABI_H)
 
@@ -147,7 +151,8 @@ if(GINKGO_BUILD_HIP)
     if(DEFINED ENV{HIP_PLATFORM})
         set(GINKGO_HIP_PLATFORM "$ENV{HIP_PLATFORM}")
     elseif(GINKGO_HIPCONFIG_PATH)
-        execute_process(COMMAND ${GINKGO_HIPCONFIG_PATH} --platform OUTPUT_VARIABLE GINKGO_HIP_PLATFORM)
+        execute_process(COMMAND ${GINKGO_HIPCONFIG_PATH}
+            --platform OUTPUT_VARIABLE GINKGO_HIP_PLATFORM)
     else()
         message(FATAL_ERROR "No platform could be found for HIP. "
             "Set and export the environment variable HIP_PLATFORM.")
@@ -170,7 +175,8 @@ include(cmake/hip_helpers.cmake)
 include(cmake/install_helpers.cmake)
 include(cmake/windows_helpers.cmake)
 
-# This is modified from https://gitlab.kitware.com/cmake/community/wikis/FAQ#dynamic-replace
+# This is modified from
+# https://gitlab.kitware.com/cmake/community/wikis/FAQ#dynamic-replace
 if(MSVC)
     if(BUILD_SHARED_LIBS)
         ginkgo_switch_to_windows_dynamic("CXX")
@@ -204,6 +210,9 @@ endif()
 if (GINKGO_BUILD_OMP)
     add_subdirectory(omp)        # High-performance omp kernels
 endif()
+if (GINKGO_BUILD_DPCPP)
+    add_subdirectory(dpcpp)        # High-performance DPC++ kernels
+endif()
 # HIP needs to be last because it builds the GINKGO_RPATH_FOR_HIP variable
 # which needs to know the `ginkgo` target.
 if(GINKGO_BUILD_HIP)
@@ -271,7 +280,8 @@ endif()
 configure_file(${Ginkgo_SOURCE_DIR}/cmake/ginkgo.pc.in
     ${Ginkgo_BINARY_DIR}/ginkgo.pc @ONLY)
 
-# WINDOWS NVCC has " inside the string, add escape charater to avoid config problem.
+# WINDOWS NVCC has " inside the string, add escape character
+# to avoid config problem.
 ginkgo_modify_flags(CMAKE_CUDA_FLAGS)
 ginkgo_modify_flags(CMAKE_CUDA_FLAGS_DEBUG)
 ginkgo_modify_flags(CMAKE_CUDA_FLAGS_RELEASE)
@@ -296,7 +306,8 @@ add_custom_target(test_install
     -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
     -DCMAKE_CUDA_COMPILER=${CMAKE_CUDA_COMPILER}
     # `--config cfg` is ignored by single-configuration generator.
-    # `$<CONFIG>` is always be the same as `CMAKE_BUILD_TYPE` in single-configuration generator.
+    # `$<CONFIG>` is always be the same as `CMAKE_BUILD_TYPE` in
+    # single-configuration generator.
     COMMAND ${CMAKE_COMMAND} --build ${Ginkgo_BINARY_DIR}/test_install --config $<CONFIG>
     COMMAND ${GINKGO_TEST_INSTALL_COMMAND}
     COMMENT "Running a test on the installed binaries. This requires running `(sudo) make install` first.")
@@ -309,7 +320,6 @@ set(CPACK_PACKAGE_CONTACT "ginkgo.library@gmail.com")
 include(CPack)
 
 # And finally, print the configuration to screen:
-#
 if(GINKGO_CONFIG_LOG_DETAILED)
     FILE(READ ${PROJECT_BINARY_DIR}/detailed.log GINKGO_LOG_SUMMARY)
 else()

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -68,8 +68,8 @@ look at our coding guidelines before proposing a pull request.
 
 Ginkgo is divided into a `core` module with common functionalities independent
 of the architecture, and several kernel modules (`reference`, `omp`, `cuda`,
-`hip`) which contain low-level computational routines for each supported
-architecture.
+`hip`, `dpcpp`) which contain low-level computational routines for each
+supported architecture.
 
 ### Extended header files
 
@@ -521,12 +521,12 @@ existing code has been broken.
   need to be performed with data that can be as small as possible. For example,
   matrices lesser than 5x5 are acceptable. This allows the reviewers to verify
   the results for exactness with tools such as MATLAB.
-* OpenMP, CUDA and HIP kernels have to be tested against the reference kernels.
-  Hence data for the tests of these kernels can be generated in the test files
-  using helper functions or by using external files to be read through the
-  standard input. In particular for CUDA and HIP, the data size should be at
-  least bigger than the architecture's warp size to ensure there is no corner
-  case in the kernels.
+* OpenMP, CUDA, HIP and DPC++ kernels have to be tested against the reference
+  kernels. Hence data for the tests of these kernels can be generated in the
+  test files using helper functions or by using external files to be read
+  through the standard input. In particular for CUDA, HIP and DPC++ the data
+  size should be at least bigger than the architecture's warp size to ensure
+  there is no corner case in the kernels.
 
 
 ## Documentation style

diff --git a/INSTALL.md b/INSTALL.md
@@ -32,6 +32,10 @@ Ginkgo adds the following additional switches to control what is being built:
 *   `-DGINKGO_BUILD_CUDA={ON, OFF}` builds optimized cuda versions of the kernels
     (requires CUDA), default is `ON` if a CUDA compiler could be detected,
     `OFF` otherwise.
+*   `-DGINKGO_BUILD_DPCPP={ON, OFF}` builds optimized DPC++ versions of the
+    kernels (requires `CMAKE_CXX_COMPILER` to be set to the `dpcpp` compiler).
+    The default is `ON` if `CMAKE_CXX_COMPILER` is a DPC++ compiler, `OFF`
+    otherwise.
 *   `-DGINKGO_BUILD_HIP={ON, OFF}` builds optimized HIP versions of the kernels
     (requires HIP), default is `ON` if an installation of HIP could be detected,
     `OFF` otherwise.

diff --git a/README.md b/README.md
@@ -118,9 +118,12 @@ mkdir build; cd build
 cmake -G "Unix Makefiles" .. && make
 ```
 
-By default, `GINKGO_BUILD_REFERENCE` is enabled. You should be able to run examples with this
-executor. You would need to explicitly compile with the OpenMP and CUDA modules enabled
-to run with these executors. Please refer to the [Installation page](./INSTALL.md).
+By default, `GINKGO_BUILD_REFERENCE` is enabled. You should be able to run
+examples with this executor. By default, Ginkgo tries to enable the relevant
+modules depending on your machine environment (present of CUDA, ...). You can
+also explicitly compile with the OpenMP, CUDA, HIP or DPC++ modules enabled to
+run the examples with these executors. Please refer to the [Installation
+page](./INSTALL.md) for more details.
 
 After the installation, CMake can find ginkgo with `find_package(Ginkgo)`.
 An example can be found in the [`test_install`](test_install/CMakeLists.txt).

diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp
@@ -263,9 +263,14 @@ const std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
              return gko::CudaExecutor::create(FLAGS_device_id,
                                               gko::OmpExecutor::create(), true);
          }},
-        {"hip", [] {
+        {"hip",
+         [] {
              return gko::HipExecutor::create(FLAGS_device_id,
                                              gko::OmpExecutor::create(), true);
+         }},
+        {"dpcpp", [] {
+             return gko::DpcppExecutor::create(FLAGS_device_id,
+                                               gko::OmpExecutor::create());
          }}};
 
 

diff --git a/benchmark/utils/overhead_linop.hpp b/benchmark/utils/overhead_linop.hpp
@@ -109,6 +109,15 @@ GKO_DECLARE_ALL;
 }  // namespace hip
 
 
+namespace dpcpp {
+namespace overhead {
+
+GKO_DECLARE_ALL;
+
+}  // namespace overhead
+}  // namespace dpcpp
+
+
 #undef GKO_DECLARE_ALL
 
 

diff --git a/cmake/GinkgoConfig.cmake.in b/cmake/GinkgoConfig.cmake.in
@@ -36,6 +36,7 @@ set(GINKGO_BUILD_REFERENCE @GINKGO_BUILD_REFERENCE@)
 set(GINKGO_BUILD_OMP @GINKGO_BUILD_OMP@)
 set(GINKGO_BUILD_CUDA @GINKGO_BUILD_CUDA@)
 set(GINKGO_BUILD_HIP @GINKGO_BUILD_HIP@)
+set(GINKGO_BUILD_DPCPP @GINKGO_BUILD_DPCPP@)
 
 set(GINKGO_DEVEL_TOOLS @GINKGO_DEVEL_TOOLS@)
 set(GINKGO_BUILD_TESTS @GINKGO_BUILD_TESTS@)

diff --git a/cmake/autodetect_executors.cmake b/cmake/autodetect_executors.cmake
@@ -1,9 +1,13 @@
 set(GINKGO_HAS_OMP OFF)
 set(GINKGO_HAS_CUDA OFF)
+set(GINKGO_HAS_DPCPP OFF)
 set(GINKGO_HAS_HIP OFF)
 find_package(OpenMP)
 include(CheckLanguage)
 check_language(CUDA)
+try_compile(GKO_CAN_COMPILE_DPCPP ${PROJECT_BINARY_DIR}/dpcpp
+    SOURCES ${PROJECT_SOURCE_DIR}/dpcpp/test_dpcpp.dp.cpp
+    CXX_STANDARD 17)
 
 if(OpenMP_CXX_FOUND)
     if(NOT DEFINED GINKGO_BUILD_OMP)
@@ -25,3 +29,10 @@ if(GINKGO_HIPCONFIG_PATH)
     endif()
     set(GINKGO_HAS_HIP ON)
 endif()
+
+if (GKO_CAN_COMPILE_DPCPP)
+    if(NOT DEFINED GINKGO_BUILD_DPCPP)
+        message(STATUS "Enabling DPCPP executor")
+    endif()
+    set(GINKGO_HAS_DPCPP ON)
+endif()
diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
@@ -17,6 +17,26 @@ function(ginkgo_create_test test_name)
     add_test(NAME ${REL_BINARY_DIR}/${test_name} COMMAND ${TEST_TARGET_NAME})
 endfunction(ginkgo_create_test)
 
+function(ginkgo_create_dpcpp_test test_name)
+    file(RELATIVE_PATH REL_BINARY_DIR
+        ${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR})
+    string(REPLACE "/" "_" TEST_TARGET_NAME "${REL_BINARY_DIR}/${test_name}")
+    add_executable(${TEST_TARGET_NAME} ${test_name}.dp.cpp)
+    target_compile_features("${TEST_TARGET_NAME}" PUBLIC cxx_std_17)
+    target_compile_options("${TEST_TARGET_NAME}" PRIVATE "${GINKGO_DPCPP_FLAGS}")
+    target_include_directories("${TEST_TARGET_NAME}"
+        PRIVATE
+        "$<BUILD_INTERFACE:${Ginkgo_BINARY_DIR}>"
+        )
+    set_target_properties(${TEST_TARGET_NAME} PROPERTIES
+        OUTPUT_NAME ${test_name})
+    if (GINKGO_CHECK_CIRCULAR_DEPS)
+        target_link_libraries(${TEST_TARGET_NAME} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}")
+    endif()
+    target_link_libraries(${TEST_TARGET_NAME} PRIVATE ginkgo GTest::Main GTest::GTest ${ARGN})
+    add_test(NAME ${REL_BINARY_DIR}/${test_name} COMMAND ${TEST_TARGET_NAME})
+endfunction(ginkgo_create_dpcpp_test)
+
 function(ginkgo_create_thread_test test_name)
     set(THREADS_PREFER_PTHREAD_FLAG ON)
     find_package(Threads REQUIRED)