Skip to content

Commit

Permalink
Merge DPC++ Executor
Browse files Browse the repository at this point in the history
DPC++ executor with a few kernels and tests

Some major changes:
+ Upgrade GTEST to a more recent version which allows using `GTEST_SKIP()`
   to skip selected tests.
+ Add a new `Dpcpp` executor. Use it in all `gko::Operation` and
  wherever appropriate.
+ Add the general architecture with all kernels set as `GKO_NOT_IMPLEMENTED`
+ Cope with unsupported copies between devices by going through the
  master executors. This allows to copy from an AMD GPU to a
  NVIDIA GPU, or from Intel to AMD/NVIDIA.
+ Implement a few simple kernels in the components and `stop/criterion`
  to ensure kernel execution works properly.
+ Implement DPC++ specific executor tests.
+ Circumvent some issues with `abs` and `sqrt` by using the sycl
  specific functions when appropriate. Note that it's still needed to
  namespace `abs` (and probably `sqrt` as well) calls inside DPC++
  kernels, otherwise the default (std) version is still used.
+ Also add a new Intel CPU CI job.

Related PR: #648
  • Loading branch information
tcojean authored Nov 4, 2020
2 parents 9caff82 + a5cc5d2 commit 99c2004
Show file tree
Hide file tree
Showing 229 changed files with 6,367 additions and 298 deletions.
22 changes: 22 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ stages:
CUDA_ARCH_STR=-DGINKGO_CUDA_ARCHITECTURES=${CUDA_ARCH};
CUDA_HOST_STR=-DCMAKE_CUDA_HOST_COMPILER=$(which ${CXX_COMPILER});
fi
- if [ ! -z ${SYCL_DEVICE_TYPE+x} ]; then export SYCL_DEVICE_TYPE; fi
- cmake ${CI_PROJECT_DIR}${CI_PROJECT_DIR_SUFFIX}
-GNinja
-DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${CXX_COMPILER}
Expand All @@ -65,6 +66,7 @@ stages:
-DGINKGO_RUN_EXAMPLES=${RUN_EXAMPLES}
-DGINKGO_CONFIG_LOG_DETAILED=${CONFIG_LOG}
- ninja -j${NUM_CORES} -l${CI_LOAD_LIMIT}
- if [ ! -z ${SYCL_DEVICE_TYPE+x} ]; then unset SYCL_DEVICE_TYPE; fi
dependencies: []
except:
- schedules
Expand All @@ -79,6 +81,7 @@ stages:
CUDA_ARCH_STR=-DGINKGO_CUDA_ARCHITECTURES=${CUDA_ARCH};
CUDA_HOST_STR=-DCMAKE_CUDA_HOST_COMPILER=$(which ${CXX_COMPILER});
fi
- if [ ! -z ${SYCL_DEVICE_TYPE+x} ]; then export SYCL_DEVICE_TYPE; fi
- cmake ${CI_PROJECT_DIR}${CI_PROJECT_DIR_SUFFIX}
-GNinja
-DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${CXX_COMPILER}
Expand Down Expand Up @@ -117,6 +120,7 @@ stages:
ninja validate_all_examples
fi
fi
- if [ ! -z ${SYCL_DEVICE_TYPE+x} ]; then unset SYCL_DEVICE_TYPE; fi
dependencies: []
except:
- schedules
Expand Down Expand Up @@ -660,6 +664,24 @@ build/nocuda/intel/omp/release/static:
- cuda
- cpu

build/dpcpp/cpu/release/static:
<<: *default_build_with_test
image: localhost:5000/gko-oneapi
variables:
<<: *default_variables
C_COMPILER: "gcc"
CXX_COMPILER: "dpcpp"
BUILD_DPCPP: "ON"
BUILD_TYPE: "Release"
BUILD_SHARED_LIBS: "OFF"
SYCL_DEVICE_TYPE: "CPU"
only:
variables:
- $RUN_CI_TAG
tags:
- private_ci
- cuda
- cpu

# Job with important warnings as error
warnings:
Expand Down
8 changes: 4 additions & 4 deletions BENCHMARKING.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,8 @@ collection. For details, see the [available benchmark options](### 5: Available
benchmark options). Here are the most important options:
* `BENCHMARK={spmv, solver, preconditioner}` - allows to select the type of
benchmark to be ran.
* `EXECUTOR={reference,cuda,hip,omp}` - select the executor and platform the
benchmarks should be ran on.
* `EXECUTOR={reference,cuda,hip,omp,dpcpp}` - select the executor and platform
the benchmarks should be ran on.
* `SYSTEM_NAME=<name>` - a name which will be used to designate this platform
(e.g. V100, RadeonVII, ...).
* `SEGMENTS=<N>` - Split the benchmarked matrix space into `<N>` segments. If
Expand Down Expand Up @@ -256,8 +256,8 @@ The supported environment variables are described in the following list:
benchmark.
* `preconditioner` - Runs the preconditioner benchmarks on artificially
generated block-diagonal matrices.
* `EXECUTOR={reference,cuda,hip,omp}` - select the executor and platform the
benchmarks should be ran on. Default is `cuda`.
* `EXECUTOR={reference,cuda,hip,omp,dpcpp}` - select the executor and platform
the benchmarks should be ran on. Default is `cuda`.
* `SYSTEM_NAME=<name>` - a name which will be used to designate this platform
(e.g. V100, RadeonVII, ...) and not overwrite previous results. Default is
`unknown`.
Expand Down
28 changes: 19 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ option(GINKGO_BUILD_EXAMPLES "Build Ginkgo's examples" ON)
option(GINKGO_BUILD_BENCHMARKS "Build Ginkgo's benchmarks" ON)
option(GINKGO_BUILD_REFERENCE "Compile reference CPU kernels" ON)
option(GINKGO_BUILD_OMP "Compile OpenMP kernels for CPU" ${GINKGO_HAS_OMP})
option(GINKGO_BUILD_DPCPP
"Compile DPC++ kernels for Intel GPUs or other DPC++ enabled hardware" ${GINKGO_HAS_DPCPP})
option(GINKGO_BUILD_CUDA "Compile kernels for NVIDIA GPUs" ${GINKGO_HAS_CUDA})
option(GINKGO_BUILD_HIP "Compile kernels for AMD or NVIDIA GPUs" ${GINKGO_HAS_HIP})
option(GINKGO_BUILD_DOC "Generate documentation" OFF)
Expand Down Expand Up @@ -72,7 +74,8 @@ option(BUILD_SHARED_LIBS "Build shared (.so, .dylib, .dll) libraries" ON)
set(GINKGO_CIRCULAR_DEPS_FLAGS "-Wl,--no-undefined")

if(BUILD_SHARED_LIBS AND (WIN32 OR CYGWIN) AND (GINKGO_BUILD_TESTS OR GINKGO_BUILD_EXAMPLES OR GINKGO_BUILD_BENCHMARKS))
# Change shared libraries output only if this build has executable program with shared libraries.
# Change shared libraries output only if this build has executable program
# with shared libraries.
set(GINKGO_CHANGED_SHARED_LIBRARY TRUE)
option(GINKGO_CHECK_PATH "Tell Ginkgo to check if the environment variable PATH is available for this build." ON)
set(GINKGO_WINDOWS_SHARED_LIBRARY_RELPATH "windows_shared_library" CACHE STRING
Expand All @@ -83,7 +86,7 @@ else()
set(GINKGO_CHANGED_SHARED_LIBRARY FALSE)
endif()

if(GINKGO_BUILD_TESTS AND (GINKGO_BUILD_CUDA OR GINKGO_BUILD_OMP OR GINKGO_BUILD_HIP))
if(GINKGO_BUILD_TESTS AND (GINKGO_BUILD_CUDA OR GINKGO_BUILD_OMP OR GINKGO_BUILD_HIP OR GINKGO_BUILD_DPCPP))
message(STATUS "GINKGO_BUILD_TESTS is ON, enabling GINKGO_BUILD_REFERENCE")
set(GINKGO_BUILD_REFERENCE ON CACHE BOOL "Compile reference CPU kernels" FORCE)
endif()
Expand Down Expand Up @@ -127,8 +130,9 @@ endif()

list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/Modules/")

# Find important header files, store the definitions in include/ginkgo/config.h.in
# For details, see https://gitlab.kitware.com/cmake/community/wikis/doc/tutorials/How-To-Write-Platform-Checks
# Find important header files, store the definitions in
# include/ginkgo/config.h.in For details, see
# https://gitlab.kitware.com/cmake/community/wikis/doc/tutorials/How-To-Write-Platform-Checks
include(CheckIncludeFileCXX)
check_include_file_cxx(cxxabi.h GKO_HAVE_CXXABI_H)

Expand All @@ -147,7 +151,8 @@ if(GINKGO_BUILD_HIP)
if(DEFINED ENV{HIP_PLATFORM})
set(GINKGO_HIP_PLATFORM "$ENV{HIP_PLATFORM}")
elseif(GINKGO_HIPCONFIG_PATH)
execute_process(COMMAND ${GINKGO_HIPCONFIG_PATH} --platform OUTPUT_VARIABLE GINKGO_HIP_PLATFORM)
execute_process(COMMAND ${GINKGO_HIPCONFIG_PATH}
--platform OUTPUT_VARIABLE GINKGO_HIP_PLATFORM)
else()
message(FATAL_ERROR "No platform could be found for HIP. "
"Set and export the environment variable HIP_PLATFORM.")
Expand All @@ -170,7 +175,8 @@ include(cmake/hip_helpers.cmake)
include(cmake/install_helpers.cmake)
include(cmake/windows_helpers.cmake)

# This is modified from https://gitlab.kitware.com/cmake/community/wikis/FAQ#dynamic-replace
# This is modified from
# https://gitlab.kitware.com/cmake/community/wikis/FAQ#dynamic-replace
if(MSVC)
if(BUILD_SHARED_LIBS)
ginkgo_switch_to_windows_dynamic("CXX")
Expand Down Expand Up @@ -204,6 +210,9 @@ endif()
if (GINKGO_BUILD_OMP)
add_subdirectory(omp) # High-performance omp kernels
endif()
if (GINKGO_BUILD_DPCPP)
add_subdirectory(dpcpp) # High-performance DPC++ kernels
endif()
# HIP needs to be last because it builds the GINKGO_RPATH_FOR_HIP variable
# which needs to know the `ginkgo` target.
if(GINKGO_BUILD_HIP)
Expand Down Expand Up @@ -271,7 +280,8 @@ endif()
configure_file(${Ginkgo_SOURCE_DIR}/cmake/ginkgo.pc.in
${Ginkgo_BINARY_DIR}/ginkgo.pc @ONLY)

# WINDOWS NVCC has " inside the string, add escape charater to avoid config problem.
# WINDOWS NVCC has " inside the string, add escape character
# to avoid config problem.
ginkgo_modify_flags(CMAKE_CUDA_FLAGS)
ginkgo_modify_flags(CMAKE_CUDA_FLAGS_DEBUG)
ginkgo_modify_flags(CMAKE_CUDA_FLAGS_RELEASE)
Expand All @@ -296,7 +306,8 @@ add_custom_target(test_install
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CUDA_COMPILER=${CMAKE_CUDA_COMPILER}
# `--config cfg` is ignored by single-configuration generator.
# `$<CONFIG>` is always be the same as `CMAKE_BUILD_TYPE` in single-configuration generator.
# `$<CONFIG>` is always be the same as `CMAKE_BUILD_TYPE` in
# single-configuration generator.
COMMAND ${CMAKE_COMMAND} --build ${Ginkgo_BINARY_DIR}/test_install --config $<CONFIG>
COMMAND ${GINKGO_TEST_INSTALL_COMMAND}
COMMENT "Running a test on the installed binaries. This requires running `(sudo) make install` first.")
Expand All @@ -309,7 +320,6 @@ set(CPACK_PACKAGE_CONTACT "ginkgo.library@gmail.com")
include(CPack)

# And finally, print the configuration to screen:
#
if(GINKGO_CONFIG_LOG_DETAILED)
FILE(READ ${PROJECT_BINARY_DIR}/detailed.log GINKGO_LOG_SUMMARY)
else()
Expand Down
16 changes: 8 additions & 8 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ look at our coding guidelines before proposing a pull request.

Ginkgo is divided into a `core` module with common functionalities independent
of the architecture, and several kernel modules (`reference`, `omp`, `cuda`,
`hip`) which contain low-level computational routines for each supported
architecture.
`hip`, `dpcpp`) which contain low-level computational routines for each
supported architecture.

### Extended header files

Expand Down Expand Up @@ -521,12 +521,12 @@ existing code has been broken.
need to be performed with data that can be as small as possible. For example,
matrices lesser than 5x5 are acceptable. This allows the reviewers to verify
the results for exactness with tools such as MATLAB.
* OpenMP, CUDA and HIP kernels have to be tested against the reference kernels.
Hence data for the tests of these kernels can be generated in the test files
using helper functions or by using external files to be read through the
standard input. In particular for CUDA and HIP, the data size should be at
least bigger than the architecture's warp size to ensure there is no corner
case in the kernels.
* OpenMP, CUDA, HIP and DPC++ kernels have to be tested against the reference
kernels. Hence data for the tests of these kernels can be generated in the
test files using helper functions or by using external files to be read
through the standard input. In particular for CUDA, HIP and DPC++ the data
size should be at least bigger than the architecture's warp size to ensure
there is no corner case in the kernels.


## Documentation style
Expand Down
4 changes: 4 additions & 0 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ Ginkgo adds the following additional switches to control what is being built:
* `-DGINKGO_BUILD_CUDA={ON, OFF}` builds optimized cuda versions of the kernels
(requires CUDA), default is `ON` if a CUDA compiler could be detected,
`OFF` otherwise.
* `-DGINKGO_BUILD_DPCPP={ON, OFF}` builds optimized DPC++ versions of the
kernels (requires `CMAKE_CXX_COMPILER` to be set to the `dpcpp` compiler).
The default is `ON` if `CMAKE_CXX_COMPILER` is a DPC++ compiler, `OFF`
otherwise.
* `-DGINKGO_BUILD_HIP={ON, OFF}` builds optimized HIP versions of the kernels
(requires HIP), default is `ON` if an installation of HIP could be detected,
`OFF` otherwise.
Expand Down
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,12 @@ mkdir build; cd build
cmake -G "Unix Makefiles" .. && make
```

By default, `GINKGO_BUILD_REFERENCE` is enabled. You should be able to run examples with this
executor. You would need to explicitly compile with the OpenMP and CUDA modules enabled
to run with these executors. Please refer to the [Installation page](./INSTALL.md).
By default, `GINKGO_BUILD_REFERENCE` is enabled. You should be able to run
examples with this executor. By default, Ginkgo tries to enable the relevant
modules depending on your machine environment (present of CUDA, ...). You can
also explicitly compile with the OpenMP, CUDA, HIP or DPC++ modules enabled to
run the examples with these executors. Please refer to the [Installation
page](./INSTALL.md) for more details.

After the installation, CMake can find ginkgo with `find_package(Ginkgo)`.
An example can be found in the [`test_install`](test_install/CMakeLists.txt).
Expand Down
7 changes: 6 additions & 1 deletion benchmark/utils/general.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,9 +263,14 @@ const std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
return gko::CudaExecutor::create(FLAGS_device_id,
gko::OmpExecutor::create(), true);
}},
{"hip", [] {
{"hip",
[] {
return gko::HipExecutor::create(FLAGS_device_id,
gko::OmpExecutor::create(), true);
}},
{"dpcpp", [] {
return gko::DpcppExecutor::create(FLAGS_device_id,
gko::OmpExecutor::create());
}}};


Expand Down
9 changes: 9 additions & 0 deletions benchmark/utils/overhead_linop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,15 @@ GKO_DECLARE_ALL;
} // namespace hip


namespace dpcpp {
namespace overhead {

GKO_DECLARE_ALL;

} // namespace overhead
} // namespace dpcpp


#undef GKO_DECLARE_ALL


Expand Down
1 change: 1 addition & 0 deletions cmake/GinkgoConfig.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ set(GINKGO_BUILD_REFERENCE @GINKGO_BUILD_REFERENCE@)
set(GINKGO_BUILD_OMP @GINKGO_BUILD_OMP@)
set(GINKGO_BUILD_CUDA @GINKGO_BUILD_CUDA@)
set(GINKGO_BUILD_HIP @GINKGO_BUILD_HIP@)
set(GINKGO_BUILD_DPCPP @GINKGO_BUILD_DPCPP@)

set(GINKGO_DEVEL_TOOLS @GINKGO_DEVEL_TOOLS@)
set(GINKGO_BUILD_TESTS @GINKGO_BUILD_TESTS@)
Expand Down
11 changes: 11 additions & 0 deletions cmake/autodetect_executors.cmake
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
set(GINKGO_HAS_OMP OFF)
set(GINKGO_HAS_CUDA OFF)
set(GINKGO_HAS_DPCPP OFF)
set(GINKGO_HAS_HIP OFF)
find_package(OpenMP)
include(CheckLanguage)
check_language(CUDA)
try_compile(GKO_CAN_COMPILE_DPCPP ${PROJECT_BINARY_DIR}/dpcpp
SOURCES ${PROJECT_SOURCE_DIR}/dpcpp/test_dpcpp.dp.cpp
CXX_STANDARD 17)

if(OpenMP_CXX_FOUND)
if(NOT DEFINED GINKGO_BUILD_OMP)
Expand All @@ -25,3 +29,10 @@ if(GINKGO_HIPCONFIG_PATH)
endif()
set(GINKGO_HAS_HIP ON)
endif()

if (GKO_CAN_COMPILE_DPCPP)
if(NOT DEFINED GINKGO_BUILD_DPCPP)
message(STATUS "Enabling DPCPP executor")
endif()
set(GINKGO_HAS_DPCPP ON)
endif()
20 changes: 20 additions & 0 deletions cmake/create_test.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,26 @@ function(ginkgo_create_test test_name)
add_test(NAME ${REL_BINARY_DIR}/${test_name} COMMAND ${TEST_TARGET_NAME})
endfunction(ginkgo_create_test)

function(ginkgo_create_dpcpp_test test_name)
file(RELATIVE_PATH REL_BINARY_DIR
${PROJECT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR})
string(REPLACE "/" "_" TEST_TARGET_NAME "${REL_BINARY_DIR}/${test_name}")
add_executable(${TEST_TARGET_NAME} ${test_name}.dp.cpp)
target_compile_features("${TEST_TARGET_NAME}" PUBLIC cxx_std_17)
target_compile_options("${TEST_TARGET_NAME}" PRIVATE "${GINKGO_DPCPP_FLAGS}")
target_include_directories("${TEST_TARGET_NAME}"
PRIVATE
"$<BUILD_INTERFACE:${Ginkgo_BINARY_DIR}>"
)
set_target_properties(${TEST_TARGET_NAME} PROPERTIES
OUTPUT_NAME ${test_name})
if (GINKGO_CHECK_CIRCULAR_DEPS)
target_link_libraries(${TEST_TARGET_NAME} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}")
endif()
target_link_libraries(${TEST_TARGET_NAME} PRIVATE ginkgo GTest::Main GTest::GTest ${ARGN})
add_test(NAME ${REL_BINARY_DIR}/${test_name} COMMAND ${TEST_TARGET_NAME})
endfunction(ginkgo_create_dpcpp_test)

function(ginkgo_create_thread_test test_name)
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
Expand Down
Loading

0 comments on commit 99c2004

Please sign in to comment.