diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake index 9d22406f9ac..dcc452b293b 100644 --- a/cmake/create_test.cmake +++ b/cmake/create_test.cmake @@ -40,6 +40,7 @@ function(ginkgo_create_dpcpp_test test_name) add_executable(${test_target_name} ${test_name}.dp.cpp) target_compile_features(${test_target_name} PUBLIC cxx_std_17) target_compile_options(${test_target_name} PRIVATE "${GINKGO_DPCPP_FLAGS}") + target_compile_options(${test_target_name} PRIVATE "${GINKGO_COMPILER_FLAGS}") target_link_options(${test_target_name} PRIVATE -fsycl-device-code-split=per_kernel) ginkgo_set_test_target_properties(${test_name} ${test_target_name}) # Note: MKL_ENV is empty on linux. Maybe need to apply MKL_ENV to all test. diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt index fee9ec36396..dc37a35ebd3 100644 --- a/dpcpp/CMakeLists.txt +++ b/dpcpp/CMakeLists.txt @@ -66,8 +66,11 @@ target_sources(ginkgo_dpcpp ginkgo_compile_features(ginkgo_dpcpp) target_compile_definitions(ginkgo_dpcpp PRIVATE GKO_COMPILING_DPCPP) +# -sycl-std=2017 is temporary workaround after 2021.4 to propagate subgroup setting correctly +set(GINKGO_DPCPP_FLAGS "-sycl-std=2017") set(GINKGO_DPCPP_FLAGS ${GINKGO_DPCPP_FLAGS} PARENT_SCOPE) target_compile_options(ginkgo_dpcpp PRIVATE "${GINKGO_DPCPP_FLAGS}") +target_compile_options(ginkgo_dpcpp PRIVATE "${GINKGO_COMPILER_FLAGS}") # Note: add MKL as PRIVATE not PUBLIC (MKL example shows) to avoid propagating # find_package(MKL) everywhere when linking ginkgo (see the MKL example # https://software.intel.com/content/www/us/en/develop/documentation/onemkl-windows-developer-guide/top/getting-started/cmake-config-for-onemkl.html) diff --git a/dpcpp/matrix/dense_kernels.dp.cpp b/dpcpp/matrix/dense_kernels.dp.cpp index 0c89530d1d2..9a86ab9cd15 100644 --- a/dpcpp/matrix/dense_kernels.dp.cpp +++ b/dpcpp/matrix/dense_kernels.dp.cpp @@ -69,14 +69,14 @@ namespace dpcpp { namespace dense { +// Disable the 64 subgroup. CPU supports 64 now, but conj_transpose will +// lead CL_OUT_OF_RESOURCES. TODO: investigate this issue. using KCFG_1D = ConfigSet<11, 7>; constexpr auto kcfg_1d_list = - syn::value_list(); -constexpr auto subgroup_list = - syn::value_list(); + syn::value_list(); +constexpr auto subgroup_list = syn::value_list(); constexpr auto kcfg_1d_array = syn::as_array(kcfg_1d_list); constexpr int default_block_size = 256;