Skip to content

Commit

Permalink
update lateset ConfigSet and dense kernel/test
Browse files Browse the repository at this point in the history
  • Loading branch information
yhmtsai committed Jun 1, 2021
1 parent fee29ee commit 4fccdbd
Show file tree
Hide file tree
Showing 7 changed files with 616 additions and 238 deletions.
3 changes: 2 additions & 1 deletion dpcpp/base/helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/core/base/types.hpp>


#include "core/base/types.hpp"
#include "dpcpp/base/dim3.dp.hpp"


Expand Down Expand Up @@ -142,7 +143,7 @@ bool validate(sycl::queue *queue, unsigned workgroup_size,


template <typename IterArr, typename Validate>
ConfigSetType get_first_cfg(IterArr &arr, Validate verify)
std::uint32_t get_first_cfg(IterArr &arr, Validate verify)
{
for (auto &cfg : arr) {
if (verify(cfg)) {
Expand Down
7 changes: 4 additions & 3 deletions dpcpp/components/prefix_sum.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/core/base/types.hpp>


#include "core/base/types.hpp"
#include "dpcpp/base/helper.hpp"
#include "dpcpp/components/prefix_sum.dp.hpp"

Expand All @@ -52,7 +53,7 @@ namespace components {
using BlockCfg = ConfigSet<11>;

constexpr auto block_cfg_list =
::gko::syn::value_list<ConfigSetType, BlockCfg::encode(512),
::gko::syn::value_list<std::uint32_t, BlockCfg::encode(512),
BlockCfg::encode(256), BlockCfg::encode(128)>();

GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(start_prefix_sum, start_prefix_sum)
Expand All @@ -73,8 +74,8 @@ void prefix_sum(std::shared_ptr<const DpcppExecutor> exec, IndexType *counts,
if (num_entries > 0) {
auto queue = exec->get_queue();
constexpr auto block_cfg_array = as_array(block_cfg_list);
const ConfigSetType cfg =
get_first_cfg(block_cfg_array, [&queue](ConfigSetType cfg) {
const std::uint32_t cfg =
get_first_cfg(block_cfg_array, [&queue](std::uint32_t cfg) {
return validate(queue, BlockCfg::decode<0>(cfg), 16);
});
const auto wg_size = BlockCfg::decode<0>(cfg);
Expand Down
9 changes: 5 additions & 4 deletions dpcpp/components/prefix_sum.dp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <CL/sycl.hpp>


#include "core/base/types.hpp"
#include "dpcpp/base/dim3.dp.hpp"
#include "dpcpp/base/dpct.hpp"
#include "dpcpp/components/cooperative_groups.dp.hpp"
Expand Down Expand Up @@ -125,7 +126,7 @@ __dpct_inline__ void subwarp_prefix_sum(ValueType element,
* @note To calculate the prefix sum over an array of size bigger than
* `block_size`, `finalize_prefix_sum` has to be used as well.
*/
template <ConfigSetType block_size, typename ValueType>
template <std::uint32_t block_size, typename ValueType>
void start_prefix_sum(size_type num_elements, ValueType *__restrict__ elements,
ValueType *__restrict__ block_sum,
sycl::nd_item<3> item_ct1,
Expand Down Expand Up @@ -178,7 +179,7 @@ void start_prefix_sum(size_type num_elements, ValueType *__restrict__ elements,
}
}

template <ConfigSetType block_size, typename ValueType>
template <std::uint32_t block_size, typename ValueType>
void start_prefix_sum(dim3 grid, dim3 block, size_t dynamic_shared_memory,
sycl::queue *stream, size_type num_elements,
ValueType *elements, ValueType *block_sum)
Expand Down Expand Up @@ -214,7 +215,7 @@ void start_prefix_sum(dim3 grid, dim3 block, size_t dynamic_shared_memory,
*
* @note To calculate a prefix sum, first `start_prefix_sum` has to be called.
*/
template <ConfigSetType block_size, typename ValueType>
template <std::uint32_t block_size, typename ValueType>
void finalize_prefix_sum(size_type num_elements,
ValueType *__restrict__ elements,
const ValueType *__restrict__ block_sum,
Expand All @@ -231,7 +232,7 @@ void finalize_prefix_sum(size_type num_elements,
}
}

template <ConfigSetType block_size, typename ValueType>
template <std::uint32_t block_size, typename ValueType>
void finalize_prefix_sum(dim3 grid, dim3 block, size_t dynamic_shared_memory,
sycl::queue *stream, size_type num_elements,
ValueType *elements, const ValueType *block_sum)
Expand Down
11 changes: 6 additions & 5 deletions dpcpp/components/reduction.dp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/core/synthesizer/containers.hpp>


#include "core/base/types.hpp"
#include "core/synthesizer/implementation_selection.hpp"
#include "dpcpp/base/config.hpp"
#include "dpcpp/base/dim3.dp.hpp"
Expand All @@ -63,7 +64,7 @@ namespace dpcpp {
constexpr int default_block_size = 256;
using KCFG_1D = ConfigSet<11, 7>;
constexpr auto kcfg_1d_list =
syn::value_list<ConfigSetType, KCFG_1D::encode(512, 64),
syn::value_list<std::uint32_t, KCFG_1D::encode(512, 64),
KCFG_1D::encode(512, 32), KCFG_1D::encode(512, 16),
KCFG_1D::encode(256, 32), KCFG_1D::encode(256, 16),
KCFG_1D::encode(256, 8)>();
Expand Down Expand Up @@ -201,7 +202,7 @@ void reduce_array(size_type size, const ValueType *__restrict__ source,
* `source` of any size. Has to be called a second time on `result` to reduce
* an array larger than `block_size`.
*/
template <ConfigSetType cfg, typename ValueType>
template <std::uint32_t cfg, typename ValueType>
void reduce_add_array(
size_type size, const ValueType *__restrict__ source,
ValueType *__restrict__ result, sycl::nd_item<3> item_ct1,
Expand All @@ -216,7 +217,7 @@ void reduce_add_array(
}
}

template <ConfigSetType cfg = KCFG_1D::encode(256, 32), typename ValueType>
template <std::uint32_t cfg = KCFG_1D::encode(256, 32), typename ValueType>
void reduce_add_array(dim3 grid, dim3 block, size_t dynamic_shared_memory,
sycl::queue *stream, size_type size,
const ValueType *source, ValueType *result)
Expand Down Expand Up @@ -263,8 +264,8 @@ ValueType reduce_add_array(std::shared_ptr<const DpcppExecutor> exec,
ValueType answer = zero<ValueType>();
auto queue = exec->get_queue();
constexpr auto kcfg_1d_array = as_array(kcfg_1d_list);
const ConfigSetType cfg =
get_first_cfg(kcfg_1d_array, [&queue](ConfigSetType cfg) {
const std::uint32_t cfg =
get_first_cfg(kcfg_1d_array, [&queue](std::uint32_t cfg) {
return validate(queue, KCFG_1D::decode<0>(cfg),
KCFG_1D::decode<1>(cfg));
});
Expand Down
Loading

0 comments on commit 4fccdbd

Please sign in to comment.