diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 9c39d074ca6b..41788149c253 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -29,6 +29,8 @@ ADD_SUBDIRECTORY("event/") ADD_SUBDIRECTORY("idx/") ADD_SUBDIRECTORY("kernel/") ADD_SUBDIRECTORY("math/sincos/") +ADD_SUBDIRECTORY("math/unaryOps/") +ADD_SUBDIRECTORY("math/binaryOps/") ADD_SUBDIRECTORY("mem/buf/") ADD_SUBDIRECTORY("mem/view/") ADD_SUBDIRECTORY("mem/p2p/") diff --git a/test/unit/math/binaryOps/CMakeLists.txt b/test/unit/math/binaryOps/CMakeLists.txt new file mode 100644 index 000000000000..d71ece931e1d --- /dev/null +++ b/test/unit/math/binaryOps/CMakeLists.txt @@ -0,0 +1,17 @@ +SET(_TARGET_NAME "binaryOps") + +append_recursive_files_add_to_src_group("src/" "src/" "cpp" _FILES_SOURCE) + +ALPAKA_ADD_EXECUTABLE( + ${_TARGET_NAME} + ${_FILES_SOURCE}) +TARGET_INCLUDE_DIRECTORIES( + ${_TARGET_NAME} + PRIVATE ${Boost_INCLUDE_DIRS}) +TARGET_LINK_LIBRARIES( + ${_TARGET_NAME} + PRIVATE common) + +SET_TARGET_PROPERTIES(${_TARGET_NAME} PROPERTIES FOLDER "test/unit") + +ADD_TEST(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME} ${_ALPAKA_TEST_OPTIONS}) diff --git a/test/unit/math/binaryOps/src/binaryOps.cpp b/test/unit/math/binaryOps/src/binaryOps.cpp new file mode 100644 index 000000000000..ef7a40dfd0e8 --- /dev/null +++ b/test/unit/math/binaryOps/src/binaryOps.cpp @@ -0,0 +1,607 @@ +/** Copyright 2019 Axel Huebl, Benjamin Worpitz + * + * This file is part of Alpaka. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include "../../dataGen.hpp" + +/** List of all operators + * operator | in std | definition | range + * atan2 | Y | R^2\{(0,0)} + * fmod | Y | R^2\{(x,0)|x in R} + * max | Y | R^2 + * min | Y | R^2 + * remainder | Y | R^2\{(x,0)|x in R} + * + * sincos is tested separately, + * because it manipulates two inputs and doesnt return a value + * + * If you want to add a new operation simply add it to the array. + * 1. Specify the std:: implementation. + * 2. If the std function has a const reference signature use this + * 3. Specify the alpaka implementation. + * 4. Define the range in which the operator should be testes against. + * + * If you need to add a new range you have to add it to the switch case + * - in the kernel class + * - in the TestTemplate + * + * If the wrong range is used, + * the bahaviour depends on the individual implementation. + */ + +//! @enum Range +//! @brief Possible definition ranges. +enum class Range +{ + POSITIVE_ONLY, + NOT_ZER0, + X_NOT_ZERO, + Y_NOT_ZERO, + UNRESTRICTED +}; + +// C-Style Callbacks for std::math and alpaka::math. +template< + typename TAcc, + typename T> +using alpaka_func_ptr = T (*) (TAcc const & , T const &, T const &); + +template< + typename T> +using std_func_ptr = T (*) (T, T); + +// used for all operators that need const references (like min()) +template< + typename T> +using std_func_ptr_const = T const & (*) (T const & , T const &); + +/*! + * @struct TestStruct + * @tparam TAcc The type of the used accelerator, important for alpaka callback. + * @tparam T The data type (float || double). + * @var stdOps + * @brief Callback the std implementation. + * @var stdAlternative + * @brief If the callback signature uses const references. + * @var alpakaOp + * @brief Callback to alpaka implementation. + * @note One, and only one, of stdOps or stdAlternative should be a nullptr. + * @fn getStdOpRes + * @brief Checks which std-callback is used and returns the result. + * @throws runtime_exception If both are nullptr. + */ + +template< + typename TAcc, + typename T> +struct TestStruct +{ + std_func_ptr< T > stdOp; + std_func_ptr_const< T > stdAlternative; + alpaka_func_ptr< + TAcc, + T + > alpakaOp; + Range range; + + T getStdOpRes( + T argX, + T argY + ) + { + if( stdOp != nullptr ) + return stdOp( + argX, + argY + ); + else if( stdAlternative != nullptr ) + return stdAlternative( + argX, + argY + ); + else + throw std::runtime_error( + "At least one std implementation should be given" ); + } +}; + +class BinaryOpsKernel +{ +public: + ALPAKA_NO_HOST_ACC_WARNING + template< + typename TAcc, + typename TData, + typename TIdx + > + ALPAKA_FN_ACC auto operator()( + TAcc const & acc, + TIdx const & numOps, + TIdx const & sizeArgs, + TData const * const argsX, + TData const * const argsY, + TestStruct< + TAcc, + TData + > const * const structs, + TData * results + ) const -> void + { + auto const + gridThreadIdx + ( + alpaka::idx::getIdx< + alpaka::Grid, + alpaka::Threads + >( acc )[0u] + ); + TData argX; + TData argY; + if( gridThreadIdx < numOps ) + { + switch( structs[gridThreadIdx].range ) + { + case Range::POSITIVE_ONLY: + for( TIdx i( 0 ); i < sizeArgs / 2 - 1; ++i ) + { + argX = argsX[i]; + argY = argsY[i]; + results[i + gridThreadIdx * sizeArgs] = + structs[gridThreadIdx].alpakaOp( + acc, + argX, + argY + ); + } + break; + case Range::NOT_ZER0: + for( TIdx i( 0 ); i < sizeArgs; ++i ) + { + if( i == sizeArgs / 2 - 1 || i == sizeArgs / 2 ) + continue; + argX = argsX[i]; + argY = argsY[i]; + results[i + gridThreadIdx * sizeArgs] = + structs[gridThreadIdx].alpakaOp( + acc, + argX, + argY + ); + } + break; + + case Range::X_NOT_ZERO: + for( TIdx i( 0 ); i < sizeArgs; ++i ) + { + if( i == sizeArgs / 2 - 1 || i == sizeArgs / 2 ) + argX = argsX[0]; + else + argX = argsX[i]; + argY = argsY[i]; + results[i + gridThreadIdx * sizeArgs] = + structs[gridThreadIdx].alpakaOp( + acc, + argX, + argY + ); + } + break; + + case Range::Y_NOT_ZERO: + for( TIdx i( 0 ); i < sizeArgs; ++i ) + { + if( i == sizeArgs / 2 - 1 || i == sizeArgs / 2 ) + argY = argsY[0]; + else + argY = argsY[i]; + argX = argsX[i]; + + results[i + gridThreadIdx * sizeArgs] = + structs[gridThreadIdx].alpakaOp( + acc, + argX, + argY + ); + } + break; + + case Range::UNRESTRICTED: + for( TIdx i( 0 ); i < sizeArgs; ++i ) + { + argX = argsX[i]; + argY = argsY[i]; + results[i + gridThreadIdx * sizeArgs] = + structs[gridThreadIdx].alpakaOp( + acc, + argX, + argY + ); + } + break; + + default: + break; + } + } + } +}; + +template < typename Data > +struct TestTemplate +{ + template< + typename TAcc> + void operator()( ) + { + using Dim = alpaka::dim::Dim< TAcc >; + using Idx = alpaka::idx::Idx< TAcc >; + using DevAcc = alpaka::dev::Dev< TAcc >; + using PltfAcc = alpaka::pltf::Pltf< DevAcc >; + using QueueAcc = alpaka::test::queue::DefaultQueue< DevAcc >; + using PltfHost = alpaka::pltf::PltfCpu; + + // the functions that will be tested + TestStruct arr [] = + {/* normal callback, const callback, alpaka callback, definition range*/ + { &std::atan2, nullptr, &alpaka::math::atan2, Range::NOT_ZER0}, + { &std::fmod, nullptr, &alpaka::math::fmod, Range::Y_NOT_ZERO}, + { nullptr, &std::max, &alpaka::math::max, Range::Y_NOT_ZERO}, + { nullptr, &std::min, &alpaka::math::min, Range::UNRESTRICTED}, + { &std::pow, nullptr, &alpaka::math::pow, Range::POSITIVE_ONLY}, + { &std::remainder, nullptr, &alpaka::math::remainder, Range::POSITIVE_ONLY} + }; + + Idx const + numOps = + sizeof( arr ) / sizeof( TestStruct< TAcc, Data > ); + Idx const elementsPerThread( 1u ); + Idx const sizeArgs( 100u ); + Idx const sizeRes = sizeArgs * numOps; + constexpr size_t randomRange = 100u; + + // Create the kernel function object. + BinaryOpsKernel kernel; + + // Get the host device. + auto const devHost( + alpaka::pltf::getDevByIdx< PltfHost >( 0u ) + ); + + // Select a device to execute on. + auto const devAcc( + alpaka::pltf::getDevByIdx< PltfAcc >( 0u ) + ); + + // Get a queue on this device. + QueueAcc queue( devAcc ); + + alpaka::vec::Vec< + Dim, + Idx + > const extent( numOps ); + + // Let alpaka calculate good block and grid sizes given our full problem extent. + alpaka::workdiv::WorkDivMembers< + Dim, + Idx + > const workDiv( + alpaka::workdiv::getValidWorkDiv< TAcc >( + devAcc, + extent, + elementsPerThread, + false, + alpaka::workdiv::GridBlockExtentSubDivRestrictions::Unrestricted + ) + ); + + // Allocate host memory buffers. + auto + memBufHostArgsX + ( + alpaka::mem::buf::alloc< + Data, + Idx + >( + devHost, + sizeArgs + ) + ); + auto + memBufHostArgsY + ( + alpaka::mem::buf::alloc< + Data, + Idx + >( + devHost, + sizeArgs + ) + ); + auto + memBufHostRes + ( + alpaka::mem::buf::alloc< + Data, + Idx + >( + devHost, + sizeRes + ) + ); + auto + memBufHostStructs + ( + alpaka::mem::buf::alloc< + TestStruct< + TAcc, + Data + >, + Idx + >( + devHost, + extent + ) + ); + + Data + * const pBufHostArgsX = + alpaka::mem::view::getPtrNative( memBufHostArgsX ); + Data + * const pBufHostArgsY = + alpaka::mem::view::getPtrNative( memBufHostArgsY ); + Data + * const pBufHostRes = + alpaka::mem::view::getPtrNative( memBufHostRes ); + TestStruct< + TAcc, + Data + > + * const pBufHostStructs = + alpaka::mem::view::getPtrNative( memBufHostStructs ); + + // This is just for a better understanding which results are unchanged. + for( Idx i( 0 ); i < numOps; ++i ) + { + for( Idx j( 0 ); j < sizeArgs; ++j ) + { + pBufHostRes[j + i * sizeArgs] = -1; + } + } + + for( Idx i( 0 ); i < numOps; ++i ) + { + pBufHostStructs[i] = arr[i]; + } + unsigned long seed = + test::fillWithRndArgs< Data >( pBufHostArgsX, sizeArgs, randomRange ); + std::cout << "Using seed: " << seed <<" for x-values\n"; + seed = + test::fillWithRndArgs< Data >( pBufHostArgsY, sizeArgs, randomRange ); + std::cout << "Using seed: " << seed <<" for y-values\n"; + + // Allocate the buffer on the accelerator. + auto + memBufAccArgsX + ( + alpaka::mem::buf::alloc< + Data, + Idx + >( + devAcc, + sizeArgs + ) + ); + auto + memBufAccArgsY + ( + alpaka::mem::buf::alloc< + Data, + Idx + >( + devAcc, + sizeArgs + ) + ); + auto + memBufAccRes + ( + alpaka::mem::buf::alloc< + Data, + Idx + >( + devAcc, + sizeRes + ) + ); + auto + memBufAccStructs + ( + alpaka::mem::buf::alloc< + TestStruct< + TAcc, + Data + >, + Idx + >( + devAcc, + numOps + ) + ); + + + // Copy Host -> Acc. + alpaka::mem::view::copy( + queue, + memBufAccArgsX, + memBufHostArgsX, + sizeArgs + ); + alpaka::mem::view::copy( + queue, + memBufAccArgsY, + memBufHostArgsY, + sizeArgs + ); + alpaka::mem::view::copy( + queue, + memBufAccRes, + memBufHostRes, + sizeRes + ); + alpaka::mem::view::copy( + queue, + memBufAccStructs, + memBufHostStructs, + numOps + ); + + auto + pMemBufAccArgsX = alpaka::mem::view::getPtrNative( memBufAccArgsX ); + auto + pMemBufAccArgsY = alpaka::mem::view::getPtrNative( memBufAccArgsY ); + auto pMemBufAccRes = alpaka::mem::view::getPtrNative( memBufAccRes ); + auto + pMemBufAccStructs = + alpaka::mem::view::getPtrNative( memBufAccStructs ); + + + + // Create the kernel execution task. + auto const taskKernel( + alpaka::kernel::createTaskKernel< TAcc >( + workDiv, + kernel, + numOps, + sizeArgs, + pMemBufAccArgsX, + pMemBufAccArgsY, + pMemBufAccStructs, + pMemBufAccRes + ) + ); + + // Enqueue the kernel execution task. + alpaka::queue::enqueue( + queue, + taskKernel + ); + + // Copy back the result. + alpaka::mem::view::copy( + queue, + memBufHostRes, + memBufAccRes, + sizeRes + ); + + + // Wait for the queue to finish the memory operation. + alpaka::wait::wait( queue ); + + // Check device result against host result. + + Data argX; + Data argY; + Data stdRes; + TestStruct< + TAcc, + Data + > t; + for( Idx j( 0 ); j < numOps; ++j ) + { + t = arr[j]; + switch( t.range ) + { + case Range::POSITIVE_ONLY: + for( Idx i( 0 ); i < sizeArgs / 2 - 1; ++i ) + { + argX = pBufHostArgsX[i]; + argY = pBufHostArgsY[i]; + stdRes = t.getStdOpRes(argX, argY); + REQUIRE( stdRes == + Approx( pBufHostRes[i + sizeArgs * j] ) ); + } + break; + case Range::NOT_ZER0: + for( Idx i( 0 ); i < sizeArgs; ++i ) + { + if( i == sizeArgs / 2 - 1 || i == sizeArgs / 2 ) + continue; + argX = pBufHostArgsX[i]; + argY = pBufHostArgsY[i]; + stdRes = t.getStdOpRes(argX, argY); + REQUIRE( stdRes == + Approx( pBufHostRes[i + sizeArgs * j] ) ); + } + break; + + case Range::X_NOT_ZERO: + for( Idx i( 0 ); i < sizeArgs; ++i ) + { + if( i == sizeArgs / 2 - 1 || i == sizeArgs / 2 ) + argX = pBufHostArgsX[0]; + else + argX = pBufHostArgsX[i]; + + argY = pBufHostArgsY[i]; + stdRes = t.getStdOpRes(argX, argY); + REQUIRE( stdRes == + Approx( pBufHostRes[i + sizeArgs * j] ) ); + } + break; + + case Range::Y_NOT_ZERO: + for( Idx i( 0 ); i < sizeArgs; ++i ) + { + if( i == sizeArgs / 2 - 1 || i == sizeArgs / 2 ) + argY = pBufHostArgsY[0]; + else + argY = pBufHostArgsY[i]; + + argX = pBufHostArgsX[i]; + stdRes = t.getStdOpRes(argX, argY); + REQUIRE( stdRes == + Approx( pBufHostRes[i + sizeArgs * j] ) ); + } + break; + + case Range::UNRESTRICTED: + for( Idx i( 0 ); i < sizeArgs; ++i ) + { + argX = pBufHostArgsX[i]; + argY = pBufHostArgsY[i]; + stdRes = t.getStdOpRes(argX, argY); + REQUIRE( stdRes == + Approx( pBufHostRes[i + sizeArgs * j] ) ); + } + break; + + default: + break; + } + } + } +}; + +TEST_CASE("binaryOps", "[binaryOps]") +{ + using TestAccs = alpaka::test::acc::EnabledAccs< + alpaka::dim::DimInt<1u>, + std::size_t>; + + alpaka::meta::forEachType< TestAccs >( TestTemplate() ); + alpaka::meta::forEachType< TestAccs >( TestTemplate() ); +} diff --git a/test/unit/math/dataGen.hpp b/test/unit/math/dataGen.hpp new file mode 100644 index 000000000000..fbeb0c7d9549 --- /dev/null +++ b/test/unit/math/dataGen.hpp @@ -0,0 +1,62 @@ +/** Copyright 2019 Axel Huebl, Benjamin Worpitz + * + * This file is part of Alpaka. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include + +/** + * @namespace test + * @brief Only contains fillWithRndArgs. + * @fn fillWithRndArgs + * @tparam Data The used Buffer-type. + * @param buffer The buffer that should be filled. + * @param size The size of the used buffer. + * @param range The Range, around Zero, for the data. + */ + +namespace test +{ + template< typename Data > + auto fillWithRndArgs( + Data * buffer, + size_t size, + size_t range + ) -> unsigned long + { + std::random_device rd {}; + unsigned long seed = rd( ); + // for every accelerator type one seed will be generated + std::default_random_engine eng { seed }; + // these pseudo-random numbers are implementation specific + // and are not portable + std::uniform_real_distribution< Data > dist( + 0, + range + ); + + // Initiate the arguments. + for( size_t i( 0 ); i < size / 2 - 1; ++i ) + { + buffer[i] = dist( eng ); + + } + // Define the middle of the args-buffer as zeros + buffer[size / 2 - 1] = 0.0; + + buffer[size / 2] = -0.0; + + // Change the Range for the random arguments to [-randomRange, 0] + for( size_t i( size / 2 + 1 ); i < size; ++i ) + { + buffer[i] = dist( eng ) - range; + } + + return seed; + + } +} \ No newline at end of file diff --git a/test/unit/math/unaryOps/CMakeLists.txt b/test/unit/math/unaryOps/CMakeLists.txt new file mode 100644 index 000000000000..db4614ee4b70 --- /dev/null +++ b/test/unit/math/unaryOps/CMakeLists.txt @@ -0,0 +1,17 @@ +SET(_TARGET_NAME "unaryOps") + +append_recursive_files_add_to_src_group("src/" "src/" "cpp" _FILES_SOURCE) + +ALPAKA_ADD_EXECUTABLE( + ${_TARGET_NAME} + ${_FILES_SOURCE}) +TARGET_INCLUDE_DIRECTORIES( + ${_TARGET_NAME} + PRIVATE ${Boost_INCLUDE_DIRS}) +TARGET_LINK_LIBRARIES( + ${_TARGET_NAME} + PRIVATE common) + +SET_TARGET_PROPERTIES(${_TARGET_NAME} PROPERTIES FOLDER "test/unit") + +ADD_TEST(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME} ${_ALPAKA_TEST_OPTIONS}) diff --git a/test/unit/math/unaryOps/src/unaryOps.cpp b/test/unit/math/unaryOps/src/unaryOps.cpp new file mode 100644 index 000000000000..4a3146745fdb --- /dev/null +++ b/test/unit/math/unaryOps/src/unaryOps.cpp @@ -0,0 +1,513 @@ +/** Copyright 2019 Axel Huebl, Benjamin Worpitz + * + * This file is part of Alpaka. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include "../../dataGen.hpp" +#include +#include +#include +#include + +#include +#include +#include + +/** List of all operators + * operator | in std | definition | range | notes + * abs | Y | R + * acos | Y | [-1, 1] + * asin | Y | [-1, 1] + * atan | Y | R + * cbrt | Y | R | third root of arg + * ceil | Y | R + * cos | Y | R + * erf | Y | R | error function for arg + * exp | Y | R | e^arg + * floor | Y | R + * log | Y | N\{0} + * round | Y | R + * rsqrt | X | N\{0} | inverse square root + * sin | Y | R + * sqrt | Y | N + * tan | Y | [x | x \= pi/2 + k*pi, k in Z] + * trunc | Y | R | round towards zero + * + * If you want to add a new operation simply add it to the array. + * 1. Specify the std:: implementation. + * 2. Specify the alpaka implementation. + * 3. Define the range in which the operator should be testes against. + * + * If you need to add a new range you have to add it to the switch case + * - in the kernel class + * - in the TestTemplate + * + * If the wrong range is used, + * the bahaviour depends on the individual implementation. + */ + +// Custom functions. +template +T rsqrt(T t){ + return 1 / std::sqrt(t); +} + +//! @enum Range +//! @brief Possible definition ranges. +enum class Range +{ + POSITIVE_ONLY, + POSITIVE_AND_ZERO, + ONE_NEIGHBOURHOOD, // [-1, 1] + UNRESTRICTED +}; + +// C-Style Callbacks for std::math and alpaka::math. +template< + typename TAcc, + typename T> +using alpaka_func_ptr = T (*) (TAcc const & , T const &); + +template< + typename T> +using std_func_ptr = T (*) (T); + +/*! + * @struct TestStruct + * @tparam TAcc The type of the used accelerator, important for alpaka callback. + * @tparam T The data type (float || double). + * @var stdOps + * @brief Callback the std implementation. + * @var alpakaOp + * @brief Callback to alpaka implementation. + * @note Behaviour for nullptr is undefined. + */ +template< + typename TAcc, + typename T> +struct TestStruct +{ + std_func_ptr< T > stdOp; + alpaka_func_ptr< + TAcc, + T + > alpakaOp; + Range range; +}; + +class UnaryOpsKernel +{ +public: + ALPAKA_NO_HOST_ACC_WARNING + template< + typename TAcc, + typename TData, + typename TIdx + > + ALPAKA_FN_ACC auto operator()( + TAcc const & acc, + TIdx const & numOps, + TIdx const & sizeArgs, + TData const * const args, + TestStruct const * const structs, + TData * results + ) const + -> void + { + auto const gridThreadIdx(alpaka::idx::getIdx(acc)[0u]); + TData arg; + if(gridThreadIdx < numOps) + { + switch (structs[gridThreadIdx].range) + { + case Range::POSITIVE_ONLY: + for(TIdx row(0); row < sizeArgs / 2 - 1; ++row) + { + arg = args[row]; + results[row + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg); + } + break; + case Range::POSITIVE_AND_ZERO: + for(TIdx row(0); row < sizeArgs / 2; ++row) + { + arg = args[row]; + results[row + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg); + } + break; + case Range::UNRESTRICTED: + for(TIdx row(0); row < sizeArgs; ++row) + { + arg = args[row]; + results[row + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg); + } + break; + case Range::ONE_NEIGHBOURHOOD: + // this option is hardcoded because it is to specific + if(sizeArgs < 4) + break; + arg = 1; + results[0 + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg); + arg = 0.5; + results[1 + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg); + arg = 0; + results[2 + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg); + arg = -0.5; + results[3 + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg); + arg = -1; + results[4 + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg); + break; + + default: + break; + } + + } + } +}; + +template +struct TestTemplate +{ +public: + template< typename TAcc > + void operator()( ) + { + //using Data = double; // this can be changed to float without problems + using Dim = alpaka::dim::Dim< TAcc >; + using Idx = alpaka::idx::Idx< TAcc >; + using DevAcc = alpaka::dev::Dev< TAcc >; + using PltfAcc = alpaka::pltf::Pltf< DevAcc >; + using QueueAcc = alpaka::test::queue::DefaultQueue< DevAcc >; + using PltfHost = alpaka::pltf::PltfCpu; + // the functions that will be tested + + TestStruct arr [] = + { + { &std::abs, &alpaka::math::abs, Range::UNRESTRICTED }, + { &std::acos, &alpaka::math::acos, Range::ONE_NEIGHBOURHOOD }, + { &std::asin, &alpaka::math::asin, Range::ONE_NEIGHBOURHOOD }, + { &std::atan, &alpaka::math::atan, Range::UNRESTRICTED }, + { &std::cbrt, &alpaka::math::cbrt, Range::UNRESTRICTED }, + { &std::ceil, &alpaka::math::ceil, Range::UNRESTRICTED }, + { &std::cos, &alpaka::math::cos, Range::UNRESTRICTED }, + { &std::erf, &alpaka::math::erf, Range::UNRESTRICTED }, + { &std::exp, &alpaka::math::exp, Range::UNRESTRICTED }, + { &std::floor, &alpaka::math::floor, Range::UNRESTRICTED }, + { &std::log, &alpaka::math::log, Range::POSITIVE_ONLY }, + { &std::round, &alpaka::math::round, Range::UNRESTRICTED }, + { &rsqrt, &alpaka::math::rsqrt, Range::POSITIVE_ONLY }, + { &std::sin, &alpaka::math::sin, Range::UNRESTRICTED }, + { &std::sqrt, &alpaka::math::sqrt, Range::POSITIVE_AND_ZERO }, + { &std::tan, &alpaka::math::tan, Range::UNRESTRICTED }, + { &std::trunc, &alpaka::math::trunc, Range::UNRESTRICTED } + }; + + Idx const + numOps = + sizeof( arr ) / sizeof( TestStruct< TAcc, Data > ); + Idx const elementsPerThread( 1u ); + // sizeArgs defines how many test arguments are used. + // It should be an even number greater than 4. + Idx const sizeArgs( 100u ); + Idx const sizeRes = sizeArgs * numOps; + constexpr size_t randomRange = 100u; + + + // Create the kernel function object. + UnaryOpsKernel kernel; + + // Get the host device. + auto const devHost( + alpaka::pltf::getDevByIdx< PltfHost >( 0u ) + ); + + // Select a device to execute on. + auto const devAcc( + alpaka::pltf::getDevByIdx< PltfAcc >( 0u ) + ); + + // Get a queue on this device. + QueueAcc queue( devAcc ); + + alpaka::vec::Vec< + Dim, + Idx + > const extent( numOps ); + + // Let alpaka calculate good block and grid sizes given our full problem extent. + alpaka::workdiv::WorkDivMembers< + Dim, + Idx + > const workDiv( + alpaka::workdiv::getValidWorkDiv< TAcc >( + devAcc, + extent, + elementsPerThread, + false, + alpaka::workdiv::GridBlockExtentSubDivRestrictions::Unrestricted + ) + ); + + + // Allocate host memory buffers. + auto + memBufHostArgs + ( + alpaka::mem::buf::alloc< + Data, + Idx + >( + devHost, + sizeArgs + ) + ); + auto + memBufHostRes + ( + alpaka::mem::buf::alloc< + Data, + Idx + >( + devHost, + sizeRes + ) + ); + auto + memBufHostStructs + ( + alpaka::mem::buf::alloc< + TestStruct< + TAcc, + Data + >, + Idx + >( + devHost, + extent + ) + ); + + Data + * const pBufHostArgs = + alpaka::mem::view::getPtrNative( memBufHostArgs ); + Data + * const pBufHostRes = + alpaka::mem::view::getPtrNative( memBufHostRes ); + TestStruct< + TAcc, + Data + > + * const pBufHostStructs = + alpaka::mem::view::getPtrNative( memBufHostStructs ); + + // This is just for a better understanding which results are unchanged. + for( Idx i( 0 ); i < numOps; ++i ) + { + for( Idx j( 0 ); j < sizeArgs; ++j ) + { + pBufHostRes[j + i * sizeArgs] = -1; + } + } + + unsigned long seed = + test::fillWithRndArgs< Data >(pBufHostArgs, sizeArgs, randomRange); + std::cout << "Using seed: " << seed <( + devAcc, + sizeArgs + ) + ); + auto + memBufAccRes + ( + alpaka::mem::buf::alloc< + Data, + Idx + >( + devAcc, + sizeRes + ) + ); + auto + memBufAccStructs + ( + alpaka::mem::buf::alloc< + TestStruct< + TAcc, + Data + >, + Idx + >( + devAcc, + numOps + ) + ); + + + // Copy Host -> Acc. + alpaka::mem::view::copy( + queue, + memBufAccArgs, + memBufHostArgs, + sizeArgs + ); + alpaka::mem::view::copy( + queue, + memBufAccRes, + memBufHostRes, + sizeRes + ); + alpaka::mem::view::copy( + queue, + memBufAccStructs, + memBufHostStructs, + numOps + ); + + auto pMemBufAccArgs = alpaka::mem::view::getPtrNative( memBufAccArgs ); + auto pMemBufAccRes = alpaka::mem::view::getPtrNative( memBufAccRes ); + auto + pMemBufAccStructs = + alpaka::mem::view::getPtrNative( memBufAccStructs ); + + + // Create the kernel execution task. + auto const taskKernel( + alpaka::kernel::createTaskKernel< TAcc >( + workDiv, + kernel, + numOps, + sizeArgs, + pMemBufAccArgs, + pMemBufAccStructs, + pMemBufAccRes + ) + ); + + // Enqueue the kernel execution task. + alpaka::queue::enqueue( + queue, + taskKernel + ); + + // Copy back the result. + alpaka::mem::view::copy( + queue, + memBufHostArgs, + memBufAccArgs, + sizeArgs + ); + alpaka::mem::view::copy( + queue, + memBufHostRes, + memBufAccRes, + sizeRes + ); + + + // Wait for the queue to finish the memory operation. + alpaka::wait::wait( queue ); + + // Check device result against host result. + Data arg; + Data stdRes; + TestStruct< + TAcc, + Data + > t; + for( Idx i( 0u ); i < numOps; ++i ) + { + t = arr[i]; + switch( t.range ) + { + case Range::POSITIVE_ONLY: + for( Idx j( 0 ); j < sizeArgs / 2 - 2; ++j ) + { + arg = pBufHostArgs[j]; + stdRes = t.stdOp( arg ); + REQUIRE( stdRes == + Approx( pBufHostRes[j + i * sizeArgs] ) ); + } + break; + + case Range::POSITIVE_AND_ZERO: + for( Idx j( 0 ); j < sizeArgs / 2 - 2; ++j ) + { + arg = pBufHostArgs[j]; + stdRes = t.stdOp( arg ); + REQUIRE( stdRes == + Approx( pBufHostRes[j + i * sizeArgs] ) ); + } + break; + + case Range::UNRESTRICTED: + for( Idx j( 0 ); j < sizeArgs / 2 - 2; ++j ) + { + arg = pBufHostArgs[j]; + stdRes = t.stdOp( arg ); + REQUIRE( stdRes == + Approx( pBufHostRes[j + i * sizeArgs] ) ); + } + break; + + case Range::ONE_NEIGHBOURHOOD: + if( sizeArgs < 4 ) + break; + arg = 1; + stdRes = t.stdOp( arg ); + REQUIRE( stdRes == + Approx( pBufHostRes[0 + i * sizeArgs] ) ); + arg = 0.5; + stdRes = t.stdOp( arg ); + REQUIRE( stdRes == + Approx( pBufHostRes[1 + i * sizeArgs] ) ); + arg = 0; + stdRes = t.stdOp( arg ); + REQUIRE( stdRes == + Approx( pBufHostRes[2 + i * sizeArgs] ) ); + arg = -0.5; + stdRes = t.stdOp( arg ); + REQUIRE( stdRes == + Approx( pBufHostRes[3 + i * sizeArgs] ) ); + arg = -1; + stdRes = t.stdOp( arg ); + REQUIRE( stdRes == + Approx( pBufHostRes[4 + i * sizeArgs] ) ); + break; + + default: + break; + } + } + } +}; + +TEST_CASE("unaryOps", "[unaryOps]") +{ + using TestAccs = alpaka::test::acc::EnabledAccs< + alpaka::dim::DimInt< 1u >, + std::size_t + >; + alpaka::meta::forEachType< TestAccs >( TestTemplate( ) ); + alpaka::meta::forEachType< TestAccs >( TestTemplate( ) ); +} +