From 2f54d6e586333bd9a15914a106c627bc5450b131 Mon Sep 17 00:00:00 2001 From: Jakob Date: Fri, 23 Aug 2019 12:08:39 +0200 Subject: [PATCH 1/3] Added tests for existing math operators --- test/unit/CMakeLists.txt | 4 +- test/unit/math/binaryOps/CMakeLists.txt | 17 + test/unit/math/binaryOps/src/binaryOps.cpp | 432 +++++++++++++++++++++ test/unit/math/unaryOps/CMakeLists.txt | 17 + test/unit/math/unaryOps/src/unaryOps.cpp | 398 +++++++++++++++++++ 5 files changed, 867 insertions(+), 1 deletion(-) create mode 100644 test/unit/math/binaryOps/CMakeLists.txt create mode 100644 test/unit/math/binaryOps/src/binaryOps.cpp create mode 100644 test/unit/math/unaryOps/CMakeLists.txt create mode 100644 test/unit/math/unaryOps/src/unaryOps.cpp diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 9c39d074ca6b..960a68ecee76 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -28,8 +28,10 @@ ADD_SUBDIRECTORY("core/") ADD_SUBDIRECTORY("event/") ADD_SUBDIRECTORY("idx/") ADD_SUBDIRECTORY("kernel/") -ADD_SUBDIRECTORY("math/sincos/") ADD_SUBDIRECTORY("mem/buf/") +ADD_SUBDIRECTORY("math/sincos/") +ADD_SUBDIRECTORY("math/unaryOps/") +ADD_SUBDIRECTORY("math/binaryOps/") ADD_SUBDIRECTORY("mem/view/") ADD_SUBDIRECTORY("mem/p2p/") ADD_SUBDIRECTORY("meta/") diff --git a/test/unit/math/binaryOps/CMakeLists.txt b/test/unit/math/binaryOps/CMakeLists.txt new file mode 100644 index 000000000000..d71ece931e1d --- /dev/null +++ b/test/unit/math/binaryOps/CMakeLists.txt @@ -0,0 +1,17 @@ +SET(_TARGET_NAME "binaryOps") + +append_recursive_files_add_to_src_group("src/" "src/" "cpp" _FILES_SOURCE) + +ALPAKA_ADD_EXECUTABLE( + ${_TARGET_NAME} + ${_FILES_SOURCE}) +TARGET_INCLUDE_DIRECTORIES( + ${_TARGET_NAME} + PRIVATE ${Boost_INCLUDE_DIRS}) +TARGET_LINK_LIBRARIES( + ${_TARGET_NAME} + PRIVATE common) + +SET_TARGET_PROPERTIES(${_TARGET_NAME} PROPERTIES FOLDER "test/unit") + +ADD_TEST(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME} ${_ALPAKA_TEST_OPTIONS}) diff --git a/test/unit/math/binaryOps/src/binaryOps.cpp b/test/unit/math/binaryOps/src/binaryOps.cpp new file mode 100644 index 000000000000..ccb3064b628f --- /dev/null +++ b/test/unit/math/binaryOps/src/binaryOps.cpp @@ -0,0 +1,432 @@ +// +// Created by jakob on 19.08.19. +// + +#include + +#include +#include +#include +#include + +#include +#include +#include + +/* + * atan2 | Y | R^2\{(0,0)} + * fmod | Y | R^2\{(x,0)|x in R} + * max | Y | R^2 + * min | Y | R^2 + * remainder | Y | R^2\{(x,0)|x in R} + * sincos | Y | R^2 -> return 2 R +*/ + +/* + * if you want to add a new operation simply add it to the array. + * 1. Specify the std:: implementation. + * 2. If the std function has a const reference signature use this + * 3. Specify the alpaka implementation. + * 4. Define the range in which the operator should be testes against. + */ + +/* + * if you need to add a new range you have to add it to the switch case + * - in the kernel class + * - in the TestTemplate + */ + + + + +// possible definition ranges +enum class Range +{ + POSITIVE_ONLY, + NOT_ZER0, + X_NOT_ZERO, + Y_NOT_ZERO, + UNRESTRICTED +}; + +// function pointer templates for std::math and alpaka::math +template< + typename TAcc, + typename T> +using alpaka_func_ptr = T (*) (TAcc const & , T const &, T const &); + +template< + typename T> +using std_func_ptr = T (*) (T, T); + +// used for all operators that need const references (like min()) +template< + typename T> +using std_func_ptr_const = T const & (*) (T const & , T const &); + + +template< + typename TAcc, + typename T> +struct TestStruct +{ + std_func_ptr stdOp ; + std_func_ptr_const stdAlternative; + alpaka_func_ptr alpakaOp; + Range range; +}; + + +class BinaryOpsKernel{ +public: + ALPAKA_NO_HOST_ACC_WARNING + template< + typename TAcc, + typename TData, + typename TIdx + > + ALPAKA_FN_ACC auto operator()( + TAcc const & acc, + TIdx const & numOps, + TIdx const & sizeArgs, + TData const * const argsX, + TData const * const argsY, + TestStruct const * const structs, + TData * results + ) const + -> void + { + auto const gridThreadIdx(alpaka::idx::getIdx(acc)[0u]); + TData argX; + TData argY; + if(gridThreadIdx < numOps) + { + // sizeRes = numOps * sizeArgs + switch (structs[gridThreadIdx].range) + { + case Range::POSITIVE_ONLY: + for(TIdx i(0); i < sizeArgs/2 -1; ++i) + { + argX = argsX[i]; + argY = argsY[i]; + results[i + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, argX, argY); + } + break; + case Range::NOT_ZER0: + for(TIdx i(0); i < sizeArgs; ++i) + { + if(i == sizeArgs/2 -1 || i == sizeArgs/2) + continue; + argX = argsX[i]; + argY = argsY[i]; + results[i + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, argX, argY); + } + break; + + case Range::X_NOT_ZERO: + for(TIdx i(0); i < sizeArgs/2 -1; ++i) + { + if(i == sizeArgs/2 -1 || i == sizeArgs/2) + argX = argsX[0]; + else + argX = argsX[i]; + argY = argsY[i]; + results[i + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, argX, argY); + } + break; + + case Range::Y_NOT_ZERO: + for(TIdx i(0); i < sizeArgs/2 -1; ++i) + { + if(i == sizeArgs/2 -1 || i == sizeArgs/2) + argY = argsY[0]; + else + argY = argsY[i]; + argX = argsX[i]; + + results[i + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, argX, argY); + } + break; + + case Range::UNRESTRICTED: + for(TIdx i(0); i < sizeArgs; ++i) + { + argX = argsX[i]; + argY= argsY[i]; + results[i + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, argX, argY); + } + break; + + default: + break; + } + } + } +}; + + +struct TestTemplate { + template + void operator()() { + using Dim = alpaka::dim::Dim; + using Idx = alpaka::idx::Idx; + using Data = double; + using DevAcc = alpaka::dev::Dev; + using PltfAcc = alpaka::pltf::Pltf; + using QueueAcc = alpaka::test::queue::DefaultQueue; + using PltfHost = alpaka::pltf::PltfCpu; + + + std::cout << "\nTesting next AccType \n\n"; + // the functions that will be tested + + + TestStruct arr[] = + {/* normal callback, const callback, alpaka callback, definition range*/ + { &std::atan2, nullptr, &alpaka::math::atan2, Range::NOT_ZER0}, + { &std::fmod, nullptr, &alpaka::math::fmod, Range::Y_NOT_ZERO}, + { nullptr, &std::max, &alpaka::math::max, Range::Y_NOT_ZERO}, + { nullptr, &std::min, &alpaka::math::min, Range::UNRESTRICTED}, + { &std::pow, nullptr, &alpaka::math::pow, Range::POSITIVE_ONLY}, + { &std::remainder, nullptr, &alpaka::math::remainder, Range::POSITIVE_ONLY} + }; + + Idx const numOps = sizeof(arr) / sizeof(TestStruct); + Idx const elementsPerThread(1u); + Idx const sizeArgs(10u); + Idx const sizeRes = sizeArgs * numOps; + + // Create the kernel function object. + BinaryOpsKernel kernel; + + // Get the host device. + auto const devHost( + alpaka::pltf::getDevByIdx(0u)); + + // Select a device to execute on. + auto const devAcc( + alpaka::pltf::getDevByIdx(0u)); + + // Get a queue on this device. + QueueAcc queue(devAcc); + + alpaka::vec::Vec const extent(numOps); + + // Let alpaka calculate good block and grid sizes given our full problem extent. + alpaka::workdiv::WorkDivMembers const workDiv( + alpaka::workdiv::getValidWorkDiv( + devAcc, + extent, + elementsPerThread, + false, + alpaka::workdiv::GridBlockExtentSubDivRestrictions::Unrestricted)); + + + // Allocate host memory buffers. + auto memBufHostArgsX(alpaka::mem::buf::alloc(devHost, sizeArgs)); + auto memBufHostArgsY(alpaka::mem::buf::alloc(devHost, sizeArgs)); + auto memBufHostRes(alpaka::mem::buf::alloc(devHost, sizeRes)); + auto memBufHostStructs(alpaka::mem::buf::alloc, Idx>(devHost, extent)); + + Data *const pBufHostArgsX = alpaka::mem::view::getPtrNative(memBufHostArgsX); + Data *const pBufHostArgsY = alpaka::mem::view::getPtrNative(memBufHostArgsY); + Data *const pBufHostRes = alpaka::mem::view::getPtrNative(memBufHostRes); + TestStruct *const pBufHostStructs = alpaka::mem::view::getPtrNative(memBufHostStructs); + + // This is just for a better understanding which results are unchanged. + for (Idx i(0); i < numOps; ++i) + { + for (Idx j(0); j < sizeArgs; ++j) { + pBufHostRes[j + i * sizeArgs] = -1; + } + } + + // C++11 random generator for uniformly distributed numbers in [-100, 100]. + std::random_device rd{}; + std::default_random_engine eng{rd()}; + std::uniform_real_distribution dist(0, 100); + + // Initiate the arguments. + for (Idx i(0); i < sizeArgs / 2 - 1; ++i) + { + pBufHostArgsX[i] = dist(eng); + pBufHostArgsY[i] = dist(eng); + + } + pBufHostArgsX[sizeArgs / 2 - 1] = 0.0; + pBufHostArgsY[sizeArgs / 2 - 1] = 0.0; + + pBufHostArgsX[sizeArgs / 2] = -0.0; + pBufHostArgsY[sizeArgs / 2] = -0.0; + + for (Idx i(sizeArgs / 2 + 1); i < sizeArgs; ++i) + { + pBufHostArgsX[i] = dist(eng) - 100; + pBufHostArgsY[i] = dist(eng) - 100; + } + + + // Initiate the structs. + for (Idx i(0u); i < numOps; ++i) + { + pBufHostStructs[i] = arr[i]; + } + + + // Allocate the buffer on the accelerator. + auto memBufAccArgsX(alpaka::mem::buf::alloc(devAcc, sizeArgs)); + auto memBufAccArgsY(alpaka::mem::buf::alloc(devAcc, sizeArgs)); + auto memBufAccRes(alpaka::mem::buf::alloc(devAcc, sizeRes)); + auto memBufAccStructs(alpaka::mem::buf::alloc, Idx>(devAcc, numOps)); + + + // Copy Host -> Acc. + alpaka::mem::view::copy(queue, memBufAccArgsX, memBufHostArgsX, sizeArgs); + alpaka::mem::view::copy(queue, memBufAccArgsY, memBufHostArgsY, sizeArgs); + alpaka::mem::view::copy(queue, memBufAccRes, memBufHostRes, sizeRes); + alpaka::mem::view::copy(queue, memBufAccStructs, memBufHostStructs, numOps); + + for (Idx i(0u); i < sizeArgs; ++i) { + std::cout << "bufferArgs x: " << pBufHostArgsX[i] << " y: " << pBufHostArgsY[i] << std::endl; + } + + auto pMemBufAccArgsX = alpaka::mem::view::getPtrNative(memBufAccArgsX); + auto pMemBufAccArgsY = alpaka::mem::view::getPtrNative(memBufAccArgsY); + auto pMemBufAccRes = alpaka::mem::view::getPtrNative(memBufAccRes); + auto pMemBufAccStructs = alpaka::mem::view::getPtrNative(memBufAccStructs); + + + + // Create the kernel execution task. + auto const taskKernel(alpaka::kernel::createTaskKernel( + workDiv, + kernel, + numOps, + sizeArgs, + pMemBufAccArgsX, + pMemBufAccArgsY, + pMemBufAccStructs, + pMemBufAccRes + )); + + // Enqueue the kernel execution task. + alpaka::queue::enqueue(queue, taskKernel); + + // Copy back the result. + alpaka::mem::view::copy(queue, memBufHostRes, memBufAccRes, sizeRes); + + + // Wait for the queue to finish the memory operation. + alpaka::wait::wait(queue); + + + // Print out all results. + for (Idx i(0); i < numOps; ++i) + { + std::cout << "\nResults " << i + 1 << ". function:\n"; + + for (Idx j(0); j < sizeArgs; ++j) + { + Data const &res(pBufHostRes[j + i * sizeArgs]); + std::cout << "bufferResults: " << res << "\n"; + } + } + + // Check device result against host result. + + Data argX; + Data argY; + Data stdRes; + TestStruct t; + for (Idx j(0); j < numOps; ++j) + { + t = arr[j]; + switch (t.range) + { + case Range::POSITIVE_ONLY: + for (Idx i(0); i < sizeArgs / 2 - 1; ++i) + { + argX = pBufHostArgsX[i]; + argY = pBufHostArgsY[i]; + if(t.stdOp != nullptr) + stdRes = t.stdOp(argX, argY); + else + stdRes = t.stdAlternative(argX, argY); + REQUIRE(stdRes == Approx(pBufHostRes[i + sizeArgs * j])); + } + break; + case Range::NOT_ZER0: + for (Idx i(0); i < sizeArgs; ++i) + { + if (i == sizeArgs / 2 - 1 || i == sizeArgs / 2) + continue; + argX = pBufHostArgsX[i]; + argY = pBufHostArgsY[i]; + if(t.stdOp != nullptr) + stdRes = t.stdOp(argX, argY); + else + stdRes = t.stdAlternative(argX, argY); + REQUIRE(stdRes == Approx(pBufHostRes[i + sizeArgs * j])); + } + break; + + case Range::X_NOT_ZERO: + for (Idx i(0); i < sizeArgs / 2 - 1; ++i) + { + if (i == sizeArgs / 2 - 1 || i == sizeArgs / 2) + argX = pBufHostArgsX[0]; + else + argX = pBufHostArgsX[i]; + argY = pBufHostArgsY[i]; + if(t.stdOp != nullptr) + stdRes = t.stdOp(argX, argY); + else + stdRes = t.stdAlternative(argX, argY); + REQUIRE(stdRes == Approx(pBufHostRes[i + sizeArgs * j])); + } + break; + + case Range::Y_NOT_ZERO: + for (Idx i(0); i < sizeArgs / 2 - 1; ++i) + { + if (i == sizeArgs / 2 - 1 || i == sizeArgs / 2) + argY = pBufHostArgsY[0]; + else + argY = pBufHostArgsY[i]; + argX = pBufHostArgsX[i]; + if(t.stdOp != nullptr) + stdRes = t.stdOp(argX, argY); + else + stdRes = t.stdAlternative(argX, argY); + REQUIRE(stdRes == Approx(pBufHostRes[i + sizeArgs * j])); + } + break; + + case Range::UNRESTRICTED: + for (Idx i(0); i < sizeArgs; ++i) + { + argX = pBufHostArgsX[i]; + argY = pBufHostArgsY[i]; + if(t.stdOp != nullptr) + stdRes = t.stdOp(argX, argY); + else + stdRes = t.stdAlternative(argX, argY); + REQUIRE(stdRes == Approx(pBufHostRes[i + sizeArgs * j])); + } + break; + + default: + break; + } + } + } +}; + +TEST_CASE("binaryOps", "[binaryOps]") +{ + using TestAccs = alpaka::test::acc::EnabledAccs< + alpaka::dim::DimInt<1u>, + std::size_t>; + + alpaka::meta::forEachType< TestAccs >( TestTemplate() ); +} \ No newline at end of file diff --git a/test/unit/math/unaryOps/CMakeLists.txt b/test/unit/math/unaryOps/CMakeLists.txt new file mode 100644 index 000000000000..db4614ee4b70 --- /dev/null +++ b/test/unit/math/unaryOps/CMakeLists.txt @@ -0,0 +1,17 @@ +SET(_TARGET_NAME "unaryOps") + +append_recursive_files_add_to_src_group("src/" "src/" "cpp" _FILES_SOURCE) + +ALPAKA_ADD_EXECUTABLE( + ${_TARGET_NAME} + ${_FILES_SOURCE}) +TARGET_INCLUDE_DIRECTORIES( + ${_TARGET_NAME} + PRIVATE ${Boost_INCLUDE_DIRS}) +TARGET_LINK_LIBRARIES( + ${_TARGET_NAME} + PRIVATE common) + +SET_TARGET_PROPERTIES(${_TARGET_NAME} PROPERTIES FOLDER "test/unit") + +ADD_TEST(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME} ${_ALPAKA_TEST_OPTIONS}) diff --git a/test/unit/math/unaryOps/src/unaryOps.cpp b/test/unit/math/unaryOps/src/unaryOps.cpp new file mode 100644 index 000000000000..cec77b3b5ac7 --- /dev/null +++ b/test/unit/math/unaryOps/src/unaryOps.cpp @@ -0,0 +1,398 @@ +// +// Created by jakob on 08.08.19. +// +#include + +#include +#include +#include +#include + +#include +#include +#include + +/* list of all operators + * operator | in std | definition | range | notes + * abs | Y | R + * acos | Y | [-1, 1] + * asin | Y | [-1, 1] + * atan | Y | R + * cbrt | Y | R | third root of arg + * ceil | Y | R + * cos | Y | R + * erf | Y | R | error function for arg + * exp | Y | R | e^arg + * floor | Y | R + * log | Y | N\{0} + * round | Y | R + * rsqrt | X | N\{0} | inverse square root + * sin | Y | R + * sqrt | Y | N + * tan | Y | [x | x \= pi/2 + k*pi, k in Z] + * trunc | Y | R | round towards zero + */ + +/* + * if you want to add a new operation simply add it to the array. + * 1. Specify the std:: implementation. + * 2. Specify the alpaka implementation. + * 3. Define the range in which the operator should be testes against. + */ + +/* + * if you need to add a new range you have to add it to the switch case + * - in the kernel class + * - in the TestTemplate + */ + + +// Custom functions. +template +T rsqrt(T t){ + return 1 / std::sqrt(t); +} + +// Possible definition ranges. +enum class Range +{ + POSITIVE_ONLY, + POSITIVE_AND_ZERO, + ONE_NEIGHBOURHOOD, // [-1, 1] + UNRESTRICTED +}; + +// C-Style Callbacks for std::math and alpaka::math. +template< + typename TAcc, + typename T> +using alpaka_func_ptr = T (*) (TAcc const & , T const &); + +template< + typename T> +using std_func_ptr = T (*) (T); + +// Data-Structure for all operators. +template< + typename TAcc, + typename T> +struct TestStruct +{ + std_func_ptr stdOp; + alpaka_func_ptr alpakaOp; + Range range; +}; + +class UnaryOpsKernel{ +public: + ALPAKA_NO_HOST_ACC_WARNING + template< + typename TAcc, + typename TData, + typename TIdx + > + ALPAKA_FN_ACC auto operator()( + TAcc const & acc, + TIdx const & numOps, + TIdx const & sizeArgs, + TData const * const args, + TestStruct const * const structs, + TData * results + ) const + -> void + { + //results[0] = 0.42f; + auto const gridThreadIdx(alpaka::idx::getIdx(acc)[0u]); + TData arg; + if(gridThreadIdx < numOps) + { + // sizeRes = numOps * sizeArgs + switch (structs[gridThreadIdx].range) + { + case Range::POSITIVE_ONLY: + for(TIdx row(0); row < sizeArgs/2 -1; ++row) + { + arg = args[row]; + results[row + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg); + } + break; + case Range::POSITIVE_AND_ZERO: + for(TIdx row(0); row < sizeArgs/2; ++row) + { + arg = args[row]; + results[row + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg); + } + break; + case Range::UNRESTRICTED: + for(TIdx row(0); row < sizeArgs; ++row) + { + arg = args[row]; + results[row + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg); + } + break; + case Range::ONE_NEIGHBOURHOOD: + if(sizeArgs < 4) + break; + arg = 1; + results[0 + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg); + arg = 0.5; + results[1 + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg); + arg = 0; + results[2 + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg); + arg = -0.5; + results[3 + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg); + arg = -1; + results[4 + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg); + break; + + default: + break; + } + + } + } +}; + + +struct TestTemplate +{ + template + void operator()() + { + using Dim = alpaka::dim::Dim; + using Idx = alpaka::idx::Idx; + using Data = double; + using DevAcc = alpaka::dev::Dev; + using PltfAcc = alpaka::pltf::Pltf; + using QueueAcc = alpaka::test::queue::DefaultQueue; + using PltfHost = alpaka::pltf::PltfCpu; + + + std::cout << "\nTesting next AccType \n\n"; + // the functions that will be tested + + + TestStruct arr [] = + { + { &std::abs, &alpaka::math::abs, Range::UNRESTRICTED }, + { &std::acos, &alpaka::math::acos, Range::ONE_NEIGHBOURHOOD }, + { &std::asin, &alpaka::math::asin, Range::ONE_NEIGHBOURHOOD }, + { &std::atan, &alpaka::math::atan, Range::UNRESTRICTED }, + { &std::cbrt, &alpaka::math::cbrt, Range::UNRESTRICTED }, + { &std::ceil, &alpaka::math::ceil, Range::UNRESTRICTED }, + { &std::cos, &alpaka::math::cos, Range::UNRESTRICTED }, + { &std::erf, &alpaka::math::erf, Range::UNRESTRICTED }, + { &std::exp, &alpaka::math::exp, Range::UNRESTRICTED }, + { &std::floor, &alpaka::math::floor, Range::UNRESTRICTED }, + { &std::log, &alpaka::math::log, Range::POSITIVE_ONLY }, + { &std::round, &alpaka::math::round, Range::UNRESTRICTED }, + { &rsqrt, &alpaka::math::rsqrt, Range::POSITIVE_ONLY }, + { &std::sin, &alpaka::math::sin, Range::UNRESTRICTED }, + { &std::sqrt, &alpaka::math::sqrt, Range::POSITIVE_AND_ZERO }, + { &std::tan, &alpaka::math::tan, Range::UNRESTRICTED }, + { &std::trunc, &alpaka::math::trunc, Range::UNRESTRICTED } + + }; + + Idx const numOps = sizeof(arr)/sizeof(TestStruct); + Idx const elementsPerThread(1u); + Idx const sizeArgs(10u); + Idx const sizeRes= sizeArgs * numOps; + + // Create the kernel function object. + UnaryOpsKernel kernel; + + // Get the host device. + auto const devHost( + alpaka::pltf::getDevByIdx(0u)); + + // Select a device to execute on. + auto const devAcc( + alpaka::pltf::getDevByIdx(0u)); + + // Get a queue on this device. + QueueAcc queue(devAcc); + + alpaka::vec::Vec const extent(numOps); + + // Let alpaka calculate good block and grid sizes given our full problem extent. + alpaka::workdiv::WorkDivMembers const workDiv( + alpaka::workdiv::getValidWorkDiv( + devAcc, + extent, + elementsPerThread, + false, + alpaka::workdiv::GridBlockExtentSubDivRestrictions::Unrestricted)); + + + // Allocate host memory buffers. + auto memBufHostArgs(alpaka::mem::buf::alloc(devHost, sizeArgs)); + auto memBufHostRes(alpaka::mem::buf::alloc(devHost, sizeRes)); + auto memBufHostStructs(alpaka::mem::buf::alloc, Idx>(devHost, extent)); + + Data * const pBufHostArgs = alpaka::mem::view::getPtrNative(memBufHostArgs); + Data * const pBufHostRes = alpaka::mem::view::getPtrNative(memBufHostRes); + TestStruct * const pBufHostStructs = alpaka::mem::view::getPtrNative(memBufHostStructs); + + // This is just for a better understanding which results are unchanged. + for(Idx i(0);i < numOps;++i) + { + for(Idx j(0);j < sizeArgs;++j) + { + pBufHostRes[j+i*sizeArgs] = -1; + } + } + + // C++11 random generator for uniformly distributed numbers in {-100,..,100} + std::random_device rd{}; + std::default_random_engine eng{ rd() }; + std::uniform_real_distribution dist(0, 100); + + + + // Initiate the arguments. + for(Idx i(0); i < sizeArgs/2-1; ++i) + { + pBufHostArgs[i] = dist(eng); + } + pBufHostArgs[sizeArgs/2 -1] = 0.0; + pBufHostArgs[sizeArgs/2] = -0.0; + + for(Idx i(sizeArgs/2 + 1); i < sizeArgs; ++i) + { + pBufHostArgs[i] = dist(eng)-100; + } + + // Initiate the structs. + for(Idx i(0u); i < numOps; ++i) + { + pBufHostStructs[i] = arr[i]; + } + + // Allocate the buffer on the accelerator. + auto memBufAccArgs(alpaka::mem::buf::alloc(devAcc, sizeArgs)); + auto memBufAccRes(alpaka::mem::buf::alloc(devAcc, sizeRes)); + auto memBufAccStructs(alpaka::mem::buf::alloc, Idx>(devAcc, numOps)); + + + // Copy Host -> Acc. + alpaka::mem::view::copy(queue, memBufAccArgs, memBufHostArgs, sizeArgs); + alpaka::mem::view::copy(queue, memBufAccRes, memBufHostRes, sizeRes); + alpaka::mem::view::copy(queue, memBufAccStructs, memBufHostStructs, numOps); + + for(Idx i(0u); i < sizeArgs; ++i){ + std::cout<<"bufferArgs: " << pBufHostArgs[i] <<"\n"; + } + + auto pMemBufAccArgs = alpaka::mem::view::getPtrNative(memBufAccArgs); + auto pMemBufAccRes = alpaka::mem::view::getPtrNative(memBufAccRes); + auto pMemBufAccStructs = alpaka::mem::view::getPtrNative(memBufAccStructs); + + + // Create the kernel execution task. + auto const taskKernel(alpaka::kernel::createTaskKernel( + workDiv, + kernel, + numOps, + sizeArgs, + pMemBufAccArgs, + pMemBufAccStructs, + pMemBufAccRes + )); + + // Enqueue the kernel execution task. + alpaka::queue::enqueue(queue, taskKernel); + + // Copy back the result. + alpaka::mem::view::copy(queue, memBufHostArgs, memBufAccArgs, sizeArgs); + alpaka::mem::view::copy(queue, memBufHostRes, memBufAccRes, sizeRes); + + + // Wait for the queue to finish the memory operation. + alpaka::wait::wait(queue); + + // Print out all results. + for(Idx i(0u); i < numOps; ++i) + { + std::cout <<"\nResults "<< i +1 <<". function:\n"; + + for(Idx j(0u); j < sizeArgs; ++j) + { + Data const & res(pBufHostRes[j + i * sizeArgs]); + std::cout<<"bufferResults: " << res << "\n"; + } + } + + + // Check device result against host result. + Data arg; + Data stdRes; + TestStruct t; + for(Idx i(0u); i < numOps; ++i) + { + t = arr[i]; + switch (t.range) + { + case Range::POSITIVE_ONLY: + for (Idx j(0); j < sizeArgs / 2 - 2; ++j) + { + arg = pBufHostArgs[j]; + stdRes = t.stdOp(arg); + REQUIRE( stdRes == Approx(pBufHostRes[j + i * sizeArgs])); + } + break; + + case Range::POSITIVE_AND_ZERO: + for (Idx j(0); j < sizeArgs / 2 - 2; ++j) + { + arg = pBufHostArgs[j]; + stdRes = t.stdOp(arg); + REQUIRE(stdRes == Approx(pBufHostRes[j + i * sizeArgs])); + } + break; + + case Range::UNRESTRICTED: + for (Idx j(0); j < sizeArgs / 2 - 2; ++j) + { + arg = pBufHostArgs[j]; + stdRes = t.stdOp(arg); + REQUIRE( stdRes == Approx(pBufHostRes[j + i * sizeArgs])); + } + break; + + case Range::ONE_NEIGHBOURHOOD: + if(sizeArgs < 4) + break; + arg = 1; + stdRes = t.stdOp(arg); + REQUIRE(stdRes == Approx(pBufHostRes[0 + i * sizeArgs])); + arg = 0.5; + stdRes = t.stdOp(arg); + REQUIRE(stdRes == Approx(pBufHostRes[1 + i * sizeArgs])); + arg = 0; + stdRes = t.stdOp(arg); + REQUIRE(stdRes == Approx(pBufHostRes[2 + i * sizeArgs])); + arg = -0.5; + stdRes = t.stdOp(arg); + REQUIRE(stdRes == Approx(pBufHostRes[3 + i * sizeArgs])); + arg = -1; + stdRes = t.stdOp(arg); + REQUIRE(stdRes == Approx(pBufHostRes[4 + i * sizeArgs])); + break; + + default: + break; + } + } + } +}; + +TEST_CASE("unaryOps", "[unaryOps]") +{ + using TestAccs = alpaka::test::acc::EnabledAccs< + alpaka::dim::DimInt<1u>, + std::size_t>; + + alpaka::meta::forEachType< TestAccs >( TestTemplate() ); +} + From b90e740474570f5786ab97cdcb1fe915ff42f404 Mon Sep 17 00:00:00 2001 From: Jakob Date: Mon, 26 Aug 2019 10:17:53 +0200 Subject: [PATCH 2/3] Resolved Pr-requested changes, reformatted code --- test/unit/CMakeLists.txt | 2 +- test/unit/math/binaryOps/src/binaryOps.cpp | 577 ++++++++++++++------- test/unit/math/dataGen.hpp | 53 ++ test/unit/math/unaryOps/src/unaryOps.cpp | 386 +++++++++----- 4 files changed, 696 insertions(+), 322 deletions(-) create mode 100644 test/unit/math/dataGen.hpp diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 960a68ecee76..41788149c253 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -28,10 +28,10 @@ ADD_SUBDIRECTORY("core/") ADD_SUBDIRECTORY("event/") ADD_SUBDIRECTORY("idx/") ADD_SUBDIRECTORY("kernel/") -ADD_SUBDIRECTORY("mem/buf/") ADD_SUBDIRECTORY("math/sincos/") ADD_SUBDIRECTORY("math/unaryOps/") ADD_SUBDIRECTORY("math/binaryOps/") +ADD_SUBDIRECTORY("mem/buf/") ADD_SUBDIRECTORY("mem/view/") ADD_SUBDIRECTORY("mem/p2p/") ADD_SUBDIRECTORY("meta/") diff --git a/test/unit/math/binaryOps/src/binaryOps.cpp b/test/unit/math/binaryOps/src/binaryOps.cpp index ccb3064b628f..af4f51a28826 100644 --- a/test/unit/math/binaryOps/src/binaryOps.cpp +++ b/test/unit/math/binaryOps/src/binaryOps.cpp @@ -1,45 +1,49 @@ -// -// Created by jakob on 19.08.19. -// - -#include +/** Copyright 2019 Axel Huebl, Benjamin Worpitz + * + * This file is part of Alpaka. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ #include #include -#include +#include #include -#include -#include +#include #include +#include +#include "../../dataGen.hpp" -/* +/** List of all operators + * operator | in std | definition | range * atan2 | Y | R^2\{(0,0)} * fmod | Y | R^2\{(x,0)|x in R} * max | Y | R^2 * min | Y | R^2 * remainder | Y | R^2\{(x,0)|x in R} - * sincos | Y | R^2 -> return 2 R -*/ - -/* - * if you want to add a new operation simply add it to the array. + * + * sincos is testes separately, + * because it manipulates two inputs and doesnt return a value + * + * If you want to add a new operation simply add it to the array. * 1. Specify the std:: implementation. * 2. If the std function has a const reference signature use this * 3. Specify the alpaka implementation. * 4. Define the range in which the operator should be testes against. - */ - -/* - * if you need to add a new range you have to add it to the switch case + * + * If you need to add a new range you have to add it to the switch case * - in the kernel class * - in the TestTemplate + * + * If the wrong range is used, + * the bahaviour depends on the individual implementation. */ - - - -// possible definition ranges +//! @enum Range +//! @brief Possible definition ranges. enum class Range { POSITIVE_ONLY, @@ -49,7 +53,7 @@ enum class Range UNRESTRICTED }; -// function pointer templates for std::math and alpaka::math +// C-Style Callbacks for std::math and alpaka::math. template< typename TAcc, typename T> @@ -64,96 +68,167 @@ template< typename T> using std_func_ptr_const = T const & (*) (T const & , T const &); +/*! + * @struct TestStruct + * @tparam TAcc The type of the used accelerator, important for alpaka callback. + * @tparam T The data type (float || double). + * @var stdOps + * @brief Callback the std implementation. + * @var stdAlternative + * @brief If the callback signature uses const references. + * @var alpakaOp + * @brief Callback to alpaka implementation. + * @note One, and only one, of stdOps or stdAlternative should be a nullptr. + * @fn getStdOpRes + * @brief Checks which std-callback is used and returns the result. + * @throws runtime_exception If both are nullptr. + */ template< typename TAcc, typename T> struct TestStruct { - std_func_ptr stdOp ; - std_func_ptr_const stdAlternative; - alpaka_func_ptr alpakaOp; + std_func_ptr< T > stdOp; + std_func_ptr_const< T > stdAlternative; + alpaka_func_ptr< + TAcc, + T + > alpakaOp; Range range; -}; + T getStdOpRes( + T argX, + T argY + ) + { + if( stdOp != nullptr ) + return stdOp( + argX, + argY + ); + else if( stdAlternative != nullptr ) + return stdAlternative( + argX, + argY + ); + else + throw std::runtime_error( + "At least one std implementation should be given" ); + } +}; -class BinaryOpsKernel{ +class BinaryOpsKernel +{ public: ALPAKA_NO_HOST_ACC_WARNING template< - typename TAcc, - typename TData, - typename TIdx + typename TAcc, + typename TData, + typename TIdx > ALPAKA_FN_ACC auto operator()( - TAcc const & acc, - TIdx const & numOps, - TIdx const & sizeArgs, - TData const * const argsX, - TData const * const argsY, - TestStruct const * const structs, - TData * results - ) const - -> void + TAcc const & acc, + TIdx const & numOps, + TIdx const & sizeArgs, + TData const * const argsX, + TData const * const argsY, + TestStruct< + TAcc, + TData + > const * const structs, + TData * results + ) const -> void { - auto const gridThreadIdx(alpaka::idx::getIdx(acc)[0u]); + auto const + gridThreadIdx + ( + alpaka::idx::getIdx< + alpaka::Grid, + alpaka::Threads + >( acc )[0u] + ); TData argX; TData argY; - if(gridThreadIdx < numOps) + if( gridThreadIdx < numOps ) { - // sizeRes = numOps * sizeArgs - switch (structs[gridThreadIdx].range) + switch( structs[gridThreadIdx].range ) { case Range::POSITIVE_ONLY: - for(TIdx i(0); i < sizeArgs/2 -1; ++i) + for( TIdx i( 0 ); i < sizeArgs / 2 - 1; ++i ) { argX = argsX[i]; argY = argsY[i]; - results[i + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, argX, argY); + results[i + gridThreadIdx * sizeArgs] = + structs[gridThreadIdx].alpakaOp( + acc, + argX, + argY + ); } break; case Range::NOT_ZER0: - for(TIdx i(0); i < sizeArgs; ++i) + for( TIdx i( 0 ); i < sizeArgs; ++i ) { - if(i == sizeArgs/2 -1 || i == sizeArgs/2) + if( i == sizeArgs / 2 - 1 || i == sizeArgs / 2 ) continue; argX = argsX[i]; argY = argsY[i]; - results[i + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, argX, argY); + results[i + gridThreadIdx * sizeArgs] = + structs[gridThreadIdx].alpakaOp( + acc, + argX, + argY + ); } break; case Range::X_NOT_ZERO: - for(TIdx i(0); i < sizeArgs/2 -1; ++i) + for( TIdx i( 0 ); i < sizeArgs; ++i ) { - if(i == sizeArgs/2 -1 || i == sizeArgs/2) + if( i == sizeArgs / 2 - 1 || i == sizeArgs / 2 ) argX = argsX[0]; else argX = argsX[i]; argY = argsY[i]; - results[i + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, argX, argY); + results[i + gridThreadIdx * sizeArgs] = + structs[gridThreadIdx].alpakaOp( + acc, + argX, + argY + ); } break; case Range::Y_NOT_ZERO: - for(TIdx i(0); i < sizeArgs/2 -1; ++i) + for( TIdx i( 0 ); i < sizeArgs; ++i ) { - if(i == sizeArgs/2 -1 || i == sizeArgs/2) + if( i == sizeArgs / 2 - 1 || i == sizeArgs / 2 ) argY = argsY[0]; else argY = argsY[i]; argX = argsX[i]; - results[i + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, argX, argY); + results[i + gridThreadIdx * sizeArgs] = + structs[gridThreadIdx].alpakaOp( + acc, + argX, + argY + ); } break; case Range::UNRESTRICTED: - for(TIdx i(0); i < sizeArgs; ++i) + for( TIdx i( 0 ); i < sizeArgs; ++i ) { argX = argsX[i]; - argY= argsY[i]; - results[i + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, argX, argY); + argY = argsY[i]; + results[i + gridThreadIdx * sizeArgs] = + structs[gridThreadIdx].alpakaOp( + acc, + argX, + argY + ); } break; @@ -165,140 +240,256 @@ class BinaryOpsKernel{ }; -struct TestTemplate { - template - void operator()() { - using Dim = alpaka::dim::Dim; - using Idx = alpaka::idx::Idx; - using Data = double; - using DevAcc = alpaka::dev::Dev; - using PltfAcc = alpaka::pltf::Pltf; - using QueueAcc = alpaka::test::queue::DefaultQueue; +struct TestTemplate +{ + template< + typename TAcc> + void operator()( ) + { + using Data = double; // this can be changed to float without problems + using Dim = alpaka::dim::Dim< TAcc >; + using Idx = alpaka::idx::Idx< TAcc >; + using DevAcc = alpaka::dev::Dev< TAcc >; + using PltfAcc = alpaka::pltf::Pltf< DevAcc >; + using QueueAcc = alpaka::test::queue::DefaultQueue< DevAcc >; using PltfHost = alpaka::pltf::PltfCpu; - std::cout << "\nTesting next AccType \n\n"; // the functions that will be tested - - - TestStruct arr[] = - {/* normal callback, const callback, alpaka callback, definition range*/ - { &std::atan2, nullptr, &alpaka::math::atan2, Range::NOT_ZER0}, - { &std::fmod, nullptr, &alpaka::math::fmod, Range::Y_NOT_ZERO}, - { nullptr, &std::max, &alpaka::math::max, Range::Y_NOT_ZERO}, - { nullptr, &std::min, &alpaka::math::min, Range::UNRESTRICTED}, - { &std::pow, nullptr, &alpaka::math::pow, Range::POSITIVE_ONLY}, - { &std::remainder, nullptr, &alpaka::math::remainder, Range::POSITIVE_ONLY} - }; - - Idx const numOps = sizeof(arr) / sizeof(TestStruct); - Idx const elementsPerThread(1u); - Idx const sizeArgs(10u); + TestStruct arr [] = + {/* normal callback, const callback, alpaka callback, definition range*/ + { &std::atan2, nullptr, &alpaka::math::atan2, Range::NOT_ZER0}, + { &std::fmod, nullptr, &alpaka::math::fmod, Range::Y_NOT_ZERO}, + { nullptr, &std::max, &alpaka::math::max, Range::Y_NOT_ZERO}, + { nullptr, &std::min, &alpaka::math::min, Range::UNRESTRICTED}, + { &std::pow, nullptr, &alpaka::math::pow, Range::POSITIVE_ONLY}, + { &std::remainder, nullptr, &alpaka::math::remainder, Range::POSITIVE_ONLY} + }; + + Idx const + numOps = + sizeof( arr ) / sizeof( TestStruct< TAcc, Data > ); + Idx const elementsPerThread( 1u ); + Idx const sizeArgs( 10u ); Idx const sizeRes = sizeArgs * numOps; + constexpr size_t randomRange = 100u; // Create the kernel function object. BinaryOpsKernel kernel; // Get the host device. auto const devHost( - alpaka::pltf::getDevByIdx(0u)); + alpaka::pltf::getDevByIdx< PltfHost >( 0u ) + ); // Select a device to execute on. auto const devAcc( - alpaka::pltf::getDevByIdx(0u)); + alpaka::pltf::getDevByIdx< PltfAcc >( 0u ) + ); // Get a queue on this device. - QueueAcc queue(devAcc); + QueueAcc queue( devAcc ); - alpaka::vec::Vec const extent(numOps); + alpaka::vec::Vec< + Dim, + Idx + > const extent( numOps ); // Let alpaka calculate good block and grid sizes given our full problem extent. - alpaka::workdiv::WorkDivMembers const workDiv( - alpaka::workdiv::getValidWorkDiv( - devAcc, - extent, - elementsPerThread, - false, - alpaka::workdiv::GridBlockExtentSubDivRestrictions::Unrestricted)); + alpaka::workdiv::WorkDivMembers< + Dim, + Idx + > const workDiv( + alpaka::workdiv::getValidWorkDiv< TAcc >( + devAcc, + extent, + elementsPerThread, + false, + alpaka::workdiv::GridBlockExtentSubDivRestrictions::Unrestricted + ) + ); // Allocate host memory buffers. - auto memBufHostArgsX(alpaka::mem::buf::alloc(devHost, sizeArgs)); - auto memBufHostArgsY(alpaka::mem::buf::alloc(devHost, sizeArgs)); - auto memBufHostRes(alpaka::mem::buf::alloc(devHost, sizeRes)); - auto memBufHostStructs(alpaka::mem::buf::alloc, Idx>(devHost, extent)); - - Data *const pBufHostArgsX = alpaka::mem::view::getPtrNative(memBufHostArgsX); - Data *const pBufHostArgsY = alpaka::mem::view::getPtrNative(memBufHostArgsY); - Data *const pBufHostRes = alpaka::mem::view::getPtrNative(memBufHostRes); - TestStruct *const pBufHostStructs = alpaka::mem::view::getPtrNative(memBufHostStructs); + auto + memBufHostArgsX + ( + alpaka::mem::buf::alloc< + Data, + Idx + >( + devHost, + sizeArgs + ) + ); + auto + memBufHostArgsY + ( + alpaka::mem::buf::alloc< + Data, + Idx + >( + devHost, + sizeArgs + ) + ); + auto + memBufHostRes + ( + alpaka::mem::buf::alloc< + Data, + Idx + >( + devHost, + sizeRes + ) + ); + auto + memBufHostStructs + ( + alpaka::mem::buf::alloc< + TestStruct< + TAcc, + Data + >, + Idx + >( + devHost, + extent + ) + ); + + Data + * const pBufHostArgsX = + alpaka::mem::view::getPtrNative( memBufHostArgsX ); + Data + * const pBufHostArgsY = + alpaka::mem::view::getPtrNative( memBufHostArgsY ); + Data + * const pBufHostRes = + alpaka::mem::view::getPtrNative( memBufHostRes ); + TestStruct< + TAcc, + Data + > + * const pBufHostStructs = + alpaka::mem::view::getPtrNative( memBufHostStructs ); // This is just for a better understanding which results are unchanged. - for (Idx i(0); i < numOps; ++i) + for( Idx i( 0 ); i < numOps; ++i ) { - for (Idx j(0); j < sizeArgs; ++j) { + for( Idx j( 0 ); j < sizeArgs; ++j ) + { pBufHostRes[j + i * sizeArgs] = -1; } } - // C++11 random generator for uniformly distributed numbers in [-100, 100]. - std::random_device rd{}; - std::default_random_engine eng{rd()}; - std::uniform_real_distribution dist(0, 100); - - // Initiate the arguments. - for (Idx i(0); i < sizeArgs / 2 - 1; ++i) - { - pBufHostArgsX[i] = dist(eng); - pBufHostArgsY[i] = dist(eng); - - } - pBufHostArgsX[sizeArgs / 2 - 1] = 0.0; - pBufHostArgsY[sizeArgs / 2 - 1] = 0.0; - - pBufHostArgsX[sizeArgs / 2] = -0.0; - pBufHostArgsY[sizeArgs / 2] = -0.0; - - for (Idx i(sizeArgs / 2 + 1); i < sizeArgs; ++i) + for( Idx i( 0 ); i < numOps; ++i ) { - pBufHostArgsX[i] = dist(eng) - 100; - pBufHostArgsY[i] = dist(eng) - 100; + pBufHostStructs[i] = arr[i]; } + test::fillWithRndArgs< Data >( pBufHostArgsX, sizeArgs, randomRange ); + test::fillWithRndArgs< Data >( pBufHostArgsY, sizeArgs, randomRange ); - // Initiate the structs. - for (Idx i(0u); i < numOps; ++i) - { - pBufHostStructs[i] = arr[i]; - } // Allocate the buffer on the accelerator. - auto memBufAccArgsX(alpaka::mem::buf::alloc(devAcc, sizeArgs)); - auto memBufAccArgsY(alpaka::mem::buf::alloc(devAcc, sizeArgs)); - auto memBufAccRes(alpaka::mem::buf::alloc(devAcc, sizeRes)); - auto memBufAccStructs(alpaka::mem::buf::alloc, Idx>(devAcc, numOps)); + auto + memBufAccArgsX + ( + alpaka::mem::buf::alloc< + Data, + Idx + >( + devAcc, + sizeArgs + ) + ); + auto + memBufAccArgsY + ( + alpaka::mem::buf::alloc< + Data, + Idx + >( + devAcc, + sizeArgs + ) + ); + auto + memBufAccRes + ( + alpaka::mem::buf::alloc< + Data, + Idx + >( + devAcc, + sizeRes + ) + ); + auto + memBufAccStructs + ( + alpaka::mem::buf::alloc< + TestStruct< + TAcc, + Data + >, + Idx + >( + devAcc, + numOps + ) + ); // Copy Host -> Acc. - alpaka::mem::view::copy(queue, memBufAccArgsX, memBufHostArgsX, sizeArgs); - alpaka::mem::view::copy(queue, memBufAccArgsY, memBufHostArgsY, sizeArgs); - alpaka::mem::view::copy(queue, memBufAccRes, memBufHostRes, sizeRes); - alpaka::mem::view::copy(queue, memBufAccStructs, memBufHostStructs, numOps); - - for (Idx i(0u); i < sizeArgs; ++i) { - std::cout << "bufferArgs x: " << pBufHostArgsX[i] << " y: " << pBufHostArgsY[i] << std::endl; + alpaka::mem::view::copy( + queue, + memBufAccArgsX, + memBufHostArgsX, + sizeArgs + ); + alpaka::mem::view::copy( + queue, + memBufAccArgsY, + memBufHostArgsY, + sizeArgs + ); + alpaka::mem::view::copy( + queue, + memBufAccRes, + memBufHostRes, + sizeRes + ); + alpaka::mem::view::copy( + queue, + memBufAccStructs, + memBufHostStructs, + numOps + ); + + for( Idx i( 0u ); i < sizeArgs; ++i ) + { + std::cout << "bufferArgs x: " << pBufHostArgsX[i] << " y: " + << pBufHostArgsY[i] << std::endl; } - auto pMemBufAccArgsX = alpaka::mem::view::getPtrNative(memBufAccArgsX); - auto pMemBufAccArgsY = alpaka::mem::view::getPtrNative(memBufAccArgsY); - auto pMemBufAccRes = alpaka::mem::view::getPtrNative(memBufAccRes); - auto pMemBufAccStructs = alpaka::mem::view::getPtrNative(memBufAccStructs); + auto + pMemBufAccArgsX = alpaka::mem::view::getPtrNative( memBufAccArgsX ); + auto + pMemBufAccArgsY = alpaka::mem::view::getPtrNative( memBufAccArgsY ); + auto pMemBufAccRes = alpaka::mem::view::getPtrNative( memBufAccRes ); + auto + pMemBufAccStructs = + alpaka::mem::view::getPtrNative( memBufAccStructs ); // Create the kernel execution task. - auto const taskKernel(alpaka::kernel::createTaskKernel( + auto const taskKernel( + alpaka::kernel::createTaskKernel< TAcc >( workDiv, kernel, numOps, @@ -307,27 +498,36 @@ struct TestTemplate { pMemBufAccArgsY, pMemBufAccStructs, pMemBufAccRes - )); + ) + ); // Enqueue the kernel execution task. - alpaka::queue::enqueue(queue, taskKernel); + alpaka::queue::enqueue( + queue, + taskKernel + ); // Copy back the result. - alpaka::mem::view::copy(queue, memBufHostRes, memBufAccRes, sizeRes); + alpaka::mem::view::copy( + queue, + memBufHostRes, + memBufAccRes, + sizeRes + ); // Wait for the queue to finish the memory operation. - alpaka::wait::wait(queue); + alpaka::wait::wait( queue ); // Print out all results. - for (Idx i(0); i < numOps; ++i) + for( Idx i( 0 ); i < numOps; ++i ) { std::cout << "\nResults " << i + 1 << ". function:\n"; - for (Idx j(0); j < sizeArgs; ++j) + for( Idx j( 0 ); j < sizeArgs; ++j ) { - Data const &res(pBufHostRes[j + i * sizeArgs]); + Data const & res( pBufHostRes[j + i * sizeArgs] ); std::cout << "bufferResults: " << res << "\n"; } } @@ -337,81 +537,76 @@ struct TestTemplate { Data argX; Data argY; Data stdRes; - TestStruct t; - for (Idx j(0); j < numOps; ++j) + TestStruct< + TAcc, + Data + > t; + for( Idx j( 0 ); j < numOps; ++j ) { t = arr[j]; - switch (t.range) + switch( t.range ) { case Range::POSITIVE_ONLY: - for (Idx i(0); i < sizeArgs / 2 - 1; ++i) + for( Idx i( 0 ); i < sizeArgs / 2 - 1; ++i ) { argX = pBufHostArgsX[i]; argY = pBufHostArgsY[i]; - if(t.stdOp != nullptr) - stdRes = t.stdOp(argX, argY); - else - stdRes = t.stdAlternative(argX, argY); - REQUIRE(stdRes == Approx(pBufHostRes[i + sizeArgs * j])); + stdRes = t.getStdOpRes(argX, argY); + REQUIRE( stdRes == + Approx( pBufHostRes[i + sizeArgs * j] ) ); } break; case Range::NOT_ZER0: - for (Idx i(0); i < sizeArgs; ++i) + for( Idx i( 0 ); i < sizeArgs; ++i ) { - if (i == sizeArgs / 2 - 1 || i == sizeArgs / 2) + if( i == sizeArgs / 2 - 1 || i == sizeArgs / 2 ) continue; argX = pBufHostArgsX[i]; argY = pBufHostArgsY[i]; - if(t.stdOp != nullptr) - stdRes = t.stdOp(argX, argY); - else - stdRes = t.stdAlternative(argX, argY); - REQUIRE(stdRes == Approx(pBufHostRes[i + sizeArgs * j])); + stdRes = t.getStdOpRes(argX, argY); + REQUIRE( stdRes == + Approx( pBufHostRes[i + sizeArgs * j] ) ); } break; case Range::X_NOT_ZERO: - for (Idx i(0); i < sizeArgs / 2 - 1; ++i) + for( Idx i( 0 ); i < sizeArgs; ++i ) { - if (i == sizeArgs / 2 - 1 || i == sizeArgs / 2) + if( i == sizeArgs / 2 - 1 || i == sizeArgs / 2 ) argX = pBufHostArgsX[0]; else argX = pBufHostArgsX[i]; + argY = pBufHostArgsY[i]; - if(t.stdOp != nullptr) - stdRes = t.stdOp(argX, argY); - else - stdRes = t.stdAlternative(argX, argY); - REQUIRE(stdRes == Approx(pBufHostRes[i + sizeArgs * j])); + stdRes = t.getStdOpRes(argX, argY); + REQUIRE( stdRes == + Approx( pBufHostRes[i + sizeArgs * j] ) ); } break; case Range::Y_NOT_ZERO: - for (Idx i(0); i < sizeArgs / 2 - 1; ++i) + for( Idx i( 0 ); i < sizeArgs; ++i ) { - if (i == sizeArgs / 2 - 1 || i == sizeArgs / 2) + if( i == sizeArgs / 2 - 1 || i == sizeArgs / 2 ) argY = pBufHostArgsY[0]; else argY = pBufHostArgsY[i]; + argX = pBufHostArgsX[i]; - if(t.stdOp != nullptr) - stdRes = t.stdOp(argX, argY); - else - stdRes = t.stdAlternative(argX, argY); - REQUIRE(stdRes == Approx(pBufHostRes[i + sizeArgs * j])); + stdRes = t.getStdOpRes(argX, argY); + REQUIRE( stdRes == + Approx( pBufHostRes[i + sizeArgs * j] ) ); } break; case Range::UNRESTRICTED: - for (Idx i(0); i < sizeArgs; ++i) + for( Idx i( 0 ); i < sizeArgs; ++i ) { argX = pBufHostArgsX[i]; argY = pBufHostArgsY[i]; - if(t.stdOp != nullptr) - stdRes = t.stdOp(argX, argY); - else - stdRes = t.stdAlternative(argX, argY); - REQUIRE(stdRes == Approx(pBufHostRes[i + sizeArgs * j])); + stdRes = t.getStdOpRes(argX, argY); + REQUIRE( stdRes == + Approx( pBufHostRes[i + sizeArgs * j] ) ); } break; @@ -429,4 +624,4 @@ TEST_CASE("binaryOps", "[binaryOps]") std::size_t>; alpaka::meta::forEachType< TestAccs >( TestTemplate() ); -} \ No newline at end of file +} diff --git a/test/unit/math/dataGen.hpp b/test/unit/math/dataGen.hpp new file mode 100644 index 000000000000..890a8efc8b60 --- /dev/null +++ b/test/unit/math/dataGen.hpp @@ -0,0 +1,53 @@ +/** Copyright 2019 Axel Huebl, Benjamin Worpitz + * + * This file is part of Alpaka. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +/** + * @namespace test + * @brief Only contains fillWithRndArgs. + * @fn fillWithRndArgs + * @tparam Data The used Buffer-type. + * @param buffer The buffer that should be filled. + * @param size The size of the used buffer. + * @param range The Range, around Zero, for the data. + */ + +namespace test +{ + template< typename Data > + auto fillWithRndArgs( + Data * buffer, + size_t size, + size_t range + ) -> void + { + std::random_device rd {}; + std::default_random_engine eng { rd( ) }; + std::uniform_real_distribution< Data > dist( + 0, + range + ); + + // Initiate the arguments. + for( size_t i( 0 ); i < size / 2 - 1; ++i ) + { + buffer[i] = dist( eng ); + + } + // Define the middle of the args-buffer as zeros + buffer[size / 2 - 1] = 0.0; + + buffer[size / 2] = -0.0; + + // Change the Range for the random arguments to [-randomRange, 0] + for( size_t i( size / 2 + 1 ); i < size; ++i ) + { + buffer[i] = dist( eng ) - range; + } + } +} \ No newline at end of file diff --git a/test/unit/math/unaryOps/src/unaryOps.cpp b/test/unit/math/unaryOps/src/unaryOps.cpp index cec77b3b5ac7..0e580d1656cd 100644 --- a/test/unit/math/unaryOps/src/unaryOps.cpp +++ b/test/unit/math/unaryOps/src/unaryOps.cpp @@ -1,18 +1,23 @@ -// -// Created by jakob on 08.08.19. -// -#include +/** Copyright 2019 Axel Huebl, Benjamin Worpitz + * + * This file is part of Alpaka. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ +#include "../../dataGen.hpp" #include #include -#include +#include #include -#include -#include +#include #include +#include -/* list of all operators +/** List of all operators * operator | in std | definition | range | notes * abs | Y | R * acos | Y | [-1, 1] @@ -31,29 +36,28 @@ * sqrt | Y | N * tan | Y | [x | x \= pi/2 + k*pi, k in Z] * trunc | Y | R | round towards zero - */ - -/* - * if you want to add a new operation simply add it to the array. + * + * If you want to add a new operation simply add it to the array. * 1. Specify the std:: implementation. * 2. Specify the alpaka implementation. * 3. Define the range in which the operator should be testes against. - */ - -/* - * if you need to add a new range you have to add it to the switch case + * + * If you need to add a new range you have to add it to the switch case * - in the kernel class * - in the TestTemplate + * + * If the wrong range is used, + * the bahaviour depends on the individual implementation. */ - // Custom functions. template T rsqrt(T t){ return 1 / std::sqrt(t); } -// Possible definition ranges. +//! @enum Range +//! @brief Possible definition ranges. enum class Range { POSITIVE_ONLY, @@ -72,7 +76,16 @@ template< typename T> using std_func_ptr = T (*) (T); -// Data-Structure for all operators. +/*! + * @struct TestStruct + * @tparam TAcc The type of the used accelerator, important for alpaka callback. + * @tparam T The data type (float || double). + * @var stdOps + * @brief Callback the std implementation. + * @var alpakaOp + * @brief Callback to alpaka implementation. + * @note Behaviour for nullptr is undefined. + */ template< typename TAcc, typename T> @@ -101,12 +114,10 @@ class UnaryOpsKernel{ ) const -> void { - //results[0] = 0.42f; auto const gridThreadIdx(alpaka::idx::getIdx(acc)[0u]); TData arg; if(gridThreadIdx < numOps) { - // sizeRes = numOps * sizeArgs switch (structs[gridThreadIdx].range) { case Range::POSITIVE_ONLY: @@ -131,6 +142,7 @@ class UnaryOpsKernel{ } break; case Range::ONE_NEIGHBOURHOOD: + // this option is hardcoded because it is to specific if(sizeArgs < 4) break; arg = 1; @@ -153,25 +165,22 @@ class UnaryOpsKernel{ } }; - struct TestTemplate { - template - void operator()() + template< typename TAcc > + void operator()( ) { - using Dim = alpaka::dim::Dim; - using Idx = alpaka::idx::Idx; - using Data = double; - using DevAcc = alpaka::dev::Dev; - using PltfAcc = alpaka::pltf::Pltf; - using QueueAcc = alpaka::test::queue::DefaultQueue; + using Data = double; // this can be changed to float without problems + using Dim = alpaka::dim::Dim< TAcc >; + using Idx = alpaka::idx::Idx< TAcc >; + using DevAcc = alpaka::dev::Dev< TAcc >; + using PltfAcc = alpaka::pltf::Pltf< DevAcc >; + using QueueAcc = alpaka::test::queue::DefaultQueue< DevAcc >; using PltfHost = alpaka::pltf::PltfCpu; - std::cout << "\nTesting next AccType \n\n"; // the functions that will be tested - TestStruct arr [] = { { &std::abs, &alpaka::math::abs, Range::UNRESTRICTED }, @@ -191,135 +200,240 @@ struct TestTemplate { &std::sqrt, &alpaka::math::sqrt, Range::POSITIVE_AND_ZERO }, { &std::tan, &alpaka::math::tan, Range::UNRESTRICTED }, { &std::trunc, &alpaka::math::trunc, Range::UNRESTRICTED } - }; - Idx const numOps = sizeof(arr)/sizeof(TestStruct); - Idx const elementsPerThread(1u); - Idx const sizeArgs(10u); - Idx const sizeRes= sizeArgs * numOps; + Idx const + numOps = + sizeof( arr ) / sizeof( TestStruct< TAcc, Data > ); + Idx const elementsPerThread( 1u ); + // sizeArgs defines how many test arguments are used. + // It should be an even number greater than 4. + Idx const sizeArgs( 100u ); + Idx const sizeRes = sizeArgs * numOps; + constexpr size_t randomRange = 100u; + // Create the kernel function object. UnaryOpsKernel kernel; // Get the host device. auto const devHost( - alpaka::pltf::getDevByIdx(0u)); + alpaka::pltf::getDevByIdx< PltfHost >( 0u ) + ); // Select a device to execute on. auto const devAcc( - alpaka::pltf::getDevByIdx(0u)); + alpaka::pltf::getDevByIdx< PltfAcc >( 0u ) + ); // Get a queue on this device. - QueueAcc queue(devAcc); + QueueAcc queue( devAcc ); - alpaka::vec::Vec const extent(numOps); + alpaka::vec::Vec< + Dim, + Idx + > const extent( numOps ); // Let alpaka calculate good block and grid sizes given our full problem extent. - alpaka::workdiv::WorkDivMembers const workDiv( - alpaka::workdiv::getValidWorkDiv( + alpaka::workdiv::WorkDivMembers< + Dim, + Idx + > const workDiv( + alpaka::workdiv::getValidWorkDiv< TAcc >( devAcc, extent, elementsPerThread, false, - alpaka::workdiv::GridBlockExtentSubDivRestrictions::Unrestricted)); + alpaka::workdiv::GridBlockExtentSubDivRestrictions::Unrestricted + ) + ); // Allocate host memory buffers. - auto memBufHostArgs(alpaka::mem::buf::alloc(devHost, sizeArgs)); - auto memBufHostRes(alpaka::mem::buf::alloc(devHost, sizeRes)); - auto memBufHostStructs(alpaka::mem::buf::alloc, Idx>(devHost, extent)); - - Data * const pBufHostArgs = alpaka::mem::view::getPtrNative(memBufHostArgs); - Data * const pBufHostRes = alpaka::mem::view::getPtrNative(memBufHostRes); - TestStruct * const pBufHostStructs = alpaka::mem::view::getPtrNative(memBufHostStructs); + auto + memBufHostArgs + ( + alpaka::mem::buf::alloc< + Data, + Idx + >( + devHost, + sizeArgs + ) + ); + auto + memBufHostRes + ( + alpaka::mem::buf::alloc< + Data, + Idx + >( + devHost, + sizeRes + ) + ); + auto + memBufHostStructs + ( + alpaka::mem::buf::alloc< + TestStruct< + TAcc, + Data + >, + Idx + >( + devHost, + extent + ) + ); + + Data + * const pBufHostArgs = + alpaka::mem::view::getPtrNative( memBufHostArgs ); + Data + * const pBufHostRes = + alpaka::mem::view::getPtrNative( memBufHostRes ); + TestStruct< + TAcc, + Data + > + * const pBufHostStructs = + alpaka::mem::view::getPtrNative( memBufHostStructs ); // This is just for a better understanding which results are unchanged. - for(Idx i(0);i < numOps;++i) + for( Idx i( 0 ); i < numOps; ++i ) { - for(Idx j(0);j < sizeArgs;++j) + for( Idx j( 0 ); j < sizeArgs; ++j ) { - pBufHostRes[j+i*sizeArgs] = -1; + pBufHostRes[j + i * sizeArgs] = -1; } } - // C++11 random generator for uniformly distributed numbers in {-100,..,100} - std::random_device rd{}; - std::default_random_engine eng{ rd() }; - std::uniform_real_distribution dist(0, 100); - - - - // Initiate the arguments. - for(Idx i(0); i < sizeArgs/2-1; ++i) - { - pBufHostArgs[i] = dist(eng); - } - pBufHostArgs[sizeArgs/2 -1] = 0.0; - pBufHostArgs[sizeArgs/2] = -0.0; - - for(Idx i(sizeArgs/2 + 1); i < sizeArgs; ++i) - { - pBufHostArgs[i] = dist(eng)-100; - } + test::fillWithRndArgs< Data >(pBufHostArgs, sizeArgs, randomRange); // Initiate the structs. - for(Idx i(0u); i < numOps; ++i) + for( Idx i( 0u ); i < numOps; ++i ) { pBufHostStructs[i] = arr[i]; } // Allocate the buffer on the accelerator. - auto memBufAccArgs(alpaka::mem::buf::alloc(devAcc, sizeArgs)); - auto memBufAccRes(alpaka::mem::buf::alloc(devAcc, sizeRes)); - auto memBufAccStructs(alpaka::mem::buf::alloc, Idx>(devAcc, numOps)); + auto + memBufAccArgs + ( + alpaka::mem::buf::alloc< + Data, + Idx + >( + devAcc, + sizeArgs + ) + ); + auto + memBufAccRes + ( + alpaka::mem::buf::alloc< + Data, + Idx + >( + devAcc, + sizeRes + ) + ); + auto + memBufAccStructs + ( + alpaka::mem::buf::alloc< + TestStruct< + TAcc, + Data + >, + Idx + >( + devAcc, + numOps + ) + ); // Copy Host -> Acc. - alpaka::mem::view::copy(queue, memBufAccArgs, memBufHostArgs, sizeArgs); - alpaka::mem::view::copy(queue, memBufAccRes, memBufHostRes, sizeRes); - alpaka::mem::view::copy(queue, memBufAccStructs, memBufHostStructs, numOps); - - for(Idx i(0u); i < sizeArgs; ++i){ - std::cout<<"bufferArgs: " << pBufHostArgs[i] <<"\n"; + alpaka::mem::view::copy( + queue, + memBufAccArgs, + memBufHostArgs, + sizeArgs + ); + alpaka::mem::view::copy( + queue, + memBufAccRes, + memBufHostRes, + sizeRes + ); + alpaka::mem::view::copy( + queue, + memBufAccStructs, + memBufHostStructs, + numOps + ); + + for( Idx i( 0u ); i < sizeArgs; ++i ) + { + std::cout << "bufferArgs: " << pBufHostArgs[i] << "\n"; } - auto pMemBufAccArgs = alpaka::mem::view::getPtrNative(memBufAccArgs); - auto pMemBufAccRes = alpaka::mem::view::getPtrNative(memBufAccRes); - auto pMemBufAccStructs = alpaka::mem::view::getPtrNative(memBufAccStructs); + auto pMemBufAccArgs = alpaka::mem::view::getPtrNative( memBufAccArgs ); + auto pMemBufAccRes = alpaka::mem::view::getPtrNative( memBufAccRes ); + auto + pMemBufAccStructs = + alpaka::mem::view::getPtrNative( memBufAccStructs ); // Create the kernel execution task. - auto const taskKernel(alpaka::kernel::createTaskKernel( - workDiv, - kernel, - numOps, - sizeArgs, - pMemBufAccArgs, - pMemBufAccStructs, - pMemBufAccRes - )); + auto const taskKernel( + alpaka::kernel::createTaskKernel< TAcc >( + workDiv, + kernel, + numOps, + sizeArgs, + pMemBufAccArgs, + pMemBufAccStructs, + pMemBufAccRes + ) + ); // Enqueue the kernel execution task. - alpaka::queue::enqueue(queue, taskKernel); + alpaka::queue::enqueue( + queue, + taskKernel + ); // Copy back the result. - alpaka::mem::view::copy(queue, memBufHostArgs, memBufAccArgs, sizeArgs); - alpaka::mem::view::copy(queue, memBufHostRes, memBufAccRes, sizeRes); + alpaka::mem::view::copy( + queue, + memBufHostArgs, + memBufAccArgs, + sizeArgs + ); + alpaka::mem::view::copy( + queue, + memBufHostRes, + memBufAccRes, + sizeRes + ); // Wait for the queue to finish the memory operation. - alpaka::wait::wait(queue); + alpaka::wait::wait( queue ); // Print out all results. - for(Idx i(0u); i < numOps; ++i) + for( Idx i( 0u ); i < numOps; ++i ) { - std::cout <<"\nResults "<< i +1 <<". function:\n"; + std::cout << "\nResults " << i + 1 << ". function:\n"; - for(Idx j(0u); j < sizeArgs; ++j) + for( Idx j( 0u ); j < sizeArgs; ++j ) { - Data const & res(pBufHostRes[j + i * sizeArgs]); - std::cout<<"bufferResults: " << res << "\n"; + Data const & res( pBufHostRes[j + i * sizeArgs] ); + std::cout << "bufferResults: " << res << "\n"; } } @@ -327,57 +441,68 @@ struct TestTemplate // Check device result against host result. Data arg; Data stdRes; - TestStruct t; - for(Idx i(0u); i < numOps; ++i) + TestStruct< + TAcc, + Data + > t; + for( Idx i( 0u ); i < numOps; ++i ) { t = arr[i]; - switch (t.range) + switch( t.range ) { case Range::POSITIVE_ONLY: - for (Idx j(0); j < sizeArgs / 2 - 2; ++j) + for( Idx j( 0 ); j < sizeArgs / 2 - 2; ++j ) { arg = pBufHostArgs[j]; - stdRes = t.stdOp(arg); - REQUIRE( stdRes == Approx(pBufHostRes[j + i * sizeArgs])); + stdRes = t.stdOp( arg ); + REQUIRE( stdRes == + Approx( pBufHostRes[j + i * sizeArgs] ) ); } break; case Range::POSITIVE_AND_ZERO: - for (Idx j(0); j < sizeArgs / 2 - 2; ++j) + for( Idx j( 0 ); j < sizeArgs / 2 - 2; ++j ) { arg = pBufHostArgs[j]; - stdRes = t.stdOp(arg); - REQUIRE(stdRes == Approx(pBufHostRes[j + i * sizeArgs])); + stdRes = t.stdOp( arg ); + REQUIRE( stdRes == + Approx( pBufHostRes[j + i * sizeArgs] ) ); } break; case Range::UNRESTRICTED: - for (Idx j(0); j < sizeArgs / 2 - 2; ++j) + for( Idx j( 0 ); j < sizeArgs / 2 - 2; ++j ) { arg = pBufHostArgs[j]; - stdRes = t.stdOp(arg); - REQUIRE( stdRes == Approx(pBufHostRes[j + i * sizeArgs])); + stdRes = t.stdOp( arg ); + REQUIRE( stdRes == + Approx( pBufHostRes[j + i * sizeArgs] ) ); } break; case Range::ONE_NEIGHBOURHOOD: - if(sizeArgs < 4) + if( sizeArgs < 4 ) break; arg = 1; - stdRes = t.stdOp(arg); - REQUIRE(stdRes == Approx(pBufHostRes[0 + i * sizeArgs])); + stdRes = t.stdOp( arg ); + REQUIRE( stdRes == + Approx( pBufHostRes[0 + i * sizeArgs] ) ); arg = 0.5; - stdRes = t.stdOp(arg); - REQUIRE(stdRes == Approx(pBufHostRes[1 + i * sizeArgs])); + stdRes = t.stdOp( arg ); + REQUIRE( stdRes == + Approx( pBufHostRes[1 + i * sizeArgs] ) ); arg = 0; - stdRes = t.stdOp(arg); - REQUIRE(stdRes == Approx(pBufHostRes[2 + i * sizeArgs])); + stdRes = t.stdOp( arg ); + REQUIRE( stdRes == + Approx( pBufHostRes[2 + i * sizeArgs] ) ); arg = -0.5; - stdRes = t.stdOp(arg); - REQUIRE(stdRes == Approx(pBufHostRes[3 + i * sizeArgs])); + stdRes = t.stdOp( arg ); + REQUIRE( stdRes == + Approx( pBufHostRes[3 + i * sizeArgs] ) ); arg = -1; - stdRes = t.stdOp(arg); - REQUIRE(stdRes == Approx(pBufHostRes[4 + i * sizeArgs])); + stdRes = t.stdOp( arg ); + REQUIRE( stdRes == + Approx( pBufHostRes[4 + i * sizeArgs] ) ); break; default: @@ -390,9 +515,10 @@ struct TestTemplate TEST_CASE("unaryOps", "[unaryOps]") { using TestAccs = alpaka::test::acc::EnabledAccs< - alpaka::dim::DimInt<1u>, - std::size_t>; + alpaka::dim::DimInt< 1u >, + std::size_t + >; - alpaka::meta::forEachType< TestAccs >( TestTemplate() ); + alpaka::meta::forEachType< TestAccs >( TestTemplate( ) ); } From 0a1475ba688a6f53054a1b3af6d2085991781c2a Mon Sep 17 00:00:00 2001 From: Jakob Date: Mon, 9 Sep 2019 15:30:28 +0200 Subject: [PATCH 3/3] added seed dataGen and resolved further requests --- test/unit/math/binaryOps/src/binaryOps.cpp | 40 +++++------------- test/unit/math/dataGen.hpp | 13 +++++- test/unit/math/unaryOps/src/unaryOps.cpp | 47 +++++++++------------- 3 files changed, 39 insertions(+), 61 deletions(-) diff --git a/test/unit/math/binaryOps/src/binaryOps.cpp b/test/unit/math/binaryOps/src/binaryOps.cpp index af4f51a28826..ef7a40dfd0e8 100644 --- a/test/unit/math/binaryOps/src/binaryOps.cpp +++ b/test/unit/math/binaryOps/src/binaryOps.cpp @@ -25,7 +25,7 @@ * min | Y | R^2 * remainder | Y | R^2\{(x,0)|x in R} * - * sincos is testes separately, + * sincos is tested separately, * because it manipulates two inputs and doesnt return a value * * If you want to add a new operation simply add it to the array. @@ -239,14 +239,13 @@ class BinaryOpsKernel } }; - +template < typename Data > struct TestTemplate { template< typename TAcc> void operator()( ) { - using Data = double; // this can be changed to float without problems using Dim = alpaka::dim::Dim< TAcc >; using Idx = alpaka::idx::Idx< TAcc >; using DevAcc = alpaka::dev::Dev< TAcc >; @@ -254,7 +253,6 @@ struct TestTemplate using QueueAcc = alpaka::test::queue::DefaultQueue< DevAcc >; using PltfHost = alpaka::pltf::PltfCpu; - std::cout << "\nTesting next AccType \n\n"; // the functions that will be tested TestStruct arr [] = {/* normal callback, const callback, alpaka callback, definition range*/ @@ -270,7 +268,7 @@ struct TestTemplate numOps = sizeof( arr ) / sizeof( TestStruct< TAcc, Data > ); Idx const elementsPerThread( 1u ); - Idx const sizeArgs( 10u ); + Idx const sizeArgs( 100u ); Idx const sizeRes = sizeArgs * numOps; constexpr size_t randomRange = 100u; @@ -309,7 +307,6 @@ struct TestTemplate ) ); - // Allocate host memory buffers. auto memBufHostArgsX @@ -388,11 +385,12 @@ struct TestTemplate { pBufHostStructs[i] = arr[i]; } - - test::fillWithRndArgs< Data >( pBufHostArgsX, sizeArgs, randomRange ); + unsigned long seed = + test::fillWithRndArgs< Data >( pBufHostArgsX, sizeArgs, randomRange ); + std::cout << "Using seed: " << seed <<" for x-values\n"; + seed = test::fillWithRndArgs< Data >( pBufHostArgsY, sizeArgs, randomRange ); - - + std::cout << "Using seed: " << seed <<" for y-values\n"; // Allocate the buffer on the accelerator. auto @@ -470,12 +468,6 @@ struct TestTemplate numOps ); - for( Idx i( 0u ); i < sizeArgs; ++i ) - { - std::cout << "bufferArgs x: " << pBufHostArgsX[i] << " y: " - << pBufHostArgsY[i] << std::endl; - } - auto pMemBufAccArgsX = alpaka::mem::view::getPtrNative( memBufAccArgsX ); auto @@ -519,19 +511,6 @@ struct TestTemplate // Wait for the queue to finish the memory operation. alpaka::wait::wait( queue ); - - // Print out all results. - for( Idx i( 0 ); i < numOps; ++i ) - { - std::cout << "\nResults " << i + 1 << ". function:\n"; - - for( Idx j( 0 ); j < sizeArgs; ++j ) - { - Data const & res( pBufHostRes[j + i * sizeArgs] ); - std::cout << "bufferResults: " << res << "\n"; - } - } - // Check device result against host result. Data argX; @@ -623,5 +602,6 @@ TEST_CASE("binaryOps", "[binaryOps]") alpaka::dim::DimInt<1u>, std::size_t>; - alpaka::meta::forEachType< TestAccs >( TestTemplate() ); + alpaka::meta::forEachType< TestAccs >( TestTemplate() ); + alpaka::meta::forEachType< TestAccs >( TestTemplate() ); } diff --git a/test/unit/math/dataGen.hpp b/test/unit/math/dataGen.hpp index 890a8efc8b60..fbeb0c7d9549 100644 --- a/test/unit/math/dataGen.hpp +++ b/test/unit/math/dataGen.hpp @@ -7,6 +7,8 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include + /** * @namespace test * @brief Only contains fillWithRndArgs. @@ -24,10 +26,14 @@ namespace test Data * buffer, size_t size, size_t range - ) -> void + ) -> unsigned long { std::random_device rd {}; - std::default_random_engine eng { rd( ) }; + unsigned long seed = rd( ); + // for every accelerator type one seed will be generated + std::default_random_engine eng { seed }; + // these pseudo-random numbers are implementation specific + // and are not portable std::uniform_real_distribution< Data > dist( 0, range @@ -49,5 +55,8 @@ namespace test { buffer[i] = dist( eng ) - range; } + + return seed; + } } \ No newline at end of file diff --git a/test/unit/math/unaryOps/src/unaryOps.cpp b/test/unit/math/unaryOps/src/unaryOps.cpp index 0e580d1656cd..4a3146745fdb 100644 --- a/test/unit/math/unaryOps/src/unaryOps.cpp +++ b/test/unit/math/unaryOps/src/unaryOps.cpp @@ -91,12 +91,16 @@ template< typename T> struct TestStruct { - std_func_ptr stdOp; - alpaka_func_ptr alpakaOp; + std_func_ptr< T > stdOp; + alpaka_func_ptr< + TAcc, + T + > alpakaOp; Range range; }; -class UnaryOpsKernel{ +class UnaryOpsKernel +{ public: ALPAKA_NO_HOST_ACC_WARNING template< @@ -121,14 +125,14 @@ class UnaryOpsKernel{ switch (structs[gridThreadIdx].range) { case Range::POSITIVE_ONLY: - for(TIdx row(0); row < sizeArgs/2 -1; ++row) + for(TIdx row(0); row < sizeArgs / 2 - 1; ++row) { arg = args[row]; results[row + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg); } break; case Range::POSITIVE_AND_ZERO: - for(TIdx row(0); row < sizeArgs/2; ++row) + for(TIdx row(0); row < sizeArgs / 2; ++row) { arg = args[row]; results[row + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg); @@ -165,20 +169,20 @@ class UnaryOpsKernel{ } }; +template struct TestTemplate { +public: template< typename TAcc > void operator()( ) { - using Data = double; // this can be changed to float without problems + //using Data = double; // this can be changed to float without problems using Dim = alpaka::dim::Dim< TAcc >; using Idx = alpaka::idx::Idx< TAcc >; using DevAcc = alpaka::dev::Dev< TAcc >; using PltfAcc = alpaka::pltf::Pltf< DevAcc >; using QueueAcc = alpaka::test::queue::DefaultQueue< DevAcc >; using PltfHost = alpaka::pltf::PltfCpu; - - std::cout << "\nTesting next AccType \n\n"; // the functions that will be tested TestStruct arr [] = @@ -309,7 +313,10 @@ struct TestTemplate } } - test::fillWithRndArgs< Data >(pBufHostArgs, sizeArgs, randomRange); + unsigned long seed = + test::fillWithRndArgs< Data >(pBufHostArgs, sizeArgs, randomRange); + std::cout << "Using seed: " << seed <, std::size_t >; - - alpaka::meta::forEachType< TestAccs >( TestTemplate( ) ); + alpaka::meta::forEachType< TestAccs >( TestTemplate( ) ); + alpaka::meta::forEachType< TestAccs >( TestTemplate( ) ); }