diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index 9c39d074ca6b..41788149c253 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -29,6 +29,8 @@ ADD_SUBDIRECTORY("event/")
 ADD_SUBDIRECTORY("idx/")
 ADD_SUBDIRECTORY("kernel/")
 ADD_SUBDIRECTORY("math/sincos/")
+ADD_SUBDIRECTORY("math/unaryOps/")
+ADD_SUBDIRECTORY("math/binaryOps/")
 ADD_SUBDIRECTORY("mem/buf/")
 ADD_SUBDIRECTORY("mem/view/")
 ADD_SUBDIRECTORY("mem/p2p/")
diff --git a/test/unit/math/binaryOps/CMakeLists.txt b/test/unit/math/binaryOps/CMakeLists.txt
new file mode 100644
index 000000000000..d71ece931e1d
--- /dev/null
+++ b/test/unit/math/binaryOps/CMakeLists.txt
@@ -0,0 +1,17 @@
+SET(_TARGET_NAME "binaryOps")
+
+append_recursive_files_add_to_src_group("src/" "src/" "cpp" _FILES_SOURCE)
+
+ALPAKA_ADD_EXECUTABLE(
+        ${_TARGET_NAME}
+        ${_FILES_SOURCE})
+TARGET_INCLUDE_DIRECTORIES(
+        ${_TARGET_NAME}
+        PRIVATE ${Boost_INCLUDE_DIRS})
+TARGET_LINK_LIBRARIES(
+        ${_TARGET_NAME}
+        PRIVATE common)
+
+SET_TARGET_PROPERTIES(${_TARGET_NAME} PROPERTIES FOLDER "test/unit")
+
+ADD_TEST(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME} ${_ALPAKA_TEST_OPTIONS})
diff --git a/test/unit/math/binaryOps/src/binaryOps.cpp b/test/unit/math/binaryOps/src/binaryOps.cpp
new file mode 100644
index 000000000000..ef7a40dfd0e8
--- /dev/null
+++ b/test/unit/math/binaryOps/src/binaryOps.cpp
@@ -0,0 +1,607 @@
+/** Copyright 2019 Axel Huebl, Benjamin Worpitz
+ *
+ * This file is part of Alpaka.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <alpaka/alpaka.hpp>
+#include <alpaka/test/MeasureKernelRunTime.hpp>
+#include <alpaka/test/acc/TestAccs.hpp>
+#include <alpaka/test/queue/Queue.hpp>
+
+#include <catch2/catch.hpp>
+#include <cmath>
+#include <iostream>
+#include "../../dataGen.hpp"
+
+/** List of all operators
+ * operator  | in std | definition |  range
+ * atan2     | Y | R^2\{(0,0)}
+ * fmod      | Y | R^2\{(x,0)|x in R}
+ * max       | Y | R^2
+ * min       | Y | R^2
+ * remainder | Y | R^2\{(x,0)|x in R}
+ *
+ * sincos is tested separately,
+ * because it manipulates two inputs and doesnt return a value
+ *
+ * If you want to add a new operation simply add it to the array.
+ * 1. Specify the std::  implementation.
+ * 2. If the std function has a const reference signature use this
+ * 3. Specify the alpaka implementation.
+ * 4. Define the range in which the operator should be testes against.
+ *
+ * If you need to add a new range you have to add it to the switch case
+ *  - in the kernel class
+ *  - in the TestTemplate
+ *  
+ *  If the wrong range is used,
+ *  the bahaviour depends on the individual implementation.
+ */
+
+//! @enum Range
+//! @brief Possible definition ranges.
+enum class Range
+{
+    POSITIVE_ONLY,
+    NOT_ZER0,
+    X_NOT_ZERO,
+    Y_NOT_ZERO,
+    UNRESTRICTED
+};
+
+// C-Style Callbacks for std::math and alpaka::math.
+template<
+        typename TAcc,
+        typename T>
+using alpaka_func_ptr = T (*) (TAcc const & , T const &, T const &);
+
+template<
+        typename T>
+using std_func_ptr = T (*) (T, T);
+
+// used for all operators that need const references (like min())
+template<
+        typename T>
+using std_func_ptr_const = T const & (*) (T const & , T const &);
+
+/*!
+ * @struct TestStruct
+ * @tparam TAcc The type of the used accelerator, important for alpaka callback.
+ * @tparam T The data type (float || double).
+ * @var stdOps
+ * @brief Callback the std implementation.
+ * @var stdAlternative
+ * @brief If the callback signature uses const references.
+ * @var alpakaOp
+ * @brief Callback to alpaka implementation.
+ * @note One, and only one, of stdOps or stdAlternative should be a nullptr.
+ * @fn getStdOpRes
+ * @brief Checks which std-callback is used and returns the result.
+ * @throws runtime_exception If both are nullptr.
+ */
+
+template<
+        typename TAcc,
+        typename T>
+struct TestStruct
+{
+    std_func_ptr< T > stdOp;
+    std_func_ptr_const< T > stdAlternative;
+    alpaka_func_ptr<
+        TAcc,
+        T
+    > alpakaOp;
+    Range range;
+
+    T getStdOpRes(
+        T argX,
+        T argY
+    )
+    {
+        if( stdOp != nullptr )
+            return stdOp(
+                argX,
+                argY
+            );
+        else if( stdAlternative != nullptr )
+            return stdAlternative(
+                argX,
+                argY
+            );
+        else
+            throw std::runtime_error(
+                "At least one std implementation should be given" );
+    }
+};
+
+class BinaryOpsKernel
+{
+public:
+    ALPAKA_NO_HOST_ACC_WARNING
+    template<
+        typename TAcc,
+        typename TData,
+        typename TIdx
+    >
+    ALPAKA_FN_ACC auto operator()(
+        TAcc const & acc,
+        TIdx const & numOps,
+        TIdx const & sizeArgs,
+        TData const * const argsX,
+        TData const * const argsY,
+        TestStruct<
+            TAcc,
+            TData
+        > const * const structs,
+        TData * results
+    ) const -> void
+    {
+        auto const
+            gridThreadIdx
+            (
+                alpaka::idx::getIdx<
+                    alpaka::Grid,
+                    alpaka::Threads
+                >( acc )[0u]
+            );
+        TData argX;
+        TData argY;
+        if( gridThreadIdx < numOps )
+        {
+            switch( structs[gridThreadIdx].range )
+            {
+                case Range::POSITIVE_ONLY:
+                    for( TIdx i( 0 ); i < sizeArgs / 2 - 1; ++i )
+                    {
+                        argX = argsX[i];
+                        argY = argsY[i];
+                        results[i + gridThreadIdx * sizeArgs] =
+                            structs[gridThreadIdx].alpakaOp(
+                                acc,
+                                argX,
+                                argY
+                            );
+                    }
+                    break;
+                case Range::NOT_ZER0:
+                    for( TIdx i( 0 ); i < sizeArgs; ++i )
+                    {
+                        if( i == sizeArgs / 2 - 1 || i == sizeArgs / 2 )
+                            continue;
+                        argX = argsX[i];
+                        argY = argsY[i];
+                        results[i + gridThreadIdx * sizeArgs] =
+                            structs[gridThreadIdx].alpakaOp(
+                                acc,
+                                argX,
+                                argY
+                            );
+                    }
+                    break;
+
+                case Range::X_NOT_ZERO:
+                    for( TIdx i( 0 ); i < sizeArgs; ++i )
+                    {
+                        if( i == sizeArgs / 2 - 1 || i == sizeArgs / 2 )
+                            argX = argsX[0];
+                        else
+                            argX = argsX[i];
+                        argY = argsY[i];
+                        results[i + gridThreadIdx * sizeArgs] =
+                            structs[gridThreadIdx].alpakaOp(
+                                acc,
+                                argX,
+                                argY
+                            );
+                    }
+                    break;
+
+                case Range::Y_NOT_ZERO:
+                    for( TIdx i( 0 ); i < sizeArgs; ++i )
+                    {
+                        if( i == sizeArgs / 2 - 1 || i == sizeArgs / 2 )
+                            argY = argsY[0];
+                        else
+                            argY = argsY[i];
+                        argX = argsX[i];
+
+                        results[i + gridThreadIdx * sizeArgs] =
+                            structs[gridThreadIdx].alpakaOp(
+                                acc,
+                                argX,
+                                argY
+                            );
+                    }
+                    break;
+
+                case Range::UNRESTRICTED:
+                    for( TIdx i( 0 ); i < sizeArgs; ++i )
+                    {
+                        argX = argsX[i];
+                        argY = argsY[i];
+                        results[i + gridThreadIdx * sizeArgs] =
+                            structs[gridThreadIdx].alpakaOp(
+                                acc,
+                                argX,
+                                argY
+                            );
+                    }
+                    break;
+
+                default:
+                    break;
+            }
+        }
+    }
+};
+
+template < typename Data >
+struct TestTemplate
+{
+    template<
+        typename TAcc>
+    void operator()( )
+    {
+        using Dim = alpaka::dim::Dim< TAcc >;
+        using Idx = alpaka::idx::Idx< TAcc >;
+        using DevAcc = alpaka::dev::Dev< TAcc >;
+        using PltfAcc = alpaka::pltf::Pltf< DevAcc >;
+        using QueueAcc = alpaka::test::queue::DefaultQueue< DevAcc >;
+        using PltfHost = alpaka::pltf::PltfCpu;
+
+        // the functions that will be tested
+        TestStruct<TAcc, Data> arr [] =
+            {/* normal callback,     const callback, alpaka callback,                           definition range*/
+                { &std::atan2,       nullptr,        &alpaka::math::atan2<TAcc, Data, Data>,     Range::NOT_ZER0},
+                { &std::fmod,        nullptr,        &alpaka::math::fmod<TAcc, Data, Data>,      Range::Y_NOT_ZERO},
+                { nullptr,           &std::max,      &alpaka::math::max<TAcc, Data, Data>,       Range::Y_NOT_ZERO},
+                { nullptr,           &std::min,      &alpaka::math::min<TAcc, Data, Data>,       Range::UNRESTRICTED},
+                { &std::pow,         nullptr,        &alpaka::math::pow<TAcc, Data, Data>,       Range::POSITIVE_ONLY},
+                { &std::remainder,   nullptr,        &alpaka::math::remainder<TAcc, Data, Data>, Range::POSITIVE_ONLY}
+            };
+
+        Idx const
+            numOps =
+            sizeof( arr ) / sizeof( TestStruct< TAcc, Data > );
+        Idx const elementsPerThread( 1u );
+        Idx const sizeArgs( 100u );
+        Idx const sizeRes = sizeArgs * numOps;
+        constexpr size_t randomRange = 100u;
+
+        // Create the kernel function object.
+        BinaryOpsKernel kernel;
+
+        // Get the host device.
+        auto const devHost(
+            alpaka::pltf::getDevByIdx< PltfHost >( 0u )
+        );
+
+        // Select a device to execute on.
+        auto const devAcc(
+            alpaka::pltf::getDevByIdx< PltfAcc >( 0u )
+        );
+
+        // Get a queue on this device.
+        QueueAcc queue( devAcc );
+
+        alpaka::vec::Vec<
+            Dim,
+            Idx
+        > const extent( numOps );
+
+        // Let alpaka calculate good block and grid sizes given our full problem extent.
+        alpaka::workdiv::WorkDivMembers<
+            Dim,
+            Idx
+        > const workDiv(
+            alpaka::workdiv::getValidWorkDiv< TAcc >(
+                devAcc,
+                extent,
+                elementsPerThread,
+                false,
+                alpaka::workdiv::GridBlockExtentSubDivRestrictions::Unrestricted
+            )
+        );
+
+        // Allocate host memory buffers.
+        auto
+            memBufHostArgsX
+            (
+                alpaka::mem::buf::alloc<
+                    Data,
+                    Idx
+                >(
+                    devHost,
+                    sizeArgs
+                )
+            );
+        auto
+            memBufHostArgsY
+            (
+                alpaka::mem::buf::alloc<
+                    Data,
+                    Idx
+                >(
+                    devHost,
+                    sizeArgs
+                )
+            );
+        auto
+            memBufHostRes
+            (
+                alpaka::mem::buf::alloc<
+                    Data,
+                    Idx
+                >(
+                    devHost,
+                    sizeRes
+                )
+            );
+        auto
+            memBufHostStructs
+            (
+                alpaka::mem::buf::alloc<
+                    TestStruct<
+                        TAcc,
+                        Data
+                    >,
+                    Idx
+                >(
+                    devHost,
+                    extent
+                )
+            );
+
+        Data
+            * const pBufHostArgsX =
+            alpaka::mem::view::getPtrNative( memBufHostArgsX );
+        Data
+            * const pBufHostArgsY =
+            alpaka::mem::view::getPtrNative( memBufHostArgsY );
+        Data
+            * const pBufHostRes =
+            alpaka::mem::view::getPtrNative( memBufHostRes );
+        TestStruct<
+            TAcc,
+            Data
+        >
+            * const pBufHostStructs =
+            alpaka::mem::view::getPtrNative( memBufHostStructs );
+
+        // This is just for a better understanding which results are unchanged.
+        for( Idx i( 0 ); i < numOps; ++i )
+        {
+            for( Idx j( 0 ); j < sizeArgs; ++j )
+            {
+                pBufHostRes[j + i * sizeArgs] = -1;
+            }
+        }
+
+        for( Idx i( 0 ); i < numOps; ++i )
+        {
+            pBufHostStructs[i] = arr[i];
+        }
+        unsigned long seed =
+        test::fillWithRndArgs< Data >( pBufHostArgsX, sizeArgs, randomRange );
+        std::cout << "Using seed: " << seed <<" for x-values\n";
+        seed =
+        test::fillWithRndArgs< Data >( pBufHostArgsY, sizeArgs, randomRange );
+        std::cout << "Using seed: " << seed <<" for y-values\n";
+
+        // Allocate the buffer on the accelerator.
+        auto
+            memBufAccArgsX
+            (
+                alpaka::mem::buf::alloc<
+                    Data,
+                    Idx
+                >(
+                    devAcc,
+                    sizeArgs
+                )
+            );
+        auto
+            memBufAccArgsY
+            (
+                alpaka::mem::buf::alloc<
+                    Data,
+                    Idx
+                >(
+                    devAcc,
+                    sizeArgs
+                )
+            );
+        auto
+            memBufAccRes
+            (
+                alpaka::mem::buf::alloc<
+                    Data,
+                    Idx
+                >(
+                    devAcc,
+                    sizeRes
+                )
+            );
+        auto
+            memBufAccStructs
+            (
+                alpaka::mem::buf::alloc<
+                    TestStruct<
+                        TAcc,
+                        Data
+                    >,
+                    Idx
+                >(
+                    devAcc,
+                    numOps
+                )
+            );
+
+
+        // Copy Host -> Acc.
+        alpaka::mem::view::copy(
+            queue,
+            memBufAccArgsX,
+            memBufHostArgsX,
+            sizeArgs
+        );
+        alpaka::mem::view::copy(
+            queue,
+            memBufAccArgsY,
+            memBufHostArgsY,
+            sizeArgs
+        );
+        alpaka::mem::view::copy(
+            queue,
+            memBufAccRes,
+            memBufHostRes,
+            sizeRes
+        );
+        alpaka::mem::view::copy(
+            queue,
+            memBufAccStructs,
+            memBufHostStructs,
+            numOps
+        );
+
+        auto
+            pMemBufAccArgsX = alpaka::mem::view::getPtrNative( memBufAccArgsX );
+        auto
+            pMemBufAccArgsY = alpaka::mem::view::getPtrNative( memBufAccArgsY );
+        auto pMemBufAccRes = alpaka::mem::view::getPtrNative( memBufAccRes );
+        auto
+            pMemBufAccStructs =
+            alpaka::mem::view::getPtrNative( memBufAccStructs );
+
+
+
+        // Create the kernel execution task.
+        auto const taskKernel(
+            alpaka::kernel::createTaskKernel< TAcc >(
+                workDiv,
+                kernel,
+                numOps,
+                sizeArgs,
+                pMemBufAccArgsX,
+                pMemBufAccArgsY,
+                pMemBufAccStructs,
+                pMemBufAccRes
+            )
+        );
+
+        // Enqueue the kernel execution task.
+        alpaka::queue::enqueue(
+            queue,
+            taskKernel
+        );
+
+        // Copy back the result.
+        alpaka::mem::view::copy(
+            queue,
+            memBufHostRes,
+            memBufAccRes,
+            sizeRes
+        );
+
+
+        // Wait for the queue to finish the memory operation.
+        alpaka::wait::wait( queue );
+
+        // Check device result against host result.
+
+        Data argX;
+        Data argY;
+        Data stdRes;
+        TestStruct<
+            TAcc,
+            Data
+        > t;
+        for( Idx j( 0 ); j < numOps; ++j )
+        {
+            t = arr[j];
+            switch( t.range )
+            {
+                case Range::POSITIVE_ONLY:
+                    for( Idx i( 0 ); i < sizeArgs / 2 - 1; ++i )
+                    {
+                        argX = pBufHostArgsX[i];
+                        argY = pBufHostArgsY[i];
+                        stdRes = t.getStdOpRes(argX, argY);
+                        REQUIRE( stdRes ==
+                                 Approx( pBufHostRes[i + sizeArgs * j] ) );
+                    }
+                    break;
+                case Range::NOT_ZER0:
+                    for( Idx i( 0 ); i < sizeArgs; ++i )
+                    {
+                        if( i == sizeArgs / 2 - 1 || i == sizeArgs / 2 )
+                            continue;
+                        argX = pBufHostArgsX[i];
+                        argY = pBufHostArgsY[i];
+                        stdRes = t.getStdOpRes(argX, argY);
+                        REQUIRE( stdRes ==
+                                 Approx( pBufHostRes[i + sizeArgs * j] ) );
+                    }
+                    break;
+
+                case Range::X_NOT_ZERO:
+                    for( Idx i( 0 ); i < sizeArgs; ++i )
+                    {
+                        if( i == sizeArgs / 2 - 1 || i == sizeArgs / 2 )
+                            argX = pBufHostArgsX[0];
+                        else
+                            argX = pBufHostArgsX[i];
+
+                        argY = pBufHostArgsY[i];
+                        stdRes = t.getStdOpRes(argX, argY);
+                        REQUIRE( stdRes ==
+                                 Approx( pBufHostRes[i + sizeArgs * j] ) );
+                    }
+                    break;
+
+                case Range::Y_NOT_ZERO:
+                    for( Idx i( 0 ); i < sizeArgs; ++i )
+                    {
+                        if( i == sizeArgs / 2 - 1 || i == sizeArgs / 2 )
+                            argY = pBufHostArgsY[0];
+                        else
+                            argY = pBufHostArgsY[i];
+
+                        argX = pBufHostArgsX[i];
+                        stdRes = t.getStdOpRes(argX, argY);
+                        REQUIRE( stdRes ==
+                                 Approx( pBufHostRes[i + sizeArgs * j] ) );
+                    }
+                    break;
+
+                case Range::UNRESTRICTED:
+                    for( Idx i( 0 ); i < sizeArgs; ++i )
+                    {
+                        argX = pBufHostArgsX[i];
+                        argY = pBufHostArgsY[i];
+                        stdRes = t.getStdOpRes(argX, argY);
+                        REQUIRE( stdRes ==
+                                 Approx( pBufHostRes[i + sizeArgs * j] ) );
+                    }
+                    break;
+
+                default:
+                    break;
+            }
+        }
+    }
+};
+
+TEST_CASE("binaryOps", "[binaryOps]")
+{
+    using TestAccs = alpaka::test::acc::EnabledAccs<
+            alpaka::dim::DimInt<1u>,
+            std::size_t>;
+
+    alpaka::meta::forEachType< TestAccs >( TestTemplate<double>() );
+    alpaka::meta::forEachType< TestAccs >( TestTemplate<float>() );
+}
diff --git a/test/unit/math/dataGen.hpp b/test/unit/math/dataGen.hpp
new file mode 100644
index 000000000000..fbeb0c7d9549
--- /dev/null
+++ b/test/unit/math/dataGen.hpp
@@ -0,0 +1,62 @@
+/** Copyright 2019 Axel Huebl, Benjamin Worpitz
+ *
+ * This file is part of Alpaka.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <random>
+
+/**
+ * @namespace test
+ * @brief Only contains fillWithRndArgs.
+ * @fn fillWithRndArgs
+ * @tparam Data The used Buffer-type.
+ * @param buffer The buffer that should be filled.
+ * @param size The size of the used buffer.
+ * @param range The Range, around Zero, for the data.
+ */
+
+namespace test
+{
+    template< typename Data >
+    auto fillWithRndArgs(
+        Data * buffer,
+        size_t size,
+        size_t range
+    ) -> unsigned long
+    {
+        std::random_device rd {};
+        unsigned long seed = rd( );
+        // for every accelerator type one seed will be generated
+        std::default_random_engine eng { seed };
+        // these pseudo-random numbers are implementation specific
+        // and are not portable
+        std::uniform_real_distribution< Data > dist(
+            0,
+            range
+        );
+
+        // Initiate the arguments.
+        for( size_t i( 0 ); i < size / 2 - 1; ++i )
+        {
+            buffer[i] = dist( eng );
+
+        }
+        // Define the middle of the args-buffer as zeros
+        buffer[size / 2 - 1] = 0.0;
+
+        buffer[size / 2] = -0.0;
+
+        // Change the Range for the random arguments to [-randomRange, 0]
+        for( size_t i( size / 2 + 1 ); i < size; ++i )
+        {
+            buffer[i] = dist( eng ) - range;
+        }
+
+        return seed;
+
+    }
+}
\ No newline at end of file
diff --git a/test/unit/math/unaryOps/CMakeLists.txt b/test/unit/math/unaryOps/CMakeLists.txt
new file mode 100644
index 000000000000..db4614ee4b70
--- /dev/null
+++ b/test/unit/math/unaryOps/CMakeLists.txt
@@ -0,0 +1,17 @@
+SET(_TARGET_NAME "unaryOps")
+
+append_recursive_files_add_to_src_group("src/" "src/" "cpp" _FILES_SOURCE)
+
+ALPAKA_ADD_EXECUTABLE(
+        ${_TARGET_NAME}
+        ${_FILES_SOURCE})
+TARGET_INCLUDE_DIRECTORIES(
+        ${_TARGET_NAME}
+        PRIVATE ${Boost_INCLUDE_DIRS})
+TARGET_LINK_LIBRARIES(
+        ${_TARGET_NAME}
+        PRIVATE common)
+
+SET_TARGET_PROPERTIES(${_TARGET_NAME} PROPERTIES FOLDER "test/unit")
+
+ADD_TEST(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME} ${_ALPAKA_TEST_OPTIONS})
diff --git a/test/unit/math/unaryOps/src/unaryOps.cpp b/test/unit/math/unaryOps/src/unaryOps.cpp
new file mode 100644
index 000000000000..4a3146745fdb
--- /dev/null
+++ b/test/unit/math/unaryOps/src/unaryOps.cpp
@@ -0,0 +1,513 @@
+/** Copyright 2019 Axel Huebl, Benjamin Worpitz
+ *
+ * This file is part of Alpaka.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include "../../dataGen.hpp"
+#include <alpaka/alpaka.hpp>
+#include <alpaka/test/MeasureKernelRunTime.hpp>
+#include <alpaka/test/acc/TestAccs.hpp>
+#include <alpaka/test/queue/Queue.hpp>
+
+#include <catch2/catch.hpp>
+#include <cmath>
+#include <iostream>
+
+/** List of all operators
+ * operator  | in std | definition |  range | notes
+ * abs       | Y | R
+ * acos      | Y | [-1, 1]
+ * asin      | Y | [-1, 1]
+ * atan      | Y | R
+ * cbrt      | Y | R | third root of arg
+ * ceil      | Y | R
+ * cos       | Y | R
+ * erf       | Y | R | error function for arg
+ * exp       | Y | R | e^arg
+ * floor     | Y | R
+ * log       | Y | N\{0}
+ * round     | Y | R
+ * rsqrt     | X | N\{0} | inverse square root
+ * sin       | Y | R
+ * sqrt      | Y | N
+ * tan       | Y | [x | x \= pi/2 + k*pi, k in Z]
+ * trunc     | Y | R | round towards zero
+ *
+ * If you want to add a new operation simply add it to the array.
+ * 1. Specify the std::  implementation.
+ * 2. Specify the alpaka implementation.
+ * 3. Define the range in which the operator should be testes against.
+ *
+ * If you need to add a new range you have to add it to the switch case
+ *  - in the kernel class
+ *  - in the TestTemplate
+ *
+ *  If the wrong range is used,
+ *  the bahaviour depends on the individual implementation.
+ */
+
+// Custom functions.
+template<typename T>
+T rsqrt(T t){
+    return 1 / std::sqrt(t);
+}
+
+//! @enum Range
+//! @brief Possible definition ranges.
+enum class Range
+{
+    POSITIVE_ONLY,
+    POSITIVE_AND_ZERO,
+    ONE_NEIGHBOURHOOD, // [-1, 1]
+    UNRESTRICTED
+};
+
+// C-Style Callbacks for std::math and alpaka::math.
+template<
+    typename TAcc,
+    typename T>
+using alpaka_func_ptr = T (*) (TAcc const & , T const &);
+
+template<
+    typename T>
+using std_func_ptr = T (*) (T);
+
+/*!
+ * @struct TestStruct
+ * @tparam TAcc The type of the used accelerator, important for alpaka callback.
+ * @tparam T The data type (float || double).
+ * @var stdOps
+ * @brief Callback the std implementation.
+ * @var alpakaOp
+ * @brief Callback to alpaka implementation.
+ * @note Behaviour for nullptr is undefined.
+ */
+template<
+    typename TAcc,
+    typename T>
+struct TestStruct
+{
+    std_func_ptr< T > stdOp;
+    alpaka_func_ptr<
+        TAcc,
+        T
+    > alpakaOp;
+    Range range;
+};
+
+class UnaryOpsKernel
+{
+public:
+    ALPAKA_NO_HOST_ACC_WARNING
+    template<
+        typename TAcc,
+        typename TData,
+        typename TIdx
+    >
+    ALPAKA_FN_ACC auto operator()(
+        TAcc const & acc,
+        TIdx const & numOps,
+        TIdx const & sizeArgs,
+        TData const * const args,
+        TestStruct<TAcc, TData> const * const structs,
+        TData * results
+        ) const
+        -> void
+    {
+        auto const gridThreadIdx(alpaka::idx::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u]);
+        TData arg;
+        if(gridThreadIdx < numOps)
+        {
+            switch (structs[gridThreadIdx].range)
+            {
+                case Range::POSITIVE_ONLY:
+                    for(TIdx row(0); row < sizeArgs / 2 - 1; ++row)
+                    {
+                        arg = args[row];
+                        results[row + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg);
+                    }
+                    break;
+                case Range::POSITIVE_AND_ZERO:
+                    for(TIdx row(0); row < sizeArgs / 2; ++row)
+                    {
+                        arg = args[row];
+                        results[row + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg);
+                    }
+                    break;
+                case Range::UNRESTRICTED:
+                    for(TIdx row(0); row < sizeArgs; ++row)
+                    {
+                        arg = args[row];
+                        results[row + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg);
+                    }
+                    break;
+                case Range::ONE_NEIGHBOURHOOD:
+                    // this option is hardcoded because it is to specific
+                    if(sizeArgs < 4)
+                        break;
+                    arg = 1;
+                    results[0 + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg);
+                    arg = 0.5;
+                    results[1 + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg);
+                    arg = 0;
+                    results[2 + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg);
+                    arg = -0.5;
+                    results[3 + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg);
+                    arg = -1;
+                    results[4 + gridThreadIdx* sizeArgs] = structs[gridThreadIdx].alpakaOp(acc, arg);
+                    break;
+
+                default:
+                    break;
+            }
+
+        }
+    }
+};
+
+template <typename Data>
+struct TestTemplate
+{
+public:
+    template< typename TAcc >
+    void operator()( )
+    {
+        //using Data = double; // this can be changed to float without problems
+        using Dim = alpaka::dim::Dim< TAcc >;
+        using Idx = alpaka::idx::Idx< TAcc >;
+        using DevAcc = alpaka::dev::Dev< TAcc >;
+        using PltfAcc = alpaka::pltf::Pltf< DevAcc >;
+        using QueueAcc = alpaka::test::queue::DefaultQueue< DevAcc >;
+        using PltfHost = alpaka::pltf::PltfCpu;
+        // the functions that will be tested
+
+        TestStruct<TAcc, Data> arr [] =
+            {
+                { &std::abs,    &alpaka::math::abs<TAcc,Data>,      Range::UNRESTRICTED      },
+                { &std::acos,   &alpaka::math::acos<TAcc,Data>,     Range::ONE_NEIGHBOURHOOD },
+                { &std::asin,   &alpaka::math::asin<TAcc,Data>,     Range::ONE_NEIGHBOURHOOD },
+                { &std::atan,   &alpaka::math::atan<TAcc,Data>,     Range::UNRESTRICTED      },
+                { &std::cbrt,   &alpaka::math::cbrt<TAcc,Data>,     Range::UNRESTRICTED      },
+                { &std::ceil,   &alpaka::math::ceil<TAcc,Data>,     Range::UNRESTRICTED      },
+                { &std::cos,    &alpaka::math::cos<TAcc,Data>,      Range::UNRESTRICTED      },
+                { &std::erf,    &alpaka::math::erf<TAcc,Data>,      Range::UNRESTRICTED      },
+                { &std::exp,    &alpaka::math::exp<TAcc,Data>,      Range::UNRESTRICTED      },
+                { &std::floor,  &alpaka::math::floor<TAcc,Data>,    Range::UNRESTRICTED      },
+                { &std::log,    &alpaka::math::log<TAcc,Data>,      Range::POSITIVE_ONLY     },
+                { &std::round,  &alpaka::math::round<TAcc,Data>,    Range::UNRESTRICTED      },
+                { &rsqrt<Data>, &alpaka::math::rsqrt<TAcc,Data>,    Range::POSITIVE_ONLY     },
+                { &std::sin,    &alpaka::math::sin<TAcc,Data>,      Range::UNRESTRICTED      },
+                { &std::sqrt,   &alpaka::math::sqrt<TAcc,Data>,     Range::POSITIVE_AND_ZERO },
+                { &std::tan,    &alpaka::math::tan<TAcc,Data>,      Range::UNRESTRICTED      },
+                { &std::trunc,  &alpaka::math::trunc<TAcc,Data>,    Range::UNRESTRICTED      }
+            };
+
+        Idx const
+            numOps =
+            sizeof( arr ) / sizeof( TestStruct< TAcc, Data > );
+        Idx const elementsPerThread( 1u );
+        // sizeArgs defines how many test arguments are used.
+        // It should be an even number greater than 4.
+        Idx const sizeArgs( 100u );
+        Idx const sizeRes = sizeArgs * numOps;
+        constexpr size_t randomRange = 100u;
+
+
+        // Create the kernel function object.
+        UnaryOpsKernel kernel;
+
+        // Get the host device.
+        auto const devHost(
+            alpaka::pltf::getDevByIdx< PltfHost >( 0u )
+        );
+
+        // Select a device to execute on.
+        auto const devAcc(
+            alpaka::pltf::getDevByIdx< PltfAcc >( 0u )
+        );
+
+        // Get a queue on this device.
+        QueueAcc queue( devAcc );
+
+        alpaka::vec::Vec<
+            Dim,
+            Idx
+        > const extent( numOps );
+
+        // Let alpaka calculate good block and grid sizes given our full problem extent.
+        alpaka::workdiv::WorkDivMembers<
+            Dim,
+            Idx
+        > const workDiv(
+            alpaka::workdiv::getValidWorkDiv< TAcc >(
+                devAcc,
+                extent,
+                elementsPerThread,
+                false,
+                alpaka::workdiv::GridBlockExtentSubDivRestrictions::Unrestricted
+            )
+        );
+
+
+        // Allocate host memory buffers.
+        auto
+            memBufHostArgs
+            (
+                alpaka::mem::buf::alloc<
+                    Data,
+                    Idx
+                >(
+                    devHost,
+                    sizeArgs
+                )
+            );
+        auto
+            memBufHostRes
+            (
+                alpaka::mem::buf::alloc<
+                    Data,
+                    Idx
+                >(
+                    devHost,
+                    sizeRes
+                )
+            );
+        auto
+            memBufHostStructs
+            (
+                alpaka::mem::buf::alloc<
+                    TestStruct<
+                        TAcc,
+                        Data
+                    >,
+                    Idx
+                >(
+                    devHost,
+                    extent
+                )
+            );
+
+        Data
+            * const pBufHostArgs =
+            alpaka::mem::view::getPtrNative( memBufHostArgs );
+        Data
+            * const pBufHostRes =
+            alpaka::mem::view::getPtrNative( memBufHostRes );
+        TestStruct<
+            TAcc,
+            Data
+        >
+            * const pBufHostStructs =
+            alpaka::mem::view::getPtrNative( memBufHostStructs );
+
+        // This is just for a better understanding which results are unchanged.
+        for( Idx i( 0 ); i < numOps; ++i )
+        {
+            for( Idx j( 0 ); j < sizeArgs; ++j )
+            {
+                pBufHostRes[j + i * sizeArgs] = -1;
+            }
+        }
+
+        unsigned long seed =
+            test::fillWithRndArgs< Data >(pBufHostArgs, sizeArgs, randomRange);
+        std::cout << "Using seed: " << seed <<std::endl;
+
+
+        // Initiate the structs.
+        for( Idx i( 0u ); i < numOps; ++i )
+        {
+            pBufHostStructs[i] = arr[i];
+        }
+
+        // Allocate the buffer on the accelerator.
+        auto
+            memBufAccArgs
+            (
+                alpaka::mem::buf::alloc<
+                    Data,
+                    Idx
+                >(
+                    devAcc,
+                    sizeArgs
+                )
+            );
+        auto
+            memBufAccRes
+            (
+                alpaka::mem::buf::alloc<
+                    Data,
+                    Idx
+                >(
+                    devAcc,
+                    sizeRes
+                )
+            );
+        auto
+            memBufAccStructs
+            (
+                alpaka::mem::buf::alloc<
+                    TestStruct<
+                        TAcc,
+                        Data
+                    >,
+                    Idx
+                >(
+                    devAcc,
+                    numOps
+                )
+            );
+
+
+        // Copy Host -> Acc.
+        alpaka::mem::view::copy(
+            queue,
+            memBufAccArgs,
+            memBufHostArgs,
+            sizeArgs
+        );
+        alpaka::mem::view::copy(
+            queue,
+            memBufAccRes,
+            memBufHostRes,
+            sizeRes
+        );
+        alpaka::mem::view::copy(
+            queue,
+            memBufAccStructs,
+            memBufHostStructs,
+            numOps
+        );
+
+        auto pMemBufAccArgs = alpaka::mem::view::getPtrNative( memBufAccArgs );
+        auto pMemBufAccRes = alpaka::mem::view::getPtrNative( memBufAccRes );
+        auto
+            pMemBufAccStructs =
+            alpaka::mem::view::getPtrNative( memBufAccStructs );
+
+
+        // Create the kernel execution task.
+        auto const taskKernel(
+            alpaka::kernel::createTaskKernel< TAcc >(
+                workDiv,
+                kernel,
+                numOps,
+                sizeArgs,
+                pMemBufAccArgs,
+                pMemBufAccStructs,
+                pMemBufAccRes
+            )
+        );
+
+        // Enqueue the kernel execution task.
+        alpaka::queue::enqueue(
+            queue,
+            taskKernel
+        );
+
+        // Copy back the result.
+        alpaka::mem::view::copy(
+            queue,
+            memBufHostArgs,
+            memBufAccArgs,
+            sizeArgs
+        );
+        alpaka::mem::view::copy(
+            queue,
+            memBufHostRes,
+            memBufAccRes,
+            sizeRes
+        );
+
+
+        // Wait for the queue to finish the memory operation.
+        alpaka::wait::wait( queue );
+
+        // Check device result against host result.
+        Data arg;
+        Data stdRes;
+        TestStruct<
+            TAcc,
+            Data
+        > t;
+        for( Idx i( 0u ); i < numOps; ++i )
+        {
+            t = arr[i];
+            switch( t.range )
+            {
+                case Range::POSITIVE_ONLY:
+                    for( Idx j( 0 ); j < sizeArgs / 2 - 2; ++j )
+                    {
+                        arg = pBufHostArgs[j];
+                        stdRes = t.stdOp( arg );
+                        REQUIRE( stdRes ==
+                                 Approx( pBufHostRes[j + i * sizeArgs] ) );
+                    }
+                    break;
+
+                case Range::POSITIVE_AND_ZERO:
+                    for( Idx j( 0 ); j < sizeArgs / 2 - 2; ++j )
+                    {
+                        arg = pBufHostArgs[j];
+                        stdRes = t.stdOp( arg );
+                        REQUIRE( stdRes ==
+                                 Approx( pBufHostRes[j + i * sizeArgs] ) );
+                    }
+                    break;
+
+                case Range::UNRESTRICTED:
+                    for( Idx j( 0 ); j < sizeArgs / 2 - 2; ++j )
+                    {
+                        arg = pBufHostArgs[j];
+                        stdRes = t.stdOp( arg );
+                        REQUIRE( stdRes ==
+                                 Approx( pBufHostRes[j + i * sizeArgs] ) );
+                    }
+                    break;
+
+                case Range::ONE_NEIGHBOURHOOD:
+                    if( sizeArgs < 4 )
+                        break;
+                    arg = 1;
+                    stdRes = t.stdOp( arg );
+                    REQUIRE( stdRes ==
+                             Approx( pBufHostRes[0 + i * sizeArgs] ) );
+                    arg = 0.5;
+                    stdRes = t.stdOp( arg );
+                    REQUIRE( stdRes ==
+                             Approx( pBufHostRes[1 + i * sizeArgs] ) );
+                    arg = 0;
+                    stdRes = t.stdOp( arg );
+                    REQUIRE( stdRes ==
+                             Approx( pBufHostRes[2 + i * sizeArgs] ) );
+                    arg = -0.5;
+                    stdRes = t.stdOp( arg );
+                    REQUIRE( stdRes ==
+                             Approx( pBufHostRes[3 + i * sizeArgs] ) );
+                    arg = -1;
+                    stdRes = t.stdOp( arg );
+                    REQUIRE( stdRes ==
+                             Approx( pBufHostRes[4 + i * sizeArgs] ) );
+                    break;
+
+                default:
+                    break;
+            }
+        }
+    }
+};
+
+TEST_CASE("unaryOps", "[unaryOps]")
+{
+    using TestAccs = alpaka::test::acc::EnabledAccs<
+        alpaka::dim::DimInt< 1u >,
+        std::size_t
+    >;
+    alpaka::meta::forEachType< TestAccs >( TestTemplate<double>( ) );
+    alpaka::meta::forEachType< TestAccs >( TestTemplate<float>( ) );
+}
+