From 2063674fa4727a47e7717308f5d942dd077f650a Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Sat, 17 Feb 2024 16:37:43 +0100 Subject: [PATCH 1/3] Compatibility: Only use sycl::half if SYCL_CTS_ENABLE_HALF_TESTS is set --- tests/accessor_basic/accessor_common.h | 3 ++ tests/common/common.h | 2 + .../group_functions/group_functions_common.h | 6 ++- .../group_functions/group_joint_reduce.cpp.in | 10 +++- tests/group_functions/group_joint_scan.cpp.in | 10 +++- .../group_reduce_over_group.cpp.in | 10 +++- .../group_scan_over_group.cpp.in | 10 +++- tests/kernel_bundle/kernels.h | 4 ++ tests/kernel_bundle/sycl_is_compatible.cpp | 4 ++ tests/marray_basic/marray_operators.h | 6 +-- tests/math_builtin_api/math_builtin.h | 9 ++-- .../kernel_features_device_has_exceptions.cpp | 49 ++++++++++++------- .../kernel_features_separate_unit.cpp | 2 + ...ernel_features_speculative_compilation.cpp | 2 + tests/reduction/identity_helper.h | 5 +- tests/scalars/scalars_sycl_types.cpp | 10 ++++ .../spec_constants_defined_various_ways.h | 2 + .../spec_constants/spec_constants_multiple.h | 2 + .../spec_constants_same_name_inter_link.h | 2 + .../spec_constants_same_name_stress.h | 2 + util/accuracy.h | 4 ++ util/math_helper.h | 2 + util/math_reference.cpp | 42 +++++++++++++++- util/math_reference.h | 48 ++++++++++++++++++ util/type_names.h | 2 + util/type_traits.h | 12 +++-- 26 files changed, 218 insertions(+), 42 deletions(-) diff --git a/tests/accessor_basic/accessor_common.h b/tests/accessor_basic/accessor_common.h index 085fc961b..7540a2e14 100644 --- a/tests/accessor_basic/accessor_common.h +++ b/tests/accessor_basic/accessor_common.h @@ -143,6 +143,8 @@ inline std::string get_section_name(const std::string& type_name, // FIXME: re-enable when marrray is implemented in adaptivecpp and type_coverage // is enabled #ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP + +#if SYCL_CTS_ENABLE_HALF_TESTS /** * @brief Factory function for getting type_pack with fp16 type */ @@ -150,6 +152,7 @@ inline auto get_fp16_type() { static const auto types = named_type_pack::generate("sycl::half"); return types; } +#endif /** * @brief Factory function for getting type_pack with fp64 type diff --git a/tests/common/common.h b/tests/common/common.h index 1b46fe325..e760cb82e 100644 --- a/tests/common/common.h +++ b/tests/common/common.h @@ -237,6 +237,7 @@ bool check_type_sign(bool expected_sign) { return (std::is_signed::value == expected_sign); } +#if SYCL_CTS_ENABLE_HALF_TESTS /** * @brief Helper function to see if sycl::half is of the wrong sign */ @@ -245,6 +246,7 @@ inline bool check_type_sign(bool expected_sign) { bool is_signed = sycl::half(1) > sycl::half(-1); return is_signed == expected_sign; } +#endif /** * @brief Helper function to log a failure if a type is of the wrong size or diff --git a/tests/group_functions/group_functions_common.h b/tests/group_functions/group_functions_common.h index 47f18019f..2448d22e3 100644 --- a/tests/group_functions/group_functions_common.h +++ b/tests/group_functions/group_functions_common.h @@ -18,6 +18,7 @@ // *******************************************************************************/ +#include "../../util/type_traits.h" #include "../common/common.h" #include "../common/get_group_range.h" #include "../common/once_per_unit.h" @@ -133,8 +134,10 @@ struct custom_type { template inline constexpr uint64_t exact_max = std::numeric_limits::max(); +#if SYCL_CTS_ENABLE_HALF_TESTS template <> inline constexpr uint64_t exact_max = 1ull << 11; +#endif template <> inline constexpr uint64_t exact_max = 1ull << 24; template <> @@ -256,8 +259,7 @@ template inline auto get_op_types() { #if SYCL_CTS_ENABLE_FULL_CONFORMANCE static const auto types = []() { - if constexpr (std::is_floating_point_v || - std::is_same_v, sycl::half>) { + if constexpr (is_sycl_floating_point_v) { // Bitwise operations are not defined for floating point types. return named_type_pack, sycl::multiplies, sycl::logical_and, sycl::logical_or, diff --git a/tests/group_functions/group_joint_reduce.cpp.in b/tests/group_functions/group_joint_reduce.cpp.in index c0e214c5f..3dc0aed27 100644 --- a/tests/group_functions/group_joint_reduce.cpp.in +++ b/tests/group_functions/group_joint_reduce.cpp.in @@ -32,12 +32,15 @@ TEST_CASE(CTS_TYPE_NAME + " group and sub-group joint reduce functions", const auto Operators = get_op_types(); const auto RetType = unnamed_type_pack(); +#if SYCL_CTS_ENABLE_HALF_TESTS if constexpr (std::is_same_v, sycl::half>) { if (!queue.get_device().has(sycl::aspect::fp16)) SKIP( "Device does not support half precision floating point " "operations."); - } else if (std::is_same_v, double>) { + } +#endif + if constexpr (std::is_same_v, double>) { if (!queue.get_device().has(sycl::aspect::fp64)) SKIP( "Device does not support double precision floating point " @@ -57,12 +60,15 @@ TEMPLATE_LIST_TEST_CASE( const auto RetType = unnamed_type_pack(); const auto ReducedType = unnamed_type_pack(); +#if SYCL_CTS_ENABLE_HALF_TESTS if constexpr (std::is_same_v, sycl::half>) { if (!queue.get_device().has(sycl::aspect::fp16)) SKIP( "Device does not support half precision floating point " "operations."); - } else if (std::is_same_v, double>) { + } +#endif + if constexpr (std::is_same_v, double>) { if (!queue.get_device().has(sycl::aspect::fp64)) SKIP( "Device does not support double precision floating point " diff --git a/tests/group_functions/group_joint_scan.cpp.in b/tests/group_functions/group_joint_scan.cpp.in index 6c311f66d..7c7caa402 100644 --- a/tests/group_functions/group_joint_scan.cpp.in +++ b/tests/group_functions/group_joint_scan.cpp.in @@ -49,12 +49,15 @@ DISABLED_FOR_TEST_CASE(AdaptiveCpp) #endif auto queue = once_per_unit::get_queue(); +#if SYCL_CTS_ENABLE_HALF_TESTS if constexpr (std::is_same_v, sycl::half>) { if (!queue.get_device().has(sycl::aspect::fp16)) SKIP( "Device does not support half precision floating point " "operations."); - } else if (std::is_same_v, double>) { + } +#endif + if constexpr (std::is_same_v, double>) { if (!queue.get_device().has(sycl::aspect::fp64)) SKIP( "Device does not support double precision floating point " @@ -86,12 +89,15 @@ DISABLED_FOR_TEST_CASE(AdaptiveCpp) #endif auto queue = once_per_unit::get_queue(); +#if SYCL_CTS_ENABLE_HALF_TESTS if constexpr (std::is_same_v, sycl::half>) { if (!queue.get_device().has(sycl::aspect::fp16)) SKIP( "Device does not support half precision floating point " "operations."); - } else if (std::is_same_v, double>) { + } +#endif + if constexpr (std::is_same_v, double>) { if (!queue.get_device().has(sycl::aspect::fp64)) SKIP( "Device does not support double precision floating point " diff --git a/tests/group_functions/group_reduce_over_group.cpp.in b/tests/group_functions/group_reduce_over_group.cpp.in index 226b1655b..d8e2d01ce 100644 --- a/tests/group_functions/group_reduce_over_group.cpp.in +++ b/tests/group_functions/group_reduce_over_group.cpp.in @@ -33,12 +33,15 @@ TEST_CASE(CTS_TYPE_NAME + " group and sub-group reduce functions", const auto Operators = get_op_types(); const auto RetType = unnamed_type_pack(); +#if SYCL_CTS_ENABLE_HALF_TESTS if constexpr (std::is_same_v, sycl::half>) { if (!queue.get_device().has(sycl::aspect::fp16)) SKIP( "Device does not support half precision floating point " "operations."); - } else if (std::is_same_v, double>) { + } +#endif + if constexpr (std::is_same_v, double>) { if (!queue.get_device().has(sycl::aspect::fp64)) SKIP( "Device does not support double precision floating point " @@ -59,12 +62,15 @@ TEMPLATE_LIST_TEST_CASE(CTS_TYPE_NAME + const auto RetType = unnamed_type_pack(); const auto ReducedType = unnamed_type_pack(); +#if SYCL_CTS_ENABLE_HALF_TESTS if constexpr (std::is_same_v, sycl::half>) { if (!queue.get_device().has(sycl::aspect::fp16)) SKIP( "Device does not support half precision floating point " "operations."); - } else if (std::is_same_v, double>) { + } +#endif + if constexpr (std::is_same_v, double>) { if (!queue.get_device().has(sycl::aspect::fp64)) SKIP( "Device does not support double precision floating point " diff --git a/tests/group_functions/group_scan_over_group.cpp.in b/tests/group_functions/group_scan_over_group.cpp.in index b44fb9cd1..731e89594 100644 --- a/tests/group_functions/group_scan_over_group.cpp.in +++ b/tests/group_functions/group_scan_over_group.cpp.in @@ -39,12 +39,15 @@ DISABLED_FOR_TEST_CASE(AdaptiveCpp) (CTS_TYPE_NAME + " group and sub-group scan functions", "[group_func][type_list][dim]")({ auto queue = once_per_unit::get_queue(); +#if SYCL_CTS_ENABLE_HALF_TESTS if constexpr (std::is_same_v, sycl::half>) { if (!queue.get_device().has(sycl::aspect::fp16)) SKIP( "Device does not support half precision floating point " "operations."); - } else if (std::is_same_v, double>) { + } +#endif + if constexpr (std::is_same_v, double>) { if (!queue.get_device().has(sycl::aspect::fp64)) SKIP( "Device does not support double precision floating point " @@ -59,12 +62,15 @@ DISABLED_FOR_TEST_CASE(AdaptiveCpp) (CTS_TYPE_NAME + " group and sub-group scan functions with init", "[group_func][type_list][dim]")({ auto queue = once_per_unit::get_queue(); +#if SYCL_CTS_ENABLE_HALF_TESTS if constexpr (std::is_same_v, sycl::half>) { if (!queue.get_device().has(sycl::aspect::fp16)) SKIP( "Device does not support half precision floating point " "operations."); - } else if (std::is_same_v, double>) { + } +#endif + if constexpr (std::is_same_v, double>) { if (!queue.get_device().has(sycl::aspect::fp64)) SKIP( "Device does not support double precision floating point " diff --git a/tests/kernel_bundle/kernels.h b/tests/kernel_bundle/kernels.h index b15ab30a5..51624729c 100644 --- a/tests/kernel_bundle/kernels.h +++ b/tests/kernel_bundle/kernels.h @@ -221,6 +221,8 @@ struct kernel_atomic64_descriptor { // fp16, fp64, atomic64 kernels without sycl::requires attribute but with // explicit operations +#if SYCL_CTS_ENABLE_HALF_TESTS + struct kernel_fp16_no_attr : kernel_base { void operator()(sycl::item<1> id) const { if (id.get_linear_id() == 0) { @@ -239,6 +241,8 @@ struct kernel_fp16_no_attr_descriptor { } }; +#endif // SYCL_CTS_ENABLE_HALF_TESTS + struct kernel_fp64_no_attr : kernel_base { void operator()(sycl::item<1> id) const { if (id.get_linear_id() == 0) { diff --git a/tests/kernel_bundle/sycl_is_compatible.cpp b/tests/kernel_bundle/sycl_is_compatible.cpp index 91b87f942..00892aa3b 100644 --- a/tests/kernel_bundle/sycl_is_compatible.cpp +++ b/tests/kernel_bundle/sycl_is_compatible.cpp @@ -102,10 +102,12 @@ TEST_CASE("Check is_compatible for kernels with no kernel attributes", CHECK(sycl::is_compatible(builtinKernelIds, device)); } +#if SYCL_CTS_ENABLE_HALF_TESTS SECTION("for a kernel that uses `sycl::half`") { check_with_optional_features( device, queue, device.has(sycl::aspect::fp16)); } +#endif SECTION("for a kernel that uses `double`") { check_with_optional_features( @@ -204,10 +206,12 @@ TEST_CASE( const sycl::device device = sycl_cts::util::get_cts_object::device(); sycl::queue queue = sycl_cts::util::get_cts_object::queue(); +#if SYCL_CTS_ENABLE_HALF_TESTS SECTION("for a kernel that uses `sycl::half`") { check_with_optional_features( device, queue, device.has(sycl::aspect::fp16)); } +#endif SECTION("for a kernel that uses `double`") { check_with_optional_features( diff --git a/tests/marray_basic/marray_operators.h b/tests/marray_basic/marray_operators.h index 1885a28f3..dba9b5449 100644 --- a/tests/marray_basic/marray_operators.h +++ b/tests/marray_basic/marray_operators.h @@ -21,6 +21,7 @@ #ifndef SYCLCTS_TESTS_MARRAY_MARRAY_OPERATOR_H #define SYCLCTS_TESTS_MARRAY_MARRAY_OPERATOR_H +#include "../../util/type_traits.h" #include "../common/common.h" #include "../common/section_name_builder.h" #include "marray_common.h" @@ -54,9 +55,8 @@ struct operators_helper { template struct skip_result_check : std::bool_constant< - (std::is_same_v || std::is_same_v)&&( - std::is_same_v || std::is_same_v || - std::is_same_v)> {}; + (std::is_same_v || std::is_same_v) && + is_sycl_floating_point_v> {}; template constexpr bool skip_result_check_v = skip_result_check::value; diff --git a/tests/math_builtin_api/math_builtin.h b/tests/math_builtin_api/math_builtin.h index 10afe5ca9..fa950d91e 100644 --- a/tests/math_builtin_api/math_builtin.h +++ b/tests/math_builtin_api/math_builtin.h @@ -25,6 +25,7 @@ #include "../../util/accuracy.h" #include "../../util/math_reference.h" #include "../../util/sycl_exceptions.h" +#include "../../util/type_traits.h" #include "../common/once_per_unit.h" #include #include @@ -55,10 +56,12 @@ template <> struct base { using type = std::uint64_t; }; +#if SYCL_CTS_ENABLE_HALF_TESTS template <> struct base { using type = std::uint16_t; }; +#endif template std::string printable(T value) { @@ -74,10 +77,12 @@ T min_t() { return std::numeric_limits::min(); } +#if SYCL_CTS_ENABLE_HALF_TESTS template <> inline sycl::half min_t() { return static_cast(powf(2.0f, -14.0f)); } +#endif enum class AccuracyMode { ULP, AbsoluteTolerance }; @@ -95,9 +100,7 @@ bool verify(sycl_cts::util::logger& log, T a, T b, float accuracy, AccuracyMode accuracy_mode, const std::string& comment); template -typename std::enable_if::value || - std::is_same::value, - bool>::type +std::enable_if_t, bool> verify(sycl_cts::util::logger& log, T value, sycl_cts::resultRef r, float accuracy, AccuracyMode accuracy_mode, const std::string& comment) { const T reference = r.res; diff --git a/tests/optional_kernel_features/kernel_features_device_has_exceptions.cpp b/tests/optional_kernel_features/kernel_features_device_has_exceptions.cpp index a320b0d33..966393111 100644 --- a/tests/optional_kernel_features/kernel_features_device_has_exceptions.cpp +++ b/tests/optional_kernel_features/kernel_features_device_has_exceptions.cpp @@ -42,8 +42,10 @@ DISABLED_FOR_TEMPLATE_TEST_CASE_SIG(AdaptiveCpp) "[kernel_features]", ((typename FeatureTypeT, sycl::aspect FeatureAspectT), FeatureTypeT, FeatureAspectT), - (sycl::half, sycl::aspect::fp16), (double, sycl::aspect::fp64), - (AtomicRefT, sycl::aspect::atomic64))({ +#if SYCL_CTS_ENABLE_HALF_TESTS + (sycl::half, sycl::aspect::fp16), +#endif + (double, sycl::aspect::fp64), (AtomicRefT, sycl::aspect::atomic64))({ using kname = kernel_use_feature; auto queue = util::get_cts_object::queue(); @@ -84,8 +86,10 @@ DISABLED_FOR_TEMPLATE_TEST_CASE_SIG(AdaptiveCpp) "[kernel_features]", ((typename FeatureTypeT, sycl::aspect FeatureAspectT), FeatureTypeT, FeatureAspectT), - (sycl::half, sycl::aspect::fp16), (double, sycl::aspect::fp64), - (AtomicRefT, sycl::aspect::atomic64))({ +#if SYCL_CTS_ENABLE_HALF_TESTS + (sycl::half, sycl::aspect::fp16), +#endif + (double, sycl::aspect::fp64), (AtomicRefT, sycl::aspect::atomic64))({ using kname = kernel_use_feature_function_non_decorated; auto queue = util::get_cts_object::queue(); @@ -134,8 +138,10 @@ DISABLED_FOR_TEMPLATE_TEST_CASE_SIG(AdaptiveCpp) "[kernel_features]", ((typename FeatureTypeT, sycl::aspect FeatureAspectT), FeatureTypeT, FeatureAspectT), - (sycl::half, sycl::aspect::fp16), (double, sycl::aspect::fp64), - (AtomicRefT, sycl::aspect::atomic64))({ +#if SYCL_CTS_ENABLE_HALF_TESTS + (sycl::half, sycl::aspect::fp16), +#endif + (double, sycl::aspect::fp64), (AtomicRefT, sycl::aspect::atomic64))({ using kname = kernel_use_feature_function_external_decorated; auto queue = util::get_cts_object::queue(); @@ -182,8 +188,10 @@ DISABLED_FOR_TEMPLATE_TEST_CASE_SIG(AdaptiveCpp) "[kernel_features]", ((typename FeatureTypeT, sycl::aspect FeatureAspectT), FeatureTypeT, FeatureAspectT), - (sycl::half, sycl::aspect::fp16), (double, sycl::aspect::fp64), - (AtomicRefT, sycl::aspect::atomic64))({ +#if SYCL_CTS_ENABLE_HALF_TESTS + (sycl::half, sycl::aspect::fp16), +#endif + (double, sycl::aspect::fp64), (AtomicRefT, sycl::aspect::atomic64))({ using kname = kernel_dummy_function_non_decorated; auto queue = util::get_cts_object::queue(); @@ -227,8 +235,10 @@ DISABLED_FOR_TEMPLATE_TEST_CASE_SIG(AdaptiveCpp) "[kernel_features]", ((typename FeatureTypeT, sycl::aspect FeatureAspectT), FeatureTypeT, FeatureAspectT), - (sycl::half, sycl::aspect::fp16), (double, sycl::aspect::fp64), - (AtomicRefT, sycl::aspect::atomic64))({ +#if SYCL_CTS_ENABLE_HALF_TESTS + (sycl::half, sycl::aspect::fp16), +#endif + (double, sycl::aspect::fp64), (AtomicRefT, sycl::aspect::atomic64))({ using kname = kernel_dummy_function_decorated; auto queue = util::get_cts_object::queue(); @@ -271,8 +281,10 @@ DISABLED_FOR_TEMPLATE_TEST_CASE_SIG(AdaptiveCpp) "[kernel_features]", ((typename FeatureTypeT, sycl::aspect FeatureAspectT), FeatureTypeT, FeatureAspectT), - (sycl::half, sycl::aspect::fp16), (double, sycl::aspect::fp64), - (AtomicRefT, sycl::aspect::atomic64))({ +#if SYCL_CTS_ENABLE_HALF_TESTS + (sycl::half, sycl::aspect::fp16), +#endif + (double, sycl::aspect::fp64), (AtomicRefT, sycl::aspect::atomic64))({ using kname = kernel_use_feature_function_decorated; auto queue = util::get_cts_object::queue(); @@ -316,8 +328,10 @@ DISABLED_FOR_TEMPLATE_TEST_CASE_SIG(AdaptiveCpp) "[kernel_features]", ((typename FeatureTypeT, sycl::aspect FeatureAspectT), FeatureTypeT, FeatureAspectT), - (sycl::half, sycl::aspect::fp16), (double, sycl::aspect::fp64), - (AtomicRefT, sycl::aspect::atomic64))({ +#if SYCL_CTS_ENABLE_HALF_TESTS + (sycl::half, sycl::aspect::fp16), +#endif + (double, sycl::aspect::fp64), (AtomicRefT, sycl::aspect::atomic64))({ using kname = kernel_use_another_feature; auto queue = util::get_cts_object::queue(); @@ -351,7 +365,6 @@ DISABLED_FOR_TEMPLATE_TEST_CASE_SIG(AdaptiveCpp) [[sycl::device_has(AnotherFeatureAspect)]], kname, USE_FEATURE(FeatureTypeT)); } - }); #ifdef SYCL_EXTERNAL @@ -367,8 +380,10 @@ DISABLED_FOR_TEMPLATE_TEST_CASE_SIG(AdaptiveCpp) "[kernel_features]", ((typename FeatureTypeT, sycl::aspect FeatureAspectT), FeatureTypeT, FeatureAspectT), - (sycl::half, sycl::aspect::fp16), (double, sycl::aspect::fp64), - (AtomicRefT, sycl::aspect::atomic64))({ +#if SYCL_CTS_ENABLE_HALF_TESTS + (sycl::half, sycl::aspect::fp16), +#endif + (double, sycl::aspect::fp64), (AtomicRefT, sycl::aspect::atomic64))({ using kname = kernel_use_feature_function_external_decorated_with_attr; diff --git a/tests/optional_kernel_features/kernel_features_separate_unit.cpp b/tests/optional_kernel_features/kernel_features_separate_unit.cpp index ab4c0800f..96fe1db9f 100644 --- a/tests/optional_kernel_features/kernel_features_separate_unit.cpp +++ b/tests/optional_kernel_features/kernel_features_separate_unit.cpp @@ -15,6 +15,7 @@ template [[sycl::device_has(aspect)]] SYCL_EXTERNAL void use_feature_function_external_decorated(const sycl::accessor& acc); +#if SYCL_CTS_ENABLE_HALF_TESTS template <> SYCL_EXTERNAL void use_feature_function_external_decorated( @@ -25,6 +26,7 @@ use_feature_function_external_decorated( feature1 += 42; acc[0] = (feature1 == feature2); } +#endif template <> SYCL_EXTERNAL void diff --git a/tests/optional_kernel_features/kernel_features_speculative_compilation.cpp b/tests/optional_kernel_features/kernel_features_speculative_compilation.cpp index 3f7cd1279..274e849c7 100644 --- a/tests/optional_kernel_features/kernel_features_speculative_compilation.cpp +++ b/tests/optional_kernel_features/kernel_features_speculative_compilation.cpp @@ -72,6 +72,7 @@ DISABLED_FOR_TEST_CASE(AdaptiveCpp) } } +#if SYCL_CTS_ENABLE_HALF_TESTS if (queue.get_device().has(sycl::aspect::fp16)) { { const auto separate_lambda_no_arg = []() { @@ -100,6 +101,7 @@ DISABLED_FOR_TEST_CASE(AdaptiveCpp) use_feature_function_non_decorated()); } } +#endif if (queue.get_device().has(sycl::aspect::fp64)) { { diff --git a/tests/reduction/identity_helper.h b/tests/reduction/identity_helper.h index 061191a05..179dfd544 100644 --- a/tests/reduction/identity_helper.h +++ b/tests/reduction/identity_helper.h @@ -21,6 +21,8 @@ #ifndef __SYCL_CTS_TEST_IDENTITY_HELPER_H #define __SYCL_CTS_TEST_IDENTITY_HELPER_H +#include "../../util/type_traits.h" + #include #include @@ -108,8 +110,7 @@ AccumulatorT get_identity() { template > && - (std::is_floating_point_v || - std::is_same_v, sycl::half>), + is_sycl_floating_point_v, bool> = true> AccumulatorT get_identity() { return std::numeric_limits::infinity(); diff --git a/tests/scalars/scalars_sycl_types.cpp b/tests/scalars/scalars_sycl_types.cpp index c15fca1f5..4cf46f605 100644 --- a/tests/scalars/scalars_sycl_types.cpp +++ b/tests/scalars/scalars_sycl_types.cpp @@ -83,7 +83,9 @@ class TEST_NAME : public util::test_base { "size_t"); // SYCL Floating Point Data Types +#if SYCL_CTS_ENABLE_HALF_TESTS check_type_min_size_sign_log(log, 2, true, "sycl::half"); +#endif check_type_min_size_sign_log(log, 4, true, "float"); check_type_min_size_sign_log(log, 8, true, "double"); @@ -140,6 +142,7 @@ class TEST_NAME : public util::test_base { }); }); +#if SYCL_CTS_ENABLE_HALF_TESTS if (device_supports_fp16) { myQueue.submit([&](sycl::handler &cgh) { auto accSignResult = @@ -157,6 +160,7 @@ class TEST_NAME : public util::test_base { }); }); } +#endif if (device_supports_fp64) { myQueue @@ -220,9 +224,11 @@ class TEST_NAME : public util::test_base { FAIL(log, errorStr + "sign: sycl::byte"); } #endif +#if SYCL_CTS_ENABLE_HALF_TESTS if (!signResults[12] && device_supports_fp16) { FAIL(log, errorStr + "sign: sycl::half"); } +#endif if (!signResults[13]) { FAIL(log, errorStr + "sign: float"); } @@ -272,9 +278,11 @@ class TEST_NAME : public util::test_base { FAIL(log, errorStr + "size: sycl::byte"); } #endif +#if SYCL_CTS_ENABLE_HALF_TESTS if (!sizeResults[13] && device_supports_fp16) { FAIL(log, errorStr + "size: sycl::half"); } +#endif if (!sizeResults[14]) { FAIL(log, errorStr + "size: float"); } @@ -285,11 +293,13 @@ class TEST_NAME : public util::test_base { myQueue.wait_and_throw(); } +#if SYCL_CTS_ENABLE_HALF_TESTS // Check sycl::half limits specialization { INFO("Check that std::numeric_limits is specialized for sycl::half type"); CHECK(std::numeric_limits::is_specialized); } +#endif } }; diff --git a/tests/spec_constants/spec_constants_defined_various_ways.h b/tests/spec_constants/spec_constants_defined_various_ways.h index d4d14e45c..a554aebf1 100644 --- a/tests/spec_constants/spec_constants_defined_various_ways.h +++ b/tests/spec_constants/spec_constants_defined_various_ways.h @@ -182,6 +182,7 @@ static void sc_run_test_core(util::logger &log) { } } +#if SYCL_CTS_ENABLE_HALF_TESTS template static void sc_run_test_fp16(util::logger &log) { using namespace specialization_constants_defined_various_ways; @@ -205,6 +206,7 @@ static void sc_run_test_fp16(util::logger &log) { #endif } } +#endif template static void sc_run_test_fp64(util::logger &log) { diff --git a/tests/spec_constants/spec_constants_multiple.h b/tests/spec_constants/spec_constants_multiple.h index 68858d42d..451d072d3 100644 --- a/tests/spec_constants/spec_constants_multiple.h +++ b/tests/spec_constants/spec_constants_multiple.h @@ -147,6 +147,7 @@ static void sc_run_test_core(util::logger &log) { } } +#if SYCL_CTS_ENABLE_HALF_TESTS template static void sc_run_test_fp16(util::logger &log) { using namespace specialization_constants_multiple; @@ -168,6 +169,7 @@ static void sc_run_test_fp16(util::logger &log) { #endif } } +#endif template static void sc_run_test_fp64(util::logger &log) { diff --git a/tests/spec_constants/spec_constants_same_name_inter_link.h b/tests/spec_constants/spec_constants_same_name_inter_link.h index e74bc9c3b..282991b4f 100644 --- a/tests/spec_constants/spec_constants_same_name_inter_link.h +++ b/tests/spec_constants/spec_constants_same_name_inter_link.h @@ -136,6 +136,7 @@ static void sc_run_test_core(util::logger &log) { } } +#if SYCL_CTS_ENABLE_HALF_TESTS // Test function for fp16 tests template static void sc_run_test_fp16(util::logger &log) { @@ -160,6 +161,7 @@ static void sc_run_test_fp16(util::logger &log) { #endif } } +#endif // Test function for fp64 tests template diff --git a/tests/spec_constants/spec_constants_same_name_stress.h b/tests/spec_constants/spec_constants_same_name_stress.h index 7cc020f56..c6e735234 100644 --- a/tests/spec_constants/spec_constants_same_name_stress.h +++ b/tests/spec_constants/spec_constants_same_name_stress.h @@ -200,6 +200,7 @@ static void sc_run_test_core(util::logger &log) { } } +#if SYCL_CTS_ENABLE_HALF_TESTS template static void sc_run_test_fp16(util::logger &log) { using namespace specialization_constants_same_name_stress; @@ -222,6 +223,7 @@ static void sc_run_test_fp16(util::logger &log) { #endif } } +#endif template static void sc_run_test_fp64(util::logger &log) { diff --git a/util/accuracy.h b/util/accuracy.h index c1461e733..1e50e23fd 100644 --- a/util/accuracy.h +++ b/util/accuracy.h @@ -24,6 +24,7 @@ T get_ulp_std(T x) { const T positive = std::fabs(std::nextafter(x, inf) - x); return std::fmin(negative, positive); } +#if SYCL_CTS_ENABLE_HALF_TESTS template <> inline sycl::half get_ulp_std(sycl::half x) { const auto ulp = get_ulp_std(x); @@ -31,6 +32,7 @@ inline sycl::half get_ulp_std(sycl::half x) { // Multiplier is set according to the difference in precision return static_cast(ulp * multiplier); } +#endif /** * @brief Provides ulp(x) by definition given in OpenCL 1.2 rev. 19, 7.4 * See Jean-Michel Muller "On the definition of ulp (x)", definition 7 @@ -43,6 +45,7 @@ T get_ulp_sycl(T x) { const T positive = sycl::fabs(sycl::nextafter(x, inf) - x); return sycl::fmin(negative, positive); } +#if SYCL_CTS_ENABLE_HALF_TESTS template <> inline sycl::half get_ulp_sycl(sycl::half x) { const auto ulp = get_ulp_sycl(x); @@ -50,5 +53,6 @@ inline sycl::half get_ulp_sycl(sycl::half x) { // Multiplier is set according to the difference in precision return static_cast(ulp * multiplier); } +#endif #endif // __SYCLCTS_UTIL_ACCURACY_H diff --git a/util/math_helper.h b/util/math_helper.h index 83559a523..94dc2bdb9 100644 --- a/util/math_helper.h +++ b/util/math_helper.h @@ -192,10 +192,12 @@ sycl_cts::resultRef> run_func_on_marray_result_ref( template struct rel_funcs_return; +#if SYCL_CTS_ENABLE_HALF_TESTS template <> struct rel_funcs_return { using type = int16_t; }; +#endif template <> struct rel_funcs_return { using type = int32_t; diff --git a/util/math_reference.cpp b/util/math_reference.cpp index b3dff0c9e..bb84a698a 100644 --- a/util/math_reference.cpp +++ b/util/math_reference.cpp @@ -71,9 +71,11 @@ float bitselect(float a, float b, float c) { double bitselect(double a, double b, double c) { return bitselect_f_t(a, b, c); } +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half bitselect(sycl::half a, sycl::half b, sycl::half c) { return bitselect_f_t(a, b, c); } +#endif /* ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- DEGREES * @@ -84,7 +86,9 @@ T degrees_t(T a) { return a * (180.0 / M_PI); } +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half degrees(sycl::half a) { return degrees_t(a); } +#endif float degrees(float a) { return degrees_t(a); } @@ -99,7 +103,9 @@ T radians_t(T a) { return a * (M_PI / 180.0); } +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half radians(sycl::half a) { return radians_t(a); } +#endif float radians(float a) { return radians_t(a); } @@ -114,7 +120,9 @@ T step_t(T a, T b) { return 1.0; } +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half step(sycl::half a, sycl::half b) { return step_t(a, b); } +#endif float step(float a, float b) { return step_t(a, b); } @@ -131,10 +139,12 @@ sycl_cts::resultRef smoothstep_t(T a, T b, T c) { return t * t * (3 - 2 * t); } +#if SYCL_CTS_ENABLE_HALF_TESTS sycl_cts::resultRef smoothstep(sycl::half a, sycl::half b, sycl::half c) { return smoothstep_t(a, b, c); } +#endif sycl_cts::resultRef smoothstep(float a, float b, float c) { return smoothstep_t(a, b, c); } @@ -155,7 +165,9 @@ T sign_t(T a) { return +0.0; } +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half sign(sycl::half a) { return sign_t(a); } +#endif float sign(float a) { return sign_t(a); } @@ -258,10 +270,12 @@ sycl_cts::resultRef mix_t(T x, T y, T a) { return sycl_cts::resultRef(T(), true); } +#if SYCL_CTS_ENABLE_HALF_TESTS sycl_cts::resultRef mix(const sycl::half a, const sycl::half b, const sycl::half c) { return mix_t(a, b, c); } +#endif sycl_cts::resultRef mix(const float a, const float b, const float c) { return mix_t(a, b, c); @@ -458,37 +472,49 @@ sycl_cts::resultRef mul24(uint32_t x, uint32_t y) { * */ +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half acospi(sycl::half a) { return reference_acospi(a); } +#endif float acospi(float a) { return reference_acospi(a); } double acospi(double a) { return reference_acospil(a); } +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half asinpi(sycl::half a) { return reference_asinpi(a); } +#endif float asinpi(float a) { return reference_asinpi(a); } double asinpi(double a) { return reference_asinpil(a); } +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half atanpi(sycl::half a) { return reference_atanpi(a); } +#endif float atanpi(float a) { return reference_atanpi(a); } double atanpi(double a) { return reference_atanpil(a); } +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half atan2pi(sycl::half a, sycl::half b) { return reference_atan2pi(a, b); } +#endif float atan2pi(float a, float b) { return reference_atan2pi(a, b); } double atan2pi(double a, double b) { return reference_atan2pil(a, b); } +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half cospi(sycl::half a) { return reference_cospi(a); } +#endif float cospi(float a) { return reference_cospi(a); } double cospi(double a) { return reference_cospil(a); } +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half fma(sycl::half a, sycl::half b, sycl::half c) { return reference_fma(a, b, c, 0); } +#endif float fma(float a, float b, float c) { return reference_fma(a, b, c, 0); } double fma(double a, double b, double c) { return reference_fmal(a, b, c); } // AdaptiveCpp does not yet support sycl::bit_cast, which is used in // `nextafter`. -#if !SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +#if SYCL_CTS_ENABLE_HALF_TESTS && !SYCL_CTS_COMPILING_WITH_ADAPTIVECPP sycl::half fdim(sycl::half a, sycl::half b) { if (a > b) { // to get rounding to nearest even @@ -507,10 +533,12 @@ sycl::half fdim(sycl::half a, sycl::half b) { } #endif +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half fract(sycl::half a, sycl::half *b) { *b = std::floor(a); return std::fmin(a - *b, nextafter(sycl::half(1.0), sycl::half(0.0))); } +#endif float fract(float a, float *b) { *b = std::floor(a); return std::fmin(a - *b, nextafter(1.0f, 0.0f)); @@ -523,17 +551,21 @@ double fract(double a, double *b) { float nan(unsigned int a) { return std::nanf(std::to_string(a).c_str()); } double nan(unsigned long a) { return std::nan(std::to_string(a).c_str()); } double nan(unsigned long long a) { return std::nan(std::to_string(a).c_str()); } +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half nan(unsigned short a) { return nan(unsigned(a)); } +#endif +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half modf(sycl::half a, sycl::half *b) { float resPtr; float res = modf(static_cast(a), &resPtr); *b = static_cast(resPtr); return res; } +#endif // AdaptiveCpp does not yet support sycl::bit_cast -#if !SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +#if SYCL_CTS_ENABLE_HALF_TESTS && !SYCL_CTS_COMPILING_WITH_ADAPTIVECPP sycl::half nextafter(sycl::half x, sycl::half y) { if (std::isnan(x)) return x; @@ -561,11 +593,15 @@ sycl::half nextafter(sycl::half x, sycl::half y) { } #endif +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half sinpi(sycl::half a) { return reference_sinpi(a); } +#endif float sinpi(float a) { return reference_sinpi(a); } double sinpi(double a) { return reference_sinpil(a); } +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half tanpi(sycl::half a) { return reference_tanpi(a); } +#endif float tanpi(float a) { return reference_tanpi(a); } double tanpi(double a) { return reference_tanpil(a); } @@ -625,6 +661,7 @@ sycl::mdouble3 cross(sycl::mdouble3 p0, sycl::mdouble3 p1) { } #endif // SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half fast_dot(float p0) { return std::pow(p0, 2); } sycl::half fast_dot(sycl::float2 p0) { return std::pow(p0.x(), 2) + std::pow(p0.y(), 2); @@ -649,5 +686,6 @@ sycl::half fast_dot(sycl::mfloat4 p0) { std::pow(p0[3], 2); } #endif +#endif } /* namespace reference */ diff --git a/util/math_reference.h b/util/math_reference.h index bc3714dae..38d1cad37 100644 --- a/util/math_reference.h +++ b/util/math_reference.h @@ -249,7 +249,9 @@ template T bitselect(T a, T b, T c) { return (c & b) | (~c & a); } +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half bitselect(sycl::half a, sycl::half b, sycl::half c); +#endif float bitselect(float a, float b, float c); double bitselect(double a, double b, double c); MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(bitselect) @@ -710,7 +712,9 @@ sycl_cts::resultRef> mul24(sycl::marray a, // clamp is in Integer functions /* degrees */ +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half degrees(sycl::half); +#endif float degrees(float a); double degrees(double a); MAKE_VEC_AND_MARRAY_VERSIONS(degrees) @@ -718,8 +722,10 @@ MAKE_VEC_AND_MARRAY_VERSIONS(degrees) // max and min are in Integer functions /* mix */ +#if SYCL_CTS_ENABLE_HALF_TESTS sycl_cts::resultRef mix(const sycl::half a, const sycl::half b, const sycl::half c); +#endif sycl_cts::resultRef mix(const float a, const float b, const float c); sycl_cts::resultRef mix(const double a, const double b, const double c); @@ -769,13 +775,17 @@ sycl_cts::resultRef> mix(sycl::marray a, #endif /* radians */ +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half radians(sycl::half); +#endif float radians(float a); double radians(double a); MAKE_VEC_AND_MARRAY_VERSIONS(radians) /* step */ +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half step(sycl::half a, sycl::half b); +#endif float step(float a, float b); double step(double a, double b); MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(step) @@ -801,8 +811,10 @@ sycl::marray step(T a, sycl::marray b) { #endif /* smoothstep */ +#if SYCL_CTS_ENABLE_HALF_TESTS sycl_cts::resultRef smoothstep(sycl::half a, sycl::half b, sycl::half c); +#endif sycl_cts::resultRef smoothstep(float a, float b, float c); sycl_cts::resultRef smoothstep(double a, double b, double c); @@ -852,7 +864,9 @@ sycl_cts::resultRef> smoothstep(T a, T b, #endif /* sign */ +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half sign(sycl::half a); +#endif float sign(float a); double sign(double a); MAKE_VEC_AND_MARRAY_VERSIONS(sign) @@ -862,10 +876,12 @@ MAKE_VEC_AND_MARRAY_VERSIONS(sign) template struct higher_accuracy; +#if SYCL_CTS_ENABLE_HALF_TESTS template <> struct higher_accuracy { using type = float; }; +#endif template <> struct higher_accuracy { using type = double; @@ -899,7 +915,9 @@ T acosh(T a) { } MAKE_VEC_AND_MARRAY_VERSIONS(acosh) +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half acospi(sycl::half a); +#endif float acospi(float a); double acospi(double a); MAKE_VEC_AND_MARRAY_VERSIONS(acospi) @@ -916,7 +934,9 @@ T asinh(T a) { } MAKE_VEC_AND_MARRAY_VERSIONS(asinh) +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half asinpi(sycl::half a); +#endif float asinpi(float a); double asinpi(double a); MAKE_VEC_AND_MARRAY_VERSIONS(asinpi) @@ -939,12 +959,16 @@ T atanh(T a) { } MAKE_VEC_AND_MARRAY_VERSIONS(atanh) +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half atanpi(sycl::half a); +#endif float atanpi(float a); double atanpi(double a); MAKE_VEC_AND_MARRAY_VERSIONS(atanpi) +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half atan2pi(sycl::half a, sycl::half b); +#endif float atan2pi(float a, float b); double atan2pi(double a, double b); MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(atan2pi) @@ -973,7 +997,9 @@ T cosh(T a) { } MAKE_VEC_AND_MARRAY_VERSIONS(cosh) +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half cospi(sycl::half a); +#endif float cospi(float a); double cospi(double a); MAKE_VEC_AND_MARRAY_VERSIONS(cospi) @@ -1019,13 +1045,17 @@ using std::fabs; MAKE_VEC_AND_MARRAY_VERSIONS(fabs) using std::fdim; +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half fdim(sycl::half a, sycl::half b); +#endif MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(fdim) using std::floor; MAKE_VEC_AND_MARRAY_VERSIONS(floor) +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half fma(sycl::half a, sycl::half b, sycl::half c); +#endif float fma(float a, float b, float c); double fma(double a, double b, double c); @@ -1042,7 +1072,9 @@ MAKE_VEC_AND_MARRAY_VERSIONS_WITH_SCALAR(fmin) using std::fmod; MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(fmod) +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half fract(sycl::half a, sycl::half *b); +#endif float fract(float a, float *b); double fract(double a, double *b); @@ -1261,7 +1293,9 @@ T minmag(T a, T b) { MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(minmag) using std::modf; +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half modf(sycl::half a, sycl::half *b); +#endif template sycl::vec modf(sycl::vec a, sycl::vec *b) { sycl::vec res; @@ -1290,15 +1324,19 @@ sycl::marray modf(sycl::marray a, sycl::marray *b) { } #endif +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half nan(unsigned short a); +#endif float nan(unsigned int a); double nan(unsigned long a); double nan(unsigned long long a); +#if SYCL_CTS_ENABLE_HALF_TESTS template sycl::vec nan(sycl::vec a) { return sycl_cts::math::run_func_on_vector( [](unsigned short x) { return nan(x); }, a); } +#endif template sycl::vec nan(sycl::vec a) { return sycl_cts::math::run_func_on_vector( @@ -1314,11 +1352,13 @@ nan(sycl::vec a) { } // FIXME: AdaptiveCpp does not support marray #ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +#if SYCL_CTS_ENABLE_HALF_TESTS template sycl::marray nan(sycl::marray a) { return sycl_cts::math::run_func_on_marray( [](unsigned short x) { return nan(x); }, a); } +#endif template sycl::marray nan(sycl::marray a) { return sycl_cts::math::run_func_on_marray( @@ -1335,7 +1375,9 @@ nan(sycl::marray a) { #endif using std::nextafter; +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half nextafter(sycl::half a, sycl::half b); +#endif MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(nextafter) template @@ -1512,7 +1554,9 @@ T sinh(T a) { } MAKE_VEC_AND_MARRAY_VERSIONS(sinh) +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half sinpi(sycl::half a); +#endif float sinpi(float a); double sinpi(double a); MAKE_VEC_AND_MARRAY_VERSIONS(sinpi) @@ -1535,7 +1579,9 @@ T tanh(T a) { } MAKE_VEC_AND_MARRAY_VERSIONS(tanh) +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half tanpi(sycl::half a); +#endif float tanpi(float a); double tanpi(double a); MAKE_VEC_AND_MARRAY_VERSIONS(tanpi) @@ -1631,6 +1677,7 @@ sycl::marray normalize(sycl::marray a) { } #endif +#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half fast_dot(float p0); sycl::half fast_dot(sycl::float2 p0); sycl::half fast_dot(sycl::float3 p0); @@ -1641,6 +1688,7 @@ sycl::half fast_dot(sycl::mfloat2 p0); sycl::half fast_dot(sycl::mfloat3 p0); sycl::half fast_dot(sycl::mfloat4 p0); #endif +#endif template float fast_length(T p0) { diff --git a/util/type_names.h b/util/type_names.h index 586b4bc2a..c66b0eba5 100644 --- a/util/type_names.h +++ b/util/type_names.h @@ -48,7 +48,9 @@ std::string type_name() { /* float types */ MAKENAME(float); MAKENAME(double); +#if SYCL_CTS_ENABLE_HALF_TESTS MAKESYCLNAME(half); +#endif /* scalar types */ MAKESTDNAME(int8_t); diff --git a/util/type_traits.h b/util/type_traits.h index 414694550..68b4e0293 100644 --- a/util/type_traits.h +++ b/util/type_traits.h @@ -51,8 +51,12 @@ using has_atomic_support = contains using is_sycl_floating_point = +#if SYCL_CTS_ENABLE_HALF_TESTS std::bool_constant || - std::is_same_v>; + std::is_same_v, sycl::half>>; +#else + std::is_floating_point; +#endif template inline constexpr bool is_sycl_floating_point_v{ @@ -406,12 +410,10 @@ using is_legal_operator = std::bool_constant< std::is_same_v, bool>) || (std::is_same_v> && std::is_integral_v) || (std::is_same_v> && - (std::is_floating_point_v || - std::is_same_v, sycl::half>)) || + is_sycl_floating_point_v) || (std::is_same_v> && std::is_integral_v) || (std::is_same_v> && - (std::is_floating_point_v || - std::is_same_v, sycl::half>))>; + is_sycl_floating_point_v)>; /** Checks whether \p T and \p OperatorT form a valid SYCL operator. */ From 8806157dd7cec141ee9ff3cb18773c226a30ac75 Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Tue, 17 Dec 2024 18:08:30 +0100 Subject: [PATCH 2/3] is_sycl{ => _scalar}_floating_point --- tests/common/common_vec.h | 30 ++++++++----------- .../group_functions/group_functions_common.h | 2 +- tests/marray_basic/marray_operators.h | 15 +++++----- tests/math_builtin_api/math_builtin.h | 6 ++-- tests/reduction/identity_helper.h | 10 +++---- .../reduction_without_identity_param_common.h | 2 +- util/type_traits.h | 10 +++---- 7 files changed, 36 insertions(+), 39 deletions(-) diff --git a/tests/common/common_vec.h b/tests/common/common_vec.h index d07752960..b67870c36 100644 --- a/tests/common/common_vec.h +++ b/tests/common/common_vec.h @@ -57,8 +57,7 @@ bool check_vector_size(sycl::vec vector) { * @brief Helper function to check vector values are correct. */ template -bool check_vector_values(sycl::vec vector, - vecType* vals) { +bool check_vector_values(sycl::vec vector, vecType* vals) { for (int i = 0; i < numOfElems; i++) { if ((vals[i] != vector[i])) { return false; @@ -72,14 +71,12 @@ bool check_vector_values(sycl::vec vector, * for division result are accurate enough */ template -typename std::enable_if::value, bool>::type -check_vector_values_div(sycl::vec vector, - vecType *vals) { +typename std::enable_if_t, bool> +check_vector_values_div(sycl::vec vector, vecType* vals) { for (int i = 0; i < numOfElems; i++) { vecType vectorValue = vector[i]; - if (vals[i] == vectorValue) - continue; - const vecType ulpsExpected = 2.5; // Min Accuracy for x / y + if (vals[i] == vectorValue) continue; + const vecType ulpsExpected = 2.5; // Min Accuracy for x / y const vecType difference = sycl::fabs(vectorValue - vals[i]); // using sycl functions to get ulp because it used in kernel const vecType differenceExpected = ulpsExpected * get_ulp_sycl(vals[i]); @@ -95,9 +92,8 @@ check_vector_values_div(sycl::vec vector, * @brief Helper function to check that vector values for division are correct */ template -typename std::enable_if::value, bool>::type -check_vector_values_div(sycl::vec vector, - vecType *vals) { +typename std::enable_if_t, bool> +check_vector_values_div(sycl::vec vector, vecType* vals) { return check_vector_values(vector, vals); } @@ -123,7 +119,8 @@ bool check_single_vector_op(vectorType vector1, lambdaFunc lambda) { template static constexpr bool if_FP_to_non_FP_conv_v = - is_sycl_floating_point::value && !is_sycl_floating_point::value; + is_sycl_scalar_floating_point_v && + !is_sycl_scalar_floating_point_v; template sycl::vec convert_vec(sycl::vec inputVec) { @@ -196,7 +193,7 @@ sycl::vec rtn(sycl::vec inputVec) { // values instead. template void handleFPToUnsignedConv(sycl::vec& inputVec) { - if constexpr (is_sycl_floating_point::value && + if constexpr (is_sycl_scalar_floating_point_v && std::is_unsigned_v) { for (size_t i = 0; i < N; ++i) { vecType elem = inputVec[i]; @@ -247,7 +244,7 @@ bool check_vector_convert_result_impl(sycl::vec inputVec, sycl::vec expectedVec; switch (mode) { case sycl::rounding_mode::automatic: - if constexpr (is_sycl_floating_point::value) { + if constexpr (is_sycl_scalar_floating_point_v) { expectedVec = rte(inputVec); } else { expectedVec = rtz(inputVec); @@ -291,9 +288,8 @@ bool check_vector_convert_result(sycl::vec inputVec) { template bool check_vector_convert_modes(sycl::vec inputVec) { bool flag = true; - flag &= - check_vector_convert_result(inputVec); + flag &= check_vector_convert_result(inputVec); #if SYCL_CTS_ENABLE_FULL_CONFORMANCE flag &= check_vector_convert_result(inputVec); diff --git a/tests/group_functions/group_functions_common.h b/tests/group_functions/group_functions_common.h index 2448d22e3..039eb2fdd 100644 --- a/tests/group_functions/group_functions_common.h +++ b/tests/group_functions/group_functions_common.h @@ -259,7 +259,7 @@ template inline auto get_op_types() { #if SYCL_CTS_ENABLE_FULL_CONFORMANCE static const auto types = []() { - if constexpr (is_sycl_floating_point_v) { + if constexpr (is_sycl_scalar_floating_point_v) { // Bitwise operations are not defined for floating point types. return named_type_pack, sycl::multiplies, sycl::logical_and, sycl::logical_or, diff --git a/tests/marray_basic/marray_operators.h b/tests/marray_basic/marray_operators.h index dba9b5449..828ceafe3 100644 --- a/tests/marray_basic/marray_operators.h +++ b/tests/marray_basic/marray_operators.h @@ -54,9 +54,10 @@ struct operators_helper { // similar to native::divide we can skip checking them. template struct skip_result_check - : std::bool_constant< - (std::is_same_v || std::is_same_v) && - is_sycl_floating_point_v> {}; + : std::bool_constant<( + std::is_same_v || + std::is_same_v< + OpT, op_assign_div>)&&is_sycl_scalar_floating_point_v> {}; template constexpr bool skip_result_check_v = skip_result_check::value; @@ -67,10 +68,10 @@ bool are_equal_ignore_division(const T1& lhs, const T1& rhs) { // similar to native::divide we can skip checking them here. constexpr bool is_div = std::is_same_v || std::is_same_v; - constexpr bool is_sycl_floating_point = std::is_same_v || - std::is_same_v || - std::is_same_v; - if constexpr (is_div && is_sycl_floating_point) return true; + constexpr bool is_sycl_scalar_floating_point = + std::is_same_v || std::is_same_v || + std::is_same_v; + if constexpr (is_div && is_sycl_scalar_floating_point) return true; return value_operations::are_equal(lhs, rhs); } diff --git a/tests/math_builtin_api/math_builtin.h b/tests/math_builtin_api/math_builtin.h index fa950d91e..170f79190 100644 --- a/tests/math_builtin_api/math_builtin.h +++ b/tests/math_builtin_api/math_builtin.h @@ -100,9 +100,9 @@ bool verify(sycl_cts::util::logger& log, T a, T b, float accuracy, AccuracyMode accuracy_mode, const std::string& comment); template -std::enable_if_t, bool> -verify(sycl_cts::util::logger& log, T value, sycl_cts::resultRef r, - float accuracy, AccuracyMode accuracy_mode, const std::string& comment) { +std::enable_if_t, bool> verify( + sycl_cts::util::logger& log, T value, sycl_cts::resultRef r, + float accuracy, AccuracyMode accuracy_mode, const std::string& comment) { const T reference = r.res; if (!r.undefined.empty()) diff --git a/tests/reduction/identity_helper.h b/tests/reduction/identity_helper.h index 179dfd544..c3cfabbee 100644 --- a/tests/reduction/identity_helper.h +++ b/tests/reduction/identity_helper.h @@ -107,11 +107,11 @@ AccumulatorT get_identity() { } /** minimum (floating point) */ -template > && - is_sycl_floating_point_v, - bool> = true> +template < + typename AccumulatorT, typename OperatorT, + std::enable_if_t> && + is_sycl_scalar_floating_point_v, + bool> = true> AccumulatorT get_identity() { return std::numeric_limits::infinity(); } diff --git a/tests/reduction/reduction_without_identity_param_common.h b/tests/reduction/reduction_without_identity_param_common.h index 968ff6352..9e10e7972 100644 --- a/tests/reduction/reduction_without_identity_param_common.h +++ b/tests/reduction/reduction_without_identity_param_common.h @@ -301,7 +301,7 @@ template ::value && + if constexpr (is_sycl_scalar_floating_point::value && (std::is_same>::value || std::is_same>::value || std::is_same>::value)) { diff --git a/util/type_traits.h b/util/type_traits.h index 68b4e0293..cf9842fee 100644 --- a/util/type_traits.h +++ b/util/type_traits.h @@ -50,7 +50,7 @@ using has_atomic_support = contains -using is_sycl_floating_point = +using is_sycl_scalar_floating_point = #if SYCL_CTS_ENABLE_HALF_TESTS std::bool_constant || std::is_same_v, sycl::half>>; @@ -59,8 +59,8 @@ using is_sycl_floating_point = #endif template -inline constexpr bool is_sycl_floating_point_v{ - is_sycl_floating_point::value}; +inline constexpr bool is_sycl_scalar_floating_point_v{ + is_sycl_scalar_floating_point::value}; template using is_nonconst_rvalue_reference = @@ -410,10 +410,10 @@ using is_legal_operator = std::bool_constant< std::is_same_v, bool>) || (std::is_same_v> && std::is_integral_v) || (std::is_same_v> && - is_sycl_floating_point_v) || + is_sycl_scalar_floating_point_v) || (std::is_same_v> && std::is_integral_v) || (std::is_same_v> && - is_sycl_floating_point_v)>; + is_sycl_scalar_floating_point_v)>; /** Checks whether \p T and \p OperatorT form a valid SYCL operator. */ From 9d923ce0123696959730ee8d0f0bdc9c2a647ea1 Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Wed, 18 Dec 2024 10:09:22 +0100 Subject: [PATCH 3/3] Re-group functions in math_reference according to feature macros --- util/math_reference.cpp | 311 +++--- util/math_reference.h | 2059 ++++++++++++++++++++------------------- 2 files changed, 1185 insertions(+), 1185 deletions(-) diff --git a/util/math_reference.cpp b/util/math_reference.cpp index bb84a698a..aa8fc8929 100644 --- a/util/math_reference.cpp +++ b/util/math_reference.cpp @@ -35,11 +35,11 @@ namespace { template -void type_punn(const A &from, B &to) { +void type_punn(const A& from, B& to) { static_assert(sizeof(A) == sizeof(B), "type punning of incompatible sized types"); - std::memcpy(reinterpret_cast(&to), - reinterpret_cast(&from), sizeof(A)); + std::memcpy(reinterpret_cast(&to), + reinterpret_cast(&from), sizeof(A)); } #define MAX(_a, _b) ((_a) > (_b) ? (_a) : (_b)) @@ -68,14 +68,6 @@ T bitselect_f_t(T x, T y, T z) { float bitselect(float a, float b, float c) { return bitselect_f_t(a, b, c); } -double bitselect(double a, double b, double c) { - return bitselect_f_t(a, b, c); -} -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half bitselect(sycl::half a, sycl::half b, sycl::half c) { - return bitselect_f_t(a, b, c); -} -#endif /* ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- DEGREES * @@ -86,14 +78,8 @@ T degrees_t(T a) { return a * (180.0 / M_PI); } -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half degrees(sycl::half a) { return degrees_t(a); } -#endif - float degrees(float a) { return degrees_t(a); } -double degrees(double a) { return degrees_t(a); } - /* ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- RADIANS * */ @@ -103,13 +89,8 @@ T radians_t(T a) { return a * (M_PI / 180.0); } -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half radians(sycl::half a) { return radians_t(a); } -#endif - float radians(float a) { return radians_t(a); } -double radians(double a) { return radians_t(a); } /* ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- STEP * */ @@ -120,14 +101,8 @@ T step_t(T a, T b) { return 1.0; } -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half step(sycl::half a, sycl::half b) { return step_t(a, b); } -#endif - float step(float a, float b) { return step_t(a, b); } -double step(double a, double b) { return step_t(a, b); } - /* ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- SMOOTHSTEP * */ @@ -139,18 +114,9 @@ sycl_cts::resultRef smoothstep_t(T a, T b, T c) { return t * t * (3 - 2 * t); } -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl_cts::resultRef smoothstep(sycl::half a, sycl::half b, - sycl::half c) { - return smoothstep_t(a, b, c); -} -#endif sycl_cts::resultRef smoothstep(float a, float b, float c) { return smoothstep_t(a, b, c); } -sycl_cts::resultRef smoothstep(double a, double b, double c) { - return smoothstep_t(a, b, c); -} /* ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- SIGN * @@ -165,14 +131,8 @@ T sign_t(T a) { return +0.0; } -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half sign(sycl::half a) { return sign_t(a); } -#endif - float sign(float a) { return sign_t(a); } -double sign(double a) { return sign_t(a); } - /* ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- MAD_SAT * */ @@ -270,22 +230,10 @@ sycl_cts::resultRef mix_t(T x, T y, T a) { return sycl_cts::resultRef(T(), true); } -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl_cts::resultRef mix(const sycl::half a, const sycl::half b, - const sycl::half c) { - return mix_t(a, b, c); -} -#endif - sycl_cts::resultRef mix(const float a, const float b, const float c) { return mix_t(a, b, c); } -sycl_cts::resultRef mix(const double a, const double b, - const double c) { - return mix_t(a, b, c); -} - /* ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- MUL_HI * */ @@ -472,49 +420,105 @@ sycl_cts::resultRef mul24(uint32_t x, uint32_t y) { * */ -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half acospi(sycl::half a) { return reference_acospi(a); } -#endif float acospi(float a) { return reference_acospi(a); } -double acospi(double a) { return reference_acospil(a); } - -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half asinpi(sycl::half a) { return reference_asinpi(a); } -#endif float asinpi(float a) { return reference_asinpi(a); } -double asinpi(double a) { return reference_asinpil(a); } +float atanpi(float a) { return reference_atanpi(a); } +float atan2pi(float a, float b) { return reference_atan2pi(a, b); } +float cospi(float a) { return reference_cospi(a); } +float fma(float a, float b, float c) { return reference_fma(a, b, c, 0); } + +float fract(float a, float* b) { + *b = std::floor(a); + return std::fmin(a - *b, nextafter(1.0f, 0.0f)); +} + +float nan(unsigned int a) { return std::nanf(std::to_string(a).c_str()); } +float sinpi(float a) { return reference_sinpi(a); } +float tanpi(float a) { return reference_tanpi(a); } + +// Geometric functions + +template +sycl::vec cross_t(sycl::vec a, sycl::vec b) { + sycl::vec res; + std::vector temp_res(4); + std::vector av({a.x(), a.y(), a.z()}); + std::vector bv({b.x(), b.y(), b.z()}); + temp_res[0] = av[1] * bv[2] - av[2] * bv[1]; + temp_res[1] = av[2] * bv[0] - av[0] * bv[2]; + temp_res[2] = av[0] * bv[1] - av[1] * bv[0]; + temp_res[3] = 0.0; + for (int i = 0; i < N; i++) res[i] = temp_res[i]; + + return res; +} + +sycl::float4 cross(sycl::float4 p0, sycl::float4 p1) { return cross_t(p0, p1); } +sycl::float3 cross(sycl::float3 p0, sycl::float3 p1) { return cross_t(p0, p1); } + +// FIXME: AdaptiveCpp does not support marray +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +template +sycl::marray cross_t(sycl::marray a, sycl::marray b) { + sycl::marray res; + std::vector temp_res(4); + std::vector av({a[0], a[1], a[2]}); + std::vector bv({b[0], b[1], b[2]}); + temp_res[0] = av[1] * bv[2] - av[2] * bv[1]; + temp_res[1] = av[2] * bv[0] - av[0] * bv[2]; + temp_res[2] = av[0] * bv[1] - av[1] * bv[0]; + temp_res[3] = 0.0; + for (size_t i = 0; i < N; i++) res[i] = temp_res[i]; + return res; +} + +sycl::mfloat4 cross(sycl::mfloat4 p0, sycl::mfloat4 p1) { + return cross_t(p0, p1); +} +sycl::mfloat3 cross(sycl::mfloat3 p0, sycl::mfloat3 p1) { + return cross_t(p0, p1); +} +#endif // SYCL_CTS_COMPILING_WITH_ADAPTIVECPP #if SYCL_CTS_ENABLE_HALF_TESTS + +sycl::half bitselect(sycl::half a, sycl::half b, sycl::half c) { + return bitselect_f_t(a, b, c); +} + +sycl::half degrees(sycl::half a) { return degrees_t(a); } +sycl::half radians(sycl::half a) { return radians_t(a); } +sycl::half step(sycl::half a, sycl::half b) { return step_t(a, b); } + +sycl_cts::resultRef smoothstep(sycl::half a, sycl::half b, + sycl::half c) { + return smoothstep_t(a, b, c); +} + +sycl::half sign(sycl::half a) { return sign_t(a); } + +sycl_cts::resultRef mix(const sycl::half a, const sycl::half b, + const sycl::half c) { + return mix_t(a, b, c); +} + +sycl::half acospi(sycl::half a) { return reference_acospi(a); } +sycl::half asinpi(sycl::half a) { return reference_asinpi(a); } sycl::half atanpi(sycl::half a) { return reference_atanpi(a); } -#endif -float atanpi(float a) { return reference_atanpi(a); } -double atanpi(double a) { return reference_atanpil(a); } -#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half atan2pi(sycl::half a, sycl::half b) { return reference_atan2pi(a, b); } -#endif -float atan2pi(float a, float b) { return reference_atan2pi(a, b); } -double atan2pi(double a, double b) { return reference_atan2pil(a, b); } -#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half cospi(sycl::half a) { return reference_cospi(a); } -#endif -float cospi(float a) { return reference_cospi(a); } -double cospi(double a) { return reference_cospil(a); } -#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half fma(sycl::half a, sycl::half b, sycl::half c) { return reference_fma(a, b, c, 0); } -#endif -float fma(float a, float b, float c) { return reference_fma(a, b, c, 0); } -double fma(double a, double b, double c) { return reference_fmal(a, b, c); } // AdaptiveCpp does not yet support sycl::bit_cast, which is used in // `nextafter`. -#if SYCL_CTS_ENABLE_HALF_TESTS && !SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +#if !SYCL_CTS_COMPILING_WITH_ADAPTIVECPP sycl::half fdim(sycl::half a, sycl::half b) { if (a > b) { // to get rounding to nearest even @@ -533,39 +537,22 @@ sycl::half fdim(sycl::half a, sycl::half b) { } #endif -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half fract(sycl::half a, sycl::half *b) { +sycl::half fract(sycl::half a, sycl::half* b) { *b = std::floor(a); return std::fmin(a - *b, nextafter(sycl::half(1.0), sycl::half(0.0))); } -#endif -float fract(float a, float *b) { - *b = std::floor(a); - return std::fmin(a - *b, nextafter(1.0f, 0.0f)); -} -double fract(double a, double *b) { - *b = std::floor(a); - return std::fmin(a - *b, nextafter(1.0, 0.0)); -} -float nan(unsigned int a) { return std::nanf(std::to_string(a).c_str()); } -double nan(unsigned long a) { return std::nan(std::to_string(a).c_str()); } -double nan(unsigned long long a) { return std::nan(std::to_string(a).c_str()); } -#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half nan(unsigned short a) { return nan(unsigned(a)); } -#endif -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half modf(sycl::half a, sycl::half *b) { +sycl::half modf(sycl::half a, sycl::half* b) { float resPtr; float res = modf(static_cast(a), &resPtr); *b = static_cast(resPtr); return res; } -#endif // AdaptiveCpp does not yet support sycl::bit_cast -#if SYCL_CTS_ENABLE_HALF_TESTS && !SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +#if !SYCL_CTS_COMPILING_WITH_ADAPTIVECPP sycl::half nextafter(sycl::half x, sycl::half y) { if (std::isnan(x)) return x; @@ -593,75 +580,9 @@ sycl::half nextafter(sycl::half x, sycl::half y) { } #endif -#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half sinpi(sycl::half a) { return reference_sinpi(a); } -#endif -float sinpi(float a) { return reference_sinpi(a); } -double sinpi(double a) { return reference_sinpil(a); } - -#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half tanpi(sycl::half a) { return reference_tanpi(a); } -#endif -float tanpi(float a) { return reference_tanpi(a); } -double tanpi(double a) { return reference_tanpil(a); } - -// Geometric functions -template -sycl::vec cross_t(sycl::vec a, sycl::vec b) { - sycl::vec res; - std::vector temp_res(4); - std::vector av({a.x(), a.y(), a.z()}); - std::vector bv({b.x(), b.y(), b.z()}); - temp_res[0] = av[1] * bv[2] - av[2] * bv[1]; - temp_res[1] = av[2] * bv[0] - av[0] * bv[2]; - temp_res[2] = av[0] * bv[1] - av[1] * bv[0]; - temp_res[3] = 0.0; - for (int i = 0; i < N; i++) res[i] = temp_res[i]; - - return res; -} - -sycl::float4 cross(sycl::float4 p0, sycl::float4 p1) { return cross_t(p0, p1); } -sycl::float3 cross(sycl::float3 p0, sycl::float3 p1) { return cross_t(p0, p1); } -sycl::double4 cross(sycl::double4 p0, sycl::double4 p1) { - return cross_t(p0, p1); -} -sycl::double3 cross(sycl::double3 p0, sycl::double3 p1) { - return cross_t(p0, p1); -} - -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray cross_t(sycl::marray a, sycl::marray b) { - sycl::marray res; - std::vector temp_res(4); - std::vector av({a[0], a[1], a[2]}); - std::vector bv({b[0], b[1], b[2]}); - temp_res[0] = av[1] * bv[2] - av[2] * bv[1]; - temp_res[1] = av[2] * bv[0] - av[0] * bv[2]; - temp_res[2] = av[0] * bv[1] - av[1] * bv[0]; - temp_res[3] = 0.0; - for (size_t i = 0; i < N; i++) res[i] = temp_res[i]; - return res; -} - -sycl::mfloat4 cross(sycl::mfloat4 p0, sycl::mfloat4 p1) { - return cross_t(p0, p1); -} -sycl::mfloat3 cross(sycl::mfloat3 p0, sycl::mfloat3 p1) { - return cross_t(p0, p1); -} -sycl::mdouble4 cross(sycl::mdouble4 p0, sycl::mdouble4 p1) { - return cross_t(p0, p1); -} -sycl::mdouble3 cross(sycl::mdouble3 p0, sycl::mdouble3 p1) { - return cross_t(p0, p1); -} -#endif // SYCL_CTS_COMPILING_WITH_ADAPTIVECPP - -#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half fast_dot(float p0) { return std::pow(p0, 2); } sycl::half fast_dot(sycl::float2 p0) { return std::pow(p0.x(), 2) + std::pow(p0.y(), 2); @@ -686,6 +607,64 @@ sycl::half fast_dot(sycl::mfloat4 p0) { std::pow(p0[3], 2); } #endif + +#endif // SYCL_CTS_ENABLE_HALF_TESTS + +#if SYCL_CTS_ENABLE_DOUBLE_TESTS + +double bitselect(double a, double b, double c) { + return bitselect_f_t(a, b, c); +} + +double degrees(double a) { return degrees_t(a); } +double radians(double a) { return radians_t(a); } +double step(double a, double b) { return step_t(a, b); } + +sycl_cts::resultRef smoothstep(double a, double b, double c) { + return smoothstep_t(a, b, c); +} + +double sign(double a) { return sign_t(a); } + +sycl_cts::resultRef mix(const double a, const double b, + const double c) { + return mix_t(a, b, c); +} + +double acospi(double a) { return reference_acospil(a); } +double asinpi(double a) { return reference_asinpil(a); } +double atanpi(double a) { return reference_atanpil(a); } +double atan2pi(double a, double b) { return reference_atan2pil(a, b); } +double cospi(double a) { return reference_cospil(a); } +double fma(double a, double b, double c) { return reference_fmal(a, b, c); } + +double fract(double a, double* b) { + *b = std::floor(a); + return std::fmin(a - *b, nextafter(1.0, 0.0)); +} + +double nan(unsigned long a) { return std::nan(std::to_string(a).c_str()); } +double nan(unsigned long long a) { return std::nan(std::to_string(a).c_str()); } + +double sinpi(double a) { return reference_sinpil(a); } +double tanpi(double a) { return reference_tanpil(a); } + +sycl::double4 cross(sycl::double4 p0, sycl::double4 p1) { + return cross_t(p0, p1); +} +sycl::double3 cross(sycl::double3 p0, sycl::double3 p1) { + return cross_t(p0, p1); +} + +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +sycl::mdouble4 cross(sycl::mdouble4 p0, sycl::mdouble4 p1) { + return cross_t(p0, p1); +} +sycl::mdouble3 cross(sycl::mdouble3 p0, sycl::mdouble3 p1) { + return cross_t(p0, p1); +} #endif +#endif // SYCL_CTS_ENABLE_DOUBLE_TESTS + } /* namespace reference */ diff --git a/util/math_reference.h b/util/math_reference.h index 38d1cad37..48d866312 100644 --- a/util/math_reference.h +++ b/util/math_reference.h @@ -28,93 +28,8 @@ #include "./math_helper.h" #include -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP - -#define MAKE_VEC_AND_MARRAY_VERSIONS(func) \ - template \ - sycl::vec func(sycl::vec a) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x) { return func(x); }, a); \ - } \ - template \ - sycl::marray func(sycl::marray a) { \ - return sycl_cts::math::run_func_on_marray( \ - [](T x) { return func(x); }, a); \ - } - -#define MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(func) \ - template \ - sycl::vec func(sycl::vec a, sycl::vec b) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x, T y) { return func(x, y); }, a, b); \ - } \ - template \ - sycl::marray func(sycl::marray a, sycl::marray b) { \ - return sycl_cts::math::run_func_on_marray( \ - [](T x, T y) { return func(x, y); }, a, b); \ - } - -#define MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(func) \ - template \ - sycl::vec func(sycl::vec a, sycl::vec b, \ - sycl::vec c) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x, T y, T z) { return func(x, y, z); }, a, b, c); \ - } \ - template \ - sycl::marray func(sycl::marray a, sycl::marray b, \ - sycl::marray c) { \ - return sycl_cts::math::run_func_on_marray( \ - [](T x, T y, T z) { return func(x, y, z); }, a, b, c); \ - } - -#define MAKE_VEC_AND_MARRAY_VERSIONS_WITH_SCALAR(func) \ - template \ - sycl::vec func(sycl::vec a, T b) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x, T y) { return func(x, y); }, a, b); \ - } \ - template \ - sycl::marray func(sycl::marray a, T b) { \ - return sycl_cts::math::run_func_on_marray( \ - [](T x, T y) { return func(x, y); }, a, b); \ - } - -#else // definitions without marray for AdaptiveCpp - -#define MAKE_VEC_AND_MARRAY_VERSIONS(func) \ - template \ - sycl::vec func(sycl::vec a) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x) { return func(x); }, a); \ - } - -#define MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(func) \ - template \ - sycl::vec func(sycl::vec a, sycl::vec b) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x, T y) { return func(x, y); }, a, b); \ - } - -#define MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(func) \ - template \ - sycl::vec func(sycl::vec a, sycl::vec b, \ - sycl::vec c) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x, T y, T z) { return func(x, y, z); }, a, b, c); \ - } - -#define MAKE_VEC_AND_MARRAY_VERSIONS_WITH_SCALAR(func) \ - template \ - sycl::vec func(sycl::vec a, T b) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x, T y) { return func(x, y); }, a, b); \ - } - -#endif - namespace reference { + /* two argument relational reference */ template auto isequal(T a, T b) { @@ -146,7 +61,7 @@ auto islessequal(T a, T b) { return sycl_cts::math::rel_func_dispatcher(a, b); } -auto constexpr islessgreater_func = [](const auto &x, const auto &y) { +auto constexpr islessgreater_func = [](const auto& x, const auto& y) { return (x < y) || (x > y); }; template @@ -154,7 +69,7 @@ auto islessgreater(T a, T b) { return sycl_cts::math::rel_func_dispatcher(islessgreater_func, a, b); } -auto constexpr isordered_func = [](const auto &x, const auto &y) { +auto constexpr isordered_func = [](const auto& x, const auto& y) { return (x == x) && (y == y); }; template @@ -162,7 +77,7 @@ auto isordered(T a, T b) { return sycl_cts::math::rel_func_dispatcher(isordered_func, a, b); } -auto constexpr isunordered_func = [](const auto &x, const auto &y) { +auto constexpr isunordered_func = [](const auto& x, const auto& y) { return !((x == x) && (y == y)); }; template @@ -171,31 +86,31 @@ auto isunordered(T a, T b) { } /* one argument relational reference */ -auto constexpr isfinite_func = [](const auto &x) { return std::isfinite(x); }; +auto constexpr isfinite_func = [](const auto& x) { return std::isfinite(x); }; template auto isfinite(T a) { return sycl_cts::math::rel_func_dispatcher(isfinite_func, a); } -auto constexpr isinf_func = [](const auto &x) { return std::isinf(x); }; +auto constexpr isinf_func = [](const auto& x) { return std::isinf(x); }; template auto isinf(T a) { return sycl_cts::math::rel_func_dispatcher(isinf_func, a); } -auto constexpr isnan_func = [](const auto &x) { return std::isnan(x); }; +auto constexpr isnan_func = [](const auto& x) { return std::isnan(x); }; template auto isnan(T a) { return sycl_cts::math::rel_func_dispatcher(isnan_func, a); } -auto constexpr isnormal_func = [](const auto &x) { return std::isnormal(x); }; +auto constexpr isnormal_func = [](const auto& x) { return std::isnormal(x); }; template auto isnormal(T a) { return sycl_cts::math::rel_func_dispatcher(isnormal_func, a); } -auto constexpr signbit_func = [](const auto &x) { return std::signbit(x); }; +auto constexpr signbit_func = [](const auto& x) { return std::signbit(x); }; template auto signbit(T a) { return sycl_cts::math::rel_func_dispatcher(signbit_func, a); @@ -205,85 +120,22 @@ template bool any(T x) { return sycl_cts::math::if_msb_set(x); } -template -int any(sycl::vec a) { - for (int i = 0; i < N; i++) { - if (any(a[i]) == 1) return true; - } - return false; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -bool any(sycl::marray a) { - for (size_t i = 0; i < N; i++) { - if (any(a[i]) == 1) return true; - } - return false; -} -#endif template bool all(T x) { return sycl_cts::math::if_msb_set(x); } -template -int all(sycl::vec a) { - for (int i = 0; i < N; i++) { - if (all(a[i]) == 0) return false; - } - return true; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -bool all(sycl::marray a) { - for (size_t i = 0; i < N; i++) { - if (all(a[i]) == 0) return false; - } - return true; -} -#endif template T bitselect(T a, T b, T c) { return (c & b) | (~c & a); } -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half bitselect(sycl::half a, sycl::half b, sycl::half c); -#endif float bitselect(float a, float b, float c); -double bitselect(double a, double b, double c); -MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(bitselect) template T select(T a, T b, bool c) { return c ? b : a; } -template -sycl::vec select(sycl::vec a, sycl::vec b, - sycl::vec c) { - sycl::vec res; - for (int i = 0; i < N; i++) { - if (any(c[i]) == 1) - res[i] = b[i]; - else - res[i] = a[i]; - } - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray select(sycl::marray a, sycl::marray b, - sycl::marray c) { - sycl::marray res; - for (size_t i = 0; i < N; i++) { - res[i] = c[i] ? b[i] : a[i]; - } - return res; -} -#endif /* absolute value */ template @@ -292,18 +144,6 @@ sycl_cts::resultRef abs(T x) { T result = x < 0 ? T(-U(x)) : x; return result < 0 ? sycl_cts::resultRef(0, true) : result; } -template -sycl_cts::resultRef> abs(sycl::vec a) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x) { return abs(x); }, a); -} -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> abs(sycl::marray a) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x) { return abs(x); }, a); -} -#endif /* absolute difference */ template @@ -319,21 +159,6 @@ sycl_cts::resultRef abs_diff(T a, T b) { ? sycl_cts::resultRef(0, true) : T(result); } -template -sycl_cts::resultRef> abs_diff(sycl::vec a, - sycl::vec b) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y) { return abs_diff(x, y); }, a, b); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> abs_diff(sycl::marray a, - sycl::marray b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return abs_diff(x, y); }, a, b); -} -#endif /* add with saturation */ template @@ -353,7 +178,6 @@ T add_sat(T a, T b) { return r; } } -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(add_sat) /* half add */ template @@ -361,14 +185,12 @@ T hadd(T a, T b) { if (std::is_unsigned::value) return (a >> 1) + (b >> 1) + ((a & b) & 0x1); return (a >> 1) + (b >> 1) + (a & b & 1); } -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(hadd) /* round up half add */ template T rhadd(T a, T b) { return (a >> 1) + (b >> 1) + ((a & 1) | (b & 1)); } -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(rhadd) /* clamp */ template @@ -376,48 +198,6 @@ sycl_cts::resultRef clamp(T v, T minv, T maxv) { if (minv > maxv) return sycl_cts::resultRef(T(), true); return (v < minv) ? minv : ((v > maxv) ? maxv : v); } -template -sycl_cts::resultRef> clamp(sycl::vec a, sycl::vec b, - sycl::vec c) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y, T z) { return clamp(x, y, z); }, a, b, c); -} -template -sycl_cts::resultRef> clamp(sycl::vec a, T b, T c) { - sycl::vec res; - std::map undefined; - for (int i = 0; i < N; i++) { - sycl_cts::resultRef element = clamp(a[i], b, c); - if (element.undefined.empty()) - res[i] = element.res; - else - undefined[i] = true; - } - return sycl_cts::resultRef>(res, undefined); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> clamp(sycl::marray a, - sycl::marray b, - sycl::marray c) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y, T z) { return clamp(x, y, z); }, a, b, c); -} -template -sycl_cts::resultRef> clamp(sycl::marray a, T b, T c) { - sycl::marray res; - std::map undefined; - for (size_t i = 0; i < N; i++) { - sycl_cts::resultRef element = clamp(a[i], b, c); - if (element.undefined.empty()) - res[i] = element.res; - else - undefined[i] = true; - } - return sycl_cts::resultRef>(res, undefined); -} -#endif /* count leading zeros */ template @@ -430,7 +210,6 @@ T clz(T x) { lz++; return static_cast(lz); } -MAKE_VEC_AND_MARRAY_VERSIONS(clz) /* count trailing zeros */ template @@ -445,7 +224,6 @@ T ctz(T x) { tz++; return static_cast(tz); } -MAKE_VEC_AND_MARRAY_VERSIONS(ctz) // mad_hi is after mul_hi @@ -463,8 +241,6 @@ int mad_sat(int, int, int); long mad_sat(long, long, long); long long mad_sat(long long, long long, long long); -MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(mad_sat) - /* maximum value */ template sycl_cts::resultRef max(T a, T b) { @@ -474,30 +250,6 @@ sycl_cts::resultRef max(T a, T b) { return (a < b) ? b : a; return sycl_cts::resultRef(T(), true); } -template -sycl_cts::resultRef> max(sycl::vec a, sycl::vec b) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y) { return max(x, y); }, a, b); -} -template -sycl_cts::resultRef> max(sycl::vec a, T b) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y) { return max(x, y); }, a, b); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> max(sycl::marray a, - sycl::marray b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return max(x, y); }, a, b); -} -template -sycl_cts::resultRef> max(sycl::marray a, T b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return max(x, y); }, a, b); -} -#endif /* minimum value */ template @@ -508,30 +260,6 @@ sycl_cts::resultRef min(T a, T b) { return (b < a) ? b : a; return sycl_cts::resultRef(T(), true); } -template -sycl_cts::resultRef> min(sycl::vec a, sycl::vec b) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y) { return min(x, y); }, a, b); -} -template -sycl_cts::resultRef> min(sycl::vec a, T b) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y) { return min(x, y); }, a, b); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> min(sycl::marray a, - sycl::marray b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return min(x, y); }, a, b); -} -template -sycl_cts::resultRef> min(sycl::marray a, T b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return min(x, y); }, a, b); -} -#endif /* multiply and return high part */ unsigned char mul_hi(unsigned char, unsigned char); @@ -545,14 +273,12 @@ short mul_hi(short, short); int mul_hi(int, int); long mul_hi(long, long); long long mul_hi(long long, long long); -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(mul_hi) /* multiply add, get high part */ template T mad_hi(T x, T y, T z) { return mul_hi(x, y) + z; } -MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(mad_hi) /* bitwise rotate */ template @@ -570,7 +296,6 @@ T rotate(T v, T i) { size_t nBits = sycl_cts::math::num_bits(v) - size_t(i_mod); return T((v << i_mod) | ((v >> nBits) & mask)); } -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(rotate) /* substract with saturation */ template @@ -596,7 +321,6 @@ T sub_sat(T x, T y) { } } } -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(sub_sat) /* upsample */ uint16_t upsample(uint8_t h, uint8_t l); @@ -639,23 +363,6 @@ struct upsample_t { using type = int64_t; }; -template -sycl::vec::type, N> upsample( - sycl::vec a, sycl::vec::type, N> b) { - return sycl_cts::math::run_func_on_vector::type, T, N>( - [](T x, T y) { return upsample(x, y); }, a, b); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray::type, N> upsample( - sycl::marray a, - sycl::marray::type, N> b) { - return sycl_cts::math::run_func_on_marray::type, T, N>( - [](T x, T y) { return upsample(x, y); }, a, b); -} -#endif - /* return number of non zero bits in x */ template T popcount(T x) { @@ -664,613 +371,179 @@ T popcount(T x) { if (x & (1ull << i)) lz++; return lz; } -MAKE_VEC_AND_MARRAY_VERSIONS(popcount) /* fast multiply add 24bits */ sycl_cts::resultRef mad24(int32_t x, int32_t y, int32_t z); sycl_cts::resultRef mad24(uint32_t x, uint32_t y, uint32_t z); -template -sycl_cts::resultRef> mad24(sycl::vec a, sycl::vec b, - sycl::vec c) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y, T z) { return mad24(x, y, z); }, a, b, c); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> mad24(sycl::marray a, - sycl::marray b, - sycl::marray c) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y, T z) { return mad24(x, y, z); }, a, b, c); -} -#endif - /* fast multiply 24bits */ sycl_cts::resultRef mul24(int32_t x, int32_t y); sycl_cts::resultRef mul24(uint32_t x, uint32_t y); -template -sycl_cts::resultRef> mul24(sycl::vec a, - sycl::vec b) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y) { return mul24(x, y); }, a, b); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> mul24(sycl::marray a, - sycl::marray b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return mul24(x, y); }, a, b); -} -#endif - // Common functions -// clamp is in Integer functions - -/* degrees */ -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half degrees(sycl::half); -#endif float degrees(float a); -double degrees(double a); -MAKE_VEC_AND_MARRAY_VERSIONS(degrees) - -// max and min are in Integer functions - -/* mix */ -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl_cts::resultRef mix(const sycl::half a, const sycl::half b, - const sycl::half c); -#endif sycl_cts::resultRef mix(const float a, const float b, const float c); -sycl_cts::resultRef mix(const double a, const double b, const double c); - -template -sycl_cts::resultRef> mix(sycl::vec a, sycl::vec b, - sycl::vec c) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y, T z) { return mix(x, y, z); }, a, b, c); -} -template -sycl_cts::resultRef> mix(sycl::vec a, sycl::vec b, - T c) { - sycl::vec res; - std::map undefined; - for (int i = 0; i < N; i++) { - sycl_cts::resultRef element = mix(a[i], b[i], c); - if (element.undefined.empty()) - res[i] = element.res; - else - undefined[i] = true; - } - return sycl_cts::resultRef>(res, undefined); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> mix(sycl::marray a, - sycl::marray b, - sycl::marray c) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y, T z) { return mix(x, y, z); }, a, b, c); -} -template -sycl_cts::resultRef> mix(sycl::marray a, - sycl::marray b, T c) { - sycl::marray res; - std::map undefined; - for (size_t i = 0; i < N; i++) { - sycl_cts::resultRef element = mix(a[i], b[i], c); - if (element.undefined.empty()) - res[i] = element.res; - else - undefined[i] = true; - } - return sycl_cts::resultRef>(res, undefined); -} -#endif - -/* radians */ -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half radians(sycl::half); -#endif float radians(float a); -double radians(double a); -MAKE_VEC_AND_MARRAY_VERSIONS(radians) - -/* step */ -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half step(sycl::half a, sycl::half b); -#endif float step(float a, float b); -double step(double a, double b); -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(step) - -template -sycl::vec step(T a, sycl::vec b) { - sycl::vec res; - for (int i = 0; i < N; i++) { - res[i] = step(a, b[i]); - } - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray step(T a, sycl::marray b) { - sycl::marray res; - for (size_t i = 0; i < N; i++) { - res[i] = step(a, b[i]); - } - return res; -} -#endif - -/* smoothstep */ -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl_cts::resultRef smoothstep(sycl::half a, sycl::half b, - sycl::half c); -#endif sycl_cts::resultRef smoothstep(float a, float b, float c); -sycl_cts::resultRef smoothstep(double a, double b, double c); - -template -sycl_cts::resultRef> smoothstep(sycl::vec a, - sycl::vec b, - sycl::vec c) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y, T z) { return smoothstep(x, y, z); }, a, b, c); -} -template -sycl_cts::resultRef> smoothstep(T a, T b, sycl::vec c) { - sycl::vec res; - std::map undefined; - for (int i = 0; i < N; i++) { - sycl_cts::resultRef element = smoothstep(a, b, c[i]); - if (element.undefined.empty()) - res[i] = element.res; - else - undefined[i] = true; - } - return sycl_cts::resultRef>(res, undefined); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> smoothstep(sycl::marray a, - sycl::marray b, - sycl::marray c) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y, T z) { return smoothstep(x, y, z); }, a, b, c); -} -template -sycl_cts::resultRef> smoothstep(T a, T b, - sycl::marray c) { - sycl::marray res; - std::map undefined; - for (size_t i = 0; i < N; i++) { - sycl_cts::resultRef element = smoothstep(a, b, c[i]); - if (element.undefined.empty()) - res[i] = element.res; - else - undefined[i] = true; - } - return sycl_cts::resultRef>(res, undefined); -} -#endif - -/* sign */ -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half sign(sycl::half a); -#endif float sign(float a); -double sign(double a); -MAKE_VEC_AND_MARRAY_VERSIONS(sign) // Math Functions template struct higher_accuracy; -#if SYCL_CTS_ENABLE_HALF_TESTS -template <> -struct higher_accuracy { - using type = float; -}; -#endif template <> struct higher_accuracy { using type = double; }; -template <> -struct higher_accuracy { - using type = long double; -}; - -template -struct higher_accuracy> { - using type = sycl::vec::type, N>; -}; -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -struct higher_accuracy> { - using type = sycl::marray::type, N>; -}; -#endif template T acos(T a) { return std::acos(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(acos) template T acosh(T a) { return std::acosh(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(acosh) -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half acospi(sycl::half a); -#endif float acospi(float a); -double acospi(double a); -MAKE_VEC_AND_MARRAY_VERSIONS(acospi) template T asin(T a) { return std::asin(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(asin) template T asinh(T a) { return std::asinh(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(asinh) -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half asinpi(sycl::half a); -#endif float asinpi(float a); -double asinpi(double a); -MAKE_VEC_AND_MARRAY_VERSIONS(asinpi) template T atan(T a) { return std::atan(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(atan) template T atan2(T a, T b) { return std::atan2(static_cast::type>(a), b); } -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(atan2) template T atanh(T a) { return std::atanh(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(atanh) -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half atanpi(sycl::half a); -#endif float atanpi(float a); -double atanpi(double a); -MAKE_VEC_AND_MARRAY_VERSIONS(atanpi) -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half atan2pi(sycl::half a, sycl::half b); -#endif float atan2pi(float a, float b); -double atan2pi(double a, double b); -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(atan2pi) template T cbrt(T a) { return std::cbrt(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(cbrt) using std::ceil; -MAKE_VEC_AND_MARRAY_VERSIONS(ceil) - using std::copysign; -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(copysign) template T cos(T a) { return std::cos(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(cos) template T cosh(T a) { return std::cosh(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(cosh) -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half cospi(sycl::half a); -#endif float cospi(float a); -double cospi(double a); -MAKE_VEC_AND_MARRAY_VERSIONS(cospi) template T erfc(T a) { return std::erfc(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(erfc) template T erf(T a) { return std::erf(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(erf) template T exp(T a) { return std::exp(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(exp) template T exp2(T a) { return std::exp2(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(exp2) template T exp10(T a) { return std::pow(static_cast::type>(10), static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(exp10) template T expm1(T a) { return std::expm1(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(expm1) using std::fabs; -MAKE_VEC_AND_MARRAY_VERSIONS(fabs) - using std::fdim; -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half fdim(sycl::half a, sycl::half b); -#endif -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(fdim) - using std::floor; -MAKE_VEC_AND_MARRAY_VERSIONS(floor) - -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half fma(sycl::half a, sycl::half b, sycl::half c); -#endif float fma(float a, float b, float c); -double fma(double a, double b, double c); - -MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(fma) - using std::fmax; -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(fmax) -MAKE_VEC_AND_MARRAY_VERSIONS_WITH_SCALAR(fmax) - using std::fmin; -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(fmin) -MAKE_VEC_AND_MARRAY_VERSIONS_WITH_SCALAR(fmin) - using std::fmod; -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(fmod) +float fract(float a, float* b); +using std::frexp; -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half fract(sycl::half a, sycl::half *b); -#endif -float fract(float a, float *b); -double fract(double a, double *b); - -template -sycl::vec fract(sycl::vec a, sycl::vec *b) { - sycl::vec res; - sycl::vec resPtr; - for (int i = 0; i < N; i++) { - T value; - res[i] = fract(a[i], &value); - resPtr[i] = value; - } - *b = resPtr; - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray fract(sycl::marray a, sycl::marray *b) { - sycl::marray res; - sycl::marray resPtr; - for (size_t i = 0; i < N; i++) { - T value; - res[i] = fract(a[i], &value); - resPtr[i] = value; - } - *b = resPtr; - return res; -} -#endif - -using std::frexp; -template -sycl::vec frexp(sycl::vec a, sycl::vec *b) { - sycl::vec res; - sycl::vec resPtr; - for (int i = 0; i < N; i++) { - int value; - res[i] = frexp(a[i], &value); - resPtr[i] = value; - } - *b = resPtr; - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray frexp(sycl::marray a, sycl::marray *b) { - sycl::marray res; - sycl::marray resPtr; - for (size_t i = 0; i < N; i++) { - int value; - res[i] = frexp(a[i], &value); - ; - resPtr[i] = value; - } - *b = resPtr; - return res; -} -#endif - -template -T hypot(T a, T b) { - return std::hypot(static_cast::type>(a), b); -} -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(hypot) +template +T hypot(T a, T b) { + return std::hypot(static_cast::type>(a), b); +} using std::ilogb; -template -sycl::vec ilogb(sycl::vec a) { - sycl::vec res; - for (int i = 0; i < N; i++) { - res[i] = ilogb(a[i]); - } - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray ilogb(sycl::marray a) { - sycl::marray res; - for (size_t i = 0; i < N; i++) { - res[i] = ilogb(a[i]); - } - return res; -} -#endif - using std::ldexp; -template -sycl::vec ldexp(sycl::vec a, sycl::vec b) { - sycl::vec res; - for (int i = 0; i < N; i++) { - res[i] = ldexp(a[i], b[i]); - } - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray ldexp(sycl::marray a, sycl::marray b) { - sycl::marray res; - for (size_t i = 0; i < N; i++) { - res[i] = ldexp(a[i], b[i]); - } - return res; -} -#endif -template -sycl::vec ldexp(sycl::vec a, int b) { - sycl::vec res; - for (int i = 0; i < N; i++) { - res[i] = ldexp(a[i], b); - } - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray ldexp(sycl::marray a, int b) { - sycl::marray res; - for (size_t i = 0; i < N; i++) { - res[i] = ldexp(a[i], b); - } - return res; -} -#endif - using std::lgamma; -MAKE_VEC_AND_MARRAY_VERSIONS(lgamma) template -T lgamma_r(T a, int *b) { +T lgamma_r(T a, int* b) { *b = (std::tgamma(a) > 0) ? 1 : -1; return std::lgamma(a); } -template -sycl::vec lgamma_r(sycl::vec a, sycl::vec *b) { - sycl::vec res; - sycl::vec resPtr; - for (int i = 0; i < N; i++) { - int value; - res[i] = lgamma_r(a[i], &value); - resPtr[i] = value; - } - *b = resPtr; - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray lgamma_r(sycl::marray a, sycl::marray *b) { - sycl::marray res; - sycl::marray resPtr; - for (size_t i = 0; i < N; i++) { - int value; - res[i] = lgamma_r(a[i], &value); - resPtr[i] = value; - } - *b = resPtr; - return res; -} -#endif template T log(T a) { return std::log(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(log) template T log2(T a) { return std::log2(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(log2) template T log10(T a) { return std::log10(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(log10) template T log1p(T a) { return std::log1p(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(log1p) using std::logb; -MAKE_VEC_AND_MARRAY_VERSIONS(logb) template T mad(T a, T b, T c) { return a * b + c; } -MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(mad) template T maxmag(T a, T b) { @@ -1280,7 +553,6 @@ T maxmag(T a, T b) { return b; return fmax(a, b); } -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(maxmag) template T minmag(T a, T b) { @@ -1290,361 +562,1128 @@ T minmag(T a, T b) { return b; return fmin(a, b); } -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(minmag) using std::modf; -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half modf(sycl::half a, sycl::half *b); -#endif -template -sycl::vec modf(sycl::vec a, sycl::vec *b) { - sycl::vec res; - sycl::vec resPtr; - for (int i = 0; i < N; i++) { - T value; - res[i] = modf(a[i], &value); - resPtr[i] = value; - } - *b = resPtr; - return res; +float nan(unsigned int a); +using std::nextafter; + +template +T pow(T a, T b) { + return std::pow(static_cast::type>(a), + static_cast::type>(b)); } -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray modf(sycl::marray a, sycl::marray *b) { - sycl::marray res; - sycl::marray resPtr; - for (int i = 0; i < N; i++) { - T value; - res[i] = modf(a[i], &value); - resPtr[i] = value; - } - *b = resPtr; - return res; + +template +T pown(T a, int b) { + return std::pow(static_cast::type>(a), + static_cast::type>(b)); } -#endif -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half nan(unsigned short a); -#endif -float nan(unsigned int a); -double nan(unsigned long a); -double nan(unsigned long long a); -#if SYCL_CTS_ENABLE_HALF_TESTS -template -sycl::vec nan(sycl::vec a) { - return sycl_cts::math::run_func_on_vector( - [](unsigned short x) { return nan(x); }, a); +template +sycl_cts::resultRef powr(T a, T b) { + if (a < 0) return sycl_cts::resultRef(T(), true); + return std::pow(static_cast::type>(a), + static_cast::type>(b)); } -#endif -template -sycl::vec nan(sycl::vec a) { - return sycl_cts::math::run_func_on_vector( - [](unsigned int x) { return nan(x); }, a); + +using std::remainder; + +template +T remquo(T x, T y, int* quo) { + return reference_remquol(x, y, quo); } -template -std::enable_if_t || - std::is_same_v, - sycl::vec> -nan(sycl::vec a) { - return sycl_cts::math::run_func_on_vector( - [](T x) { return nan(x); }, a); + +using std::rint; + +template +T rootn(T a, int b) { + return std::pow(static_cast::type>(a), + static_cast::type>(1.0 / b)); } -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -#if SYCL_CTS_ENABLE_HALF_TESTS -template -sycl::marray nan(sycl::marray a) { - return sycl_cts::math::run_func_on_marray( - [](unsigned short x) { return nan(x); }, a); + +using std::round; + +template +T rsqrt(T a) { + return 1 / std::sqrt(static_cast::type>(a)); } -#endif -template -sycl::marray nan(sycl::marray a) { - return sycl_cts::math::run_func_on_marray( - [](unsigned int x) { return nan(x); }, a); + +template +T sincos(T a, T* b) { + *b = std::cos(static_cast::type>(a)); + return std::sin(static_cast::type>(a)); } -template -std::enable_if_t || - std::is_same_v, - sycl::marray> -nan(sycl::marray a) { - return sycl_cts::math::run_func_on_marray( - [](T x) { return nan(x); }, a); + +template +T sin(T a) { + return std::sin(static_cast::type>(a)); } -#endif -using std::nextafter; -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half nextafter(sycl::half a, sycl::half b); -#endif -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(nextafter) +template +T sinh(T a) { + return std::sinh(static_cast::type>(a)); +} + +float sinpi(float a); template -T pow(T a, T b) { - return std::pow(static_cast::type>(a), - static_cast::type>(b)); +T sqrt(T a) { + return std::sqrt(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(pow) template -T pown(T a, int b) { - return std::pow(static_cast::type>(a), - static_cast::type>(b)); +T tan(T a) { + return std::tan(static_cast::type>(a)); } -template -sycl::vec pown(sycl::vec a, sycl::vec b) { - sycl::vec res; - for (int i = 0; i < N; i++) { - res[i] = pown(a[i], b[i]); + +template +T tanh(T a) { + return std::tanh(static_cast::type>(a)); +} + +float tanpi(float a); + +template +T tgamma(T a) { + return std::tgamma(static_cast::type>(a)); +} + +using std::trunc; + +template +T recip(T a) { + return 1.0 / a; +} + +template +T divide(T a, T b) { + return a / b; +} + +// Geometric functions + +sycl::float4 cross(sycl::float4 p0, sycl::float4 p1); +sycl::float3 cross(sycl::float3 p0, sycl::float3 p1); + +// FIXME: AdaptiveCpp does not support marray +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +sycl::mfloat4 cross(sycl::mfloat4 p0, sycl::mfloat4 p1); +sycl::mfloat3 cross(sycl::mfloat3 p0, sycl::mfloat3 p1); +#endif + +template +T dot(T p0, T p1) { + return p0 * p1; +} + +template +T normalize(T p) { + if (p < 0) return -1; + return 1; +} + +#if SYCL_CTS_ENABLE_HALF_TESTS + +template <> +struct higher_accuracy { + using type = float; +}; + +sycl::half bitselect(sycl::half a, sycl::half b, sycl::half c); +sycl::half degrees(sycl::half); +sycl_cts::resultRef mix(const sycl::half a, const sycl::half b, + const sycl::half c); +sycl::half radians(sycl::half); +sycl::half step(sycl::half a, sycl::half b); +sycl_cts::resultRef smoothstep(sycl::half a, sycl::half b, + sycl::half c); +sycl::half sign(sycl::half a); +sycl::half acospi(sycl::half a); +sycl::half asinpi(sycl::half a); +sycl::half atanpi(sycl::half a); +sycl::half atan2pi(sycl::half a, sycl::half b); +sycl::half cospi(sycl::half a); +sycl::half fdim(sycl::half a, sycl::half b); +sycl::half fma(sycl::half a, sycl::half b, sycl::half c); +sycl::half fract(sycl::half a, sycl::half* b); +sycl::half modf(sycl::half a, sycl::half* b); +sycl::half nan(unsigned short a); + +template +sycl::vec nan(sycl::vec a) { + return sycl_cts::math::run_func_on_vector( + [](unsigned short x) { return nan(x); }, a); +} + +// FIXME: AdaptiveCpp does not support marray +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +template +sycl::marray nan(sycl::marray a) { + return sycl_cts::math::run_func_on_marray( + [](unsigned short x) { return nan(x); }, a); +} +#endif + +sycl::half nextafter(sycl::half a, sycl::half b); +sycl::half sinpi(sycl::half a); +sycl::half tanpi(sycl::half a); + +sycl::half fast_dot(float p0); +sycl::half fast_dot(sycl::float2 p0); +sycl::half fast_dot(sycl::float3 p0); +sycl::half fast_dot(sycl::float4 p0); + +// FIXME: AdaptiveCpp does not support marray +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +sycl::half fast_dot(sycl::mfloat2 p0); +sycl::half fast_dot(sycl::mfloat3 p0); +sycl::half fast_dot(sycl::mfloat4 p0); +#endif + +#endif // SYCL_CTS_ENABLE_HALF_TESTS + +#if SYCL_CTS_ENABLE_DOUBLE_TESTS + +template <> +struct higher_accuracy { + using type = long double; +}; + +double bitselect(double a, double b, double c); +double degrees(double a); +sycl_cts::resultRef mix(const double a, const double b, const double c); +double radians(double a); +double step(double a, double b); +sycl_cts::resultRef smoothstep(double a, double b, double c); +double sign(double a); + +double acospi(double a); +double asinpi(double a); +double atanpi(double a); +double atan2pi(double a, double b); +double cospi(double a); +double fma(double a, double b, double c); +double fract(double a, double* b); +double nan(unsigned long a); +double nan(unsigned long long a); + +template +std::enable_if_t || + std::is_same_v, + sycl::vec> +nan(sycl::vec a) { + return sycl_cts::math::run_func_on_vector( + [](T x) { return nan(x); }, a); +} + +// FIXME: AdaptiveCpp does not support marray +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +template +std::enable_if_t || + std::is_same_v, + sycl::marray> +nan(sycl::marray a) { + return sycl_cts::math::run_func_on_marray( + [](T x) { return nan(x); }, a); +} +#endif + +double sinpi(double a); +double tanpi(double a); + +sycl::double4 cross(sycl::double4 p0, sycl::double4 p1); +sycl::double3 cross(sycl::double3 p0, sycl::double3 p1); + +// FIXME: AdaptiveCpp does not support marray +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +sycl::mdouble4 cross(sycl::mdouble4 p0, sycl::mdouble4 p1); +sycl::mdouble3 cross(sycl::mdouble3 p0, sycl::mdouble3 p1); +#endif + +#endif // SYCL_CTS_ENABLE_DOUBLE_TESTS + +// sycl::vec overloads of the above. Some vector functions reference their +// scalar counterparts, so all scalar overloads (float, half, double) must have +// been declared previously - otherwise they will not participate in overload +// resolution, even if the point of template instantiation is outside this file. +#define MAKE_VEC_VERSION(func) \ + template \ + sycl::vec func(sycl::vec a) { \ + return sycl_cts::math::run_func_on_vector( \ + [](T x) { return func(x); }, a); \ + } + +#define MAKE_VEC_VERSION_2ARGS(func) \ + template \ + sycl::vec func(sycl::vec a, sycl::vec b) { \ + return sycl_cts::math::run_func_on_vector( \ + [](T x, T y) { return func(x, y); }, a, b); \ + } + +#define MAKE_VEC_VERSION_3ARGS(func) \ + template \ + sycl::vec func(sycl::vec a, sycl::vec b, \ + sycl::vec c) { \ + return sycl_cts::math::run_func_on_vector( \ + [](T x, T y, T z) { return func(x, y, z); }, a, b, c); \ + } + +#define MAKE_VEC_VERSION_WITH_SCALAR(func) \ + template \ + sycl::vec func(sycl::vec a, T b) { \ + return sycl_cts::math::run_func_on_vector( \ + [](T x, T y) { return func(x, y); }, a, b); \ + } + +// Common functions + +template +int any(sycl::vec a) { + for (int i = 0; i < N; i++) { + if (any(a[i]) == 1) return true; + } + return false; +} + +template +int all(sycl::vec a) { + for (int i = 0; i < N; i++) { + if (all(a[i]) == 0) return false; + } + return true; +} + +MAKE_VEC_VERSION_3ARGS(bitselect) + +template +sycl::vec select(sycl::vec a, sycl::vec b, + sycl::vec c) { + sycl::vec res; + for (int i = 0; i < N; i++) { + if (any(c[i]) == 1) + res[i] = b[i]; + else + res[i] = a[i]; + } + return res; +} + +template +sycl_cts::resultRef> abs(sycl::vec a) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x) { return abs(x); }, a); +} + +template +sycl_cts::resultRef> abs_diff(sycl::vec a, + sycl::vec b) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y) { return abs_diff(x, y); }, a, b); +} + +MAKE_VEC_VERSION_2ARGS(add_sat) +MAKE_VEC_VERSION_2ARGS(hadd) +MAKE_VEC_VERSION_2ARGS(rhadd) + +template +sycl_cts::resultRef> clamp(sycl::vec a, sycl::vec b, + sycl::vec c) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y, T z) { return clamp(x, y, z); }, a, b, c); +} +template +sycl_cts::resultRef> clamp(sycl::vec a, T b, T c) { + sycl::vec res; + std::map undefined; + for (int i = 0; i < N; i++) { + sycl_cts::resultRef element = clamp(a[i], b, c); + if (element.undefined.empty()) + res[i] = element.res; + else + undefined[i] = true; + } + return sycl_cts::resultRef>(res, undefined); +} + +MAKE_VEC_VERSION(clz) +MAKE_VEC_VERSION(ctz) + +MAKE_VEC_VERSION_3ARGS(mad_sat) + +template +sycl_cts::resultRef> max(sycl::vec a, sycl::vec b) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y) { return max(x, y); }, a, b); +} +template +sycl_cts::resultRef> max(sycl::vec a, T b) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y) { return max(x, y); }, a, b); +} + +template +sycl_cts::resultRef> min(sycl::vec a, sycl::vec b) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y) { return min(x, y); }, a, b); +} +template +sycl_cts::resultRef> min(sycl::vec a, T b) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y) { return min(x, y); }, a, b); +} + +MAKE_VEC_VERSION_2ARGS(mul_hi) +MAKE_VEC_VERSION_3ARGS(mad_hi) +MAKE_VEC_VERSION_2ARGS(rotate) +MAKE_VEC_VERSION_2ARGS(sub_sat) + +template +sycl::vec::type, N> upsample( + sycl::vec a, sycl::vec::type, N> b) { + return sycl_cts::math::run_func_on_vector::type, T, N>( + [](T x, T y) { return upsample(x, y); }, a, b); +} + +MAKE_VEC_VERSION(popcount) + +template +sycl_cts::resultRef> mad24(sycl::vec a, sycl::vec b, + sycl::vec c) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y, T z) { return mad24(x, y, z); }, a, b, c); +} + +template +sycl_cts::resultRef> mul24(sycl::vec a, + sycl::vec b) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y) { return mul24(x, y); }, a, b); +} + +MAKE_VEC_VERSION(degrees) + +template +sycl_cts::resultRef> mix(sycl::vec a, sycl::vec b, + sycl::vec c) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y, T z) { return mix(x, y, z); }, a, b, c); +} +template +sycl_cts::resultRef> mix(sycl::vec a, sycl::vec b, + T c) { + sycl::vec res; + std::map undefined; + for (int i = 0; i < N; i++) { + sycl_cts::resultRef element = mix(a[i], b[i], c); + if (element.undefined.empty()) + res[i] = element.res; + else + undefined[i] = true; + } + return sycl_cts::resultRef>(res, undefined); +} + +MAKE_VEC_VERSION(radians) + +template +sycl::vec step(T a, sycl::vec b) { + sycl::vec res; + for (int i = 0; i < N; i++) { + res[i] = step(a, b[i]); + } + return res; +} + +template +sycl_cts::resultRef> smoothstep(sycl::vec a, + sycl::vec b, + sycl::vec c) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y, T z) { return smoothstep(x, y, z); }, a, b, c); +} +template +sycl_cts::resultRef> smoothstep(T a, T b, sycl::vec c) { + sycl::vec res; + std::map undefined; + for (int i = 0; i < N; i++) { + sycl_cts::resultRef element = smoothstep(a, b, c[i]); + if (element.undefined.empty()) + res[i] = element.res; + else + undefined[i] = true; + } + return sycl_cts::resultRef>(res, undefined); +} + +// Math functions + +template +struct higher_accuracy> { + using type = sycl::vec::type, N>; +}; + +MAKE_VEC_VERSION(acos) +MAKE_VEC_VERSION(acosh) +MAKE_VEC_VERSION(acospi) +MAKE_VEC_VERSION(asin) +MAKE_VEC_VERSION(asinh) +MAKE_VEC_VERSION(asinpi) +MAKE_VEC_VERSION(atan) +MAKE_VEC_VERSION_2ARGS(atan2) +MAKE_VEC_VERSION(atanh) +MAKE_VEC_VERSION(atanpi) +MAKE_VEC_VERSION_2ARGS(atan2pi) +MAKE_VEC_VERSION(cbrt) +MAKE_VEC_VERSION(ceil) +MAKE_VEC_VERSION_2ARGS(copysign) +MAKE_VEC_VERSION(cos) +MAKE_VEC_VERSION(cosh) +MAKE_VEC_VERSION(cospi) +MAKE_VEC_VERSION(erfc) +MAKE_VEC_VERSION(erf) +MAKE_VEC_VERSION(exp) +MAKE_VEC_VERSION(exp2) +MAKE_VEC_VERSION(exp10) +MAKE_VEC_VERSION(expm1) +MAKE_VEC_VERSION(fabs) +MAKE_VEC_VERSION_2ARGS(fdim) +MAKE_VEC_VERSION(floor) +MAKE_VEC_VERSION_3ARGS(fma) +MAKE_VEC_VERSION_2ARGS(fmax) +MAKE_VEC_VERSION_WITH_SCALAR(fmax) +MAKE_VEC_VERSION_2ARGS(fmin) +MAKE_VEC_VERSION_WITH_SCALAR(fmin) +MAKE_VEC_VERSION_2ARGS(fmod) + +template +sycl::vec fract(sycl::vec a, sycl::vec* b) { + sycl::vec res; + sycl::vec resPtr; + for (int i = 0; i < N; i++) { + T value; + res[i] = reference::fract(a[i], &value); + resPtr[i] = value; + } + *b = resPtr; + return res; +} + +template +sycl::vec frexp(sycl::vec a, sycl::vec* b) { + sycl::vec res; + sycl::vec resPtr; + for (int i = 0; i < N; i++) { + int value; + res[i] = reference::frexp(a[i], &value); + resPtr[i] = value; + } + *b = resPtr; + return res; +} + +MAKE_VEC_VERSION_2ARGS(hypot) + +template +sycl::vec ilogb(sycl::vec a) { + sycl::vec res; + for (int i = 0; i < N; i++) { + res[i] = reference::ilogb(a[i]); + } + return res; +} + +template +sycl::vec ldexp(sycl::vec a, sycl::vec b) { + sycl::vec res; + for (int i = 0; i < N; i++) { + res[i] = reference::ldexp(a[i], b[i]); + } + return res; +} +template +sycl::vec ldexp(sycl::vec a, int b) { + sycl::vec res; + for (int i = 0; i < N; i++) { + res[i] = reference::ldexp(a[i], b); + } + return res; +} + +MAKE_VEC_VERSION(lgamma) + +template +sycl::vec lgamma_r(sycl::vec a, sycl::vec* b) { + sycl::vec res; + sycl::vec resPtr; + for (int i = 0; i < N; i++) { + int value; + res[i] = reference::lgamma_r(a[i], &value); + resPtr[i] = value; + } + *b = resPtr; + return res; +} + +MAKE_VEC_VERSION(log) +MAKE_VEC_VERSION(log2) +MAKE_VEC_VERSION(log10) +MAKE_VEC_VERSION(log1p) +MAKE_VEC_VERSION(logb) + +MAKE_VEC_VERSION_3ARGS(mad) +MAKE_VEC_VERSION_2ARGS(maxmag) +MAKE_VEC_VERSION_2ARGS(minmag) + +template +sycl::vec modf(sycl::vec a, sycl::vec* b) { + sycl::vec res; + sycl::vec resPtr; + for (int i = 0; i < N; i++) { + T value; + res[i] = reference::modf(a[i], &value); + resPtr[i] = value; + } + *b = resPtr; + return res; +} + +template +sycl::vec nan(sycl::vec a) { + return sycl_cts::math::run_func_on_vector( + [](unsigned int x) { return nan(x); }, a); +} + +MAKE_VEC_VERSION_2ARGS(nextafter) +MAKE_VEC_VERSION_2ARGS(pow) + +template +sycl::vec pown(sycl::vec a, sycl::vec b) { + sycl::vec res; + for (int i = 0; i < N; i++) { + res[i] = reference::pown(a[i], b[i]); + } + return res; +} + +template +sycl_cts::resultRef> powr(sycl::vec a, + sycl::vec b) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y) { return reference::powr(x, y); }, a, b); +} + +MAKE_VEC_VERSION_2ARGS(remainder) + +template +sycl::vec remquo(sycl::vec a, sycl::vec b, + sycl::vec* c) { + sycl::vec res; + sycl::vec resPtr; + for (int i = 0; i < N; i++) { + int value; + res[i] = reference::remquo(a[i], b[i], &value); + resPtr[i] = value; + } + *c = resPtr; + return res; +} + +MAKE_VEC_VERSION(rint) + +template +sycl::vec rootn(sycl::vec a, sycl::vec b) { + sycl::vec res; + for (int i = 0; i < N; i++) { + res[i] = reference::rootn(a[i], b[i]); + } + return res; +} + +MAKE_VEC_VERSION(round) +MAKE_VEC_VERSION(rsqrt) +MAKE_VEC_VERSION(sign) + +template +sycl::vec sincos(sycl::vec a, sycl::vec* b) { + sycl::vec res; + sycl::vec resPtr; + for (int i = 0; i < N; i++) { + T value; + res[i] = reference::sincos(a[i], &value); + resPtr[i] = value; + } + *b = resPtr; + return res; +} + +MAKE_VEC_VERSION(sin) +MAKE_VEC_VERSION(sinh) +MAKE_VEC_VERSION(sinpi) +MAKE_VEC_VERSION(sqrt) +MAKE_VEC_VERSION_2ARGS(step) +MAKE_VEC_VERSION(tan) +MAKE_VEC_VERSION(tanh) +MAKE_VEC_VERSION(tanpi) +MAKE_VEC_VERSION(tgamma) +MAKE_VEC_VERSION(trunc) +MAKE_VEC_VERSION(recip) +MAKE_VEC_VERSION_2ARGS(divide) + +// Geometric functions + +template +T dot(sycl::vec a, sycl::vec b) { + T res = 0; + for (int i = 0; i < N; i++) res += a[i] * b[i]; + return res; +} + +// sycl::marray overloads of the above. Not supported by AdaptiveCpp. +// Again, like for sycl::vec, these must be defined after all scalar overloads. +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP + +#define MAKE_MARRAY_VERSION(func) \ + template \ + sycl::marray func(sycl::marray a) { \ + return sycl_cts::math::run_func_on_marray( \ + [](T x) { return func(x); }, a); \ + } + +#define MAKE_MARRAY_VERSION_2ARGS(func) \ + template \ + sycl::marray func(sycl::marray a, sycl::marray b) { \ + return sycl_cts::math::run_func_on_marray( \ + [](T x, T y) { return func(x, y); }, a, b); \ + } + +#define MAKE_MARRAY_VERSION_3ARGS(func) \ + template \ + sycl::marray func(sycl::marray a, sycl::marray b, \ + sycl::marray c) { \ + return sycl_cts::math::run_func_on_marray( \ + [](T x, T y, T z) { return func(x, y, z); }, a, b, c); \ + } + +#define MAKE_MARRAY_VERSION_WITH_SCALAR(func) \ + template \ + sycl::marray func(sycl::marray a, T b) { \ + return sycl_cts::math::run_func_on_marray( \ + [](T x, T y) { return func(x, y); }, a, b); \ + } + +// Common functions. + +template +bool any(sycl::marray a) { + for (size_t i = 0; i < N; i++) { + if (any(a[i]) == 1) return true; + } + return false; +} + +template +bool all(sycl::marray a) { + for (size_t i = 0; i < N; i++) { + if (all(a[i]) == 0) return false; + } + return true; +} + +MAKE_MARRAY_VERSION_3ARGS(bitselect) + +template +sycl::marray select(sycl::marray a, sycl::marray b, + sycl::marray c) { + sycl::marray res; + for (size_t i = 0; i < N; i++) { + res[i] = c[i] ? b[i] : a[i]; + } + return res; +} + +template +sycl_cts::resultRef> abs(sycl::marray a) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x) { return abs(x); }, a); +} + +template +sycl_cts::resultRef> abs_diff(sycl::marray a, + sycl::marray b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return abs_diff(x, y); }, a, b); +} + +MAKE_MARRAY_VERSION_2ARGS(add_sat) +MAKE_MARRAY_VERSION_2ARGS(hadd) +MAKE_MARRAY_VERSION_2ARGS(rhadd) + +template +sycl_cts::resultRef> clamp(sycl::marray a, + sycl::marray b, + sycl::marray c) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y, T z) { return clamp(x, y, z); }, a, b, c); +} +template +sycl_cts::resultRef> clamp(sycl::marray a, T b, T c) { + sycl::marray res; + std::map undefined; + for (size_t i = 0; i < N; i++) { + sycl_cts::resultRef element = clamp(a[i], b, c); + if (element.undefined.empty()) + res[i] = element.res; + else + undefined[i] = true; + } + return sycl_cts::resultRef>(res, undefined); +} + +MAKE_MARRAY_VERSION(clz) +MAKE_MARRAY_VERSION(ctz) + +MAKE_MARRAY_VERSION_3ARGS(mad_sat) + +template +sycl_cts::resultRef> max(sycl::marray a, + sycl::marray b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return max(x, y); }, a, b); +} +template +sycl_cts::resultRef> max(sycl::marray a, T b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return max(x, y); }, a, b); +} + +template +sycl_cts::resultRef> min(sycl::marray a, + sycl::marray b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return min(x, y); }, a, b); +} +template +sycl_cts::resultRef> min(sycl::marray a, T b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return min(x, y); }, a, b); +} + +MAKE_MARRAY_VERSION_2ARGS(mul_hi) +MAKE_MARRAY_VERSION_3ARGS(mad_hi) +MAKE_MARRAY_VERSION_2ARGS(rotate) +MAKE_MARRAY_VERSION_2ARGS(sub_sat) + +template +sycl::marray::type, N> upsample( + sycl::marray a, + sycl::marray::type, N> b) { + return sycl_cts::math::run_func_on_marray::type, T, N>( + [](T x, T y) { return upsample(x, y); }, a, b); +} + +MAKE_MARRAY_VERSION(popcount) + +template +sycl_cts::resultRef> mad24(sycl::marray a, + sycl::marray b, + sycl::marray c) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y, T z) { return mad24(x, y, z); }, a, b, c); +} + +template +sycl_cts::resultRef> mul24(sycl::marray a, + sycl::marray b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return mul24(x, y); }, a, b); +} + +MAKE_MARRAY_VERSION(degrees) + +template +sycl_cts::resultRef> mix(sycl::marray a, + sycl::marray b, + sycl::marray c) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y, T z) { return mix(x, y, z); }, a, b, c); +} +template +sycl_cts::resultRef> mix(sycl::marray a, + sycl::marray b, T c) { + sycl::marray res; + std::map undefined; + for (size_t i = 0; i < N; i++) { + sycl_cts::resultRef element = mix(a[i], b[i], c); + if (element.undefined.empty()) + res[i] = element.res; + else + undefined[i] = true; } - return res; + return sycl_cts::resultRef>(res, undefined); } -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP + +MAKE_MARRAY_VERSION(radians) + template -sycl::marray pown(sycl::marray a, sycl::marray b) { +sycl::marray step(T a, sycl::marray b) { sycl::marray res; for (size_t i = 0; i < N; i++) { - res[i] = pown(a[i], b[i]); + res[i] = step(a, b[i]); } return res; } -#endif -template -sycl_cts::resultRef powr(T a, T b) { - if (a < 0) return sycl_cts::resultRef(T(), true); - return std::pow(static_cast::type>(a), - static_cast::type>(b)); -} -template -sycl_cts::resultRef> powr(sycl::vec a, - sycl::vec b) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y) { return powr(x, y); }, a, b); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template -sycl_cts::resultRef> powr(sycl::marray a, - sycl::marray b) { +sycl_cts::resultRef> smoothstep(sycl::marray a, + sycl::marray b, + sycl::marray c) { return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return powr(x, y); }, a, b); + [](T x, T y, T z) { return smoothstep(x, y, z); }, a, b, c); +} +template +sycl_cts::resultRef> smoothstep(T a, T b, + sycl::marray c) { + sycl::marray res; + std::map undefined; + for (size_t i = 0; i < N; i++) { + sycl_cts::resultRef element = smoothstep(a, b, c[i]); + if (element.undefined.empty()) + res[i] = element.res; + else + undefined[i] = true; + } + return sycl_cts::resultRef>(res, undefined); } -#endif -using std::remainder; -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(remainder) +// Math functions -template -T remquo(T x, T y, int *quo) { - return reference_remquol(x, y, quo); -} +template +struct higher_accuracy> { + using type = sycl::marray::type, N>; +}; -template -sycl::vec remquo(sycl::vec a, sycl::vec b, - sycl::vec *c) { - sycl::vec res; - sycl::vec resPtr; - for (int i = 0; i < N; i++) { - int value; - res[i] = remquo(a[i], b[i], &value); +MAKE_MARRAY_VERSION(acos) +MAKE_MARRAY_VERSION(acosh) +MAKE_MARRAY_VERSION(acospi) +MAKE_MARRAY_VERSION(asin) +MAKE_MARRAY_VERSION(asinh) +MAKE_MARRAY_VERSION(asinpi) +MAKE_MARRAY_VERSION(atan) +MAKE_MARRAY_VERSION_2ARGS(atan2) +MAKE_MARRAY_VERSION(atanh) +MAKE_MARRAY_VERSION(atanpi) +MAKE_MARRAY_VERSION_2ARGS(atan2pi) +MAKE_MARRAY_VERSION(cbrt) +MAKE_MARRAY_VERSION(ceil) +MAKE_MARRAY_VERSION_2ARGS(copysign) +MAKE_MARRAY_VERSION(cos) +MAKE_MARRAY_VERSION(cosh) +MAKE_MARRAY_VERSION(cospi) +MAKE_MARRAY_VERSION(erfc) +MAKE_MARRAY_VERSION(erf) +MAKE_MARRAY_VERSION(exp) +MAKE_MARRAY_VERSION(exp2) +MAKE_MARRAY_VERSION(exp10) +MAKE_MARRAY_VERSION(expm1) +MAKE_MARRAY_VERSION(fabs) +MAKE_MARRAY_VERSION_2ARGS(fdim) +MAKE_MARRAY_VERSION(floor) +MAKE_MARRAY_VERSION_3ARGS(fma) +MAKE_MARRAY_VERSION_2ARGS(fmax) +MAKE_MARRAY_VERSION_WITH_SCALAR(fmax) +MAKE_MARRAY_VERSION_2ARGS(fmin) +MAKE_MARRAY_VERSION_WITH_SCALAR(fmin) +MAKE_MARRAY_VERSION_2ARGS(fmod) + +template +sycl::marray fract(sycl::marray a, sycl::marray* b) { + sycl::marray res; + sycl::marray resPtr; + for (size_t i = 0; i < N; i++) { + T value; + res[i] = reference::fract(a[i], &value); resPtr[i] = value; } - *c = resPtr; + *b = resPtr; return res; } -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP + template -sycl::marray remquo(sycl::marray a, sycl::marray b, - sycl::marray *c) { +sycl::marray frexp(sycl::marray a, sycl::marray* b) { sycl::marray res; sycl::marray resPtr; for (size_t i = 0; i < N; i++) { int value; - res[i] = remquo(a[i], b[i], &value); + res[i] = reference::frexp(a[i], &value); resPtr[i] = value; } - *c = resPtr; + *b = resPtr; return res; } -#endif -using std::rint; -MAKE_VEC_AND_MARRAY_VERSIONS(rint) +MAKE_MARRAY_VERSION_2ARGS(hypot) -template -T rootn(T a, int b) { - return std::pow(static_cast::type>(a), - static_cast::type>(1.0 / b)); +template +sycl::marray ilogb(sycl::marray a) { + sycl::marray res; + for (size_t i = 0; i < N; i++) { + res[i] = reference::ilogb(a[i]); + } + return res; } -template -sycl::vec rootn(sycl::vec a, sycl::vec b) { - sycl::vec res; - for (int i = 0; i < N; i++) { - res[i] = rootn(a[i], b[i]); + +template +sycl::marray ldexp(sycl::marray a, sycl::marray b) { + sycl::marray res; + for (size_t i = 0; i < N; i++) { + res[i] = reference::ldexp(a[i], b[i]); } return res; } -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template -sycl::marray rootn(sycl::marray a, sycl::marray b) { +sycl::marray ldexp(sycl::marray a, int b) { sycl::marray res; for (size_t i = 0; i < N; i++) { - res[i] = rootn(a[i], b[i]); + res[i] = reference::ldexp(a[i], b); } return res; } -#endif - -using std::round; -MAKE_VEC_AND_MARRAY_VERSIONS(round) -template -T rsqrt(T a) { - return 1 / std::sqrt(static_cast::type>(a)); -} -MAKE_VEC_AND_MARRAY_VERSIONS(rsqrt) +MAKE_MARRAY_VERSION(lgamma) -template -T sincos(T a, T *b) { - *b = std::cos(static_cast::type>(a)); - return std::sin(static_cast::type>(a)); -} -template -sycl::vec sincos(sycl::vec a, sycl::vec *b) { - sycl::vec res; - sycl::vec resPtr; - for (int i = 0; i < N; i++) { - T value; - res[i] = sincos(a[i], &value); +template +sycl::marray lgamma_r(sycl::marray a, sycl::marray* b) { + sycl::marray res; + sycl::marray resPtr; + for (size_t i = 0; i < N; i++) { + int value; + res[i] = reference::lgamma_r(a[i], &value); resPtr[i] = value; } *b = resPtr; return res; } -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP + +MAKE_MARRAY_VERSION(log) +MAKE_MARRAY_VERSION(log2) +MAKE_MARRAY_VERSION(log10) +MAKE_MARRAY_VERSION(log1p) +MAKE_MARRAY_VERSION(logb) + +MAKE_MARRAY_VERSION_3ARGS(mad) +MAKE_MARRAY_VERSION_2ARGS(maxmag) +MAKE_MARRAY_VERSION_2ARGS(minmag) + template -sycl::marray sincos(sycl::marray a, sycl::marray *b) { +sycl::marray modf(sycl::marray a, sycl::marray* b) { sycl::marray res; sycl::marray resPtr; - for (size_t i = 0; i < N; i++) { + for (int i = 0; i < N; i++) { T value; - res[i] = sincos(a[i], &value); + res[i] = reference::modf(a[i], &value); resPtr[i] = value; } *b = resPtr; return res; } -#endif - -template -T sin(T a) { - return std::sin(static_cast::type>(a)); -} -MAKE_VEC_AND_MARRAY_VERSIONS(sin) -template -T sinh(T a) { - return std::sinh(static_cast::type>(a)); +template +sycl::marray nan(sycl::marray a) { + return sycl_cts::math::run_func_on_marray( + [](unsigned int x) { return nan(x); }, a); } -MAKE_VEC_AND_MARRAY_VERSIONS(sinh) - -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half sinpi(sycl::half a); -#endif -float sinpi(float a); -double sinpi(double a); -MAKE_VEC_AND_MARRAY_VERSIONS(sinpi) -template -T sqrt(T a) { - return std::sqrt(static_cast::type>(a)); -} -MAKE_VEC_AND_MARRAY_VERSIONS(sqrt) +MAKE_MARRAY_VERSION_2ARGS(nextafter) +MAKE_MARRAY_VERSION_2ARGS(pow) -template -T tan(T a) { - return std::tan(static_cast::type>(a)); +template +sycl::marray pown(sycl::marray a, sycl::marray b) { + sycl::marray res; + for (size_t i = 0; i < N; i++) { + res[i] = reference::pown(a[i], b[i]); + } + return res; } -MAKE_VEC_AND_MARRAY_VERSIONS(tan) -template -T tanh(T a) { - return std::tanh(static_cast::type>(a)); +template +sycl_cts::resultRef> powr(sycl::marray a, + sycl::marray b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return reference::powr(x, y); }, a, b); } -MAKE_VEC_AND_MARRAY_VERSIONS(tanh) -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half tanpi(sycl::half a); -#endif -float tanpi(float a); -double tanpi(double a); -MAKE_VEC_AND_MARRAY_VERSIONS(tanpi) +MAKE_MARRAY_VERSION_2ARGS(remainder) -template -T tgamma(T a) { - return std::tgamma(static_cast::type>(a)); +template +sycl::marray remquo(sycl::marray a, sycl::marray b, + sycl::marray* c) { + sycl::marray res; + sycl::marray resPtr; + for (size_t i = 0; i < N; i++) { + int value; + res[i] = reference::remquo(a[i], b[i], &value); + resPtr[i] = value; + } + *c = resPtr; + return res; } -MAKE_VEC_AND_MARRAY_VERSIONS(tgamma) - -using std::trunc; -MAKE_VEC_AND_MARRAY_VERSIONS(trunc) -template -T recip(T a) { - return 1.0 / a; -} -MAKE_VEC_AND_MARRAY_VERSIONS(recip) +MAKE_MARRAY_VERSION(rint) -template -T divide(T a, T b) { - return a / b; +template +sycl::marray rootn(sycl::marray a, sycl::marray b) { + sycl::marray res; + for (size_t i = 0; i < N; i++) { + res[i] = reference::rootn(a[i], b[i]); + } + return res; } -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(divide) - -// Geometric functions - -sycl::float4 cross(sycl::float4 p0, sycl::float4 p1); -sycl::float3 cross(sycl::float3 p0, sycl::float3 p1); -sycl::double4 cross(sycl::double4 p0, sycl::double4 p1); -sycl::double3 cross(sycl::double3 p0, sycl::double3 p1); -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -sycl::mfloat4 cross(sycl::mfloat4 p0, sycl::mfloat4 p1); -sycl::mfloat3 cross(sycl::mfloat3 p0, sycl::mfloat3 p1); -sycl::mdouble4 cross(sycl::mdouble4 p0, sycl::mdouble4 p1); -sycl::mdouble3 cross(sycl::mdouble3 p0, sycl::mdouble3 p1); -#endif +MAKE_MARRAY_VERSION(round) +MAKE_MARRAY_VERSION(rsqrt) +MAKE_MARRAY_VERSION(sign) -template -T dot(T p0, T p1) { - return p0 * p1; -} -template -T dot(sycl::vec a, sycl::vec b) { - T res = 0; - for (int i = 0; i < N; i++) res += a[i] * b[i]; +template +sycl::marray sincos(sycl::marray a, sycl::marray* b) { + sycl::marray res; + sycl::marray resPtr; + for (size_t i = 0; i < N; i++) { + T value; + res[i] = reference::sincos(a[i], &value); + resPtr[i] = value; + } + *b = resPtr; return res; } -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP + +MAKE_MARRAY_VERSION(sin) +MAKE_MARRAY_VERSION(sinh) +MAKE_MARRAY_VERSION(sinpi) +MAKE_MARRAY_VERSION(sqrt) +MAKE_MARRAY_VERSION_2ARGS(step) +MAKE_MARRAY_VERSION(tan) +MAKE_MARRAY_VERSION(tanh) +MAKE_MARRAY_VERSION(tanpi) +MAKE_MARRAY_VERSION(tgamma) +MAKE_MARRAY_VERSION(trunc) +MAKE_MARRAY_VERSION(recip) +MAKE_MARRAY_VERSION_2ARGS(divide) + template T dot(sycl::marray a, sycl::marray b) { T res = 0; for (size_t i = 0; i < N; i++) res += a[i] * b[i]; return res; } -#endif + +#endif // SYCL_CTS_COMPILING_WITH_ADAPTIVECPP + +// Generic functions over both scalars and vec / marray types. +// These need to be defined last. template auto length(T p) { - return sqrt(reference::dot(p, p)); + return reference::sqrt(reference::dot(p, p)); } template @@ -1652,11 +1691,6 @@ auto distance(T p0, T p1) { return reference::length(p0 - p1); } -template -T normalize(T p) { - if (p < 0) return -1; - return 1; -} template sycl::vec normalize(sycl::vec a) { sycl::vec res; @@ -1677,22 +1711,9 @@ sycl::marray normalize(sycl::marray a) { } #endif -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half fast_dot(float p0); -sycl::half fast_dot(sycl::float2 p0); -sycl::half fast_dot(sycl::float3 p0); -sycl::half fast_dot(sycl::float4 p0); -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -sycl::half fast_dot(sycl::mfloat2 p0); -sycl::half fast_dot(sycl::mfloat3 p0); -sycl::half fast_dot(sycl::mfloat4 p0); -#endif -#endif - template float fast_length(T p0) { - return sqrt(fast_dot(p0)); + return reference::sqrt(fast_dot(p0)); } template @@ -1702,7 +1723,7 @@ float fast_distance(T p0, T p1) { template T fast_normalize(T p0) { - return p0 * rsqrt(fast_dot(p0)); + return p0 * reference::rsqrt(fast_dot(p0)); } } // namespace reference