From d811a3344dd72bc7d2f3f2de703bdfa6d251fff4 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Thu, 7 Oct 2021 14:43:36 +0200 Subject: [PATCH 1/2] support proxy references in VirtualRecord assignment and arithmetic operators --- include/llama/VirtualRecord.hpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/include/llama/VirtualRecord.hpp b/include/llama/VirtualRecord.hpp index a7370238e4..bf54be03b3 100644 --- a/include/llama/VirtualRecord.hpp +++ b/include/llama/VirtualRecord.hpp @@ -144,54 +144,54 @@ namespace llama struct Assign { template - LLAMA_FN_HOST_ACC_INLINE auto operator()(A& a, const B& b) const -> decltype(auto) + LLAMA_FN_HOST_ACC_INLINE auto operator()(A&& a, const B& b) const -> decltype(auto) { - return a = b; + return std::forward(a) = b; } }; struct PlusAssign { template - LLAMA_FN_HOST_ACC_INLINE auto operator()(A& a, const B& b) const -> decltype(auto) + LLAMA_FN_HOST_ACC_INLINE auto operator()(A&& a, const B& b) const -> decltype(auto) { - return a += b; + return std::forward(a) += b; } }; struct MinusAssign { template - LLAMA_FN_HOST_ACC_INLINE auto operator()(A& a, const B& b) const -> decltype(auto) + LLAMA_FN_HOST_ACC_INLINE auto operator()(A&& a, const B& b) const -> decltype(auto) { - return a -= b; + return std::forward(a) -= b; } }; struct MultiplyAssign { template - LLAMA_FN_HOST_ACC_INLINE auto operator()(A& a, const B& b) const -> decltype(auto) + LLAMA_FN_HOST_ACC_INLINE auto operator()(A&& a, const B& b) const -> decltype(auto) { - return a *= b; + return std::forward(a) *= b; } }; struct DivideAssign { template - LLAMA_FN_HOST_ACC_INLINE auto operator()(A& a, const B& b) const -> decltype(auto) + LLAMA_FN_HOST_ACC_INLINE auto operator()(A&& a, const B& b) const -> decltype(auto) { - return a /= b; + return std::forward(a) /= b; } }; struct ModuloAssign { template - LLAMA_FN_HOST_ACC_INLINE auto operator()(A& a, const B& b) const -> decltype(auto) + LLAMA_FN_HOST_ACC_INLINE auto operator()(A&& a, const B& b) const -> decltype(auto) { - return a %= b; + return std::forward(a) %= b; } }; From 61ec8080ca5660e28e3e3bfcc25e838df9ea28de Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Thu, 7 Oct 2021 14:58:51 +0200 Subject: [PATCH 2/2] add example using byte splitting --- CMakeLists.txt | 1 + examples/bytesplit/CMakeLists.txt | 9 ++ examples/bytesplit/bytesplit.cpp | 171 ++++++++++++++++++++++++++++++ 3 files changed, 181 insertions(+) create mode 100644 examples/bytesplit/CMakeLists.txt create mode 100644 examples/bytesplit/bytesplit.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c025bcaaf2..5eb58482a4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,6 +72,7 @@ if (LLAMA_BUILD_EXAMPLES) add_subdirectory("examples/bufferguard") add_subdirectory("examples/raycast") add_subdirectory("examples/bitpack") + add_subdirectory("examples/bytesplit") # alpaka examples find_package(alpaka 0.7.0 QUIET) diff --git a/examples/bytesplit/CMakeLists.txt b/examples/bytesplit/CMakeLists.txt new file mode 100644 index 0000000000..7f025935b6 --- /dev/null +++ b/examples/bytesplit/CMakeLists.txt @@ -0,0 +1,9 @@ +cmake_minimum_required (VERSION 3.15) +project(llama-bytesplit CXX) + +if (NOT TARGET llama::llama) + find_package(llama REQUIRED) +endif() +add_executable(${PROJECT_NAME} bytesplit.cpp) +target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_17) +target_link_libraries(${PROJECT_NAME} PRIVATE llama::llama) diff --git a/examples/bytesplit/bytesplit.cpp b/examples/bytesplit/bytesplit.cpp new file mode 100644 index 0000000000..87b23b194f --- /dev/null +++ b/examples/bytesplit/bytesplit.cpp @@ -0,0 +1,171 @@ +#include +#include +#include + +// clang-format off +namespace tag +{ + struct A{}; + struct B{}; + struct C{}; + struct D{}; + struct E{}; + struct F{}; +} // namespace tag + +using Data = llama::Record< + llama::Field, + llama::Field, + llama::Field, + llama::Field, + llama::Field, + llama::Field +>; +// clang-format on + +template +using ReplaceByByteArray = std::byte[sizeof(T)]; + +template +using SplitBytes = llama::TransformLeaves; + +template +struct BytesplitSoA : private llama::mapping::SoA, false> +{ + using Base = llama::mapping::SoA, false>; + + using ArrayExtents = typename Base::ArrayExtents; + using ArrayIndex = typename Base::ArrayIndex; + using RecordDim = TRecordDim; // hide Base::RecordDim + using Base::blobCount; + + using Base::Base; + using Base::blobSize; + using Base::extents; + + LLAMA_FN_HOST_ACC_INLINE + constexpr explicit BytesplitSoA(TArrayExtents extents, TRecordDim = {}) : Base(extents) + { + } + + template + static constexpr auto isComputed(llama::RecordCoord) + { + return true; + } + + template + struct Reference + { + QualifiedBase& innerMapping; + ArrayIndex ai; + BlobArray& blobs; + + using DstType = llama::GetType; + + // NOLINTNEXTLINE(google-explicit-constructor,hicpp-explicit-conversions) + operator DstType() const + { + DstType v; + auto* p = reinterpret_cast(&v); + boost::mp11::mp_for_each>( + [&](auto ic) + { + constexpr auto i = decltype(ic)::value; + const auto [nr, off] = innerMapping.blobNrAndOffset(ai, llama::Cat>{}); + p[i] = blobs[nr][off]; + }); + return v; + } + + auto operator=(DstType v) -> Reference& + { + auto* p = reinterpret_cast(&v); + boost::mp11::mp_for_each>( + [&](auto ic) + { + constexpr auto i = decltype(ic)::value; + const auto [nr, off] = innerMapping.blobNrAndOffset(ai, llama::Cat>{}); + blobs[nr][off] = p[i]; + }); + return *this; + } + }; + + template + LLAMA_FN_HOST_ACC_INLINE constexpr auto compute( + typename Base::ArrayIndex ai, + llama::RecordCoord, + BlobArray& blobs) const + { + return Reference, BlobArray>{*this, ai, blobs}; + } +}; + +auto main() -> int +{ + constexpr auto N = 128; + using ArrayExtents = llama::ArrayExtentsDynamic<1>; + const auto mapping = BytesplitSoA{{N}}; + + auto view = llama::allocView(mapping); + + int value = 0; + for(std::size_t i = 0; i < N; i++) + llama::forEachLeafCoord([&](auto rc) { view(i)(rc) = ++value; }); + + value = 0; + for(std::size_t i = 0; i < N; i++) + llama::forEachLeafCoord( + [&](auto rc) + { + using T = llama::GetType; + ++value; + if(view(i)(rc) != static_cast(value)) + fmt::print("Error: value after store is corrupt. {} != {}\n", view(i)(rc), value); + }); + + // extract into a view of unsplit fields + auto viewExtracted = llama::allocViewUninitialized(llama::mapping::AoS{{N}}); + llama::copy(view, viewExtracted); + if(!std::equal(view.begin(), view.end(), viewExtracted.begin(), viewExtracted.end())) + fmt::print("ERROR: unsplit view is different\n"); + + // compute something on the extracted view + for(std::size_t i = 0; i < N; i++) + viewExtracted(i) *= 2; + + // rearrange back into split view + llama::copy(viewExtracted, view); + + value = 0; + for(std::size_t i = 0; i < N; i++) + llama::forEachLeafCoord( + [&](auto rc) + { + using T = llama::GetType; + ++value; + if(view(i)(rc) != static_cast(static_cast(value) * 2)) + fmt::print("Error: value after resplit is corrupt. {} != {}\n", view(i)(rc), value); + }); + + // compute something on the split view + for(std::size_t i = 0; i < N; i++) + view(i) = view(i) * 2; // cannot do view(i) *= 2; with proxy references + + value = 0; + for(std::size_t i = 0; i < N; i++) + llama::forEachLeafCoord( + [&](auto rc) + { + using T = llama::GetType; + ++value; + if(view(i)(rc) != static_cast(static_cast(value) * 4)) + fmt::print( + "Error: value after computation on split data is corrupt. {} != {}\n", + view(i)(rc), + value); + }); + + fmt::print("Done\n"); +}