diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 16915d9040..5539f9ca10 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -72,6 +72,8 @@ jobs: fail-fast: false matrix: include: + - name: build-ubuntu-gcc9 + env: {CXX: g++-9} #- name: build-ubuntu-gcc9-cuda11.0 # env: {CXX: g++-9, CUDA_URL: "https://developer.download.nvidia.com/compute/cuda/11.0.3/local_installers/cuda_11.0.3_450.51.06_linux.run"} # cuda: true diff --git a/include/llama/Tuple.hpp b/include/llama/Tuple.hpp index ea52eb72e5..30c4b02d47 100644 --- a/include/llama/Tuple.hpp +++ b/include/llama/Tuple.hpp @@ -8,69 +8,112 @@ namespace llama { + // for implementation ideas, see e.g.: + // http://mitchnull.blogspot.com/2012/06/c11-tuple-implementation-details-part-1.html + template - struct Tuple + struct Tuple; + + namespace internal { - }; + template && !std::is_final_v> + struct TupleImplLeaf + { + T val; + + LLAMA_FN_HOST_ACC_INLINE constexpr auto value() -> T& + { + return val; + } + + LLAMA_FN_HOST_ACC_INLINE constexpr auto value() const -> const T& + { + return val; + } + }; + + template + struct TupleImplLeaf : T + { + LLAMA_FN_HOST_ACC_INLINE constexpr auto value() -> T& + { + return *this; + } + + LLAMA_FN_HOST_ACC_INLINE constexpr auto value() const -> const T& + { + return *this; + } + }; + + template + struct TupleImpl; + + template<> + struct TupleImpl> + { + }; + + template + struct TupleImpl, Elements...> : TupleImplLeaf... + { + using FirstElement = boost::mp11::mp_first>; + using RestTuple = boost::mp11::mp_pop_front>; + + constexpr TupleImpl() = default; + + /// Construct a tuple from values of the same types as the tuple stores. + LLAMA_FN_HOST_ACC_INLINE constexpr explicit TupleImpl(Elements... args) + : TupleImplLeaf{args}... + { + } + + /// Construct a tuple from forwarded values of potentially different types as the tuple stores. + template< + typename... Ts, + std::enable_if_t< + sizeof...(Elements) == sizeof...(Ts) && (std::is_constructible_v && ...), + int> = 0> + LLAMA_FN_HOST_ACC_INLINE constexpr explicit TupleImpl(Ts&&... args) + : TupleImplLeaf{Elements(std::move(args))}... + { + } + }; + } // namespace internal - /// Tuple class like `std::tuple` but suitable for use with offloading devices like GPUs. - template - struct Tuple + /// Tuple class like `std::tuple` but suitable for use with offloading devices like GPUs. See also \ref + /// internal::TupleImpl. + template + struct Tuple : internal::TupleImpl, Elements...> { - using FirstElement = TFirstElement; - using RestTuple = Tuple; + private: + template + using Base = internal::TupleImplLeaf, I>>; - constexpr Tuple() = default; + public: + using internal::TupleImpl, Elements...>::TupleImpl; - /// Construct a tuple from values of the same types as the tuple stores. - LLAMA_FN_HOST_ACC_INLINE constexpr explicit Tuple(FirstElement first, Elements... rest) - : first(std::move(first)) - , rest(std::move(rest)...) + template + LLAMA_FN_HOST_ACC_INLINE friend constexpr auto get(Tuple& tuple) -> auto& { + return tuple.Base::value(); } - /// Construct a tuple from forwarded values of potentially different types as the tuple stores. - // SFINAE away this ctor if tuple elements cannot be constructed from ctor arguments - template< - typename T, - typename... Ts, - std::enable_if_t< - sizeof...(Elements) == sizeof...(Ts) - && std::is_constructible_v && (std::is_constructible_v && ...), - int> = 0> - LLAMA_FN_HOST_ACC_INLINE constexpr explicit Tuple(T&& firstArg, Ts&&... restArgs) - : first(std::forward(firstArg)) - , rest(std::forward(restArgs)...) + template + LLAMA_FN_HOST_ACC_INLINE friend constexpr auto get(const Tuple& tuple) -> const auto& { + return tuple.Base::value(); } - - FirstElement first; ///< the first element (if existing) -#ifndef __NVCC__ - [[no_unique_address]] // nvcc 11.3 ICE -#endif - RestTuple rest; ///< the remaining elements }; - template - Tuple(Elements...) -> Tuple>...>; - template - LLAMA_FN_HOST_ACC_INLINE constexpr auto get(Tuple& tuple) -> auto& - { - if constexpr(Pos == 0) - return tuple.first; - else - return get(tuple.rest); - } + constexpr auto get(Tuple& tuple); template - LLAMA_FN_HOST_ACC_INLINE constexpr auto get(const Tuple& tuple) -> const auto& - { - if constexpr(Pos == 0) - return tuple.first; - else - return get(tuple.rest); - } + constexpr auto get(const Tuple& tuple); + + template + Tuple(Elements...) -> Tuple>...>; } // namespace llama template @@ -143,44 +186,34 @@ namespace llama namespace internal { - template - struct TupleReplaceImpl - { - LLAMA_FN_HOST_ACC_INLINE - auto operator()(Tuple const tuple, Replacement const replacement) - { - return tupleCat( - llama::Tuple{tuple.first}, - TupleReplaceImpl()(tuple.rest, replacement)); - }; - }; - - template - struct TupleReplaceImpl<0, Tuple, Replacement> - { - LLAMA_FN_HOST_ACC_INLINE - auto operator()(Tuple tuple, Replacement const replacement) - { - return tupleCat(Tuple{replacement}, tuple.rest); - }; - }; - - template - struct TupleReplaceImpl<0, Tuple, Replacement> + template< + std::size_t Pos, + typename Tuple, + typename Replacement, + std::size_t... IsBefore, + std::size_t... IsAfter> + LLAMA_FN_HOST_ACC_INLINE constexpr auto tupleReplaceImpl( + Tuple&& tuple, + Replacement&& replacement, + std::index_sequence, + std::index_sequence) { - LLAMA_FN_HOST_ACC_INLINE - auto operator()(Tuple, Replacement const replacement) - { - return Tuple{replacement}; - } - }; + return llama::Tuple{ + get(std::forward(tuple))..., + std::forward(replacement), + get(std::forward(tuple))...}; + } } // namespace internal /// Creates a copy of a tuple with the element at position Pos replaced by replacement. template - LLAMA_FN_HOST_ACC_INLINE auto tupleReplace(Tuple tuple, Replacement replacement) + LLAMA_FN_HOST_ACC_INLINE constexpr auto tupleReplace(Tuple&& tuple, Replacement&& replacement) { - return internal::TupleReplaceImpl()(tuple, replacement); + return internal::tupleReplaceImpl( + std::forward(tuple), + std::forward(replacement), + std::make_index_sequence{}, + std::make_index_sequence> - Pos - 1>{}); } namespace internal @@ -206,10 +239,22 @@ namespace llama return internal::tupleTransformHelper(std::make_index_sequence{}, tuple, functor); } + namespace internal + { + template + LLAMA_FN_HOST_ACC_INLINE constexpr auto pop_front_impl( + const Tuple& tuple, + std::index_sequence) + { + return Tuple{get(tuple)...}; + } + } // namespace internal + /// Returns a copy of the tuple without the first element. - template + template LLAMA_FN_HOST_ACC_INLINE constexpr auto pop_front(const Tuple& tuple) { - return tuple.rest; + static_assert(sizeof...(Elements) > 0); + return internal::pop_front_impl(tuple, std::make_index_sequence{}); } } // namespace llama diff --git a/include/llama/View.hpp b/include/llama/View.hpp index e282aa3023..5fcb4c0900 100644 --- a/include/llama/View.hpp +++ b/include/llama/View.hpp @@ -81,7 +81,7 @@ namespace llama using RecordDim = typename View::RecordDim; forEachADCoord( view.mapping().extents(), - [&](typename View::ArrayIndex ai) + [&]([[maybe_unused]] typename View::ArrayIndex ai) { if constexpr(isRecord || internal::IsBoundedArray::value) forEachLeafCoord( diff --git a/include/llama/mapping/tree/Functors.hpp b/include/llama/mapping/tree/Functors.hpp index ba8e7d8582..38d0d1b703 100644 --- a/include/llama/mapping/tree/Functors.hpp +++ b/include/llama/mapping/tree/Functors.hpp @@ -79,20 +79,20 @@ namespace llama::mapping::tree::functor const NodeOrLeaf& nodeOrLeaf, std::size_t arraySize = 0) { + constexpr auto ci = BasicCoord::FirstElement::childIndex; if constexpr(std::tuple_size_v == 1) - return Tuple{TreeCoordElement{ - arraySize + LLAMA_COPY(basicCoord.first.arrayIndex)}}; + return Tuple{TreeCoordElement{arraySize + LLAMA_COPY(get<0>(basicCoord).arrayIndex)}}; else { - const auto& branch = get(nodeOrLeaf.childs); - auto first = TreeCoordElement>{}; + const auto& branch = get(nodeOrLeaf.childs); + auto first = TreeCoordElement>{}; return tupleCat( Tuple{first}, basicCoordToResultCoordImpl( - basicCoord.rest, + pop_front(basicCoord), branch, - (arraySize + LLAMA_COPY(basicCoord.first.arrayIndex)) * LLAMA_COPY(branch.count))); + (arraySize + LLAMA_COPY(get<0>(basicCoord).arrayIndex)) * LLAMA_COPY(branch.count))); } } }; @@ -105,7 +105,10 @@ namespace llama::mapping::tree::functor if constexpr(std::is_same_v>) return node; else - return getNode(get(node.childs)); + { + constexpr auto ci = TreeCoord::FirstElement::childIndex; + return getNode(get(node.childs)); + } } template @@ -117,9 +120,10 @@ namespace llama::mapping::tree::functor return Node{newValue, tree.childs}; else { - auto current = get(tree.childs); + constexpr auto ci = TreeCoord::FirstElement::childIndex; + auto current = get(tree.childs); auto replacement = changeNodeRuntime(current, newValue); - auto children = tupleReplace(tree.childs, replacement); + auto children = tupleReplace(tree.childs, replacement); return Node{tree.count, children}; } } @@ -161,9 +165,10 @@ namespace llama::mapping::tree::functor } else { - auto current = get(tree.childs); + constexpr auto ci = TreeCoord::FirstElement::childIndex; + auto current = get(tree.childs); auto replacement = changeNodeChildsRuntime(current, newValue); - auto children = tupleReplace(tree.childs, replacement); + auto children = tupleReplace(tree.childs, replacement); return Node{tree.count, children}; } } @@ -214,31 +219,32 @@ namespace llama::mapping::tree::functor template LLAMA_FN_HOST_ACC_INLINE auto basicCoordToResultCoordImpl(const BasicCoord& basicCoord, const Tree& tree) const { + constexpr auto ci = BasicCoord::FirstElement::childIndex; if constexpr(std::is_same_v>) { if constexpr(std::is_same_v>) return Tuple{}; else { - const auto& childTree = get(tree.childs); - const auto rt1 = basicCoord.first.arrayIndex / amount; + const auto& childTree = get(tree.childs); + const auto rt1 = get<0>(basicCoord).arrayIndex / amount; const auto rt2 - = basicCoord.first.arrayIndex % amount * childTree.count + basicCoord.rest.first.arrayIndex; - auto rt1Child = TreeCoordElement{rt1}; + = get<0>(basicCoord).arrayIndex % amount * childTree.count + get<1>(basicCoord).arrayIndex; + auto rt1Child = TreeCoordElement{rt1}; auto rt2Child = TreeCoordElement{rt2}; - return tupleCat(Tuple{rt1Child}, tupleCat(Tuple{rt2Child}, pop_front(basicCoord.rest))); + return tupleCat(Tuple{rt1Child}, tupleCat(Tuple{rt2Child}, pop_front<2>(basicCoord))); } } else { - if constexpr(InternalTreeCoord::FirstElement::childIndex != BasicCoord::FirstElement::childIndex) + if constexpr(InternalTreeCoord::FirstElement::childIndex != ci) return basicCoord; else { auto rest = basicCoordToResultCoordImpl( pop_front(basicCoord), - get(tree.childs)); - return tupleCat(Tuple{basicCoord.first}, rest); + get(tree.childs)); + return tupleCat(Tuple{get<0>(basicCoord)}, rest); } } } diff --git a/include/llama/mapping/tree/Mapping.hpp b/include/llama/mapping/tree/Mapping.hpp index c42a753be0..b10607c2e7 100644 --- a/include/llama/mapping/tree/Mapping.hpp +++ b/include/llama/mapping/tree/Mapping.hpp @@ -31,7 +31,7 @@ namespace llama::mapping::tree LLAMA_FN_HOST_ACC_INLINE MergeFunctors(const Tree& tree, const Tuple& treeOperationList) - : operation(treeOperationList.first) + : operation(get<0>(treeOperationList)) , treeAfterOp(operation.basicToResult(tree)) , next(treeAfterOp, pop_front(treeOperationList)) { @@ -148,15 +148,15 @@ namespace llama::mapping::tree LLAMA_FN_HOST_ACC_INLINE auto getTreeBlobByte(const Tree& tree, const Tuple& treeCoord) -> std::size_t { - const auto firstArrayIndex = treeCoord.first.arrayIndex; + const auto firstArrayIndex = get<0>(treeCoord).arrayIndex; if constexpr(sizeof...(Coords) > 1) { - constexpr auto firstChildIndex = decltype(treeCoord.first.childIndex)::value; + constexpr auto firstChildIndex = decltype(get<0>(treeCoord).childIndex)::value; return getTreeBlobSize(tree.childs, firstArrayIndex) + sumChildrenSmallerThan( tree, std::make_index_sequence>{}) - + getTreeBlobByte(get(tree.childs), treeCoord.rest); + + getTreeBlobByte(get(tree.childs), pop_front(treeCoord)); } else return sizeof(typename Tree::Type) * firstArrayIndex; diff --git a/include/llama/mapping/tree/toString.hpp b/include/llama/mapping/tree/toString.hpp index 7c45d6072d..c5a755916f 100644 --- a/include/llama/mapping/tree/toString.hpp +++ b/include/llama/mapping/tree/toString.hpp @@ -3,6 +3,7 @@ #pragma once +#include "../../Tuple.hpp" #include "TreeFromDimensions.hpp" #include @@ -33,9 +34,9 @@ namespace llama::mapping::tree auto toString(Tuple tree) -> std::string { if constexpr(sizeof...(Elements) > 1) - return toString(tree.first) + " , " + toString(tree.rest); + return toString(get<0>(tree)) + " , " + toString(pop_front(tree)); else - return toString(tree.first); + return toString(get<0>(tree)); } namespace internal diff --git a/tests/tuple.cpp b/tests/tuple.cpp index 4b54b6619f..df7343a3af 100644 --- a/tests/tuple.cpp +++ b/tests/tuple.cpp @@ -13,22 +13,35 @@ TEST_CASE("Tuple.CTAD") STATIC_REQUIRE(std::is_same_v>); } +TEST_CASE("Tuple.size") +{ + STATIC_REQUIRE(std::is_empty_v>); + STATIC_REQUIRE(sizeof(llama::Tuple{}) == 1 * sizeof(int)); + STATIC_REQUIRE(sizeof(llama::Tuple{}) == 2 * sizeof(int)); + STATIC_REQUIRE(sizeof(llama::Tuple{}) == 3 * sizeof(int)); + STATIC_REQUIRE(sizeof(llama::Tuple>{}) == 2 * sizeof(int)); + STATIC_REQUIRE(std::is_empty_v, + std::integral_constant, + std::integral_constant>>); +} + TEST_CASE("Tuple.get") { constexpr auto t = llama::Tuple{1, 1.0f, nullptr}; - STATIC_REQUIRE(llama::get<0>(t) == 1); - STATIC_REQUIRE(llama::get<1>(t) == 1.0f); - STATIC_REQUIRE(llama::get<2>(t) == nullptr); + STATIC_REQUIRE(get<0>(t) == 1); + STATIC_REQUIRE(get<1>(t) == 1.0f); + STATIC_REQUIRE(get<2>(t) == nullptr); } TEST_CASE("Tuple.get_mutable") { auto t = llama::Tuple{1, 1.0f}; - llama::get<0>(t)++; - llama::get<1>(t)++; - CHECK(llama::get<0>(t) == 2); - CHECK(llama::get<1>(t) == 2.0f); + get<0>(t)++; + get<1>(t)++; + CHECK(get<0>(t) == 2); + CHECK(get<1>(t) == 2.0f); } TEST_CASE("Tuple.structured_binding") @@ -60,9 +73,9 @@ TEST_CASE("Tuple.converting_ctor") }; constexpr auto t = llama::Tuple{1, 42, nullptr}; - STATIC_REQUIRE(llama::get<0>(t) == 1.0); - STATIC_REQUIRE(llama::get<1>(t).i == 84); - STATIC_REQUIRE(llama::get<2>(t).p == nullptr); + STATIC_REQUIRE(get<0>(t) == 1.0); + STATIC_REQUIRE(get<1>(t).i == 84); + STATIC_REQUIRE(get<2>(t).p == nullptr); } #endif @@ -120,4 +133,6 @@ TEST_CASE("Tuple.pop_front") STATIC_REQUIRE(llama::pop_front(llama::Tuple{1}) == llama::Tuple{}); STATIC_REQUIRE(llama::pop_front(llama::Tuple{1, 1.0f}) == llama::Tuple{1.0f}); STATIC_REQUIRE(llama::pop_front(llama::Tuple{1.0f, 1, nullptr}) == llama::Tuple{1, nullptr}); + STATIC_REQUIRE(llama::pop_front<2>(llama::Tuple{1.0f, 1, nullptr}) == llama::Tuple{nullptr}); + STATIC_REQUIRE(llama::pop_front<3>(llama::Tuple{1.0f, 1, nullptr}) == llama::Tuple{}); }