Skip to content

Commit

Permalink
add single blob SoA version with subarray alignment
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed Jun 15, 2022
1 parent 193ad64 commit 3a552a8
Show file tree
Hide file tree
Showing 10 changed files with 142 additions and 63 deletions.
28 changes: 19 additions & 9 deletions include/llama/Copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,14 @@ namespace llama
template<typename Mapping>
inline constexpr std::size_t aosoaLanes = 0;

template<typename ArrayExtents, typename RecordDim, bool SeparateBuffers, typename LinearizeArrayDimsFunctor>
template<
typename ArrayExtents,
typename RecordDim,
bool SeparateBuffers,
bool AlignSubArrays,
typename LinearizeArrayDimsFunctor>
inline constexpr std::size_t aosoaLanes<
mapping::SoA<ArrayExtents, RecordDim, SeparateBuffers, LinearizeArrayDimsFunctor>> = std::
mapping::SoA<ArrayExtents, RecordDim, SeparateBuffers, AlignSubArrays, LinearizeArrayDimsFunctor>> = std::
numeric_limits<std::size_t>::max();

template<typename ArrayExtents, typename RecordDim, std::size_t Lanes, typename LinearizeArrayDimsFunctor>
Expand Down Expand Up @@ -354,15 +359,18 @@ namespace llama
typename RecordDim,
typename LinearizeArrayDims,
std::size_t LanesSrc,
bool DstSeparateBuffers>
bool DstSeparateBuffers,
bool DstAlignSubArrays>
struct Copy<
mapping::AoSoA<ArrayExtents, RecordDim, LanesSrc, LinearizeArrayDims>,
mapping::SoA<ArrayExtents, RecordDim, DstSeparateBuffers, LinearizeArrayDims>>
mapping::SoA<ArrayExtents, RecordDim, DstSeparateBuffers, DstAlignSubArrays, LinearizeArrayDims>>
{
template<typename SrcBlob, typename DstBlob>
void operator()(
const View<mapping::AoSoA<ArrayExtents, RecordDim, LanesSrc, LinearizeArrayDims>, SrcBlob>& srcView,
View<mapping::SoA<ArrayExtents, RecordDim, DstSeparateBuffers, LinearizeArrayDims>, DstBlob>& dstView,
View<
mapping::SoA<ArrayExtents, RecordDim, DstSeparateBuffers, DstAlignSubArrays, LinearizeArrayDims>,
DstBlob>& dstView,
std::size_t threadId,
std::size_t threadCount)
{
Expand All @@ -376,15 +384,17 @@ namespace llama
typename RecordDim,
typename LinearizeArrayDims,
std::size_t LanesDst,
bool SrcSeparateBuffers>
bool SrcSeparateBuffers,
bool SrcAlignSubArrays>
struct Copy<
mapping::SoA<ArrayExtents, RecordDim, SrcSeparateBuffers, LinearizeArrayDims>,
mapping::SoA<ArrayExtents, RecordDim, SrcSeparateBuffers, SrcAlignSubArrays, LinearizeArrayDims>,
mapping::AoSoA<ArrayExtents, RecordDim, LanesDst, LinearizeArrayDims>>
{
template<typename SrcBlob, typename DstBlob>
void operator()(
const View<mapping::SoA<ArrayExtents, RecordDim, SrcSeparateBuffers, LinearizeArrayDims>, SrcBlob>&
srcView,
const View<
mapping::SoA<ArrayExtents, RecordDim, SrcSeparateBuffers, SrcAlignSubArrays, LinearizeArrayDims>,
SrcBlob>& srcView,
View<mapping::AoSoA<ArrayExtents, RecordDim, LanesDst, LinearizeArrayDims>, DstBlob>& dstView,
std::size_t threadId,
std::size_t threadCount)
Expand Down
2 changes: 1 addition & 1 deletion include/llama/mapping/AoS.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ namespace llama::mapping
using MinAlignedAoS
= AoS<ArrayExtents, RecordDim, true, LinearizeArrayDimsFunctor, FlattenRecordDimMinimizePadding>;

/// Array of struct mapping packing the field types tightly, violating the types alignment requirements.
/// Array of struct mapping packing the field types tightly, violating the type's alignment requirements.
/// \see AoS
template<typename ArrayExtents, typename RecordDim, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp>
using PackedAoS = AoS<ArrayExtents, RecordDim, false, LinearizeArrayDimsFunctor>;
Expand Down
66 changes: 53 additions & 13 deletions include/llama/mapping/SoA.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ namespace llama::mapping
{
/// Struct of array mapping. Used to create a \ref View via \ref allocView.
/// \tparam SeparateBuffers If true, every element of the record dimension is mapped to its own buffer.
/// \tparam AlignSubArrays Only relevant when SeparateBuffers == false. If true, aligns the sub arrays created
/// within the single blob by inserting padding.
/// \tparam TLinearizeArrayDimsFunctor Defines how the array dimensions should be mapped into linear numbers and
/// how big the linear domain gets.
/// \tparam FlattenRecordDimSingleBlob Defines how the record dimension's fields should be flattened if
Expand All @@ -20,6 +22,7 @@ namespace llama::mapping
typename TArrayExtents,
typename TRecordDim,
bool SeparateBuffers = true,
bool AlignSubArrays = false,
typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp,
template<typename> typename FlattenRecordDimSingleBlob = FlattenRecordDimInOrder>
struct SoA : MappingBase<TArrayExtents, TRecordDim>
Expand Down Expand Up @@ -63,6 +66,8 @@ namespace llama::mapping
typename Base::ArrayIndex ad,
RecordCoord<RecordCoords...> = {}) const -> NrAndOffset<size_type>
{
const auto subArrayOffset = LinearizeArrayDimsFunctor{}(ad, Base::extents())
* static_cast<size_type>(sizeof(GetType<TRecordDim, RecordCoord<RecordCoords...>>));
if constexpr(SeparateBuffers)
{
constexpr auto blob = flatRecordCoord<TRecordDim, RecordCoord<RecordCoords...>>;
Expand All @@ -77,11 +82,32 @@ namespace llama::mapping
*& // mess with nvcc compiler state to workaround bug
#endif
Flattener::template flatIndex<RecordCoords...>;
const auto offset = LinearizeArrayDimsFunctor{}(ad, Base::extents())
* static_cast<size_type>(sizeof(GetType<TRecordDim, RecordCoord<RecordCoords...>>))
+ static_cast<size_type>(flatOffsetOf<typename Flattener::FlatRecordDim, flatFieldIndex, false>)
* LinearizeArrayDimsFunctor{}.size(Base::extents());
return {0, offset};
const auto flatSize = LinearizeArrayDimsFunctor{}.size(Base::extents());
using FRD = typename Flattener::FlatRecordDim;
if constexpr(AlignSubArrays)
{
// TODO(bgruber): we can take a shortcut here if we know that flatSize is a multiple of all type's
// alignment. We can also precompute a table of sub array starts (and maybe store it), or rely on
// the compiler pulling it out of loops.
using namespace boost::mp11;
size_type offset = 0;
mp_for_each<mp_transform<mp_identity, mp_take_c<FRD, flatFieldIndex>>>(
[&](auto ti)
{
using FieldType = typename decltype(ti)::type;
offset = roundUpToMultiple(offset, static_cast<size_type>(alignof(FieldType)));
offset += static_cast<size_type>(sizeof(FieldType)) * flatSize;
});
offset = roundUpToMultiple(offset, static_cast<size_type>(alignof(mp_at_c<FRD, flatFieldIndex>)));
offset += subArrayOffset;
return {0, offset};
}
else
{
const auto offset
= subArrayOffset + static_cast<size_type>(flatOffsetOf<FRD, flatFieldIndex, false>) * flatSize;
return {0, offset};
}
}
}
};
Expand All @@ -90,28 +116,42 @@ namespace llama::mapping
template<typename TArrayExtents, typename TRecordDim>
SoA(TArrayExtents, TRecordDim) -> SoA<TArrayExtents, TRecordDim>;

/// Struct of array mapping storing the entire layout in a single blob.
/// \see SoA
/// Struct of array mapping storing the entire layout in a single blob. The starts of the sub arrays are aligned by
/// inserting padding. \see SoA
template<typename ArrayExtents, typename RecordDim, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp>
using SingleBlobSoA = SoA<ArrayExtents, RecordDim, false, LinearizeArrayDimsFunctor>;
using AlignedSingleBlobSoA = SoA<ArrayExtents, RecordDim, false, true, LinearizeArrayDimsFunctor>;

/// Struct of array mapping storing the entire layout in a single blob. The sub arrays are tightly packed,
/// violating the type's alignment requirements. \see SoA
template<typename ArrayExtents, typename RecordDim, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp>
using PackedSingleBlobSoA = SoA<ArrayExtents, RecordDim, false, false, LinearizeArrayDimsFunctor>;

/// Struct of array mapping storing each attribute of the record dimension in a separate blob.
/// \see SoA
template<typename ArrayExtents, typename RecordDim, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp>
using MultiBlobSoA = SoA<ArrayExtents, RecordDim, true, LinearizeArrayDimsFunctor>;
using MultiBlobSoA = SoA<ArrayExtents, RecordDim, true, false, LinearizeArrayDimsFunctor>;

/// Binds parameters to an \ref SoA mapping except for array and record dimension, producing a quoted
/// meta function accepting the latter two. Useful to to prepare this mapping for a meta mapping.
template<bool SeparateBuffers = true, typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp>
template<
bool SeparateBuffers = true,
bool AlignSubArrays = false,
typename LinearizeArrayDimsFunctor = LinearizeArrayDimsCpp>
struct BindSoA
{
template<typename ArrayExtents, typename RecordDim>
using fn = SoA<ArrayExtents, RecordDim, SeparateBuffers, LinearizeArrayDimsFunctor>;
using fn = SoA<ArrayExtents, RecordDim, SeparateBuffers, AlignSubArrays, LinearizeArrayDimsFunctor>;
};

template<typename Mapping>
inline constexpr bool isSoA = false;

template<typename ArrayExtents, typename RecordDim, bool SeparateBuffers, typename LinearizeArrayDimsFunctor>
inline constexpr bool isSoA<SoA<ArrayExtents, RecordDim, SeparateBuffers, LinearizeArrayDimsFunctor>> = true;
template<
typename ArrayExtents,
typename RecordDim,
bool SeparateBuffers,
bool AlignSubArrays,
typename LinearizeArrayDimsFunctor>
inline constexpr bool
isSoA<SoA<ArrayExtents, RecordDim, SeparateBuffers, AlignSubArrays, LinearizeArrayDimsFunctor>> = true;
} // namespace llama::mapping
10 changes: 6 additions & 4 deletions tests/copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@ namespace
llama::mapping::AoS<ArrayExtents, RecordDim, true, llama::mapping::LinearizeArrayDimsFortran>>;

using OtherMappings = boost::mp11::mp_list<
llama::mapping::SoA<ArrayExtents, RecordDim, false, llama::mapping::LinearizeArrayDimsCpp>,
// llama::mapping::SoA<ArrayExtents, RecordDim, false, llama::mapping::LinearizeArrayDimsFortran>,
// llama::mapping::SoA<ArrayExtents, RecordDim, true, llama::mapping::LinearizeArrayDimsCpp>,
llama::mapping::SoA<ArrayExtents, RecordDim, true, llama::mapping::LinearizeArrayDimsFortran>,
llama::mapping::SoA<ArrayExtents, RecordDim, false, false, llama::mapping::LinearizeArrayDimsCpp>,
// llama::mapping::SoA<ArrayExtents, RecordDim, false, false, llama::mapping::LinearizeArrayDimsFortran>,
// llama::mapping::SoA<ArrayExtents, RecordDim, true, false, llama::mapping::LinearizeArrayDimsCpp>,
llama::mapping::SoA<ArrayExtents, RecordDim, true, false, llama::mapping::LinearizeArrayDimsFortran>,
llama::mapping::SoA<ArrayExtents, RecordDim, false, true, llama::mapping::LinearizeArrayDimsCpp>,
// llama::mapping::SoA<ArrayExtents, RecordDim, false, true, llama::mapping::LinearizeArrayDimsFortran>,
llama::mapping::AoSoA<ArrayExtents, RecordDim, 4, llama::mapping::LinearizeArrayDimsCpp>,
// llama::mapping::AoSoA<ArrayExtents, RecordDim, 4, llama::mapping::LinearizeArrayDimsFortran>,
// llama::mapping::AoSoA<ArrayExtents, RecordDim, 8, llama::mapping::LinearizeArrayDimsCpp>,
Expand Down
28 changes: 19 additions & 9 deletions tests/dump.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,14 @@ TEST_CASE("dump.Particle.AoS_Packed")
dump(llama::mapping::PackedAoS<ArrayExtents, Particle>{extents});
}

TEST_CASE("dump.Particle.SoA_SB")
TEST_CASE("dump.Particle.SoA_SB_Packed")
{
dump(llama::mapping::SingleBlobSoA<ArrayExtents, Particle>{extents});
dump(llama::mapping::PackedSingleBlobSoA<ArrayExtents, Particle>{extents});
}

TEST_CASE("dump.Particle.SoA_SB_Aligned")
{
dump(llama::mapping::AlignedSingleBlobSoA<ArrayExtents, Particle>{extents});
}

TEST_CASE("dump.Particle.SoA_MB")
Expand All @@ -60,7 +65,7 @@ TEST_CASE("dump.Particle.Split.SoA.AoS.1Buffer")
ArrayExtents,
Particle,
llama::RecordCoord<2>,
llama::mapping::SingleBlobSoA,
llama::mapping::PackedSingleBlobSoA,
llama::mapping::PackedAoS>{extents});
}

Expand All @@ -71,7 +76,7 @@ TEST_CASE("dump.Particle.Split.SoA.AoS.2Buffer")
ArrayExtents,
Particle,
llama::RecordCoord<2>,
llama::mapping::SingleBlobSoA,
llama::mapping::PackedSingleBlobSoA,
llama::mapping::PackedAoS,
true>{extents});
}
Expand Down Expand Up @@ -102,7 +107,7 @@ TEST_CASE("dump.Particle.Split.AoSoA8.AoS.One.SoA")
llama::RecordCoord<1>,
llama::mapping::PackedOne,
llama::mapping::
BindSplit<llama::RecordCoord<0>, llama::mapping::PackedAoS, llama::mapping::SingleBlobSoA, true>::fn,
BindSplit<llama::RecordCoord<0>, llama::mapping::PackedAoS, llama::mapping::PackedSingleBlobSoA, true>::fn,
true>::fn,
true>{extents});
}
Expand Down Expand Up @@ -145,9 +150,14 @@ TEST_CASE("dump.ParticleUnaligned.AoS_Aligned_Min")
dump(llama::mapping::MinAlignedAoS<ArrayExtents, ParticleUnaligned>{extents});
}

TEST_CASE("dump.ParticleUnaligned.SoA_SB")
TEST_CASE("dump.ParticleUnaligned.SoA_SB_Packed")
{
dump(llama::mapping::PackedSingleBlobSoA<ArrayExtents, ParticleUnaligned>{extents});
}

TEST_CASE("dump.ParticleUnaligned.SoA_SB_Aligned")
{
dump(llama::mapping::SingleBlobSoA<ArrayExtents, ParticleUnaligned>{extents});
dump(llama::mapping::AlignedSingleBlobSoA<ArrayExtents, ParticleUnaligned>{extents});
}

TEST_CASE("dump.ParticleUnaligned.SoA_MB")
Expand Down Expand Up @@ -181,7 +191,7 @@ TEST_CASE("dump.ParticleUnaligned.Split.SoA.AoS.1Buffer")
ArrayExtents,
ParticleUnaligned,
llama::RecordCoord<1>,
llama::mapping::SingleBlobSoA,
llama::mapping::PackedSingleBlobSoA,
llama::mapping::PackedAoS>{extents});
}

Expand All @@ -191,7 +201,7 @@ TEST_CASE("dump.ParticleUnaligned.Split.SoA.AoS.2Buffer")
ArrayExtents,
ParticleUnaligned,
llama::RecordCoord<1>,
llama::mapping::SingleBlobSoA,
llama::mapping::PackedSingleBlobSoA,
llama::mapping::PackedAoS,
true>{extents});
}
Expand Down
6 changes: 3 additions & 3 deletions tests/mapping.HeatmapTrace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ TEST_CASE("Heatmap.nbody")
llama::mapping::AlignedAoS<llama::ArrayExtents<std::size_t, N>, ParticleHeatmap>{});
run("SingleBlobSoA",
heatmapSingleBlobSoA,
llama::mapping::SingleBlobSoA<llama::ArrayExtents<std::size_t, N>, ParticleHeatmap>{});
llama::mapping::PackedSingleBlobSoA<llama::ArrayExtents<std::size_t, N>, ParticleHeatmap>{});
}

TEST_CASE("Trace.ctor")
Expand Down Expand Up @@ -107,7 +107,7 @@ Mass 10200
)");
};
run(llama::mapping::AlignedAoS<llama::ArrayExtents<std::size_t, N>, ParticleHeatmap>{});
run(llama::mapping::SingleBlobSoA<llama::ArrayExtents<std::size_t, N>, ParticleHeatmap>{});
run(llama::mapping::PackedSingleBlobSoA<llama::ArrayExtents<std::size_t, N>, ParticleHeatmap>{});
}

TEMPLATE_LIST_TEST_CASE("Trace.nbody.reads_writes", "", SizeTypes)
Expand Down Expand Up @@ -148,7 +148,7 @@ Mass 10100 100
)");
};
run(llama::mapping::AlignedAoS<llama::ArrayExtents<std::size_t, N>, ParticleHeatmap>{});
run(llama::mapping::SingleBlobSoA<llama::ArrayExtents<std::size_t, N>, ParticleHeatmap>{});
run(llama::mapping::PackedSingleBlobSoA<llama::ArrayExtents<std::size_t, N>, ParticleHeatmap>{});
}

namespace
Expand Down
6 changes: 3 additions & 3 deletions tests/mapping.SoA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ TEST_CASE("mapping.SoA.SingleBlob.address")
{
auto test = [](auto arrayExtents)
{
using Mapping = llama::mapping::SingleBlobSoA<decltype(arrayExtents), Particle>;
using Mapping = llama::mapping::PackedSingleBlobSoA<decltype(arrayExtents), Particle>;
auto mapping = Mapping{arrayExtents};
using ArrayIndex = typename Mapping::ArrayIndex;

Expand Down Expand Up @@ -64,7 +64,7 @@ TEST_CASE("mapping.SoA.SingleBlob.fortran.address")
auto test = [](auto arrayExtents)
{
using Mapping = llama::mapping::
SingleBlobSoA<decltype(arrayExtents), Particle, llama::mapping::LinearizeArrayDimsFortran>;
PackedSingleBlobSoA<decltype(arrayExtents), Particle, llama::mapping::LinearizeArrayDimsFortran>;
auto mapping = Mapping{arrayExtents};
using ArrayIndex = typename Mapping::ArrayIndex;

Expand Down Expand Up @@ -128,7 +128,7 @@ TEST_CASE("mapping.SoA.SingleBlob.morton.address")
auto test = [](auto arrayExtents)
{
using Mapping = llama::mapping::
SingleBlobSoA<decltype(arrayExtents), Particle, llama::mapping::LinearizeArrayDimsMorton>;
PackedSingleBlobSoA<decltype(arrayExtents), Particle, llama::mapping::LinearizeArrayDimsMorton>;
auto mapping = Mapping{arrayExtents};
using ArrayIndex = typename Mapping::ArrayIndex;

Expand Down
11 changes: 7 additions & 4 deletions tests/mapping.Split.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,12 @@ TEST_CASE("mapping.Split.SoA_SingleBlob.AoS_Packed.1Buffer")
auto extents = ArrayExtents{16, 16};

// we layout Pos as SoA, the rest as AoS
auto mapping = llama::mapping::
Split<ArrayExtents, Particle, llama::RecordCoord<0>, llama::mapping::SingleBlobSoA, llama::mapping::PackedAoS>{
extents};
auto mapping = llama::mapping::Split<
ArrayExtents,
Particle,
llama::RecordCoord<0>,
llama::mapping::PackedSingleBlobSoA,
llama::mapping::PackedAoS>{extents};

constexpr auto mapping1Size = 6120;
const auto ai = llama::ArrayIndex{0, 0};
Expand Down Expand Up @@ -88,7 +91,7 @@ TEST_CASE("mapping.Split.AoSoA8.AoS_Packed.One.SoA_SingleBlob.4Buffer")
llama::RecordCoord<1>,
llama::mapping::PackedOne,
llama::mapping::
BindSplit<llama::RecordCoord<0>, llama::mapping::PackedAoS, llama::mapping::SingleBlobSoA, true>::fn,
BindSplit<llama::RecordCoord<0>, llama::mapping::PackedAoS, llama::mapping::PackedSingleBlobSoA, true>::fn,
true>::fn,
true>{extents};

Expand Down
Loading

0 comments on commit 3a552a8

Please sign in to comment.