diff --git a/docs/pages/api.rst b/docs/pages/api.rst
index 5c339fdc13..3c6f1b5ca3 100644
--- a/docs/pages/api.rst
+++ b/docs/pages/api.rst
@@ -138,6 +138,8 @@ Mappings
 .. doxygenstruct:: llama::mapping::AoSoA
    :members:
 .. doxygenvariable:: llama::mapping::maxLanes
+.. doxygenstruct:: llama::mapping::BitPackedIntAoS
+   :members:
 .. doxygenstruct:: llama::mapping::BitPackedIntSoA
    :members:
 .. doxygenstruct:: llama::mapping::BitPackedFloatSoA
diff --git a/docs/pages/mappings.rst b/docs/pages/mappings.rst
index 7adc570bb2..c491c7f292 100644
--- a/docs/pages/mappings.rst
+++ b/docs/pages/mappings.rst
@@ -328,11 +328,11 @@ In this example, all fields of type :cpp:`double`, and the field at coordinate R
 The load/store functions are called on loading and storing through a proxy reference returned from the mapping.
 
 
-BitPackedIntSoA
----------------
+BitPackedIntAoS/BitPackedIntSoA
+-------------------------------
 
-The BitPackedIntSoA mapping is a fully computed mapping that bitpacks integral values to reduce size and precision.
-The bits are stored as struct of arrays.
+The BitPackedIntSoA and BitPackedIntAoS mappings are fully computed mappings that bitpack integral values to reduce size and precision.
+The bits are stored as array of structs and struct of arrays, respectively.
 The number of bits used per integral is configurable.
 All field types in the record dimension must be integral.
 
diff --git a/include/llama/llama.hpp b/include/llama/llama.hpp
index 07471233f3..e230069336 100644
--- a/include/llama/llama.hpp
+++ b/include/llama/llama.hpp
@@ -62,7 +62,7 @@
 #include "mapping/AoS.hpp"
 #include "mapping/AoSoA.hpp"
 #include "mapping/BitPackedFloatSoA.hpp"
-#include "mapping/BitPackedIntSoA.hpp"
+#include "mapping/BitPackedInt.hpp"
 #include "mapping/Bytesplit.hpp"
 #include "mapping/Byteswap.hpp"
 #include "mapping/ChangeType.hpp"
diff --git a/include/llama/mapping/BitPackedFloatSoA.hpp b/include/llama/mapping/BitPackedFloatSoA.hpp
index e2e90a155f..9626ccac5a 100644
--- a/include/llama/mapping/BitPackedFloatSoA.hpp
+++ b/include/llama/mapping/BitPackedFloatSoA.hpp
@@ -3,7 +3,7 @@
 #pragma once
 
 #include "../ProxyRefOpMixin.hpp"
-#include "BitPackedIntSoA.hpp"
+#include "BitPackedInt.hpp"
 #include "Common.hpp"
 
 #include <algorithm>
diff --git a/include/llama/mapping/BitPackedIntSoA.hpp b/include/llama/mapping/BitPackedInt.hpp
similarity index 55%
rename from include/llama/mapping/BitPackedIntSoA.hpp
rename to include/llama/mapping/BitPackedInt.hpp
index cce7aea9bf..5e81de5be7 100644
--- a/include/llama/mapping/BitPackedIntSoA.hpp
+++ b/include/llama/mapping/BitPackedInt.hpp
@@ -219,6 +219,118 @@ namespace llama::mapping
         template<typename RecordDim>
         using StoredUnsignedFor = std::
             conditional_t<(sizeof(LargestIntegral<RecordDim>) > sizeof(std::uint32_t)), std::uint64_t, std::uint32_t>;
+
+        template<
+            typename TArrayExtents,
+            typename TRecordDim,
+            typename Bits,
+            SignBit SignBit,
+            typename TLinearizeArrayDimsFunctor,
+            typename TStoredIntegral>
+        struct BitPackedIntCommon
+            : MappingBase<TArrayExtents, TRecordDim>
+            , protected llama::internal::BoxedValue<Bits>
+        {
+            using LinearizeArrayDimsFunctor = TLinearizeArrayDimsFunctor;
+            using StoredIntegral = TStoredIntegral;
+            static constexpr std::size_t blobCount = mp_size<FlatRecordDim<TRecordDim>>::value;
+
+            static_assert(std::is_integral_v<StoredIntegral>);
+            static_assert(std::is_unsigned_v<StoredIntegral>);
+
+            // We could allow more integer types as storage type, but that needs to be thought through carefully
+            static_assert(
+                std::is_same_v<StoredIntegral, std::uint32_t> || std::is_same_v<StoredIntegral, std::uint64_t>);
+
+        protected:
+            using Base = MappingBase<TArrayExtents, TRecordDim>;
+            using VHBits = llama::internal::BoxedValue<Bits>;
+            using size_type = typename TArrayExtents::value_type;
+
+            template<typename T>
+            using IsAllowedFieldType = mp_or<std::is_integral<T>, std::is_enum<T>>;
+
+            static_assert(
+                mp_all_of<FlatRecordDim<TRecordDim>, IsAllowedFieldType>::value,
+                "All record dimension field types must be integral");
+
+            template<typename T>
+            using IsFieldTypeSmallerOrEqualStorageIntegral = mp_bool<sizeof(T) <= sizeof(StoredIntegral)>;
+
+            static_assert(
+                mp_all_of<FlatRecordDim<TRecordDim>, IsFieldTypeSmallerOrEqualStorageIntegral>::value,
+                "The integral type used for storage must be at least as big as the type of the values to retrieve");
+
+        public:
+            LLAMA_FN_HOST_ACC_INLINE
+            constexpr auto bits() const -> size_type
+            {
+                return static_cast<size_type>(VHBits::value());
+            }
+
+            template<typename B = Bits, std::enable_if_t<isConstant<B>, int> = 0>
+            LLAMA_FN_HOST_ACC_INLINE constexpr explicit BitPackedIntCommon(
+                TArrayExtents extents = {},
+                Bits bits = {},
+                TRecordDim = {})
+                : Base(extents)
+                , VHBits{bits}
+            {
+                static_assert(VHBits::value() > 0);
+                mp_for_each<mp_transform<mp_identity, FlatRecordDim<TRecordDim>>>(
+                    [&](auto t)
+                    {
+                        using FieldType = typename decltype(t)::type;
+                        static_assert(
+                            static_cast<std::size_t>(VHBits::value()) <= sizeof(FieldType) * CHAR_BIT,
+                            "Storage bits must not be greater than bits of field type");
+                        static_assert(
+                            VHBits::value() >= 2
+                                || std::is_unsigned_v<FieldType> || SignBit == llama::mapping::SignBit::Discard,
+                            "When keeping the sign bit, Bits must be at least 2 with signed integers in the record "
+                            "dimension");
+                    });
+            }
+
+            template<typename B = Bits, std::enable_if_t<!isConstant<B>, int> = 0>
+            LLAMA_FN_HOST_ACC_INLINE constexpr explicit BitPackedIntCommon(
+                TArrayExtents extents,
+                Bits bits,
+                TRecordDim = {})
+                : Base(extents)
+                , VHBits{bits}
+            {
+#ifdef __CUDA_ARCH__
+                assert(VHBits::value() > 0);
+#else
+                if(VHBits::value() <= 0)
+                    throw std::invalid_argument("BitPackedInt* Bits must not be zero");
+#endif
+                mp_for_each<mp_transform<mp_identity, FlatRecordDim<TRecordDim>>>(
+                    [&](auto t)
+                    {
+                        using FieldType = typename decltype(t)::type;
+#ifdef __CUDA_ARCH__
+                        assert(VHBits::value() <= sizeof(FieldType) * CHAR_BIT);
+#else
+                        if(static_cast<std::size_t>(VHBits::value()) > sizeof(FieldType) * CHAR_BIT)
+                            throw std::invalid_argument(
+                                "BitPackedInt* Bits must not be larger than any field type in the record dimension");
+                        if(!(VHBits::value() >= 2
+                             || std::is_unsigned_v<FieldType> || SignBit == llama::mapping::SignBit::Discard))
+                            throw std::invalid_argument("When keeping the sign bit, Bits must be at least 2 with "
+                                                        "signed integers in the record "
+                                                        "dimension");
+#endif
+                    });
+            }
+
+            template<std::size_t... RecordCoords>
+            static constexpr auto isComputed(RecordCoord<RecordCoords...>)
+            {
+                return true;
+            }
+        };
     } // namespace internal
 
     /// Struct of array mapping using bit packing to reduce size/precision of integral data types. If your record
@@ -240,113 +352,26 @@ namespace llama::mapping
         typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp,
         typename TStoredIntegral = internal::StoredUnsignedFor<TRecordDim>>
     struct BitPackedIntSoA
-        : MappingBase<TArrayExtents, TRecordDim>
-        , private llama::internal::BoxedValue<Bits>
+        : internal::
+              BitPackedIntCommon<TArrayExtents, TRecordDim, Bits, SignBit, TLinearizeArrayDimsFunctor, TStoredIntegral>
     {
-        using LinearizeArrayDimsFunctor = TLinearizeArrayDimsFunctor;
-        using StoredIntegral = TStoredIntegral;
-        static constexpr std::size_t blobCount = mp_size<FlatRecordDim<TRecordDim>>::value;
-
-        static_assert(std::is_integral_v<StoredIntegral>);
-        static_assert(std::is_unsigned_v<StoredIntegral>);
-
-        // We could allow more integer types as storage type, but that needs to be thought through carefully
-        static_assert(std::is_same_v<StoredIntegral, std::uint32_t> || std::is_same_v<StoredIntegral, std::uint64_t>);
-
     private:
-        using Base = MappingBase<TArrayExtents, TRecordDim>;
-        using VHBits = llama::internal::BoxedValue<Bits>;
-        using size_type = typename TArrayExtents::value_type;
-
-        template<typename T>
-        using IsAllowedFieldType = mp_or<std::is_integral<T>, std::is_enum<T>>;
-
-        static_assert(
-            mp_all_of<FlatRecordDim<TRecordDim>, IsAllowedFieldType>::value,
-            "All record dimension field types must be integral");
-
-        template<typename T>
-        using IsFieldTypeSmallerOrEqualStorageIntegral = mp_bool<sizeof(T) <= sizeof(StoredIntegral)>;
-
-        static_assert(
-            mp_all_of<FlatRecordDim<TRecordDim>, IsFieldTypeSmallerOrEqualStorageIntegral>::value,
-            "The integral type used for storage must be at least as big as the type of the values to retrieve");
+        using Base = internal::
+            BitPackedIntCommon<TArrayExtents, TRecordDim, Bits, SignBit, TLinearizeArrayDimsFunctor, TStoredIntegral>;
 
     public:
-        LLAMA_FN_HOST_ACC_INLINE
-        constexpr auto bits() const -> size_type
-        {
-            return static_cast<size_type>(VHBits::value());
-        }
-
-        template<typename B = Bits, std::enable_if_t<isConstant<B>, int> = 0>
-        LLAMA_FN_HOST_ACC_INLINE constexpr explicit BitPackedIntSoA(
-            TArrayExtents extents = {},
-            Bits bits = {},
-            TRecordDim = {})
-            : Base(extents)
-            , VHBits{bits}
-        {
-            static_assert(VHBits::value() > 0);
-            mp_for_each<mp_transform<mp_identity, FlatRecordDim<TRecordDim>>>(
-                [&](auto t)
-                {
-                    using FieldType = typename decltype(t)::type;
-                    static_assert(
-                        static_cast<std::size_t>(VHBits::value()) <= sizeof(FieldType) * CHAR_BIT,
-                        "Storage bits must not be greater than bits of field type");
-                    static_assert(
-                        VHBits::value() >= 2
-                            || std::is_unsigned_v<FieldType> || SignBit == llama::mapping::SignBit::Discard,
-                        "When keeping the sign bit, Bits must be at least 2 with signed integers in the record "
-                        "dimension");
-                });
-        }
-
-        template<typename B = Bits, std::enable_if_t<!isConstant<B>, int> = 0>
-        LLAMA_FN_HOST_ACC_INLINE constexpr explicit BitPackedIntSoA(TArrayExtents extents, Bits bits, TRecordDim = {})
-            : Base(extents)
-            , VHBits{bits}
-        {
-#ifdef __CUDA_ARCH__
-            assert(VHBits::value() > 0);
-#else
-            if(VHBits::value() <= 0)
-                throw std::invalid_argument("BitPackedIntSoA Bits must not be zero");
-#endif
-            mp_for_each<mp_transform<mp_identity, FlatRecordDim<TRecordDim>>>(
-                [&](auto t)
-                {
-                    using FieldType = typename decltype(t)::type;
-#ifdef __CUDA_ARCH__
-                    assert(VHBits::value() <= sizeof(FieldType) * CHAR_BIT);
-#else
-                    if(static_cast<std::size_t>(VHBits::value()) > sizeof(FieldType) * CHAR_BIT)
-                        throw std::invalid_argument(
-                            "BitPackedIntSoA Bits must not be larger than any field type in the record dimension");
-                    if(!(VHBits::value() >= 2
-                         || std::is_unsigned_v<FieldType> || SignBit == llama::mapping::SignBit::Discard))
-                        throw std::invalid_argument(
-                            "When keeping the sign bit, Bits must be at least 2 with signed integers in the record "
-                            "dimension");
-#endif
-                });
-        }
+        using Base::Base;
+        using typename Base::size_type;
+        using VHBits = typename Base::VHBits; // use plain using declaration with nvcc >= 11.8
 
         LLAMA_FN_HOST_ACC_INLINE
         constexpr auto blobSize(size_type /*blobIndex*/) const -> size_type
         {
-            constexpr auto bitsPerStoredIntegral = static_cast<size_type>(sizeof(StoredIntegral) * CHAR_BIT);
-            const auto bitsNeeded = LinearizeArrayDimsFunctor{}.size(Base::extents()) * VHBits::value();
+            constexpr auto bitsPerStoredIntegral = static_cast<size_type>(sizeof(TStoredIntegral) * CHAR_BIT);
+            const auto bitsNeeded = TLinearizeArrayDimsFunctor{}.size(Base::extents()) * VHBits::value();
             return roundUpToMultiple(bitsNeeded, bitsPerStoredIntegral) / CHAR_BIT;
         }
 
-        template<std::size_t... RecordCoords>
-        static constexpr auto isComputed(RecordCoord<RecordCoords...>)
-        {
-            return true;
-        }
-
         template<std::size_t... RecordCoords, typename Blobs>
         LLAMA_FN_HOST_ACC_INLINE constexpr auto compute(
             typename Base::ArrayIndex ai,
@@ -354,9 +379,9 @@ namespace llama::mapping
             Blobs& blobs) const
         {
             constexpr auto blob = flatRecordCoord<TRecordDim, RecordCoord<RecordCoords...>>;
-            const auto bitOffset = LinearizeArrayDimsFunctor{}(ai, Base::extents()) * VHBits::value();
+            const auto bitOffset = TLinearizeArrayDimsFunctor{}(ai, Base::extents()) * VHBits::value();
 
-            using QualifiedStoredIntegral = CopyConst<Blobs, StoredIntegral>;
+            using QualifiedStoredIntegral = CopyConst<Blobs, TStoredIntegral>;
             using DstType = GetType<TRecordDim, RecordCoord<RecordCoords...>>;
             LLAMA_BEGIN_SUPPRESS_HOST_DEVICE_WARNING
             return internal::BitPackedIntRef<DstType, QualifiedStoredIntegral, VHBits, size_type, SignBit>{
@@ -401,4 +426,117 @@ namespace llama::mapping
         typename StoredIntegral>
     inline constexpr bool isBitPackedIntSoA<
         BitPackedIntSoA<ArrayExtents, RecordDim, Bits, SignBit, LinearizeArrayDimsFunctor, StoredIntegral>> = true;
+
+    /// Array of struct mapping using bit packing to reduce size/precision of integral data types. If your record
+    /// dimension contains non-integral types, split them off using the \ref Split mapping first.
+    /// \tparam Bits If Bits is llama::Constant<N>, the compile-time N specifies the number of bits to use. If Bits is
+    /// an integral type T, the number of bits is specified at runtime, passed to the constructor and stored as type T.
+    /// Must not be zero and must not be bigger than the bits of TStoredIntegral.
+    /// @tparam SignBit When set to SignBit::Discard, discards the sign bit when storing signed integers. All
+    /// numbers will be read back positive.
+    /// \tparam TLinearizeArrayDimsFunctor Defines how the array dimensions should be mapped into linear numbers and
+    /// how big the linear domain gets.
+    /// \tparam FlattenRecordDim Defines how the record dimension's fields should be flattened. See \ref
+    //  FlattenRecordDimInOrder, \ref FlattenRecordDimIncreasingAlignment, \ref FlattenRecordDimDecreasingAlignment and
+    //  \ref FlattenRecordDimMinimizePadding.
+    /// \tparam TStoredIntegral Integral type used as storage of reduced precision integers. Must be std::uint32_t or
+    /// std::uint64_t.
+    template<
+        typename TArrayExtents,
+        typename TRecordDim,
+        typename Bits = typename TArrayExtents::value_type,
+        SignBit SignBit = SignBit::Keep,
+        typename TLinearizeArrayDimsFunctor = LinearizeArrayDimsCpp,
+        template<typename> typename FlattenRecordDim = FlattenRecordDimInOrder,
+        typename TStoredIntegral = internal::StoredUnsignedFor<TRecordDim>>
+    struct BitPackedIntAoS
+        : internal::
+              BitPackedIntCommon<TArrayExtents, TRecordDim, Bits, SignBit, TLinearizeArrayDimsFunctor, TStoredIntegral>
+    {
+    private:
+        using Base = internal::
+            BitPackedIntCommon<TArrayExtents, TRecordDim, Bits, SignBit, TLinearizeArrayDimsFunctor, TStoredIntegral>;
+
+    public:
+        using Base::Base;
+        using typename Base::size_type;
+        using VHBits = typename Base::VHBits; // use plain using declaration with nvcc >= 11.8
+
+        using Flattener = FlattenRecordDim<TRecordDim>;
+
+        LLAMA_FN_HOST_ACC_INLINE
+        constexpr auto blobSize(size_type /*blobIndex*/) const -> size_type
+        {
+            constexpr auto bitsPerStoredIntegral = static_cast<size_type>(sizeof(TStoredIntegral) * CHAR_BIT);
+            const auto bitsNeeded
+                = TLinearizeArrayDimsFunctor{}.size(Base::extents()) * VHBits::value() * flatFieldCount<TRecordDim>;
+            return roundUpToMultiple(bitsNeeded, bitsPerStoredIntegral) / CHAR_BIT;
+        }
+
+        template<std::size_t... RecordCoords, typename Blobs>
+        LLAMA_FN_HOST_ACC_INLINE constexpr auto compute(
+            typename Base::ArrayIndex ai,
+            RecordCoord<RecordCoords...>,
+            Blobs& blobs) const
+        {
+            constexpr auto flatFieldIndex = static_cast<size_type>(Flattener::template flatIndex<RecordCoords...>);
+            const auto bitOffset
+                = ((TLinearizeArrayDimsFunctor{}(ai, Base::extents()) * flatFieldCount<TRecordDim>) +flatFieldIndex)
+                * VHBits::value();
+
+            using QualifiedStoredIntegral = CopyConst<Blobs, TStoredIntegral>;
+            using DstType = GetType<TRecordDim, RecordCoord<RecordCoords...>>;
+            LLAMA_BEGIN_SUPPRESS_HOST_DEVICE_WARNING
+            return internal::BitPackedIntRef<DstType, QualifiedStoredIntegral, VHBits, size_type, SignBit>{
+                reinterpret_cast<QualifiedStoredIntegral*>(&blobs[0][0]),
+                bitOffset,
+                static_cast<const VHBits&>(*this)};
+            LLAMA_END_SUPPRESS_HOST_DEVICE_WARNING
+        }
+    };
+
+    /// Binds parameters to a \ref BitPackedIntAoS mapping except for array and record dimension, producing a quoted
+    /// meta function accepting the latter two. Useful to to prepare this mapping for a meta mapping.
+    template<
+        typename Bits = void,
+        SignBit SignBit = SignBit::Keep,
+        typename LinearizeArrayDimsFunctor = mapping::LinearizeArrayDimsCpp,
+        template<typename> typename FlattenRecordDim = FlattenRecordDimInOrder,
+        typename StoredIntegral = void>
+    struct BindBitPackedIntAoS
+    {
+        template<typename ArrayExtents, typename RecordDim>
+        using fn = BitPackedIntAoS<
+            ArrayExtents,
+            RecordDim,
+            std::conditional_t<!std::is_void_v<Bits>, Bits, typename ArrayExtents::value_type>,
+            SignBit,
+            LinearizeArrayDimsFunctor,
+            FlattenRecordDim,
+            std::conditional_t<
+                !std::is_void_v<StoredIntegral>,
+                StoredIntegral,
+                internal::StoredUnsignedFor<RecordDim>>>;
+    };
+
+    template<typename Mapping>
+    inline constexpr bool isBitPackedIntAoS = false;
+
+    template<
+        typename ArrayExtents,
+        typename RecordDim,
+        typename Bits,
+        SignBit SignBit,
+        typename LinearizeArrayDimsFunctor,
+        template<typename>
+        typename FlattenRecordDim,
+        typename StoredIntegral>
+    inline constexpr bool isBitPackedIntAoS<BitPackedIntAoS<
+        ArrayExtents,
+        RecordDim,
+        Bits,
+        SignBit,
+        LinearizeArrayDimsFunctor,
+        FlattenRecordDim,
+        StoredIntegral>> = true;
 } // namespace llama::mapping
diff --git a/tests/mapping.BitPackedIntSoA.cpp b/tests/mapping.BitPackedInt.cpp
similarity index 62%
rename from tests/mapping.BitPackedIntSoA.cpp
rename to tests/mapping.BitPackedInt.cpp
index ab3ebf9880..1ff0992d48 100644
--- a/tests/mapping.BitPackedIntSoA.cpp
+++ b/tests/mapping.BitPackedInt.cpp
@@ -13,49 +13,65 @@ using UInts = llama::Record<
     llama::Field<std::uint32_t, std::uint32_t>,
     llama::Field<std::uint64_t, std::uint64_t>>;
 
-TEST_CASE("mapping.BitPackedIntSoA.Constant.SInts")
+TEMPLATE_TEST_CASE(
+    "mapping.BitPackedInt.Constant.SInts",
+    "",
+    (llama::mapping::BitPackedIntSoA<llama::ArrayExtentsDynamic<std::size_t, 1>, SInts, llama::Constant<7>>),
+    (llama::mapping::BitPackedIntAoS<llama::ArrayExtentsDynamic<std::size_t, 1>, SInts, llama::Constant<7>>) )
 {
     // 16 elements * 4 fields = 64 integers, iota produces [0;63], which fits int8_t and into 7 bits
-    auto view = llama::allocView(
-        llama::mapping::BitPackedIntSoA<llama::ArrayExtentsDynamic<std::size_t, 1>, SInts, llama::Constant<7>>{{16}});
+    auto view = llama::allocView(TestType{{16}});
     CHECK(view.mapping().bits() == 7);
     iotaFillView(view);
     iotaCheckView(view);
 }
 
-TEST_CASE("mapping.BitPackedIntSoA.Value.SInts")
+TEMPLATE_TEST_CASE(
+    "mapping.BitPackedInt.Value.SInts",
+    "",
+    (llama::mapping::BitPackedIntSoA<llama::ArrayExtentsDynamic<std::size_t, 1>, SInts>),
+    (llama::mapping::BitPackedIntAoS<llama::ArrayExtentsDynamic<std::size_t, 1>, SInts>) )
 {
     // 16 elements * 4 fields = 64 integers, iota produces [0;63], which fits int8_t and into 7 bits
-    auto view = llama::allocView(
-        llama::mapping::BitPackedIntSoA<llama::ArrayExtentsDynamic<std::size_t, 1>, SInts>{{16}, 7});
+    auto view = llama::allocView(TestType{{16}, 7});
     CHECK(view.mapping().bits() == 7);
     iotaFillView(view);
     iotaCheckView(view);
 }
 
-TEST_CASE("mapping.BitPackedIntSoA.Constant.UInts")
+TEMPLATE_TEST_CASE(
+    "mapping.BitPackedInt.Constant.UInts",
+    "",
+    (llama::mapping::BitPackedIntSoA<llama::ArrayExtentsDynamic<std::size_t, 1>, UInts, llama::Constant<7>>),
+    (llama::mapping::BitPackedIntAoS<llama::ArrayExtentsDynamic<std::size_t, 1>, UInts, llama::Constant<7>>) )
 {
     // 32 elements * 4 fields = 128 integers, iota produces [0;127], which fits uint8_t and into 7 bits
-    auto view = llama::allocView(
-        llama::mapping::BitPackedIntSoA<llama::ArrayExtentsDynamic<std::size_t, 1>, UInts, llama::Constant<7>>{{32}});
+    auto view = llama::allocView(TestType{{32}});
     CHECK(view.mapping().bits() == 7);
     iotaFillView(view);
     iotaCheckView(view);
 }
 
-TEST_CASE("mapping.BitPackedIntSoA.Value.UInts")
+TEMPLATE_TEST_CASE(
+    "mapping.BitPackedInt.Value.UInts",
+    "",
+    (llama::mapping::BitPackedIntSoA<llama::ArrayExtentsDynamic<std::size_t, 1>, UInts>),
+    (llama::mapping::BitPackedIntAoS<llama::ArrayExtentsDynamic<std::size_t, 1>, UInts>) )
 {
     // 32 elements * 4 fields = 128 integers, iota produces [0;127], which fits uint8_t and into 7 bits
-    auto view = llama::allocView(
-        llama::mapping::BitPackedIntSoA<llama::ArrayExtentsDynamic<std::size_t, 1>, UInts>{{32}, 7});
+    auto view = llama::allocView(TestType{{32}, 7});
     CHECK(view.mapping().bits() == 7);
     iotaFillView(view);
     iotaCheckView(view);
 }
 
-TEST_CASE("mapping.BitPackedIntSoA.UInts.Cutoff")
+TEMPLATE_TEST_CASE(
+    "mapping.BitPackedInt.UInts.Cutoff",
+    "",
+    (llama::mapping::BitPackedIntSoA<llama::ArrayExtents<>, UInts, llama::Constant<3>>),
+    (llama::mapping::BitPackedIntAoS<llama::ArrayExtents<>, UInts, llama::Constant<3>>) )
 {
-    auto view = llama::allocView(llama::mapping::BitPackedIntSoA<llama::ArrayExtents<>, UInts, llama::Constant<3>>{});
+    auto view = llama::allocView(TestType{});
 
     for(auto i = 0; i < 8; i++)
     {
@@ -71,9 +87,13 @@ TEST_CASE("mapping.BitPackedIntSoA.UInts.Cutoff")
     }
 }
 
-TEST_CASE("mapping.BitPackedIntSoA.SInts.Cutoff")
+TEMPLATE_TEST_CASE(
+    "mapping.BitPackedInt.SInts.Cutoff",
+    "",
+    (llama::mapping::BitPackedIntSoA<llama::ArrayExtents<>, SInts, llama::Constant<4>>),
+    (llama::mapping::BitPackedIntAoS<llama::ArrayExtents<>, SInts, llama::Constant<4>>) )
 {
-    auto view = llama::allocView(llama::mapping::BitPackedIntSoA<llama::ArrayExtents<>, SInts, llama::Constant<4>>{});
+    auto view = llama::allocView(TestType{});
 
     for(auto i = 0; i < 8; i++)
     {
@@ -102,9 +122,13 @@ TEST_CASE("mapping.BitPackedIntSoA.SInts.Cutoff")
     }
 }
 
-TEST_CASE("mapping.BitPackedIntSoA.SInts.Roundtrip")
+constexpr auto n = 1000;
+TEMPLATE_TEST_CASE(
+    "mapping.BitPackedInt.SInts.Roundtrip",
+    "",
+    (llama::mapping::BitPackedIntSoA<llama::ArrayExtents<std::size_t, n>, Vec3I, llama::Constant<12>>),
+    (llama::mapping::BitPackedIntAoS<llama::ArrayExtents<std::size_t, n>, Vec3I, llama::Constant<12>>) )
 {
-    constexpr auto n = 1000;
     auto view = llama::allocView(llama::mapping::AoS<llama::ArrayExtents<std::size_t, n>, Vec3I>{});
     std::default_random_engine engine;
     std::uniform_int_distribution dist{-2000, 2000}; // fits into 12 bits
@@ -112,8 +136,7 @@ TEST_CASE("mapping.BitPackedIntSoA.SInts.Roundtrip")
         view(i) = dist(engine);
 
     // copy into packed representation
-    auto packedView = llama::allocView(
-        llama::mapping::BitPackedIntSoA<llama::ArrayExtents<std::size_t, n>, Vec3I, llama::Constant<12>>{});
+    auto packedView = llama::allocView(TestType{});
     llama::copy(view, packedView);
 
     // compute on packed representation
@@ -132,12 +155,15 @@ TEST_CASE("mapping.BitPackedIntSoA.SInts.Roundtrip")
         CHECK(view(i) == view2(i));
 }
 
-TEST_CASE("mapping.BitPackedIntSoA.bool")
+TEMPLATE_TEST_CASE(
+    "mapping.BitPackedInt.bool",
+    "",
+    (llama::mapping::BitPackedIntSoA<llama::ArrayExtentsDynamic<std::size_t, 1>, bool, llama::Constant<1>>),
+    (llama::mapping::BitPackedIntAoS<llama::ArrayExtentsDynamic<std::size_t, 1>, bool, llama::Constant<1>>) )
 {
     // pack 32 bools into 4 bytes
     const auto n = 32;
-    const auto mapping
-        = llama::mapping::BitPackedIntSoA<llama::ArrayExtentsDynamic<std::size_t, 1>, bool, llama::Constant<1>>{{n}};
+    const auto mapping = TestType{{n}};
     CHECK(mapping.blobSize(0) == n / CHAR_BIT);
     auto view = llama::allocView(mapping);
     for(auto i = 0; i < n; i++)
@@ -169,12 +195,17 @@ namespace
     };
 } // namespace
 
-TEMPLATE_TEST_CASE("mapping.BitPackedIntSoA.Enum", "", Grades, GradesClass)
+TEMPLATE_TEST_CASE(
+    "mapping.BitPackedInt.Enum",
+    "",
+    (llama::mapping::BitPackedIntSoA<llama::ArrayExtentsDynamic<std::size_t, 1>, Grades, llama::Constant<3>>),
+    (llama::mapping::BitPackedIntAoS<llama::ArrayExtentsDynamic<std::size_t, 1>, Grades, llama::Constant<3>>),
+    (llama::mapping::BitPackedIntSoA<llama::ArrayExtentsDynamic<std::size_t, 1>, GradesClass, llama::Constant<3>>),
+    (llama::mapping::BitPackedIntAoS<llama::ArrayExtentsDynamic<std::size_t, 1>, GradesClass, llama::Constant<3>>) )
 {
-    using Enum = TestType;
+    using Enum = typename TestType::RecordDim;
 
-    auto view = llama::allocView(
-        llama::mapping::BitPackedIntSoA<llama::ArrayExtentsDynamic<std::size_t, 1>, Enum, llama::Constant<3>>{{6}});
+    auto view = llama::allocView(TestType{{6}});
     view(0) = Enum::A;
     view(1) = Enum::B;
     view(2) = Enum::C;
@@ -190,76 +221,113 @@ TEMPLATE_TEST_CASE("mapping.BitPackedIntSoA.Enum", "", Grades, GradesClass)
     CHECK(view(5) == Enum::F);
 }
 
-TEST_CASE("mapping.BitPackedIntSoA.Size")
+TEST_CASE("mapping.BitPackedInt.Size")
 {
     STATIC_REQUIRE(std::is_empty_v<
                    llama::mapping::BitPackedIntSoA<llama::ArrayExtents<std::size_t, 16>, SInts, llama::Constant<7>>>);
+    STATIC_REQUIRE(std::is_empty_v<
+                   llama::mapping::BitPackedIntAoS<llama::ArrayExtents<std::size_t, 16>, SInts, llama::Constant<7>>>);
+
     STATIC_REQUIRE(
         sizeof(llama::mapping::BitPackedIntSoA<llama::ArrayExtents<unsigned, 16>, SInts>{{}, 7}) == sizeof(unsigned));
+    STATIC_REQUIRE(
+        sizeof(llama::mapping::BitPackedIntAoS<llama::ArrayExtents<unsigned, 16>, SInts>{{}, 7}) == sizeof(unsigned));
 }
 
-TEST_CASE("mapping.BitPackedIntSoA.FullBitWidth.16")
+TEMPLATE_TEST_CASE(
+    "mapping.BitPackedInt.FullBitWidth.16",
+    "",
+    (llama::mapping::BitPackedIntSoA<llama::ArrayExtents<>, std::uint16_t, llama::Constant<16>>),
+    (llama::mapping::BitPackedIntAoS<llama::ArrayExtents<>, std::uint16_t, llama::Constant<16>>) )
 {
     // this could detect bugs when shifting integers by their bit-width
-    auto view = llama::allocView(
-        llama::mapping::BitPackedIntSoA<llama::ArrayExtents<>, std::uint16_t, llama::Constant<16>>{});
+    auto view = llama::allocView(TestType{});
 
     constexpr std::uint16_t value = 0xAABB;
     view() = value;
     CHECK(view() == value);
 }
 
-TEST_CASE("mapping.BitPackedIntSoA.FullBitWidth.32")
+TEMPLATE_TEST_CASE(
+    "mapping.BitPackedInt.FullBitWidth.32",
+    "",
+    (llama::mapping::BitPackedIntSoA<llama::ArrayExtents<>, std::uint32_t, llama::Constant<32>>),
+    (llama::mapping::BitPackedIntAoS<llama::ArrayExtents<>, std::uint32_t, llama::Constant<32>>) )
 {
     // this could detect bugs when shifting integers by their bit-width
-    auto view = llama::allocView(
-        llama::mapping::BitPackedIntSoA<llama::ArrayExtents<>, std::uint32_t, llama::Constant<32>>{});
+    auto view = llama::allocView(TestType{});
 
     constexpr std::uint32_t value = 0xAABBCCDD;
     view() = value;
     CHECK(view() == value);
 }
 
-TEST_CASE("mapping.BitPackedIntSoA.FullBitWidth.64")
+TEMPLATE_TEST_CASE(
+    "mapping.BitPackedInt.FullBitWidth.64",
+    "",
+    (llama::mapping::BitPackedIntSoA<llama::ArrayExtents<>, std::uint64_t, llama::Constant<64>>),
+    (llama::mapping::BitPackedIntAoS<llama::ArrayExtents<>, std::uint64_t, llama::Constant<64>>) )
 {
     // this could detect bugs when shifting integers by their bit-width
-    auto view = llama::allocView(
-        llama::mapping::BitPackedIntSoA<llama::ArrayExtents<>, std::uint64_t, llama::Constant<64>>{});
+    auto view = llama::allocView(TestType{});
 
     constexpr std::uint64_t value = 0xAABBCCDDEEFF8899;
     view() = value;
     CHECK(view() == value);
 }
 
-TEST_CASE("mapping.BitPackedIntSoA.ValidateBitsSmallerThanFieldType")
+TEMPLATE_TEST_CASE(
+    "mapping.BitPackedInt.ValidateBitsSmallerThanFieldType",
+    "",
+    (llama::mapping::BitPackedIntSoA<llama::ArrayExtents<std::size_t, 16>, UInts, unsigned>),
+    (llama::mapping::BitPackedIntAoS<llama::ArrayExtents<std::size_t, 16>, UInts, unsigned>) )
 {
     // 11 bits are larger than the uint8_t field type
-    CHECK_THROWS(llama::mapping::BitPackedIntSoA<llama::ArrayExtents<std::size_t, 16>, UInts, unsigned>{{}, 11});
+    CHECK_THROWS(TestType{{}, 11});
 }
 
-TEST_CASE("mapping.BitPackedIntSoA.ValidateBitsSmallerThanStorageIntegral")
+TEMPLATE_TEST_CASE(
+    "mapping.BitPackedInt.ValidateBitsSmallerThanStorageIntegral",
+    "",
+    (llama::mapping::BitPackedIntSoA<
+        llama::ArrayExtents<std::size_t, 16>,
+        std::uint32_t,
+        unsigned,
+        llama::mapping::SignBit::Keep,
+        llama::mapping::LinearizeArrayDimsCpp,
+        std::uint32_t>),
+    (llama::mapping::BitPackedIntAoS<
+        llama::ArrayExtents<std::size_t, 16>,
+        std::uint32_t,
+        unsigned,
+        llama::mapping::SignBit::Keep,
+        llama::mapping::LinearizeArrayDimsCpp,
+        llama::mapping::FlattenRecordDimInOrder,
+        std::uint32_t>) )
 {
-    CHECK_THROWS(llama::mapping::BitPackedIntSoA<
-                 llama::ArrayExtents<std::size_t, 16>,
-                 std::uint32_t,
-                 unsigned,
-                 llama::mapping::SignBit::Keep,
-                 llama::mapping::LinearizeArrayDimsCpp,
-                 std::uint32_t>{{}, 40});
+    CHECK_THROWS(TestType{{}, 40});
 }
 
-TEST_CASE("mapping.BitPackedIntSoA.ValidateBitsNotZero")
+TEMPLATE_TEST_CASE(
+    "mapping.BitPackedInt.ValidateBitsNotZero",
+    "",
+    (llama::mapping::BitPackedIntSoA<llama::ArrayExtents<std::size_t, 16>, UInts, unsigned>),
+    (llama::mapping::BitPackedIntAoS<llama::ArrayExtents<std::size_t, 16>, UInts, unsigned>) )
 {
-    CHECK_THROWS(llama::mapping::BitPackedIntSoA<llama::ArrayExtents<std::size_t, 16>, UInts, unsigned>{{}, 0});
+    CHECK_THROWS(TestType{{}, 0});
 }
 
-TEST_CASE("mapping.BitPackedIntSoA.ValidateBitsAtLeast2WithSignBit")
+TEMPLATE_TEST_CASE(
+    "mapping.BitPackedInt.ValidateBitsAtLeast2WithSignBit",
+    "",
+    (llama::mapping::BitPackedIntSoA<llama::ArrayExtents<std::size_t, 16>, SInts, unsigned>),
+    (llama::mapping::BitPackedIntAoS<llama::ArrayExtents<std::size_t, 16>, SInts, unsigned>) )
 {
-    CHECK_THROWS(llama::mapping::BitPackedIntSoA<llama::ArrayExtents<std::size_t, 16>, SInts, unsigned>{{}, 1});
+    CHECK_THROWS(TestType{{}, 1});
 }
 
 TEMPLATE_TEST_CASE(
-    "mapping.BitPackedIntSoA.bitpack",
+    "mapping.bitpack",
     "",
     std::int8_t,
     std::int16_t,
@@ -324,7 +392,7 @@ TEMPLATE_TEST_CASE(
 }
 
 TEMPLATE_TEST_CASE(
-    "mapping.BitPackedIntSoA.bitpack.1bit",
+    "mapping.bitpack.1bit",
     "",
     std::int8_t,
     std::int16_t,
@@ -365,7 +433,7 @@ TEMPLATE_TEST_CASE(
 }
 
 TEMPLATE_TEST_CASE(
-    "mapping.BitPackedIntSoA.bitpack.1bit.fastpath",
+    "mapping.bitpack1",
     "",
     std::int8_t,
     std::int16_t,