diff --git a/examples/root/lhcb_analysis/CMakeLists.txt b/examples/root/lhcb_analysis/CMakeLists.txt
index 731278ecdc..f4ca4396de 100644
--- a/examples/root/lhcb_analysis/CMakeLists.txt
+++ b/examples/root/lhcb_analysis/CMakeLists.txt
@@ -10,7 +10,7 @@ if (NOT TARGET llama::llama)
 	find_package(llama REQUIRED)
 endif()
 add_executable(${PROJECT_NAME} lhcb.cpp)
-#target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_20)
+target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_20)
 target_link_libraries(${PROJECT_NAME} PRIVATE
 		ROOT::Hist ROOT::Graf ROOT::Gpad ROOT::ROOTNTuple llama::llama OpenMP::OpenMP_CXX)
 if (MSVC)
diff --git a/examples/root/lhcb_analysis/README.md b/examples/root/lhcb_analysis/README.md
index 4226cf97b3..31a6caba22 100644
--- a/examples/root/lhcb_analysis/README.md
+++ b/examples/root/lhcb_analysis/README.md
@@ -2,9 +2,8 @@ This example is a LLAMA version from the IO benchmark found here:
 https://github.com/jblomer/iotools/blob/master/lhcb.cxx
 
 The lhcb analysis example requires an input file, which can be downloaded here:
-https://root.cern/files/RNTuple/
-
-The file is typically called B2HHH~zstd.ntuple
-
+https://root.cern/files/RNTuple/.
+The file is typically called B2HHH~zstd.ntuple, so you can run:
+`curl https://root.cern/files/RNTuple/B2HHH~zstd.ntuple -o B2HHH~zstd.ntuple`
 If you get an error due to a version incompatibility of the file, try this workaround:
 https://github.com/jblomer/iotools/issues/9
diff --git a/examples/root/lhcb_analysis/lhcb.cpp b/examples/root/lhcb_analysis/lhcb.cpp
index c70df18d62..e279ed1bab 100644
--- a/examples/root/lhcb_analysis/lhcb.cpp
+++ b/examples/root/lhcb_analysis/lhcb.cpp
@@ -33,48 +33,65 @@ namespace
     constexpr auto analysisRepetitionsInstrumentation = 1; // costly, so run less often
 
     // clang-format off
-    struct H1isMuon{};
-    struct H2isMuon{};
-    struct H3isMuon{};
+    struct BFlightDistance{};
+    struct BVertexChi2{};
 
+    struct H1Charge{};
+    struct H1IpChi2{};
     struct H1PX{};
     struct H1PY{};
     struct H1PZ{};
     struct H1ProbK{};
     struct H1ProbPi{};
+    struct H1isMuon{};
 
+    struct H2Charge{};
+    struct H2IpChi2{};
     struct H2PX{};
     struct H2PY{};
     struct H2PZ{};
     struct H2ProbK{};
     struct H2ProbPi{};
+    struct H2isMuon{};
 
+    struct H3Charge{};
+    struct H3IpChi2{};
     struct H3PX{};
     struct H3PY{};
     struct H3PZ{};
     struct H3ProbK{};
     struct H3ProbPi{};
+    struct H3isMuon{};
     // clang-format on
 
+    // TODO(bgruber): only needed data is loaded. Should we load the entire data set?
     using RecordDim = llama::Record<
-        llama::Field<H1isMuon, int>,
-        llama::Field<H2isMuon, int>,
-        llama::Field<H3isMuon, int>,
+        // llama::Field<BFlightDistance, double>,
+        // llama::Field<BVertexChi2, double>,
+        // llama::Field<H1Charge, int>,
+        // llama::Field<H1IpChi2, double>,
         llama::Field<H1PX, double>,
         llama::Field<H1PY, double>,
         llama::Field<H1PZ, double>,
         llama::Field<H1ProbK, double>,
         llama::Field<H1ProbPi, double>,
+        llama::Field<H1isMuon, int>,
+        // llama::Field<H2Charge, int>,
+        // llama::Field<H2IpChi2, double>,
         llama::Field<H2PX, double>,
         llama::Field<H2PY, double>,
         llama::Field<H2PZ, double>,
         llama::Field<H2ProbK, double>,
         llama::Field<H2ProbPi, double>,
+        llama::Field<H2isMuon, int>,
+        // llama::Field<H3Charge, int>,
+        // llama::Field<H3IpChi2, double>,
         llama::Field<H3PX, double>,
         llama::Field<H3PY, double>,
         llama::Field<H3PZ, double>,
         llama::Field<H3ProbK, double>,
-        llama::Field<H3ProbPi, double>>;
+        llama::Field<H3ProbPi, double>,
+        llama::Field<H3isMuon, int>>;
 
     namespace RE = ROOT::Experimental;
 
@@ -95,59 +112,17 @@ namespace
 
         auto view = llama::allocViewUninitialized(Mapping{typename Mapping::ArrayExtents{ntuple->GetNEntries()}});
 
-        auto viewH1IsMuon = ntuple->GetView<int>("H1_isMuon");
-        auto viewH2IsMuon = ntuple->GetView<int>("H2_isMuon");
-        auto viewH3IsMuon = ntuple->GetView<int>("H3_isMuon");
-
-        auto viewH1PX = ntuple->GetView<double>("H1_PX");
-        auto viewH1PY = ntuple->GetView<double>("H1_PY");
-        auto viewH1PZ = ntuple->GetView<double>("H1_PZ");
-        auto viewH1ProbK = ntuple->GetView<double>("H1_ProbK");
-        auto viewH1ProbPi = ntuple->GetView<double>("H1_ProbPi");
-
-        auto viewH2PX = ntuple->GetView<double>("H2_PX");
-        auto viewH2PY = ntuple->GetView<double>("H2_PY");
-        auto viewH2PZ = ntuple->GetView<double>("H2_PZ");
-        auto viewH2ProbK = ntuple->GetView<double>("H2_ProbK");
-        auto viewH2ProbPi = ntuple->GetView<double>("H2_ProbPi");
-
-        auto viewH3PX = ntuple->GetView<double>("H3_PX");
-        auto viewH3PY = ntuple->GetView<double>("H3_PY");
-        auto viewH3PZ = ntuple->GetView<double>("H3_PZ");
-        auto viewH3ProbK = ntuple->GetView<double>("H3_ProbK");
-        auto viewH3ProbPi = ntuple->GetView<double>("H3_ProbPi");
-
-        for(auto i : ntuple->GetEntryRange())
-        {
-            auto&& event = view(i);
-            event(H1isMuon{}) = viewH1IsMuon(i);
-            event(H2isMuon{}) = viewH2IsMuon(i);
-            event(H3isMuon{}) = viewH3IsMuon(i);
-
-            // a few sanity checks in case we mess up with the bitpacking
-            assert(event(H1isMuon{}) != viewH1IsMuon(i));
-            assert(event(H2isMuon{}) != viewH2IsMuon(i));
-            assert(event(H3isMuon{}) != viewH3IsMuon(i));
-
-            event(H1PX{}) = viewH1PX(i);
-            event(H1PY{}) = viewH1PY(i);
-            event(H1PZ{}) = viewH1PZ(i);
-            event(H1ProbK{}) = viewH1ProbK(i);
-            event(H1ProbPi{}) = viewH1ProbPi(i);
-
-            event(H2PX{}) = viewH2PX(i);
-            event(H2PY{}) = viewH2PY(i);
-            event(H2PZ{}) = viewH2PZ(i);
-            event(H2ProbK{}) = viewH2ProbK(i);
-            event(H2ProbPi{}) = viewH2ProbPi(i);
-
-            event(H3PX{}) = viewH3PX(i);
-            event(H3PY{}) = viewH3PY(i);
-            event(H3PZ{}) = viewH3PZ(i);
-            event(H3ProbK{}) = viewH3ProbK(i);
-            event(H3ProbPi{}) = viewH3ProbPi(i);
-        }
-
+        llama::forEachLeafCoord<RecordDim>(
+            [&]<typename RecordCoord>(RecordCoord)
+            {
+                using Type = llama::GetType<RecordDim, RecordCoord>;
+                using Tag = llama::GetTag<RecordDim, RecordCoord>;
+                auto columnName = std::string(llama::structName<Tag>());
+                columnName.insert(columnName.begin() + 1 + (columnName[0] == 'H'), '_');
+                auto columnView = ntuple->GetView<Type>(columnName);
+                for(auto i : ntuple->GetEntryRange())
+                    view(i)(Tag{}) = columnView(i);
+            });
         const auto duration
             = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - begin).count();
 
@@ -233,18 +208,18 @@ namespace
         return std::tuple{hists[0], duration};
     }
 
-    const auto histogramFolder = std::string("lhcb/histograms");
-    const auto layoutsFolder = std::string("lhcb/layouts");
-    const auto heatmapFolder = std::string("lhcb/heatmaps");
+    const auto histogramFolder = std::filesystem::path("lhcb/histograms");
+    const auto layoutsFolder = std::filesystem::path("lhcb/layouts");
+    const auto heatmapFolder = std::filesystem::path("lhcb/heatmaps");
 
-    void save(TH1D& h, const std::string& mappingName)
+    void saveHist(TH1D& h, const std::string& mappingName)
     {
-        const auto file = std::filesystem::path(histogramFolder + "/" + mappingName + ".png");
-        std::filesystem::create_directories(file.parent_path());
+        std::filesystem::create_directories(histogramFolder);
         auto c = TCanvas("c", "", 800, 700);
         h.GetXaxis()->SetTitle("m_{KKK} [MeV/c^{2}]");
-        h.DrawCopy();
-        c.Print(file.c_str());
+        h.DrawCopy("", "");
+        c.Print((histogramFolder / (mappingName + ".png")).c_str());
+        c.Print((histogramFolder / (mappingName + ".pdf")).c_str());
         // c.Modified();
         // c.Update();
         // auto app = TApplication("", nullptr, nullptr);
@@ -327,6 +302,15 @@ namespace
             true>::fn,
         true>;
 
+    using Custom1_3_H1ProbK_float = llama::mapping::Split<
+        llama::ArrayExtentsDynamic<RE::NTupleSize_t, 1>,
+        RecordDim,
+        mp_list<mp_list<H1isMuon>, mp_list<H2isMuon>, mp_list<H3isMuon>, mp_list<H1ProbK>>,
+        llama::mapping::BindChangeType<llama::mapping::BindAoS<>::fn, mp_list<mp_list<double, float>>>::fn,
+        llama::mapping::
+            BindSplit<mp_list<mp_list<H2ProbK>>, llama::mapping::AlignedAoS, llama::mapping::AlignedAoS, true>::fn,
+        true>;
+
     using Custom4Heatmap = llama::mapping::Heatmap<Custom4>;
 
     using Custom5 = llama::mapping::Split<
@@ -341,6 +325,7 @@ namespace
             true>::fn,
         true>;
 
+    template<std::size_t ManBits = 16>
     using Custom6 = llama::mapping::Split<
         llama::ArrayExtentsDynamic<RE::NTupleSize_t, 1>,
         RecordDim,
@@ -348,9 +333,9 @@ namespace
         llama::mapping::BindBitPackedIntAoS<llama::Constant<1>, llama::mapping::SignBit::Discard>::fn,
         llama::mapping::BindSplit<
             mp_list<mp_list<H1ProbK>, mp_list<H2ProbK>>,
-            llama::mapping::BindBitPackedFloatAoS<llama::Constant<6>, llama::Constant<16>>::template fn,
-            llama::mapping::BindBitPackedFloatAoS<llama::Constant<6>, llama::Constant<16>>::template fn,
-            true>::fn,
+            llama::mapping::BindBitPackedFloatAoS<llama::Constant<6>, llama::Constant<ManBits>>::template fn,
+            llama::mapping::BindBitPackedFloatAoS<llama::Constant<6>, llama::Constant<ManBits>>::template fn,
+            true>::template fn,
         true>;
 
     using Custom7 = llama::mapping::Split<
@@ -365,6 +350,7 @@ namespace
             true>::fn,
         true>;
 
+    template<std::size_t ManBits = 16>
     using Custom8 = llama::mapping::Split<
         llama::ArrayExtentsDynamic<RE::NTupleSize_t, 1>,
         RecordDim,
@@ -373,8 +359,8 @@ namespace
         llama::mapping::BindSplit<
             mp_list<mp_list<H1ProbK>, mp_list<H2ProbK>>,
             llama::mapping::BindChangeType<llama::mapping::BindAoS<>::fn, mp_list<mp_list<double, float>>>::fn,
-            llama::mapping::BindBitPackedFloatAoS<llama::Constant<6>, llama::Constant<16>>::template fn,
-            true>::fn,
+            llama::mapping::BindBitPackedFloatAoS<llama::Constant<6>, llama::Constant<ManBits>>::template fn,
+            true>::template fn,
         true>;
 
     using Custom9 = llama::mapping::Split<
@@ -410,17 +396,17 @@ namespace
     template<typename Mapping>
     void saveLayout(const std::filesystem::path& layoutFile)
     {
-        std::filesystem::create_directories(layoutFile.parent_path());
-        std::ofstream{layoutFile} << llama::toSvg(Mapping{typename Mapping::ArrayExtents{10}});
+        std::filesystem::create_directories(layoutsFolder);
+        std::ofstream{layoutsFolder / layoutFile} << llama::toSvg(Mapping{typename Mapping::ArrayExtents{3}}, 32);
     }
 
     template<typename View>
     void saveHeatmap(const View& v, const std::filesystem::path& heatmapFile)
     {
-        std::filesystem::create_directories(heatmapFile.parent_path());
+        std::filesystem::create_directories(heatmapFolder);
         const auto& m = v.mapping();
-        m.writeGnuplotDataFileBinary(v.blobs(), std::ofstream{heatmapFile});
-        std::ofstream{heatmapFile.parent_path() / "plot.sh"} << View::Mapping::gnuplotScriptBinary;
+        m.writeGnuplotDataFileBinary(v.blobs(), std::ofstream{heatmapFolder / heatmapFile});
+        std::ofstream{heatmapFolder / "plot.sh"} << View::Mapping::gnuplotScriptBinary;
     }
 
     template<typename View>
@@ -480,7 +466,7 @@ namespace
     template<typename Mapping, bool Sort = false>
     void testAnalysis(const std::string& inputFile, const std::string& mappingName)
     {
-        saveLayout<Mapping>(layoutsFolder + "/" + mappingName + ".svg");
+        saveLayout<Mapping>(mappingName + ".svg");
 
         auto [view, conversionTime] = convertRNTupleToLLAMA<Mapping>(inputFile);
         if constexpr(llama::mapping::isFieldAccessCount<Mapping>)
@@ -490,7 +476,7 @@ namespace
         }
         if constexpr(llama::mapping::isHeatmap<Mapping>)
         {
-            saveHeatmap(view, heatmapFolder + "/" + mappingName + "_conversion.bin");
+            saveHeatmap(view, heatmapFolder / (mappingName + "_conversion.bin"));
             clearHeatmap(view);
         }
 
@@ -513,8 +499,8 @@ namespace
         if constexpr(llama::mapping::isFieldAccessCount<Mapping>)
             view.mapping().printFieldHits(view.blobs());
         if constexpr(llama::mapping::isHeatmap<Mapping>)
-            saveHeatmap(view, heatmapFolder + "/" + mappingName + "_analysis.bin");
-        save(hist, mappingName);
+            saveHeatmap(view, mappingName + "_analysis.bin");
+        saveHist(hist, mappingName);
         std::size_t cachlinesLoaded = 0;
         if constexpr(
             !llama::mapping::isHeatmap<Mapping> && !llama::mapping::isFieldAccessCount<Mapping>
@@ -531,11 +517,11 @@ namespace
         const auto mean = hist.GetMean();
         const auto absError = std::abs(mean - expectedMean);
         fmt::print(
-            "{:16} {:>15.3f} {:>10.3f} {:>12.3f} {:>4} {:>10.1f} {:>7} {:>6.1f} {:>6.1f} {:>6.1f} {:>6.3f} {:>8}\n",
-            "\"" + mappingName + "\"",
+            "{:13} {:>9.3f} {:>9.3f} {:>9.3f} {:>4} {:>10.1f} {:>7} {:>6.1f} {:>6.1f} {:>6.1f} {:>6.3f} {:>8}\n",
+            mappingName,
             conversionTime / 1000.0,
             sortTime.count() / 1000.0,
-            totalAnalysisTime.count() / repetitions / 1000.0,
+            static_cast<double>(totalAnalysisTime.count()) / repetitions / 1000.0,
             repetitions,
             totalBlobSizes(view.mapping()) / 1024.0 / 1024.0,
             hist.GetEntries(),
@@ -561,11 +547,11 @@ auto main(int argc, const char* argv[]) -> int
                                       // format. Remove this once RNTuple hits production.
 
     fmt::print(
-        "{:16} {:>15} {:>10} {:>12} {:>4} {:>10} {:>7} {:>6} {:>6} {:>6} {:>6} {:>8}\n",
+        "{:13} {:>9} {:>9} {:>9} {:>4} {:>10} {:>7} {:>6} {:>6} {:>6} {:>6} {:>8}\n",
         "Mapping",
-        "RNT->LLAMA(ms)",
+        "Read(ms)",
         "Sort(ms)",
-        "Analysis(ms)",
+        "Anly(ms)",
         "Rep",
         "Size(MiB)",
         "Entries",
@@ -577,55 +563,70 @@ auto main(int argc, const char* argv[]) -> int
 
     testAnalysis<AoS>(inputFile, "AoS");
     // testAnalysis<AoS, true>(inputFile, "AoS");
-    testAnalysis<AoSFieldAccessCount>(inputFile, "AoS FAC"); // also shows how many bytes were needed,
+    testAnalysis<AoSFieldAccessCount>(inputFile, "AoS_FAC"); // also shows how many bytes were needed,
                                                              // which is actually the same for all analyses
-    testAnalysis<AoSHeatmap>(inputFile, "AoS Heatmap");
+    testAnalysis<AoSHeatmap>(inputFile, "AoS_HM");
     testAnalysis<AoSoA8>(inputFile, "AoSoA8");
     testAnalysis<AoSoA16>(inputFile, "AoSoA16");
-    testAnalysis<SoAASB>(inputFile, "SoA SB A");
-    testAnalysis<SoAMB>(inputFile, "SoA MB");
+    testAnalysis<SoAASB>(inputFile, "SoA_SB_A");
+    testAnalysis<SoAMB>(inputFile, "SoA_MB");
     // testAnalysis<SoAMB, true>(inputFile, "SoA MB S");
 
-    testAnalysis<AoS_Floats>(inputFile, "AoS float");
-    testAnalysis<SoAMB_Floats>(inputFile, "SoA MB float");
+    testAnalysis<AoS_Floats>(inputFile, "AoS_F");
+    testAnalysis<SoAMB_Floats>(inputFile, "SoA_MB_F");
     // testAnalysis<SoAMB_Floats, true>(inputFile, "SoA MB S float");
 
     testAnalysis<Custom1>(inputFile, "Custom1");
     testAnalysis<Custom2>(inputFile, "Custom2");
     testAnalysis<Custom3>(inputFile, "Custom3");
     testAnalysis<Custom4>(inputFile, "Custom4");
-    testAnalysis<Custom4Heatmap>(inputFile, "Custom4 Heatmap");
+    testAnalysis<Custom4Heatmap>(inputFile, "Custom4_HM");
     testAnalysis<Custom5>(inputFile, "Custom5");
-    testAnalysis<Custom5, true>(inputFile, "Custom5 S");
-    testAnalysis<Custom6>(inputFile, "Custom6");
-    testAnalysis<Custom6, true>(inputFile, "Custom6 S");
+    testAnalysis<Custom5, true>(inputFile, "Custom5_S");
+    testAnalysis<Custom6<>>(inputFile, "Custom6");
+    testAnalysis<Custom6<>, true>(inputFile, "Custom6_S");
     testAnalysis<Custom7>(inputFile, "Custom7");
-    testAnalysis<Custom7, true>(inputFile, "Custom7 S");
-    testAnalysis<Custom8>(inputFile, "Custom8");
-    testAnalysis<Custom8, true>(inputFile, "Custom8 S");
+    testAnalysis<Custom7, true>(inputFile, "Custom7_S");
+    testAnalysis<Custom8<>>(inputFile, "Custom8");
+    testAnalysis<Custom8<>, true>(inputFile, "Custom8_S");
     testAnalysis<Custom9>(inputFile, "Custom9");
-    testAnalysis<Custom9, true>(inputFile, "Custom9 S");
+    testAnalysis<Custom9, true>(inputFile, "Custom9_S");
+    testAnalysis<Custom1_3_H1ProbK_float>(inputFile, "Custom1_3_F");
 
     constexpr auto fullExp = 11;
     constexpr auto fullMan = 52;
-    testAnalysis<MakeBitpacked<fullExp, fullMan>>(inputFile, fmt::format("BP SoA {}e{}", fullMan, fullExp));
+    testAnalysis<MakeBitpacked<fullExp, fullMan>>(inputFile, fmt::format("BP_SoA_{}e{}", fullMan, fullExp));
 
     // using namespace boost::mp11;
-    // mp_for_each<mp_reverse<mp_drop_c<mp_iota_c<fullExp>, 1>>>(
+    // mp_for_each<mp_reverse<mp_iota_c<fullExp>>>(
     //     [&](auto ic)
     //     {
     //         constexpr auto exp = decltype(ic)::value;
-    //         testAnalysis<MakeBitpacked<exp, fullMan>>(inputFile, fmt::format("BP SoA {}e{}", fullMan, exp));
+    //         testAnalysis<MakeBitpacked<exp, fullMan>>(inputFile, fmt::format("BP_SoA_{}e{}", fullMan, exp));
     //     });
-    // mp_for_each<mp_reverse<mp_drop_c<mp_iota_c<fullMan>, 1>>>(
+    // mp_for_each<mp_reverse<mp_iota_c<fullMan>>>(
     //     [&](auto ic)
     //     {
     //         constexpr auto man = decltype(ic)::value;
-    //         testAnalysis<MakeBitpacked<fullExp, man>>(inputFile, fmt::format("BP SoA {}e{}", man, fullExp));
+    //         testAnalysis<MakeBitpacked<fullExp, man>>(inputFile, fmt::format("BP_SoA_{}e{}", man, fullExp));
     //     });
 
     // we typically observe wrong results at exp < 6, and man < 16
-    testAnalysis<MakeBitpacked<6, 16>>(inputFile, "BP SoA 16e6");
+    testAnalysis<MakeBitpacked<6, 16>>(inputFile, "BP_SoA_16e6");
+
+    // mp_for_each<mp_reverse<mp_iota_c<fullMan + 1>>>(
+    //     [&](auto ic)
+    //     {
+    //         constexpr auto man = decltype(ic)::value;
+    //         testAnalysis<Custom8<man>>(inputFile, fmt::format("Custom8_16e{}", man));
+    //     });
+    //
+    // mp_for_each<mp_reverse<mp_iota_c<fullMan + 1>>>(
+    //     [&](auto ic)
+    //     {
+    //         constexpr auto man = decltype(ic)::value;
+    //         testAnalysis<Custom6<man>>(inputFile, fmt::format("Custom6_16e{}", man));
+    //     });
 
     return 0;
 }