diff --git a/examples/root/lhcb_analysis/CMakeLists.txt b/examples/root/lhcb_analysis/CMakeLists.txt index 731278ecdc..f4ca4396de 100644 --- a/examples/root/lhcb_analysis/CMakeLists.txt +++ b/examples/root/lhcb_analysis/CMakeLists.txt @@ -10,7 +10,7 @@ if (NOT TARGET llama::llama) find_package(llama REQUIRED) endif() add_executable(${PROJECT_NAME} lhcb.cpp) -#target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_20) +target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_20) target_link_libraries(${PROJECT_NAME} PRIVATE ROOT::Hist ROOT::Graf ROOT::Gpad ROOT::ROOTNTuple llama::llama OpenMP::OpenMP_CXX) if (MSVC) diff --git a/examples/root/lhcb_analysis/README.md b/examples/root/lhcb_analysis/README.md index 4226cf97b3..31a6caba22 100644 --- a/examples/root/lhcb_analysis/README.md +++ b/examples/root/lhcb_analysis/README.md @@ -2,9 +2,8 @@ This example is a LLAMA version from the IO benchmark found here: https://github.com/jblomer/iotools/blob/master/lhcb.cxx The lhcb analysis example requires an input file, which can be downloaded here: -https://root.cern/files/RNTuple/ - -The file is typically called B2HHH~zstd.ntuple - +https://root.cern/files/RNTuple/. +The file is typically called B2HHH~zstd.ntuple, so you can run: +`curl https://root.cern/files/RNTuple/B2HHH~zstd.ntuple -o B2HHH~zstd.ntuple` If you get an error due to a version incompatibility of the file, try this workaround: https://github.com/jblomer/iotools/issues/9 diff --git a/examples/root/lhcb_analysis/lhcb.cpp b/examples/root/lhcb_analysis/lhcb.cpp index c70df18d62..e279ed1bab 100644 --- a/examples/root/lhcb_analysis/lhcb.cpp +++ b/examples/root/lhcb_analysis/lhcb.cpp @@ -33,48 +33,65 @@ namespace constexpr auto analysisRepetitionsInstrumentation = 1; // costly, so run less often // clang-format off - struct H1isMuon{}; - struct H2isMuon{}; - struct H3isMuon{}; + struct BFlightDistance{}; + struct BVertexChi2{}; + struct H1Charge{}; + struct H1IpChi2{}; struct H1PX{}; struct H1PY{}; struct H1PZ{}; struct H1ProbK{}; struct H1ProbPi{}; + struct H1isMuon{}; + struct H2Charge{}; + struct H2IpChi2{}; struct H2PX{}; struct H2PY{}; struct H2PZ{}; struct H2ProbK{}; struct H2ProbPi{}; + struct H2isMuon{}; + struct H3Charge{}; + struct H3IpChi2{}; struct H3PX{}; struct H3PY{}; struct H3PZ{}; struct H3ProbK{}; struct H3ProbPi{}; + struct H3isMuon{}; // clang-format on + // TODO(bgruber): only needed data is loaded. Should we load the entire data set? using RecordDim = llama::Record< - llama::Field, - llama::Field, - llama::Field, + // llama::Field, + // llama::Field, + // llama::Field, + // llama::Field, llama::Field, llama::Field, llama::Field, llama::Field, llama::Field, + llama::Field, + // llama::Field, + // llama::Field, llama::Field, llama::Field, llama::Field, llama::Field, llama::Field, + llama::Field, + // llama::Field, + // llama::Field, llama::Field, llama::Field, llama::Field, llama::Field, - llama::Field>; + llama::Field, + llama::Field>; namespace RE = ROOT::Experimental; @@ -95,59 +112,17 @@ namespace auto view = llama::allocViewUninitialized(Mapping{typename Mapping::ArrayExtents{ntuple->GetNEntries()}}); - auto viewH1IsMuon = ntuple->GetView("H1_isMuon"); - auto viewH2IsMuon = ntuple->GetView("H2_isMuon"); - auto viewH3IsMuon = ntuple->GetView("H3_isMuon"); - - auto viewH1PX = ntuple->GetView("H1_PX"); - auto viewH1PY = ntuple->GetView("H1_PY"); - auto viewH1PZ = ntuple->GetView("H1_PZ"); - auto viewH1ProbK = ntuple->GetView("H1_ProbK"); - auto viewH1ProbPi = ntuple->GetView("H1_ProbPi"); - - auto viewH2PX = ntuple->GetView("H2_PX"); - auto viewH2PY = ntuple->GetView("H2_PY"); - auto viewH2PZ = ntuple->GetView("H2_PZ"); - auto viewH2ProbK = ntuple->GetView("H2_ProbK"); - auto viewH2ProbPi = ntuple->GetView("H2_ProbPi"); - - auto viewH3PX = ntuple->GetView("H3_PX"); - auto viewH3PY = ntuple->GetView("H3_PY"); - auto viewH3PZ = ntuple->GetView("H3_PZ"); - auto viewH3ProbK = ntuple->GetView("H3_ProbK"); - auto viewH3ProbPi = ntuple->GetView("H3_ProbPi"); - - for(auto i : ntuple->GetEntryRange()) - { - auto&& event = view(i); - event(H1isMuon{}) = viewH1IsMuon(i); - event(H2isMuon{}) = viewH2IsMuon(i); - event(H3isMuon{}) = viewH3IsMuon(i); - - // a few sanity checks in case we mess up with the bitpacking - assert(event(H1isMuon{}) != viewH1IsMuon(i)); - assert(event(H2isMuon{}) != viewH2IsMuon(i)); - assert(event(H3isMuon{}) != viewH3IsMuon(i)); - - event(H1PX{}) = viewH1PX(i); - event(H1PY{}) = viewH1PY(i); - event(H1PZ{}) = viewH1PZ(i); - event(H1ProbK{}) = viewH1ProbK(i); - event(H1ProbPi{}) = viewH1ProbPi(i); - - event(H2PX{}) = viewH2PX(i); - event(H2PY{}) = viewH2PY(i); - event(H2PZ{}) = viewH2PZ(i); - event(H2ProbK{}) = viewH2ProbK(i); - event(H2ProbPi{}) = viewH2ProbPi(i); - - event(H3PX{}) = viewH3PX(i); - event(H3PY{}) = viewH3PY(i); - event(H3PZ{}) = viewH3PZ(i); - event(H3ProbK{}) = viewH3ProbK(i); - event(H3ProbPi{}) = viewH3ProbPi(i); - } - + llama::forEachLeafCoord( + [&](RecordCoord) + { + using Type = llama::GetType; + using Tag = llama::GetTag; + auto columnName = std::string(llama::structName()); + columnName.insert(columnName.begin() + 1 + (columnName[0] == 'H'), '_'); + auto columnView = ntuple->GetView(columnName); + for(auto i : ntuple->GetEntryRange()) + view(i)(Tag{}) = columnView(i); + }); const auto duration = std::chrono::duration_cast(std::chrono::steady_clock::now() - begin).count(); @@ -233,18 +208,18 @@ namespace return std::tuple{hists[0], duration}; } - const auto histogramFolder = std::string("lhcb/histograms"); - const auto layoutsFolder = std::string("lhcb/layouts"); - const auto heatmapFolder = std::string("lhcb/heatmaps"); + const auto histogramFolder = std::filesystem::path("lhcb/histograms"); + const auto layoutsFolder = std::filesystem::path("lhcb/layouts"); + const auto heatmapFolder = std::filesystem::path("lhcb/heatmaps"); - void save(TH1D& h, const std::string& mappingName) + void saveHist(TH1D& h, const std::string& mappingName) { - const auto file = std::filesystem::path(histogramFolder + "/" + mappingName + ".png"); - std::filesystem::create_directories(file.parent_path()); + std::filesystem::create_directories(histogramFolder); auto c = TCanvas("c", "", 800, 700); h.GetXaxis()->SetTitle("m_{KKK} [MeV/c^{2}]"); - h.DrawCopy(); - c.Print(file.c_str()); + h.DrawCopy("", ""); + c.Print((histogramFolder / (mappingName + ".png")).c_str()); + c.Print((histogramFolder / (mappingName + ".pdf")).c_str()); // c.Modified(); // c.Update(); // auto app = TApplication("", nullptr, nullptr); @@ -327,6 +302,15 @@ namespace true>::fn, true>; + using Custom1_3_H1ProbK_float = llama::mapping::Split< + llama::ArrayExtentsDynamic, + RecordDim, + mp_list, mp_list, mp_list, mp_list>, + llama::mapping::BindChangeType::fn, mp_list>>::fn, + llama::mapping:: + BindSplit>, llama::mapping::AlignedAoS, llama::mapping::AlignedAoS, true>::fn, + true>; + using Custom4Heatmap = llama::mapping::Heatmap; using Custom5 = llama::mapping::Split< @@ -341,6 +325,7 @@ namespace true>::fn, true>; + template using Custom6 = llama::mapping::Split< llama::ArrayExtentsDynamic, RecordDim, @@ -348,9 +333,9 @@ namespace llama::mapping::BindBitPackedIntAoS, llama::mapping::SignBit::Discard>::fn, llama::mapping::BindSplit< mp_list, mp_list>, - llama::mapping::BindBitPackedFloatAoS, llama::Constant<16>>::template fn, - llama::mapping::BindBitPackedFloatAoS, llama::Constant<16>>::template fn, - true>::fn, + llama::mapping::BindBitPackedFloatAoS, llama::Constant>::template fn, + llama::mapping::BindBitPackedFloatAoS, llama::Constant>::template fn, + true>::template fn, true>; using Custom7 = llama::mapping::Split< @@ -365,6 +350,7 @@ namespace true>::fn, true>; + template using Custom8 = llama::mapping::Split< llama::ArrayExtentsDynamic, RecordDim, @@ -373,8 +359,8 @@ namespace llama::mapping::BindSplit< mp_list, mp_list>, llama::mapping::BindChangeType::fn, mp_list>>::fn, - llama::mapping::BindBitPackedFloatAoS, llama::Constant<16>>::template fn, - true>::fn, + llama::mapping::BindBitPackedFloatAoS, llama::Constant>::template fn, + true>::template fn, true>; using Custom9 = llama::mapping::Split< @@ -410,17 +396,17 @@ namespace template void saveLayout(const std::filesystem::path& layoutFile) { - std::filesystem::create_directories(layoutFile.parent_path()); - std::ofstream{layoutFile} << llama::toSvg(Mapping{typename Mapping::ArrayExtents{10}}); + std::filesystem::create_directories(layoutsFolder); + std::ofstream{layoutsFolder / layoutFile} << llama::toSvg(Mapping{typename Mapping::ArrayExtents{3}}, 32); } template void saveHeatmap(const View& v, const std::filesystem::path& heatmapFile) { - std::filesystem::create_directories(heatmapFile.parent_path()); + std::filesystem::create_directories(heatmapFolder); const auto& m = v.mapping(); - m.writeGnuplotDataFileBinary(v.blobs(), std::ofstream{heatmapFile}); - std::ofstream{heatmapFile.parent_path() / "plot.sh"} << View::Mapping::gnuplotScriptBinary; + m.writeGnuplotDataFileBinary(v.blobs(), std::ofstream{heatmapFolder / heatmapFile}); + std::ofstream{heatmapFolder / "plot.sh"} << View::Mapping::gnuplotScriptBinary; } template @@ -480,7 +466,7 @@ namespace template void testAnalysis(const std::string& inputFile, const std::string& mappingName) { - saveLayout(layoutsFolder + "/" + mappingName + ".svg"); + saveLayout(mappingName + ".svg"); auto [view, conversionTime] = convertRNTupleToLLAMA(inputFile); if constexpr(llama::mapping::isFieldAccessCount) @@ -490,7 +476,7 @@ namespace } if constexpr(llama::mapping::isHeatmap) { - saveHeatmap(view, heatmapFolder + "/" + mappingName + "_conversion.bin"); + saveHeatmap(view, heatmapFolder / (mappingName + "_conversion.bin")); clearHeatmap(view); } @@ -513,8 +499,8 @@ namespace if constexpr(llama::mapping::isFieldAccessCount) view.mapping().printFieldHits(view.blobs()); if constexpr(llama::mapping::isHeatmap) - saveHeatmap(view, heatmapFolder + "/" + mappingName + "_analysis.bin"); - save(hist, mappingName); + saveHeatmap(view, mappingName + "_analysis.bin"); + saveHist(hist, mappingName); std::size_t cachlinesLoaded = 0; if constexpr( !llama::mapping::isHeatmap && !llama::mapping::isFieldAccessCount @@ -531,11 +517,11 @@ namespace const auto mean = hist.GetMean(); const auto absError = std::abs(mean - expectedMean); fmt::print( - "{:16} {:>15.3f} {:>10.3f} {:>12.3f} {:>4} {:>10.1f} {:>7} {:>6.1f} {:>6.1f} {:>6.1f} {:>6.3f} {:>8}\n", - "\"" + mappingName + "\"", + "{:13} {:>9.3f} {:>9.3f} {:>9.3f} {:>4} {:>10.1f} {:>7} {:>6.1f} {:>6.1f} {:>6.1f} {:>6.3f} {:>8}\n", + mappingName, conversionTime / 1000.0, sortTime.count() / 1000.0, - totalAnalysisTime.count() / repetitions / 1000.0, + static_cast(totalAnalysisTime.count()) / repetitions / 1000.0, repetitions, totalBlobSizes(view.mapping()) / 1024.0 / 1024.0, hist.GetEntries(), @@ -561,11 +547,11 @@ auto main(int argc, const char* argv[]) -> int // format. Remove this once RNTuple hits production. fmt::print( - "{:16} {:>15} {:>10} {:>12} {:>4} {:>10} {:>7} {:>6} {:>6} {:>6} {:>6} {:>8}\n", + "{:13} {:>9} {:>9} {:>9} {:>4} {:>10} {:>7} {:>6} {:>6} {:>6} {:>6} {:>8}\n", "Mapping", - "RNT->LLAMA(ms)", + "Read(ms)", "Sort(ms)", - "Analysis(ms)", + "Anly(ms)", "Rep", "Size(MiB)", "Entries", @@ -577,55 +563,70 @@ auto main(int argc, const char* argv[]) -> int testAnalysis(inputFile, "AoS"); // testAnalysis(inputFile, "AoS"); - testAnalysis(inputFile, "AoS FAC"); // also shows how many bytes were needed, + testAnalysis(inputFile, "AoS_FAC"); // also shows how many bytes were needed, // which is actually the same for all analyses - testAnalysis(inputFile, "AoS Heatmap"); + testAnalysis(inputFile, "AoS_HM"); testAnalysis(inputFile, "AoSoA8"); testAnalysis(inputFile, "AoSoA16"); - testAnalysis(inputFile, "SoA SB A"); - testAnalysis(inputFile, "SoA MB"); + testAnalysis(inputFile, "SoA_SB_A"); + testAnalysis(inputFile, "SoA_MB"); // testAnalysis(inputFile, "SoA MB S"); - testAnalysis(inputFile, "AoS float"); - testAnalysis(inputFile, "SoA MB float"); + testAnalysis(inputFile, "AoS_F"); + testAnalysis(inputFile, "SoA_MB_F"); // testAnalysis(inputFile, "SoA MB S float"); testAnalysis(inputFile, "Custom1"); testAnalysis(inputFile, "Custom2"); testAnalysis(inputFile, "Custom3"); testAnalysis(inputFile, "Custom4"); - testAnalysis(inputFile, "Custom4 Heatmap"); + testAnalysis(inputFile, "Custom4_HM"); testAnalysis(inputFile, "Custom5"); - testAnalysis(inputFile, "Custom5 S"); - testAnalysis(inputFile, "Custom6"); - testAnalysis(inputFile, "Custom6 S"); + testAnalysis(inputFile, "Custom5_S"); + testAnalysis>(inputFile, "Custom6"); + testAnalysis, true>(inputFile, "Custom6_S"); testAnalysis(inputFile, "Custom7"); - testAnalysis(inputFile, "Custom7 S"); - testAnalysis(inputFile, "Custom8"); - testAnalysis(inputFile, "Custom8 S"); + testAnalysis(inputFile, "Custom7_S"); + testAnalysis>(inputFile, "Custom8"); + testAnalysis, true>(inputFile, "Custom8_S"); testAnalysis(inputFile, "Custom9"); - testAnalysis(inputFile, "Custom9 S"); + testAnalysis(inputFile, "Custom9_S"); + testAnalysis(inputFile, "Custom1_3_F"); constexpr auto fullExp = 11; constexpr auto fullMan = 52; - testAnalysis>(inputFile, fmt::format("BP SoA {}e{}", fullMan, fullExp)); + testAnalysis>(inputFile, fmt::format("BP_SoA_{}e{}", fullMan, fullExp)); // using namespace boost::mp11; - // mp_for_each, 1>>>( + // mp_for_each>>( // [&](auto ic) // { // constexpr auto exp = decltype(ic)::value; - // testAnalysis>(inputFile, fmt::format("BP SoA {}e{}", fullMan, exp)); + // testAnalysis>(inputFile, fmt::format("BP_SoA_{}e{}", fullMan, exp)); // }); - // mp_for_each, 1>>>( + // mp_for_each>>( // [&](auto ic) // { // constexpr auto man = decltype(ic)::value; - // testAnalysis>(inputFile, fmt::format("BP SoA {}e{}", man, fullExp)); + // testAnalysis>(inputFile, fmt::format("BP_SoA_{}e{}", man, fullExp)); // }); // we typically observe wrong results at exp < 6, and man < 16 - testAnalysis>(inputFile, "BP SoA 16e6"); + testAnalysis>(inputFile, "BP_SoA_16e6"); + + // mp_for_each>>( + // [&](auto ic) + // { + // constexpr auto man = decltype(ic)::value; + // testAnalysis>(inputFile, fmt::format("Custom8_16e{}", man)); + // }); + // + // mp_for_each>>( + // [&](auto ic) + // { + // constexpr auto man = decltype(ic)::value; + // testAnalysis>(inputFile, fmt::format("Custom6_16e{}", man)); + // }); return 0; }