From 0df47040b73a4bfccca08d3ded0b486c110a96be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Thu, 4 May 2023 12:22:24 +0200
Subject: [PATCH 01/13] Extract verifyChunk

---
 include/openPMD/RecordComponent.hpp |  5 ++++
 include/openPMD/RecordComponent.tpp | 38 +++++------------------------
 src/RecordComponent.cpp             | 27 ++++++++++++--------
 3 files changed, 28 insertions(+), 42 deletions(-)
diff --git a/include/openPMD/RecordComponent.hpp b/include/openPMD/RecordComponent.hpp
index 3a924dfcc9..deec871d9c 100644
--- a/include/openPMD/RecordComponent.hpp
+++ b/include/openPMD/RecordComponent.hpp
@@ -537,6 +537,11 @@ OPENPMD_protected
     }
 
     void readBase(bool require_unit_si);
+
+    template <typename T>
+    void verifyChunk(Offset const &, Extent const &) const;
+
+    void verifyChunk(Datatype, Offset const &, Extent const &) const;
 }; // RecordComponent
 
 } // namespace openPMD
diff --git a/include/openPMD/RecordComponent.tpp b/include/openPMD/RecordComponent.tpp
index e8ba6006ab..d28279c028 100644
--- a/include/openPMD/RecordComponent.tpp
+++ b/include/openPMD/RecordComponent.tpp
@@ -278,38 +278,7 @@ template <typename T, typename F>
 inline DynamicMemoryView<T>
 RecordComponent::storeChunk(Offset o, Extent e, F &&createBuffer)
 {
-    if (constant())
-        throw std::runtime_error(
-            "Chunks cannot be written for a constant RecordComponent.");
-    if (empty())
-        throw std::runtime_error(
-            "Chunks cannot be written for an empty RecordComponent.");
-    Datatype dtype = determineDatatype<T>();
-    if (dtype != getDatatype())
-    {
-        std::ostringstream oss;
-        oss << "Datatypes of chunk data (" << dtype
-            << ") and record component (" << getDatatype() << ") do not match.";
-        throw std::runtime_error(oss.str());
-    }
-    uint8_t dim = getDimensionality();
-    if (e.size() != dim || o.size() != dim)
-    {
-        std::ostringstream oss;
-        oss << "Dimensionality of chunk ("
-            << "offset=" << o.size() << "D, "
-            << "extent=" << e.size() << "D) "
-            << "and record component (" << int(dim) << "D) "
-            << "do not match.";
-        throw std::runtime_error(oss.str());
-    }
-    Extent dse = getExtent();
-    for (uint8_t i = 0; i < dim; ++i)
-        if (dse[i] < o[i] + e[i])
-            throw std::runtime_error(
-                "Chunk does not reside inside dataset (Dimension on index " +
-                std::to_string(i) + ". DS: " + std::to_string(dse[i]) +
-                " - Chunk: " + std::to_string(o[i] + e[i]) + ")");
+    verifyChunk<T>(o, e);
 
     /*
      * The openPMD backend might not yet know about this dataset.
@@ -407,4 +376,9 @@ auto RecordComponent::visit(Args &&...args)
         getDatatype(), *this, std::forward<Args>(args)...);
 }
 
+template <typename T>
+void RecordComponent::verifyChunk(Offset const &o, Extent const &e) const
+{
+    verifyChunk(determineDatatype<T>(), o, e);
+}
 } // namespace openPMD
diff --git a/src/RecordComponent.cpp b/src/RecordComponent.cpp
index 3cca47106a..cbae8b8014 100644
--- a/src/RecordComponent.cpp
+++ b/src/RecordComponent.cpp
@@ -452,6 +452,21 @@ bool RecordComponent::dirtyRecursive() const
 
 void RecordComponent::storeChunk(
     auxiliary::WriteBuffer buffer, Datatype dtype, Offset o, Extent e)
+{
+    verifyChunk(dtype, o, e);
+
+    Parameter<Operation::WRITE_DATASET> dWrite;
+    dWrite.offset = std::move(o);
+    dWrite.extent = std::move(e);
+    dWrite.dtype = dtype;
+    /* std::static_pointer_cast correctly reference-counts the pointer */
+    dWrite.data = std::move(buffer);
+    auto &rc = get();
+    rc.m_chunks.push(IOTask(this, std::move(dWrite)));
+}
+
+void RecordComponent::verifyChunk(
+    Datatype dtype, Offset const &o, Extent const &e) const
 {
     if (constant())
         throw std::runtime_error(
@@ -467,6 +482,8 @@ void RecordComponent::storeChunk(
         throw std::runtime_error(oss.str());
     }
     uint8_t dim = getDimensionality();
+    Extent dse = getExtent();
+
     if (e.size() != dim || o.size() != dim)
     {
         std::ostringstream oss;
@@ -477,22 +494,12 @@ void RecordComponent::storeChunk(
             << "do not match.";
         throw std::runtime_error(oss.str());
     }
-    Extent dse = getExtent();
     for (uint8_t i = 0; i < dim; ++i)
         if (dse[i] < o[i] + e[i])
             throw std::runtime_error(
                 "Chunk does not reside inside dataset (Dimension on index " +
                 std::to_string(i) + ". DS: " + std::to_string(dse[i]) +
                 " - Chunk: " + std::to_string(o[i] + e[i]) + ")");
-
-    Parameter<Operation::WRITE_DATASET> dWrite;
-    dWrite.offset = o;
-    dWrite.extent = e;
-    dWrite.dtype = dtype;
-    /* std::static_pointer_cast correctly reference-counts the pointer */
-    dWrite.data = std::move(buffer);
-    auto &rc = get();
-    rc.m_chunks.push(IOTask(this, std::move(dWrite)));
 }
 
 namespace

From af6eaad36e1d97959f5af5d42ad380655189ddfe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Tue, 25 Apr 2023 11:13:32 +0200
Subject: [PATCH 02/13] Main implementation

Use magic number instead of API call (impl)
---
 include/openPMD/Dataset.hpp                   | 11 +++
 include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp  | 13 +++-
 include/openPMD/IO/IOTask.hpp                 |  1 +
 include/openPMD/RecordComponent.tpp           | 14 +++-
 .../openPMD/backend/BaseRecordComponent.hpp   |  2 +
 .../openPMD/backend/PatchRecordComponent.hpp  | 35 +++++++++-
 src/Dataset.cpp                               | 41 +++++++++++
 src/IO/ADIOS/ADIOS2IOHandler.cpp              | 19 ++++-
 src/RecordComponent.cpp                       | 69 ++++++++++++++-----
 src/backend/BaseRecordComponent.cpp           | 13 ++++
 src/backend/PatchRecordComponent.cpp          |  7 +-
 src/binding/python/PatchRecordComponent.cpp   |  6 +-
 12 files changed, 200 insertions(+), 31 deletions(-)

diff --git a/include/openPMD/Dataset.hpp b/include/openPMD/Dataset.hpp
index 8757a3cf0a..0032888541 100644
--- a/include/openPMD/Dataset.hpp
+++ b/include/openPMD/Dataset.hpp
@@ -22,7 +22,9 @@
 
 #include "openPMD/Datatype.hpp"
 
+#include <limits>
 #include <memory>
+#include <optional>
 #include <string>
 #include <type_traits>
 #include <vector>
@@ -37,6 +39,11 @@ class Dataset
     friend class RecordComponent;
 
 public:
+    enum : std::uint64_t
+    {
+        JOINED_DIMENSION = std::numeric_limits<std::uint64_t>::max()
+    };
+
     Dataset(Datatype, Extent, std::string options = "{}");
 
     /**
@@ -53,5 +60,9 @@ class Dataset
     Datatype dtype;
     uint8_t rank;
     std::string options = "{}"; //!< backend-dependent JSON configuration
+
+    bool empty() const;
+
+    std::optional<size_t> joinedDimension() const;
 };
 } // namespace openPMD
diff --git a/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp b/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp
index 66c8c7a466..24454fa8dc 100644
--- a/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp
+++ b/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp
@@ -60,6 +60,8 @@ namespace openPMD
 {
 #if openPMD_HAVE_ADIOS2
 
+std::optional<size_t> joinedDimension(adios2::Dims const &dims);
+
 class ADIOS2IOHandler;
 
 namespace detail
@@ -443,15 +445,24 @@ class ADIOS2IOHandlerImpl
                         std::to_string(actualDim) + ")");
             }
         }
+        auto joinedDim = joinedDimension(shape);
         for (unsigned int i = 0; i < actualDim; i++)
         {
-            if (offset[i] + extent[i] > shape[i])
+            if (!(joinedDim.has_value() && *joinedDim == i) &&
+                offset[i] + extent[i] > shape[i])
             {
                 throw std::runtime_error(
                     "[ADIOS2] Dataset access out of bounds.");
             }
         }
 
+        if (joinedDim.has_value() && !offset.empty())
+        {
+            throw std::runtime_error(
+                "[ADIOS2] Offset must be an empty vector in case of joined "
+                "array.");
+        }
+
         var.SetSelection(
             {adios2::Dims(offset.begin(), offset.end()),
              adios2::Dims(extent.begin(), extent.end())});
diff --git a/include/openPMD/IO/IOTask.hpp b/include/openPMD/IO/IOTask.hpp
index d2fc05f379..8334c67314 100644
--- a/include/openPMD/IO/IOTask.hpp
+++ b/include/openPMD/IO/IOTask.hpp
@@ -326,6 +326,7 @@ struct OPENPMDAPI_EXPORT Parameter<Operation::CREATE_DATASET>
     Extent extent = {};
     Datatype dtype = Datatype::UNDEFINED;
     std::string options = "{}";
+    std::optional<size_t> joinedDimension;
 
     /** Warn about unused JSON paramters
      *
diff --git a/include/openPMD/RecordComponent.tpp b/include/openPMD/RecordComponent.tpp
index d28279c028..91498b05eb 100644
--- a/include/openPMD/RecordComponent.tpp
+++ b/include/openPMD/RecordComponent.tpp
@@ -259,8 +259,17 @@ RecordComponent::storeChunk(T_ContiguousContainer &data, Offset o, Extent e)
     // default arguments
     //   offset = {0u}: expand to right dim {0u, 0u, ...}
     Offset offset = o;
-    if (o.size() == 1u && o.at(0) == 0u && dim > 1u)
-        offset = Offset(dim, 0u);
+    if (o.size() == 1u && o.at(0) == 0u)
+    {
+        if (joinedDimension().has_value())
+        {
+            offset.clear();
+        }
+        else if (dim > 1u)
+        {
+            offset = Offset(dim, 0u);
+        }
+    }
 
     //   extent = {-1u}: take full size
     Extent extent(dim, 1u);
@@ -303,6 +312,7 @@ RecordComponent::storeChunk(Offset o, Extent e, F &&createBuffer)
         dCreate.name = rc.m_name;
         dCreate.extent = getExtent();
         dCreate.dtype = getDatatype();
+        dCreate.joinedDimension = joinedDimension();
         if (!rc.m_dataset.has_value())
         {
             throw error::WrongAPIUsage(
diff --git a/include/openPMD/backend/BaseRecordComponent.hpp b/include/openPMD/backend/BaseRecordComponent.hpp
index 0288e9bb9a..fe4490830d 100644
--- a/include/openPMD/backend/BaseRecordComponent.hpp
+++ b/include/openPMD/backend/BaseRecordComponent.hpp
@@ -143,6 +143,8 @@ class BaseRecordComponent : virtual public Attributable
      */
     bool constant() const;
 
+    std::optional<size_t> joinedDimension() const;
+
     /**
      * Get data chunks that are available to be loaded from the backend.
      * Note that this is backend-dependent information and the returned
diff --git a/include/openPMD/backend/PatchRecordComponent.hpp b/include/openPMD/backend/PatchRecordComponent.hpp
index ff8b330c2f..c95df8aed9 100644
--- a/include/openPMD/backend/PatchRecordComponent.hpp
+++ b/include/openPMD/backend/PatchRecordComponent.hpp
@@ -20,9 +20,10 @@
  */
 #pragma once
 
-#include "openPMD/RecordComponent.hpp"
 #include "openPMD/auxiliary/ShareRawInternal.hpp"
 #include "openPMD/backend/BaseRecordComponent.hpp"
+#include "openPMD/Error.hpp"
+#include "openPMD/RecordComponent.hpp"
 
 #include <memory>
 #include <sstream>
@@ -85,6 +86,9 @@ class PatchRecordComponent : public RecordComponent
     template <typename T>
     void store(uint64_t idx, T);
 
+    template <typename T>
+    void store(T);
+
     // clang-format off
 OPENPMD_private
     // clang-format on
@@ -180,4 +184,33 @@ inline void PatchRecordComponent::store(uint64_t idx, T data)
     auto &rc = get();
     rc.m_chunks.push(IOTask(this, std::move(dWrite)));
 }
+
+template <typename T>
+inline void PatchRecordComponent::store(T data)
+{
+    Datatype dtype = determineDatatype<T>();
+    if (dtype != getDatatype())
+    {
+        std::ostringstream oss;
+        oss << "Datatypes of patch data (" << dtype << ") and dataset ("
+            << getDatatype() << ") do not match.";
+        throw std::runtime_error(oss.str());
+    }
+
+    if (!joinedDimension().has_value())
+    {
+        throw error::WrongAPIUsage(
+            "[PatchRecordComponent::store] API call without explicit "
+            "specification of index only allowed when a joined dimension is "
+            "specified.");
+    }
+
+    Parameter<Operation::WRITE_DATASET> dWrite;
+    dWrite.offset = {};
+    dWrite.extent = {1};
+    dWrite.dtype = dtype;
+    dWrite.data = std::make_shared<T>(data);
+    auto &rc = get();
+    rc.m_chunks.push(IOTask(this, std::move(dWrite)));
+}
 } // namespace openPMD
diff --git a/src/Dataset.cpp b/src/Dataset.cpp
index 662bd2d29f..c1546e9ef0 100644
--- a/src/Dataset.cpp
+++ b/src/Dataset.cpp
@@ -19,6 +19,7 @@
  * If not, see <http://www.gnu.org/licenses/>.
  */
 #include "openPMD/Dataset.hpp"
+#include "openPMD/Error.hpp"
 
 #include <cstddef>
 #include <iostream>
@@ -30,6 +31,9 @@ Dataset::Dataset(Datatype d, Extent e, std::string options_in)
 {
     // avoid initialization order issues
     rank = static_cast<uint8_t>(extent.size());
+    // Call this in order to have early error message in case of wrong
+    // specification of joined dimensions
+    joinedDimension();
 }
 
 Dataset::Dataset(Extent e) : Dataset(Datatype::UNDEFINED, std::move(e))
@@ -49,4 +53,41 @@ Dataset &Dataset::extend(Extent newExtents)
     extent = newExtents;
     return *this;
 }
+
+bool Dataset::empty() const
+{
+    auto jd = joinedDimension();
+    for (size_t i = 0; i < extent.size(); ++i)
+    {
+        if (extent[i] == 0 && (!jd.has_value() || jd.value() != i))
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+std::optional<size_t> Dataset::joinedDimension() const
+{
+    std::optional<size_t> res;
+    for (size_t i = 0; i < extent.size(); ++i)
+    {
+        if (extent[i] == JOINED_DIMENSION)
+        {
+            if (res.has_value())
+            {
+                throw error::WrongAPIUsage(
+                    "Must specify JOINED_DIMENSION at most once (found at "
+                    "indices " +
+                    std::to_string(res.value()) + " and " + std::to_string(i) +
+                    ")");
+            }
+            else
+            {
+                res = i;
+            }
+        }
+    }
+    return res;
+}
 } // namespace openPMD
diff --git a/src/IO/ADIOS/ADIOS2IOHandler.cpp b/src/IO/ADIOS/ADIOS2IOHandler.cpp
index 08fac073cf..8d9ea653ad 100644
--- a/src/IO/ADIOS/ADIOS2IOHandler.cpp
+++ b/src/IO/ADIOS/ADIOS2IOHandler.cpp
@@ -68,6 +68,18 @@ namespace openPMD
 
 #if openPMD_HAVE_ADIOS2
 
+    std::optional<size_t> joinedDimension(adios2::Dims const &dims)
+    {
+        for (size_t i = 0; i < dims.size(); ++i)
+        {
+            if (dims[i] == adios2::JoinedDim)
+            {
+                return i;
+            }
+        }
+        return std::nullopt;
+    }
+
 #if openPMD_HAVE_MPI
 
 ADIOS2IOHandlerImpl::ADIOS2IOHandlerImpl(
@@ -741,8 +753,11 @@ void ADIOS2IOHandlerImpl::createDataset(
                 varName + "' remain unused:\n");
 
         // cast from openPMD::Extent to adios2::Dims
-        adios2::Dims const shape(
-            parameters.extent.begin(), parameters.extent.end());
+        adios2::Dims shape(parameters.extent.begin(), parameters.extent.end());
+        if (auto jd = parameters.joinedDimension; jd.has_value())
+        {
+            shape[jd.value()] = adios2::JoinedDim;
+        }
 
         auto &fileData = getFileData(file, IfFileNotOpen::ThrowError);
 
diff --git a/src/RecordComponent.cpp b/src/RecordComponent.cpp
index cbae8b8014..a8f7d734ba 100644
--- a/src/RecordComponent.cpp
+++ b/src/RecordComponent.cpp
@@ -95,10 +95,7 @@ RecordComponent &RecordComponent::resetDataset(Dataset d)
     }
     // if( d.extent.empty() )
     //    throw std::runtime_error("Dataset extent must be at least 1D.");
-    if (std::any_of(
-            d.extent.begin(), d.extent.end(), [](Extent::value_type const &i) {
-                return i == 0u;
-            }))
+    if (d.empty())
         return makeEmpty(std::move(d));
 
     rc.m_isEmpty = false;
@@ -299,6 +296,7 @@ void RecordComponent::flush(
                 dCreate.extent = getExtent();
                 dCreate.dtype = getDatatype();
                 dCreate.options = rc.m_dataset.value().options;
+                dCreate.joinedDimension = joinedDimension();
                 IOHandler()->enqueue(IOTask(this, dCreate));
             }
         }
@@ -484,22 +482,57 @@ void RecordComponent::verifyChunk(
     uint8_t dim = getDimensionality();
     Extent dse = getExtent();
 
-    if (e.size() != dim || o.size() != dim)
+    if (auto jd = joinedDimension(); jd.has_value())
     {
-        std::ostringstream oss;
-        oss << "Dimensionality of chunk ("
-            << "offset=" << o.size() << "D, "
-            << "extent=" << e.size() << "D) "
-            << "and record component (" << int(dim) << "D) "
-            << "do not match.";
-        throw std::runtime_error(oss.str());
+        if (o.size() != 0)
+        {
+            std::ostringstream oss;
+            oss << "Joined array: Must specify an empty offset (given: "
+                << "offset=" << o.size() << "D, "
+                << "extent=" << e.size() << "D).";
+            throw std::runtime_error(oss.str());
+        }
+        if (e.size() != dim)
+        {
+            std::ostringstream oss;
+            oss << "Joined array: Dimensionalities of chunk extent and dataset "
+                   "extent must be equivalent (given: "
+                << "offset=" << o.size() << "D, "
+                << "extent=" << e.size() << "D).";
+            throw std::runtime_error(oss.str());
+        }
+        for (size_t i = 0; i < dim; ++i)
+        {
+            if (i != jd.value() && e[i] != dse[i])
+            {
+                throw std::runtime_error(
+                    "Joined array: Chunk extent on non-joined dimensions must "
+                    "be equivalent to dataset extents (Dimension on index " +
+                    std::to_string(i) + ". DS: " + std::to_string(dse[i]) +
+                    " - Chunk: " + std::to_string(o[i] + e[i]) + ")");
+            }
+        }
+    }
+    else
+    {
+        if (e.size() != dim || o.size() != dim)
+        {
+            std::ostringstream oss;
+            oss << "Dimensionality of chunk ("
+                << "offset=" << o.size() << "D, "
+                << "extent=" << e.size() << "D) "
+                << "and record component (" << int(dim) << "D) "
+                << "do not match.";
+            throw std::runtime_error(oss.str());
+        }
+        for (uint8_t i = 0; i < dim; ++i)
+            if (dse[i] < o[i] + e[i])
+                throw std::runtime_error(
+                    "Chunk does not reside inside dataset (Dimension on "
+                    "index " +
+                    std::to_string(i) + ". DS: " + std::to_string(dse[i]) +
+                    " - Chunk: " + std::to_string(o[i] + e[i]) + ")");
     }
-    for (uint8_t i = 0; i < dim; ++i)
-        if (dse[i] < o[i] + e[i])
-            throw std::runtime_error(
-                "Chunk does not reside inside dataset (Dimension on index " +
-                std::to_string(i) + ". DS: " + std::to_string(dse[i]) +
-                " - Chunk: " + std::to_string(o[i] + e[i]) + ")");
 }
 
 namespace
diff --git a/src/backend/BaseRecordComponent.cpp b/src/backend/BaseRecordComponent.cpp
index 96b38beed5..839cdc55a6 100644
--- a/src/backend/BaseRecordComponent.cpp
+++ b/src/backend/BaseRecordComponent.cpp
@@ -65,6 +65,19 @@ bool BaseRecordComponent::constant() const
     return get().m_isConstant;
 }
 
+std::optional<size_t> BaseRecordComponent::joinedDimension() const
+{
+    auto &rc = get();
+    if (rc.m_dataset.has_value())
+    {
+        return rc.m_dataset.value().joinedDimension();
+    }
+    else
+    {
+        return false;
+    }
+}
+
 ChunkTable BaseRecordComponent::availableChunks()
 {
     auto &rc = get();
diff --git a/src/backend/PatchRecordComponent.cpp b/src/backend/PatchRecordComponent.cpp
index 748f1e1bd9..9252eb19ad 100644
--- a/src/backend/PatchRecordComponent.cpp
+++ b/src/backend/PatchRecordComponent.cpp
@@ -42,14 +42,11 @@ PatchRecordComponent &PatchRecordComponent::resetDataset(Dataset d)
             "written.");
     if (d.extent.empty())
         throw std::runtime_error("Dataset extent must be at least 1D.");
-    if (std::any_of(
-            d.extent.begin(), d.extent.end(), [](Extent::value_type const &i) {
-                return i == 0u;
-            }))
+    if (d.empty())
         throw std::runtime_error(
             "Dataset extent must not be zero in any dimension.");
 
-    get().m_dataset = d;
+    get().m_dataset = std::move(d);
     dirty() = true;
     return *this;
 }
diff --git a/src/binding/python/PatchRecordComponent.cpp b/src/binding/python/PatchRecordComponent.cpp
index 3454d76222..311272f5b6 100644
--- a/src/binding/python/PatchRecordComponent.cpp
+++ b/src/binding/python/PatchRecordComponent.cpp
@@ -189,12 +189,14 @@ void init_PatchRecordComponent(py::module &m)
         // allowed python intrinsics, after (!) buffer matching
         .def(
             "store",
-            &PatchRecordComponent::store<double>,
+            py::overload_cast<uint64_t, double>(
+                &PatchRecordComponent::store<double>),
             py::arg("idx"),
             py::arg("data"))
         .def(
             "store",
-            &PatchRecordComponent::store<long>,
+            py::overload_cast<uint64_t, long>(
+                &PatchRecordComponent::store<long>),
             py::arg("idx"),
             py::arg("data"))
 

From 07b0331c77ea62fc7ae11fd6379e6cb4082a93a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Tue, 25 Apr 2023 13:38:05 +0200
Subject: [PATCH 03/13] Python bindings

---
 src/binding/python/Dataset.cpp | 108 ++++++++++++++++++---------------
 1 file changed, 58 insertions(+), 50 deletions(-)

diff --git a/src/binding/python/Dataset.cpp b/src/binding/python/Dataset.cpp
index 656cd59ea8..70d85721f2 100644
--- a/src/binding/python/Dataset.cpp
+++ b/src/binding/python/Dataset.cpp
@@ -27,58 +27,66 @@
 
 void init_Dataset(py::module &m)
 {
-    py::class_<Dataset>(m, "Dataset")
+    auto pyDataset =
+        py::class_<Dataset>(m, "Dataset")
+            .def(
+                py::init<Datatype, Extent>(),
+                py::arg("dtype"),
+                py::arg("extent"))
+            .def(py::init<Extent>(), py::arg("extent"))
+            .def(
+                py::init([](py::dtype dt, Extent const &e) {
+                    auto const d = dtype_from_numpy(std::move(dt));
+                    return new Dataset{d, e};
+                }),
+                py::arg("dtype"),
+                py::arg("extent"))
+            .def(
+                py::init<Datatype, Extent, std::string>(),
+                py::arg("dtype"),
+                py::arg("extent"),
+                py::arg("options"))
+            .def(
+                py::init([](py::dtype dt, Extent e, std::string options) {
+                    auto const d = dtype_from_numpy(std::move(dt));
+                    return new Dataset{d, std::move(e), std::move(options)};
+                }),
+                py::arg("dtype"),
+                py::arg("extent"),
+                py::arg("options"))
 
-        .def(py::init<Datatype, Extent>(), py::arg("dtype"), py::arg("extent"))
-        .def(py::init<Extent>(), py::arg("extent"))
-        .def(
-            py::init([](py::dtype dt, Extent const &e) {
-                auto const d = dtype_from_numpy(std::move(dt));
-                return new Dataset{d, e};
-            }),
-            py::arg("dtype"),
-            py::arg("extent"))
-        .def(
-            py::init<Datatype, Extent, std::string>(),
-            py::arg("dtype"),
-            py::arg("extent"),
-            py::arg("options"))
-        .def(
-            py::init([](py::dtype dt, Extent const &e, std::string options) {
-                auto const d = dtype_from_numpy(std::move(dt));
-                return new Dataset{d, e, std::move(options)};
-            }),
-            py::arg("dtype"),
-            py::arg("extent"),
-            py::arg("options"))
-
-        .def(
-            "__repr__",
-            [](const Dataset &d) {
-                std::stringstream stream;
-                stream << "<openPMD.Dataset of type '" << d.dtype
-                       << "' and with extent ";
-                if (d.extent.empty())
-                {
-                    stream << "[]>";
-                }
-                else
-                {
-                    auto begin = d.extent.begin();
-                    stream << '[' << *begin++;
-                    for (; begin != d.extent.end(); ++begin)
+            .def(
+                "__repr__",
+                [](const Dataset &d) {
+                    std::stringstream stream;
+                    stream << "<openPMD.Dataset of type '" << d.dtype
+                           << "' and with extent ";
+                    if (d.extent.empty())
+                    {
+                        stream << "[]>";
+                    }
+                    else
                     {
-                        stream << ", " << *begin;
+                        auto begin = d.extent.begin();
+                        stream << '[' << *begin++;
+                        for (; begin != d.extent.end(); ++begin)
+                        {
+                            stream << ", " << *begin;
+                        }
+                        stream << "]>";
                     }
-                    stream << "]>";
-                }
-                return stream.str();
-            })
+                    return stream.str();
+                })
 
-        .def_readonly("extent", &Dataset::extent)
-        .def("extend", &Dataset::extend)
-        .def_readonly("rank", &Dataset::rank)
-        .def_property_readonly(
-            "dtype", [](const Dataset &d) { return dtype_to_numpy(d.dtype); })
-        .def_readwrite("options", &Dataset::options);
+            .def_property_readonly(
+                "joined_dimension", &Dataset::joinedDimension)
+            .def_readonly("extent", &Dataset::extent)
+            .def("extend", &Dataset::extend)
+            .def_readonly("rank", &Dataset::rank)
+            .def_property_readonly(
+                "dtype",
+                [](const Dataset &d) { return dtype_to_numpy(d.dtype); })
+            .def_readwrite("options", &Dataset::options);
+    pyDataset.attr("JOINED_DIMENSION") =
+        py::int_(uint64_t(Dataset::JOINED_DIMENSION));
 }

From 3d5dcf700072e4a51fb02af3c6dd08ab61a802b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Tue, 11 Apr 2023 12:08:54 +0200
Subject: [PATCH 04/13] Throw errors if unsupported

---
 src/IO/ADIOS/ADIOS2IOHandler.cpp  | 7 +++++++
 src/IO/HDF5/HDF5IOHandler.cpp     | 6 ++++++
 src/IO/JSON/JSONIOHandlerImpl.cpp | 6 ++++++
 3 files changed, 19 insertions(+)

diff --git a/src/IO/ADIOS/ADIOS2IOHandler.cpp b/src/IO/ADIOS/ADIOS2IOHandler.cpp
index 8d9ea653ad..97ade042a2 100644
--- a/src/IO/ADIOS/ADIOS2IOHandler.cpp
+++ b/src/IO/ADIOS/ADIOS2IOHandler.cpp
@@ -720,6 +720,13 @@ void ADIOS2IOHandlerImpl::createDataset(
             "[ADIOS2] Creating a dataset in a file opened as read "
             "only is not possible.");
     }
+#if !openPMD_HAS_ADIOS_2_9
+    if (parameters.joinedDimension.has_value())
+    {
+        error::throwOperationUnsupportedInBackend(
+            "ADIOS1", "Joined Arrays require ADIOS2 >= v2.9");
+    }
+#endif
     if (!writable->written)
     {
         /* Sanitize name */
diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp
index ac4fcaddf9..7caac73776 100644
--- a/src/IO/HDF5/HDF5IOHandler.cpp
+++ b/src/IO/HDF5/HDF5IOHandler.cpp
@@ -446,6 +446,12 @@ void HDF5IOHandlerImpl::createDataset(
             "[HDF5] Creating a dataset in a file opened as read only is not "
             "possible.");
 
+    if (parameters.joinedDimension.has_value())
+    {
+        error::throwOperationUnsupportedInBackend(
+            "ADIOS1", "Joined Arrays currently only supported in ADIOS2");
+    }
+
     if (!writable->written)
     {
         /* Sanitize name */
diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp
index a4e1bb39ab..36d153de6a 100644
--- a/src/IO/JSON/JSONIOHandlerImpl.cpp
+++ b/src/IO/JSON/JSONIOHandlerImpl.cpp
@@ -260,6 +260,12 @@ void JSONIOHandlerImpl::createDataset(
             "[JSON] Creating a dataset in a file opened as read only is not "
             "possible.");
     }
+    if (parameter.joinedDimension.has_value())
+    {
+        error::throwOperationUnsupportedInBackend(
+            "ADIOS1", "Joined Arrays currently only supported in ADIOS2");
+    }
+
     if (!writable->written)
     {
         /* Sanitize name */

From b79af4fcd6efd721e8481c91a02a0a8b7ab52599 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Tue, 25 Apr 2023 11:13:42 +0200
Subject: [PATCH 05/13] Testing

Only test if ADIOS2 version at least 2.9
---
 test/ParallelIOTest.cpp | 156 ++++++++++++++++++++++++++++++++++++++++
 test/SerialIOTest.cpp   | 144 +++++++++++++++++++++++++++++++++++++
 2 files changed, 300 insertions(+)

diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp
index 0b7f3e672a..84f868a059 100644
--- a/test/ParallelIOTest.cpp
+++ b/test/ParallelIOTest.cpp
@@ -1785,4 +1785,160 @@ TEST_CASE("unavailable_backend", "[core][parallel]")
     }
 #endif
 }
+
+void joined_dim(std::string const &ext)
+{
+    using type = float;
+    using patchType = uint64_t;
+    constexpr size_t patches_per_rank = 5;
+    constexpr size_t length_of_patch = 10;
+
+    int size{-1};
+    int rank{-1};
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+    {
+        Series s(
+            "../samples/joinedDimParallel." + ext,
+            Access::CREATE,
+            MPI_COMM_WORLD);
+        std::vector<UniquePtrWithLambda<type>> writeFrom(patches_per_rank);
+
+        auto it = s.writeIterations()[100];
+
+        Dataset numParticlesDS(
+            determineDatatype<patchType>(), {Dataset::JOINED_DIMENSION});
+        auto numParticles =
+            it.particles["e"]
+                .particlePatches["numParticles"][RecordComponent::SCALAR];
+        auto numParticlesOffset =
+            it.particles["e"]
+                .particlePatches["numParticlesOffset"][RecordComponent::SCALAR];
+        numParticles.resetDataset(numParticlesDS);
+        numParticlesOffset.resetDataset(numParticlesDS);
+
+        auto patchOffset = it.particles["e"].particlePatches["offset"]["x"];
+        auto patchExtent = it.particles["e"].particlePatches["extent"]["x"];
+        Dataset particlePatchesDS(
+            determineDatatype<float>(), {Dataset::JOINED_DIMENSION});
+        patchOffset.resetDataset(particlePatchesDS);
+        patchExtent.resetDataset(particlePatchesDS);
+
+        float start_value = rank * patches_per_rank * length_of_patch;
+        for (size_t i = 0; i < 5; ++i)
+        {
+            writeFrom[i] = UniquePtrWithLambda<type>(
+                new type[length_of_patch],
+                [](auto const *ptr) { delete[] ptr; });
+            std::iota(
+                writeFrom[i].get(),
+                writeFrom[i].get() + 10,
+                start_value + length_of_patch * i);
+            patchOffset.store<type>(start_value + length_of_patch * i);
+        }
+
+        auto epx = it.particles["e"]["position"]["x"];
+        Dataset ds(determineDatatype<type>(), {Dataset::JOINED_DIMENSION});
+        epx.resetDataset(ds);
+
+        size_t counter = 0;
+        for (auto &chunk : writeFrom)
+        {
+            epx.storeChunk(std::move(chunk), {}, {length_of_patch});
+            numParticles.store<patchType>(length_of_patch);
+            /*
+             * For the sake of the test case, we know that the
+             * numParticlesOffset has this value. In general, the purpose of the
+             * joined array is that we don't need to know these values, so the
+             * specification of particle patches is somewhat difficult.
+             */
+            numParticlesOffset.store<patchType>(
+                start_value + counter++ * length_of_patch);
+            patchExtent.store<type>(10);
+        }
+        writeFrom.clear();
+        it.close();
+        s.close();
+    }
+
+    {
+        Series s(
+            "../samples/joinedDimParallel." + ext,
+            Access::READ_ONLY,
+            MPI_COMM_WORLD);
+        auto it = s.iterations[100];
+        auto e = it.particles["e"];
+
+        auto particleData = e["position"]["x"].loadChunk<type>();
+        auto numParticles =
+            e.particlePatches["numParticles"][RecordComponent::SCALAR]
+                .load<patchType>();
+        auto numParticlesOffset =
+            e.particlePatches["numParticlesOffset"][RecordComponent::SCALAR]
+                .load<patchType>();
+        auto patchOffset = e.particlePatches["offset"]["x"].load<type>();
+        auto patchExtent = e.particlePatches["extent"]["x"].load<type>();
+
+        it.close();
+
+        // check validity of particle patches
+        auto numPatches =
+            e.particlePatches["numParticlesOffset"][RecordComponent::SCALAR]
+                .getExtent()[0];
+        REQUIRE(
+            e.particlePatches["numParticles"][RecordComponent::SCALAR]
+                .getExtent()[0] == numPatches);
+        for (size_t i = 0; i < numPatches; ++i)
+        {
+            for (size_t j = 0; j < numParticles.get()[i]; ++j)
+            {
+                REQUIRE(
+                    patchOffset.get()[i] <=
+                    particleData.get()[numParticlesOffset.get()[i] + j]);
+                REQUIRE(
+                    particleData.get()[numParticlesOffset.get()[i] + j] <
+                    patchOffset.get()[i] + patchExtent.get()[i]);
+            }
+        }
+
+        /*
+         * Check that joined array joins early writes before later writes from
+         * the same rank
+         */
+        for (size_t i = 0; i < size * length_of_patch * patches_per_rank; ++i)
+        {
+            REQUIRE(float(i) == particleData.get()[i]);
+        }
+        for (size_t i = 0; i < size * patches_per_rank; ++i)
+        {
+            REQUIRE(length_of_patch * i == numParticlesOffset.get()[i]);
+            REQUIRE(type(length_of_patch * i) == patchOffset.get()[i]);
+        }
+    }
+}
+
+TEST_CASE("joined_dim", "[parallel]")
+{
+#if 100000000 * ADIOS2_VERSION_MAJOR + 1000000 * ADIOS2_VERSION_MINOR +        \
+        10000 * ADIOS2_VERSION_PATCH + 100 * ADIOS2_VERSION_TWEAK >=           \
+    209000000
+    constexpr char const *supportsJoinedDims[] = {"bp", "bp4", "bp5"};
+#else
+    // no zero-size arrays
+    std::vector<char const *> supportsJoinedDims;
+#endif
+    for (auto const &t : testedFileExtensions())
+    {
+        for (auto const supported : supportsJoinedDims)
+        {
+            if (t == supported)
+            {
+                joined_dim(t);
+                break;
+            }
+        }
+    }
+}
+
 #endif // openPMD_HAVE_ADIOS2 && openPMD_HAVE_MPI
diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp
index e0d4c75348..f74a81a490 100644
--- a/test/SerialIOTest.cpp
+++ b/test/SerialIOTest.cpp
@@ -7371,3 +7371,147 @@ TEST_CASE("groupbased_read_write", "[serial]")
         groupbased_read_write("toml");
     }
 }
+
+void joined_dim(std::string const &ext)
+{
+    using type = float;
+    using patchType = uint64_t;
+    constexpr size_t patches_per_rank = 5;
+    constexpr size_t length_of_patch = 10;
+
+    {
+        Series s("../samples/joinedDimParallel." + ext, Access::CREATE);
+        std::vector<UniquePtrWithLambda<type>> writeFrom(patches_per_rank);
+
+        auto it = s.writeIterations()[100];
+
+        Dataset numParticlesDS(
+            determineDatatype<patchType>(), {Dataset::JOINED_DIMENSION});
+        auto numParticles =
+            it.particles["e"]
+                .particlePatches["numParticles"][RecordComponent::SCALAR];
+        auto numParticlesOffset =
+            it.particles["e"]
+                .particlePatches["numParticlesOffset"][RecordComponent::SCALAR];
+        numParticles.resetDataset(numParticlesDS);
+        numParticlesOffset.resetDataset(numParticlesDS);
+
+        auto patchOffset = it.particles["e"].particlePatches["offset"]["x"];
+        auto patchExtent = it.particles["e"].particlePatches["extent"]["x"];
+        Dataset particlePatchesDS(
+            determineDatatype<float>(), {Dataset::JOINED_DIMENSION});
+        patchOffset.resetDataset(particlePatchesDS);
+        patchExtent.resetDataset(particlePatchesDS);
+
+        for (size_t i = 0; i < 5; ++i)
+        {
+            writeFrom[i] = UniquePtrWithLambda<type>(
+                new type[length_of_patch],
+                [](auto const *ptr) { delete[] ptr; });
+            std::iota(
+                writeFrom[i].get(),
+                writeFrom[i].get() + 10,
+                length_of_patch * i);
+            patchOffset.store<type>(length_of_patch * i);
+        }
+
+        auto epx = it.particles["e"]["position"]["x"];
+        Dataset ds(determineDatatype<type>(), {Dataset::JOINED_DIMENSION});
+        epx.resetDataset(ds);
+
+        size_t counter = 0;
+        for (auto &chunk : writeFrom)
+        {
+            epx.storeChunk(std::move(chunk), {}, {length_of_patch});
+            numParticles.store<patchType>(length_of_patch);
+            /*
+             * For the sake of the test case, we know that the
+             * numParticlesOffset has this value. In general, the purpose of the
+             * joined array is that we don't need to know these values, so the
+             * specification of particle patches is somewhat difficult.
+             */
+            numParticlesOffset.store<patchType>(counter++ * length_of_patch);
+            patchExtent.store<type>(10);
+        }
+        writeFrom.clear();
+        it.close();
+        s.close();
+    }
+
+    {
+        Series s("../samples/joinedDimParallel." + ext, Access::READ_ONLY);
+        auto it = s.iterations[100];
+        auto e = it.particles["e"];
+
+        auto particleData = e["position"]["x"].loadChunk<type>();
+        auto numParticles =
+            e.particlePatches["numParticles"][RecordComponent::SCALAR]
+                .load<patchType>();
+        auto numParticlesOffset =
+            e.particlePatches["numParticlesOffset"][RecordComponent::SCALAR]
+                .load<patchType>();
+        auto patchOffset = e.particlePatches["offset"]["x"].load<type>();
+        auto patchExtent = e.particlePatches["extent"]["x"].load<type>();
+
+        it.close();
+
+        // check validity of particle patches
+        auto numPatches =
+            e.particlePatches["numParticlesOffset"][RecordComponent::SCALAR]
+                .getExtent()[0];
+        REQUIRE(
+            e.particlePatches["numParticles"][RecordComponent::SCALAR]
+                .getExtent()[0] == numPatches);
+        for (size_t i = 0; i < numPatches; ++i)
+        {
+            for (size_t j = 0; j < numParticles.get()[i]; ++j)
+            {
+                REQUIRE(
+                    patchOffset.get()[i] <=
+                    particleData.get()[numParticlesOffset.get()[i] + j]);
+                REQUIRE(
+                    particleData.get()[numParticlesOffset.get()[i] + j] <
+                    patchOffset.get()[i] + patchExtent.get()[i]);
+            }
+        }
+
+        /*
+         * Check that:
+         * 1. Joined array joins writes from lower ranks before higher ranks
+         * 2. Joined array joins early writes before later writes from the same
+         *    rank
+         */
+        for (size_t i = 0; i < length_of_patch * patches_per_rank; ++i)
+        {
+            REQUIRE(float(i) == particleData.get()[i]);
+        }
+        for (size_t i = 0; i < patches_per_rank; ++i)
+        {
+            REQUIRE(length_of_patch * i == numParticlesOffset.get()[i]);
+            REQUIRE(type(length_of_patch * i) == patchOffset.get()[i]);
+        }
+    }
+}
+
+TEST_CASE("joined_dim", "[serial]")
+{
+#if 100000000 * ADIOS2_VERSION_MAJOR + 1000000 * ADIOS2_VERSION_MINOR +        \
+        10000 * ADIOS2_VERSION_PATCH + 100 * ADIOS2_VERSION_TWEAK >=           \
+    209000000
+    constexpr char const *supportsJoinedDims[] = {"bp", "bp4", "bp5"};
+#else
+    // no zero-size arrays
+    std::vector<char const *> supportsJoinedDims;
+#endif
+    for (auto const &t : testedFileExtensions())
+    {
+        for (auto const supported : supportsJoinedDims)
+        {
+            if (t == supported)
+            {
+                joined_dim(t);
+                break;
+            }
+        }
+    }
+}

From e55233e2f20c3bb081a5f1880ac8b59284876921 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Tue, 25 Apr 2023 17:19:01 +0200
Subject: [PATCH 06/13] Documentation

---
 docs/source/usage/workflow.rst | 42 ++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/docs/source/usage/workflow.rst b/docs/source/usage/workflow.rst
index 61ef593a2e..1a537dda89 100644
--- a/docs/source/usage/workflow.rst
+++ b/docs/source/usage/workflow.rst
@@ -3,6 +3,48 @@
 Workflow
 ========
 
+Storing and reading chunks
+--------------------------
+
+1. **Chunks within an n-dimensional dataset**
+
+   Most commonly, chunks within an n-dimensional dataset are identified by their offset and extent.
+   The extent is the size of the chunk in each dimension, NOT the absolute coordinate within the entire dataset.
+
+   In the Python API, this is modeled to conform to the conventional ``__setitem__``/``__getitem__`` protocol.
+
+2. **Joined arrays (write only)**
+
+   (Currently) only supported in ADIOS2 no older than v2.9.0 under the conditions listed in the `ADIOS2 documentation on joined arrays <https://adios2.readthedocs.io/en/latest/components/components.html#shapes>`_.
+
+   In some cases, the concrete chunk within a dataset does not matter and the computation of indexes is a needless computational and mental overhead.
+   This commonly occurs for particle data which the openPMD-standard models as a list of particles.
+   The order of particles does not matter greatly, and making different parallel processes agree on indexing is error-prone boilerplate.
+
+   In such a case, at most one *joined dimension* can be specified in the Dataset, e.g. ``{Dataset::JOINED_DIMENSION, 128, 128}`` (3D for the sake of explanation, particle data would normally be 1D).
+   The chunk is then stored by specifying an empty offset vector ``{}``.
+   The chunk extent vector must be equivalent to the global extent in all non-joined dimensions (i.e. joined arrays allow no further sub-chunking other than concatenation along the joined dimension).
+   The joined dimension of the extent vector specifies the extent that this piece should have along the joined dimension.
+   The global extent of the dataset along the joined dimension will then be the sum of all local chunk extents along the joined dimension.
+
+   Since openPMD follows a struct-of-array layout of data, it is important not to lose correlation of data between components. E.g., joining an array must take care that ``particles/e/position/x`` and ``particles/e/position/y`` are joined in uniform way.
+
+   The openPMD-api makes the **following guarantee**:
+
+   Consider a Series written from ``N`` parallel processes between two (collective) flush points. For each parallel process ``n`` and dataset ``D``, let:
+
+    * ``chunk(D, n, i)`` be the ``i``'th chunk written to dataset ``D`` on process ``n``
+    * ``num_chunks(D, n)`` be the count of chunks written by ``n`` to ``D``
+    * ``joined_index(D, c)`` be the index of chunk ``c`` in the joining order of ``D``
+
+  Then for any two datasets ``x`` and ``y``:
+
+    * If for any parallel process ``n`` the condition holds that ``num_chunks(x, n) = num_chunks(y, n)`` (between the two flush points!)...
+    * ...then for any parallel process ``n`` and chunk index ``i`` less than ``num_chunks(x, n)``: ``joined_index(x, chunk(x, n, i)) = joined_index(y, chunk(y, n, i))``.
+
+  **TLDR:** Writing chunks to two joined arrays in synchronous way (**1.** same order of store operations and **2.** between the same flush operations) will result in the same joining order in both arrays.
+
+
 Access modes
 ------------
 

From 403c0a98358737ef4ccd27ed03d60e8aff64a209 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 9 Jan 2024 14:28:29 +0000
Subject: [PATCH 07/13] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/IO/ADIOS/ADIOS2IOHandler.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/IO/ADIOS/ADIOS2IOHandler.cpp b/src/IO/ADIOS/ADIOS2IOHandler.cpp
index 97ade042a2..839f59dd03 100644
--- a/src/IO/ADIOS/ADIOS2IOHandler.cpp
+++ b/src/IO/ADIOS/ADIOS2IOHandler.cpp
@@ -68,17 +68,17 @@ namespace openPMD
 
 #if openPMD_HAVE_ADIOS2
 
-    std::optional<size_t> joinedDimension(adios2::Dims const &dims)
+std::optional<size_t> joinedDimension(adios2::Dims const &dims)
+{
+    for (size_t i = 0; i < dims.size(); ++i)
     {
-        for (size_t i = 0; i < dims.size(); ++i)
+        if (dims[i] == adios2::JoinedDim)
         {
-            if (dims[i] == adios2::JoinedDim)
-            {
-                return i;
-            }
+            return i;
         }
-        return std::nullopt;
     }
+    return std::nullopt;
+}
 
 #if openPMD_HAVE_MPI
 

From ea09b718bd630db73bbbea527d3238318a8a6f33 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Tue, 6 Feb 2024 12:26:48 +0100
Subject: [PATCH 08/13] Fix ADIOS2 checks for multidimensional joined arrays

---
 include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp | 32 ++++++++++++++------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp b/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp
index 24454fa8dc..c23fb81d49 100644
--- a/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp
+++ b/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp
@@ -446,21 +446,35 @@ class ADIOS2IOHandlerImpl
             }
         }
         auto joinedDim = joinedDimension(shape);
-        for (unsigned int i = 0; i < actualDim; i++)
+        if (joinedDim.has_value())
         {
-            if (!(joinedDim.has_value() && *joinedDim == i) &&
-                offset[i] + extent[i] > shape[i])
+            if (!offset.empty())
             {
                 throw std::runtime_error(
-                    "[ADIOS2] Dataset access out of bounds.");
+                    "[ADIOS2] Offset must be an empty vector in case of joined "
+                    "array.");
+            }
+            for (unsigned int i = 0; i < actualDim; i++)
+            {
+                if (*joinedDim != i && extent[i] != shape[i])
+                {
+                    throw std::runtime_error(
+                        "[ADIOS2] store_chunk extent of non-joined dimensions "
+                        "must be equivalent to the total extent.");
+                }
             }
         }
-
-        if (joinedDim.has_value() && !offset.empty())
+        else
         {
-            throw std::runtime_error(
-                "[ADIOS2] Offset must be an empty vector in case of joined "
-                "array.");
+            for (unsigned int i = 0; i < actualDim; i++)
+            {
+                if (!(joinedDim.has_value() && *joinedDim == i) &&
+                    offset[i] + extent[i] > shape[i])
+                {
+                    throw std::runtime_error(
+                        "[ADIOS2] Dataset access out of bounds.");
+                }
+            }
         }
 
         var.SetSelection(

From c45ccd277474424c94404e6826b4297f9c319e46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Tue, 6 Feb 2024 12:27:23 +0100
Subject: [PATCH 09/13] Python test

---
 examples/5_write_parallel.py           | 21 +++++++--
 src/binding/python/RecordComponent.cpp | 61 +++++++++++++++++++-------
 2 files changed, 63 insertions(+), 19 deletions(-)

diff --git a/examples/5_write_parallel.py b/examples/5_write_parallel.py
index ace0cd6e63..9ad07e8554 100644
--- a/examples/5_write_parallel.py
+++ b/examples/5_write_parallel.py
@@ -14,6 +14,13 @@
 import numpy as np
 import openpmd_api as io
 
+try:
+    import adios2
+    from packaging import version
+    USE_JOINED_DIMENSION = \
+        version.parse(adios2.__version__) >= version.parse('2.9.0')
+except ImportError:
+    USE_JOINED_DIMENSION = False
 if __name__ == "__main__":
     # also works with any other MPI communicator
     comm = MPI.COMM_WORLD
@@ -29,7 +36,9 @@
 
     # open file for writing
     series = io.Series(
-        "../samples/5_parallel_write_py.h5",
+        "../samples/5_parallel_write_py.bp"
+            if USE_JOINED_DIMENSION
+            else "../samples/5_parallel_write_py.bp",
         io.Access.create,
         comm
     )
@@ -51,7 +60,9 @@
         meshes["mymesh"]
 
     # example 1D domain decomposition in first index
-    global_extent = [comm.size * 10, 300]
+    global_extent = [io.Dataset.JOINED_DIMENSION, 300] \
+        if USE_JOINED_DIMENSION else [comm.size * 10, 300]
+
     dataset = io.Dataset(local_data.dtype, global_extent)
 
     if 0 == comm.rank:
@@ -64,7 +75,11 @@
               "mymesh in iteration 1")
 
     # example shows a 1D domain decomposition in first index
-    mymesh[comm.rank*10:(comm.rank+1)*10, :] = local_data
+
+    if USE_JOINED_DIMENSION:
+        mymesh.store_chunk(local_data, [], [10, 300])
+    else:
+        mymesh[comm.rank*10:(comm.rank+1)*10, :] = local_data
     if 0 == comm.rank:
         print("Registered a single chunk per MPI rank containing its "
               "contribution, ready to write content to disk")
diff --git a/src/binding/python/RecordComponent.cpp b/src/binding/python/RecordComponent.cpp
index 37ad9a7cff..7399184383 100644
--- a/src/binding/python/RecordComponent.cpp
+++ b/src/binding/python/RecordComponent.cpp
@@ -265,23 +265,52 @@ inline void store_chunk(
                         "in record component (") +
             std::to_string(r_shape.size()) + std::string("D)"));
 
-    for (auto d = 0; d < a.ndim(); ++d)
+    if (auto joined_dim = r.joinedDimension(); joined_dim.has_value())
     {
-        // selection causes overflow of r
-        if (offset.at(d) + extent.at(d) > r_shape.at(d))
-            throw py::index_error(
-                std::string("slice ") + std::to_string(offset.at(d)) +
-                std::string(":") + std::to_string(extent.at(d)) +
-                std::string(" is out of bounds for axis ") + std::to_string(d) +
-                std::string(" with size ") + std::to_string(r_shape.at(d)));
-        // underflow of selection in r for given a
-        if (s_shape.at(d) != std::uint64_t(a.shape()[d]))
-            throw py::index_error(
-                std::string("size of chunk (") + std::to_string(a.shape()[d]) +
-                std::string(") for axis ") + std::to_string(d) +
-                std::string(" does not match selection ") +
-                std::string("size in record component (") +
-                std::to_string(s_extent.at(d)) + std::string(")"));
+        for (py::ssize_t d = 0; d < a.ndim(); ++d)
+        {
+            // selection causes overflow of r
+            if (d != py::ssize_t(*joined_dim) && extent.at(d) != r_shape.at(d))
+                throw py::index_error(
+                    std::string("selection for axis ") + std::to_string(d) +
+                    " of record component with joined dimension " +
+                    std::to_string(*joined_dim) +
+                    " must be equivalent to its global extent " +
+                    std::to_string(extent.at(d)) + ", but was " +
+                    std::to_string(r_shape.at(d)) + ".");
+            // underflow of selection in r for given a
+            if (s_shape.at(d) != std::uint64_t(a.shape()[d]))
+                throw py::index_error(
+                    std::string("size of chunk (") +
+                    std::to_string(a.shape()[d]) + std::string(") for axis ") +
+                    std::to_string(d) +
+                    std::string(" does not match selection ") +
+                    std::string("size in record component (") +
+                    std::to_string(s_extent.at(d)) + std::string(")"));
+        }
+    }
+    else
+    {
+        for (auto d = 0; d < a.ndim(); ++d)
+        {
+            // selection causes overflow of r
+            if (offset.at(d) + extent.at(d) > r_shape.at(d))
+                throw py::index_error(
+                    std::string("slice ") + std::to_string(offset.at(d)) +
+                    std::string(":") + std::to_string(extent.at(d)) +
+                    std::string(" is out of bounds for axis ") +
+                    std::to_string(d) + std::string(" with size ") +
+                    std::to_string(r_shape.at(d)));
+            // underflow of selection in r for given a
+            if (s_shape.at(d) != std::uint64_t(a.shape()[d]))
+                throw py::index_error(
+                    std::string("size of chunk (") +
+                    std::to_string(a.shape()[d]) + std::string(") for axis ") +
+                    std::to_string(d) +
+                    std::string(" does not match selection ") +
+                    std::string("size in record component (") +
+                    std::to_string(s_extent.at(d)) + std::string(")"));
+        }
     }
 
     check_buffer_is_contiguous(a);

From 70d6f3f9b34e03773037f0a41ae2a19d2b4657cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Tue, 6 Feb 2024 15:31:01 +0100
Subject: [PATCH 10/13] Expose this to the sliced Python API

---
 docs/source/usage/workflow.rst         |   1 +
 examples/5_write_parallel.py           |   9 +-
 src/binding/python/RecordComponent.cpp | 119 +++++++++++++++++++++++--
 3 files changed, 119 insertions(+), 10 deletions(-)

diff --git a/docs/source/usage/workflow.rst b/docs/source/usage/workflow.rst
index 1a537dda89..ec44e2e70f 100644
--- a/docs/source/usage/workflow.rst
+++ b/docs/source/usage/workflow.rst
@@ -25,6 +25,7 @@ Storing and reading chunks
    The chunk is then stored by specifying an empty offset vector ``{}``.
    The chunk extent vector must be equivalent to the global extent in all non-joined dimensions (i.e. joined arrays allow no further sub-chunking other than concatenation along the joined dimension).
    The joined dimension of the extent vector specifies the extent that this piece should have along the joined dimension.
+   In the Python API, the slice-based setter syntax can be used as an abbreviation since the necessary information is determined from the passed array, e.g. ``record_component[()] = local_data``.
    The global extent of the dataset along the joined dimension will then be the sum of all local chunk extents along the joined dimension.
 
    Since openPMD follows a struct-of-array layout of data, it is important not to lose correlation of data between components. E.g., joining an array must take care that ``particles/e/position/x`` and ``particles/e/position/y`` are joined in uniform way.
diff --git a/examples/5_write_parallel.py b/examples/5_write_parallel.py
index 9ad07e8554..2ee046547c 100644
--- a/examples/5_write_parallel.py
+++ b/examples/5_write_parallel.py
@@ -21,13 +21,14 @@
         version.parse(adios2.__version__) >= version.parse('2.9.0')
 except ImportError:
     USE_JOINED_DIMENSION = False
+
 if __name__ == "__main__":
     # also works with any other MPI communicator
     comm = MPI.COMM_WORLD
 
     # global data set to write: [MPI_Size * 10, 300]
     # each rank writes a 10x300 slice with its MPI rank as values
-    local_value = comm.size
+    local_value = comm.rank
     local_data = np.ones(10 * 300,
                          dtype=np.double).reshape(10, 300) * local_value
     if 0 == comm.rank:
@@ -77,7 +78,11 @@
     # example shows a 1D domain decomposition in first index
 
     if USE_JOINED_DIMENSION:
-        mymesh.store_chunk(local_data, [], [10, 300])
+        # explicit API
+        # mymesh.store_chunk(local_data, [], [10, 300])
+        mymesh[:, :] = local_data
+        # or short:
+        # mymesh[()] = local_data
     else:
         mymesh[comm.rank*10:(comm.rank+1)*10, :] = local_data
     if 0 == comm.rank:
diff --git a/src/binding/python/RecordComponent.cpp b/src/binding/python/RecordComponent.cpp
index 7399184383..af58f0d2c8 100644
--- a/src/binding/python/RecordComponent.cpp
+++ b/src/binding/python/RecordComponent.cpp
@@ -18,12 +18,15 @@
  * and the GNU Lesser General Public License along with openPMD-api.
  * If not, see <http://www.gnu.org/licenses/>.
  */
+#include <limits>
+#include <pybind11/detail/common.h>
 #include <pybind11/numpy.h>
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 
 #include "openPMD/DatatypeHelpers.hpp"
 #include "openPMD/Error.hpp"
+#include "openPMD/RecordComponent.hpp"
 #include "openPMD/Series.hpp"
 #include "openPMD/backend/BaseRecordComponent.hpp"
 
@@ -40,6 +43,7 @@
 #include <exception>
 #include <iostream>
 #include <sstream>
+#include <stdexcept>
 #include <string>
 #include <tuple>
 #include <type_traits>
@@ -111,14 +115,48 @@ inline std::tuple<Offset, Extent, std::vector<bool>> parseTupleSlices(
             py::slice slice = py::cast<py::slice>(slices[i]);
 
             size_t start, stop, step, slicelength;
+            auto mocked_extent = full_extent.at(curAxis);
+            // py::ssize_t is a signed type, so we will need to use another
+            // magic number for JOINED_DIMENSION in this computation, since the
+            // C++ API's JOINED_DIMENSION would be interpreted as a negative
+            // index
+            bool undo_mocked_extent = false;
+            constexpr auto PYTHON_JOINED_DIMENSION =
+                std::numeric_limits<py::ssize_t>::max() - 1;
+            if (mocked_extent == Dataset::JOINED_DIMENSION)
+            {
+                undo_mocked_extent = true;
+                mocked_extent = PYTHON_JOINED_DIMENSION;
+            }
             if (!slice.compute(
-                    full_extent.at(curAxis),
-                    &start,
-                    &stop,
-                    &step,
-                    &slicelength))
+                    mocked_extent, &start, &stop, &step, &slicelength))
                 throw py::error_already_set();
 
+            if (undo_mocked_extent)
+            {
+                // do the same calculation again, but with another global extent
+                // (that is not smaller than the previous in order to avoid
+                // cutting off the range)
+                // this is to avoid the unlikely case
+                // that the mocked alternative value is actually the intended
+                // one
+                size_t start2, stop2, step2, slicelength2;
+                if (!slice.compute(
+                        mocked_extent + 1,
+                        &start2,
+                        &stop2,
+                        &step2,
+                        &slicelength2))
+                    throw py::error_already_set();
+                if (slicelength == slicelength2)
+                {
+                    // slicelength was given as an absolute value and
+                    // accidentally hit our mocked value
+                    // --> keep that value
+                    undo_mocked_extent = false;
+                }
+            }
+
             // TODO PySlice_AdjustIndices: Python 3.6.1+
             //      Adjust start/end slice indices assuming a sequence of the
             //      specified length. Out of bounds indices are clipped in a
@@ -132,7 +170,10 @@ inline std::tuple<Offset, Extent, std::vector<bool>> parseTupleSlices(
 
             // verified for size later in C++ API
             offset.at(curAxis) = start;
-            extent.at(curAxis) = slicelength; // stop - start;
+            extent.at(curAxis) =
+                undo_mocked_extent && slicelength == PYTHON_JOINED_DIMENSION
+                ? Dataset::JOINED_DIMENSION
+                : slicelength; // stop - start;
 
             continue;
         }
@@ -187,6 +228,59 @@ inline std::tuple<Offset, Extent, std::vector<bool>> parseTupleSlices(
     return std::make_tuple(offset, extent, flatten);
 }
 
+inline std::tuple<Offset, Extent, std::vector<bool>> parseJoinedTupleSlices(
+    uint8_t const ndim,
+    Extent const &full_extent,
+    py::tuple const &slices,
+    size_t joined_dim,
+    py::array const &a)
+{
+
+    std::vector<bool> flatten;
+    Offset offset;
+    Extent extent;
+    std::tie(offset, extent, flatten) =
+        parseTupleSlices(ndim, full_extent, slices);
+    for (size_t i = 0; i < ndim; ++i)
+    {
+        if (offset.at(i) != 0)
+        {
+            throw std::runtime_error(
+                "Joined array: Cannot use non-zero offset in store_chunk "
+                "(offset[" +
+                std::to_string(i) + "] = " + std::to_string(offset[i]) + ").");
+        }
+        if (flatten.at(i))
+        {
+            throw std::runtime_error(
+                "Flattened slices unimplemented for joined arrays.");
+        }
+
+        if (i == joined_dim)
+        {
+            if (extent.at(i) == 0 || extent.at(i) == Dataset::JOINED_DIMENSION)
+            {
+                extent[i] = a.shape()[i];
+            }
+        }
+        else
+        {
+            if (extent.at(i) != full_extent.at(i))
+            {
+                throw std::runtime_error(
+                    "Joined array: Must use full extent in store_chunk for "
+                    "non-joined dimension "
+                    "(local_extent[" +
+                    std::to_string(i) + "] = " + std::to_string(extent[i]) +
+                    " != global_extent[" + std::to_string(i) +
+                    "] = " + std::to_string(full_extent[i]) + ").");
+            }
+        }
+    }
+    offset.clear();
+    return std::make_tuple(offset, extent, flatten);
+}
+
 /** Check an array is a contiguous buffer
  *
  * Required are contiguous buffers for store and load
@@ -388,8 +482,17 @@ store_chunk(RecordComponent &r, py::array &a, py::tuple const &slices)
     Offset offset;
     Extent extent;
     std::vector<bool> flatten;
-    std::tie(offset, extent, flatten) =
-        parseTupleSlices(ndim, full_extent, slices);
+    if (auto joined_dimension = r.joinedDimension();
+        joined_dimension.has_value())
+    {
+        std::tie(offset, extent, flatten) = parseJoinedTupleSlices(
+            ndim, full_extent, slices, *joined_dimension, a);
+    }
+    else
+    {
+        std::tie(offset, extent, flatten) =
+            parseTupleSlices(ndim, full_extent, slices);
+    }
 
     store_chunk(r, a, offset, extent, flatten);
 }

From 5af59b10b83c474a3b1f56064a2e5a50ec95e42b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Tue, 6 Feb 2024 17:13:12 +0100
Subject: [PATCH 11/13] Fix

---
 examples/5_write_parallel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/5_write_parallel.py b/examples/5_write_parallel.py
index 2ee046547c..5a5df097b2 100644
--- a/examples/5_write_parallel.py
+++ b/examples/5_write_parallel.py
@@ -39,7 +39,7 @@
     series = io.Series(
         "../samples/5_parallel_write_py.bp"
             if USE_JOINED_DIMENSION
-            else "../samples/5_parallel_write_py.bp",
+            else "../samples/5_parallel_write_py.h5",
         io.Access.create,
         comm
     )

From 51eca2a2937a0c3f33dd9a9499213ec74e1625f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?= <franz.poeschel@gmail.com>
Date: Wed, 7 Feb 2024 10:45:46 +0100
Subject: [PATCH 12/13] Fix formatting

---
 examples/5_write_parallel.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/5_write_parallel.py b/examples/5_write_parallel.py
index 5a5df097b2..8574c1d66e 100644
--- a/examples/5_write_parallel.py
+++ b/examples/5_write_parallel.py
@@ -38,8 +38,8 @@
     # open file for writing
     series = io.Series(
         "../samples/5_parallel_write_py.bp"
-            if USE_JOINED_DIMENSION
-            else "../samples/5_parallel_write_py.h5",
+        if USE_JOINED_DIMENSION
+        else "../samples/5_parallel_write_py.h5",
         io.Access.create,
         comm
     )

From 32dbc6082d395b7492de8410e5e0b146e402ea01 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 13 Feb 2024 19:10:54 +0000
Subject: [PATCH 13/13] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 include/openPMD/backend/PatchRecordComponent.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/openPMD/backend/PatchRecordComponent.hpp b/include/openPMD/backend/PatchRecordComponent.hpp
index c95df8aed9..4620f9be0b 100644
--- a/include/openPMD/backend/PatchRecordComponent.hpp
+++ b/include/openPMD/backend/PatchRecordComponent.hpp
@@ -20,10 +20,10 @@
  */
 #pragma once
 
-#include "openPMD/auxiliary/ShareRawInternal.hpp"
-#include "openPMD/backend/BaseRecordComponent.hpp"
 #include "openPMD/Error.hpp"
 #include "openPMD/RecordComponent.hpp"
+#include "openPMD/auxiliary/ShareRawInternal.hpp"
+#include "openPMD/backend/BaseRecordComponent.hpp"
 
 #include <memory>
 #include <sstream>