Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ntuple] Add support for std::map fields #13904

Merged
merged 5 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions tree/ntuple/v7/doc/specifications.md
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,10 @@ This means that they have the same on-disk representation as `std::vector<T>`, u
- Child field of type `T`, which must by a type with RNTuple I/O support.
The name of the child field is `_0`.

#### std::map\<K, V\>

A map is stored using a collection mother field, whose principal column is of type `(Split)Index[64|32]` and a child field of type `std::pair<K, V>` named `_0`.

### std::atomic\<T\>

Atomic types are stored as a leaf field with a single subfield named `_0`.
Expand Down
72 changes: 67 additions & 5 deletions tree/ntuple/v7/inc/ROOT/RField.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <functional>
#include <iostream>
#include <iterator>
#include <map>
#include <memory>
#include <new>
#include <set>
Expand Down Expand Up @@ -859,8 +860,8 @@ protected:
void GenerateValue(void *where) const override;
void DestroyValue(void *objPtr, bool dtorOnly = false) const override;

std::size_t AppendImpl(const void *from) final;
void ReadGlobalImpl(NTupleSize_t globalIndex, void *to) final;
std::size_t AppendImpl(const void *from) override;
void ReadGlobalImpl(NTupleSize_t globalIndex, void *to) override;

void CommitClusterImpl() final { fNWritten = 0; }

Expand All @@ -871,10 +872,10 @@ public:
~RProxiedCollectionField() override = default;

using Detail::RFieldBase::GenerateValue;
std::vector<RValue> SplitValue(const RValue &value) const final;
std::vector<RValue> SplitValue(const RValue &value) const override;
size_t GetValueSize() const override { return fProxy->Sizeof(); }
size_t GetAlignment() const override { return alignof(std::max_align_t); }
void AcceptVisitor(Detail::RFieldVisitor &visitor) const final;
void AcceptVisitor(Detail::RFieldVisitor &visitor) const override;
void GetCollectionInfo(NTupleSize_t globalIndex, RClusterIndex *collectionStart, ClusterSize_t *size) const
{
fPrincipalColumn->GetCollectionInfo(globalIndex, collectionStart, size);
Expand Down Expand Up @@ -1155,6 +1156,28 @@ public:
size_t GetAlignment() const override { return std::alignment_of<std::set<std::max_align_t>>(); }
};

/// The generic field for a std::map<KeyType, ValueType>
class RMapField : public RProxiedCollectionField {
private:
TClass *fItemClass;

protected:
std::unique_ptr<Detail::RFieldBase> CloneImpl(std::string_view newName) const final;

std::size_t AppendImpl(const void *from) final;
void ReadGlobalImpl(NTupleSize_t globalIndex, void *to) final;

public:
RMapField(std::string_view fieldName, std::string_view typeName, std::unique_ptr<Detail::RFieldBase> itemField);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use a PairField?

Suggested change
RMapField(std::string_view fieldName, std::string_view typeName, std::unique_ptr<Detail::RFieldBase> itemField);
RMapField(std::string_view fieldName, std::string_view typeName, std::unique_ptr<RPairField> itemField);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, this causes trouble with CloneImpl because when we call Clone on the subfield, a RFieldBase is returned (unless there's a way to cast it to an RPairField that I'm unaware of?)

Copy link
Contributor

@jblomer jblomer Oct 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, I think we can do a dynamic_cast to a RPairField. We know it must be a RPairField.

RMapField(RMapField &&other) = default;
RMapField &operator=(RMapField &&other) = default;
~RMapField() override = default;

std::vector<RValue> SplitValue(const RValue &value) const final;

size_t GetAlignment() const override { return std::alignment_of<std::map<std::max_align_t, std::max_align_t>>(); }
};

/// The field for values that may or may not be present in an entry. Parent class for unique pointer field and
/// optional field. A nullable field cannot be instantiated itself but only its descendants.
/// The RNullableField takes care of the on-disk representation. Child classes are responsible for the in-memory
Expand Down Expand Up @@ -2263,6 +2286,37 @@ public:
size_t GetAlignment() const final { return std::alignment_of<ContainerT>(); }
};

template <typename KeyT, typename ValueT>
class RField<std::map<KeyT, ValueT>> : public RMapField {
using ContainerT = typename std::map<KeyT, ValueT>;

protected:
void GenerateValue(void *where) const final { new (where) ContainerT(); }
void DestroyValue(void *objPtr, bool dtorOnly = false) const final
{
std::destroy_at(static_cast<ContainerT *>(objPtr));
Detail::RFieldBase::DestroyValue(objPtr, dtorOnly);
}

public:
static std::string TypeName()
{
return "std::map<" + RField<KeyT>::TypeName() + "," + RField<ValueT>::TypeName() + ">";
}

explicit RField(std::string_view name)
: RMapField(name, TypeName(), std::make_unique<RField<std::pair<KeyT, ValueT>>>("_0"))
{
}
RField(RField &&other) = default;
RField &operator=(RField &&other) = default;
~RField() override = default;

using Detail::RFieldBase::GenerateValue;
size_t GetValueSize() const final { return sizeof(ContainerT); }
size_t GetAlignment() const final { return std::alignment_of<ContainerT>(); }
};

template <typename... ItemTs>
class RField<std::variant<ItemTs...>> : public RVariantField {
using ContainerT = typename std::variant<ItemTs...>;
Expand Down Expand Up @@ -2437,6 +2491,14 @@ private:
return {std::make_unique<RField<Ty1>>("_0"), std::make_unique<RField<Ty2>>("_1")};
}

static std::array<std::size_t, 2> BuildItemOffsets()
{
auto pair = ContainerT();
auto offsetFirst = reinterpret_cast<std::uintptr_t>(&(pair.first)) - reinterpret_cast<std::uintptr_t>(&pair);
auto offsetSecond = reinterpret_cast<std::uintptr_t>(&(pair.second)) - reinterpret_cast<std::uintptr_t>(&pair);
return {offsetFirst, offsetSecond};
}

protected:
std::unique_ptr<Detail::RFieldBase> CloneImpl(std::string_view newName) const final
{
Expand All @@ -2457,7 +2519,7 @@ public:
return "std::pair<" + RField<T1>::TypeName() + "," + RField<T2>::TypeName() + ">";
}
explicit RField(std::string_view name, std::array<std::unique_ptr<Detail::RFieldBase>, 2> &&itemFields)
: RPairField(name, std::move(itemFields), {offsetof(ContainerT, first), offsetof(ContainerT, second)})
: RPairField(name, std::move(itemFields), BuildItemOffsets())
{
fMaxAlignment = std::max(alignof(T1), alignof(T2));
fSize = sizeof(ContainerT);
Expand Down
84 changes: 82 additions & 2 deletions tree/ntuple/v7/src/RField.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ std::string GetNormalizedTypeName(const std::string &typeName)
normalizedType = "std::" + normalizedType;
if (normalizedType.substr(0, 14) == "unordered_set<")
normalizedType = "std::" + normalizedType;
if (normalizedType.substr(0, 4) == "map<")
normalizedType = "std::" + normalizedType;
if (normalizedType.substr(0, 7) == "atomic<")
normalizedType = "std::" + normalizedType;
if (normalizedType == "byte")
Expand Down Expand Up @@ -464,6 +466,18 @@ ROOT::Experimental::Detail::RFieldBase::Create(const std::string &fieldName, con
auto normalizedInnerTypeName = itemField->GetType();
result = std::make_unique<RSetField>(fieldName, "std::unordered_set<" + normalizedInnerTypeName + ">",
std::move(itemField));
} else if (canonicalType.substr(0, 9) == "std::map<") {
auto innerTypes = TokenizeTypeList(canonicalType.substr(9, canonicalType.length() - 10));
if (innerTypes.size() != 2)
return R__FAIL("the type list for std::map must have exactly two elements");

auto normalizedKeyTypeName = GetNormalizedTypeName(innerTypes[0]);
auto normalizedValueTypeName = GetNormalizedTypeName(innerTypes[1]);

auto itemField =
Create("_0", "std::pair<" + normalizedKeyTypeName + "," + normalizedValueTypeName + ">").Unwrap();
result = std::make_unique<RMapField>(
fieldName, "std::map<" + normalizedKeyTypeName + "," + normalizedValueTypeName + ">", std::move(itemField));
} else if (canonicalType.substr(0, 12) == "std::atomic<") {
std::string itemTypeName = canonicalType.substr(12, canonicalType.length() - 13);
auto itemField = Create("_0", itemTypeName).Unwrap();
Expand Down Expand Up @@ -1591,8 +1605,7 @@ ROOT::Experimental::RProxiedCollectionField::RProxiedCollectionField(std::string
std::string_view typeName)
: RProxiedCollectionField(fieldName, typeName, TClass::GetClass(std::string(typeName).c_str()))
{
// TODO(jalopezg, fdegeus) Full support for associative collections (both custom and STL) will be handled in a
// follow-up PR.
// NOTE (fdegeus): std::map is supported, custom associative might be supported in the future if the need arises.
if (fProperties & TVirtualCollectionProxy::kIsAssociative)
throw RException(R__FAIL("custom associative collection proxies not supported"));

Expand Down Expand Up @@ -2665,6 +2678,73 @@ ROOT::Experimental::RSetField::CloneImpl(std::string_view newName) const

//------------------------------------------------------------------------------

ROOT::Experimental::RMapField::RMapField(std::string_view fieldName, std::string_view typeName,
std::unique_ptr<Detail::RFieldBase> itemField)
: RProxiedCollectionField(fieldName, typeName, TClass::GetClass(std::string(typeName).c_str()))
{
if (!dynamic_cast<RPairField *>(itemField.get()))
throw RException(R__FAIL("RMapField inner field type must be of RPairField"));

fItemClass = fProxy->GetValueClass();
fItemSize = fItemClass->GetClassSize();

Attach(std::move(itemField));
}

std::size_t ROOT::Experimental::RMapField::AppendImpl(const void *from)
{
std::size_t nbytes = 0;
unsigned count = 0;
TVirtualCollectionProxy::TPushPop RAII(fProxy.get(), const_cast<void *>(from));
for (auto ptr : RCollectionIterableOnce{const_cast<void *>(from), fIFuncsWrite, fProxy.get(), 0U}) {
nbytes += CallAppendOn(*fSubFields[0], ptr);
count++;
}
fNWritten += count;
fColumns[0]->Append(&fNWritten);
return nbytes + fColumns[0]->GetElement()->GetPackedSize();
}

void ROOT::Experimental::RMapField::ReadGlobalImpl(NTupleSize_t globalIndex, void *to)
{
ClusterSize_t nItems;
RClusterIndex collectionStart;
fPrincipalColumn->GetCollectionInfo(globalIndex, &collectionStart, &nItems);

TVirtualCollectionProxy::TPushPop RAII(fProxy.get(), to);
void *obj =
fProxy->Allocate(static_cast<std::uint32_t>(nItems), (fProperties & TVirtualCollectionProxy::kNeedDelete));

unsigned i = 0;
for (auto ptr : RCollectionIterableOnce{obj, fIFuncsRead, fProxy.get(), fItemSize}) {
CallReadOn(*fSubFields[0], collectionStart + i, ptr);
i++;
}

if (obj != to)
fProxy->Commit(obj);
}

std::vector<ROOT::Experimental::Detail::RFieldBase::RValue>
ROOT::Experimental::RMapField::SplitValue(const RValue &value) const
{
std::vector<RValue> result;
TVirtualCollectionProxy::TPushPop RAII(fProxy.get(), value.GetRawPtr());
for (auto ptr : RCollectionIterableOnce{value.GetRawPtr(), fIFuncsWrite, fProxy.get(), 0U}) {
result.emplace_back(fSubFields[0]->BindValue(ptr));
}
return result;
}

std::unique_ptr<ROOT::Experimental::Detail::RFieldBase>
ROOT::Experimental::RMapField::CloneImpl(std::string_view newName) const
{
auto newItemField = fSubFields[0]->Clone(fSubFields[0]->GetName());
return std::unique_ptr<RMapField>(new RMapField(newName, GetType(), std::move(newItemField)));
}

//------------------------------------------------------------------------------

ROOT::Experimental::RNullableField::RNullableField(std::string_view fieldName, std::string_view typeName,
std::unique_ptr<Detail::RFieldBase> itemField)
: ROOT::Experimental::Detail::RFieldBase(fieldName, typeName, ENTupleStructure::kCollection, false /* isSimple */)
Expand Down
1 change: 1 addition & 0 deletions tree/ntuple/v7/test/CustomStruct.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include <cstddef>
#include <cstdint>
#include <map>
#include <set>
#include <string>
#include <unordered_set>
Expand Down
16 changes: 16 additions & 0 deletions tree/ntuple/v7/test/ProxiedSTLContainerLinkDef.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,20 @@
#pragma link C++ class std::unordered_set<CustomStruct>+;
#pragma link C++ class std::unordered_set<std::vector<bool>>+;

#pragma link C++ class std::map<char, long>+;
#pragma link C++ class std::map<char, std::int64_t>+;
#pragma link C++ class std::map<char, std::string>+;
#pragma link C++ class std::map<int, std::vector<CustomStruct>>+;
#pragma link C++ class std::map<std::string, float>+;
#pragma link C++ class std::map<char, std::map<int, CustomStruct>>+;
#pragma link C++ class std::map<float, std::map<char, std::int32_t>>+;

#pragma link C++ class std::pair<char, long>+;
#pragma link C++ class std::pair<char, std::int64_t>+;
#pragma link C++ class std::pair<char, std::string>+;
#pragma link C++ class std::pair<int, CustomStruct>+;
#pragma link C++ class std::pair<int, std::vector<CustomStruct>>+;
#pragma link C++ class std::pair<float, std::map<char, std::int32_t>>+;
#pragma link C++ class std::pair<char, std::int32_t>+;

#endif
26 changes: 26 additions & 0 deletions tree/ntuple/v7/test/ntuple_show.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,32 @@ TEST(RNTupleShow, CollectionProxy)
}
}

TEST(RNTupleShow, Map)
{
FileRaii fileGuard("test_ntuple_show_map.ntuple");
{
auto model = RNTupleModel::Create();
auto mapF = model->MakeField<std::map<std::string, float>>("mapF");
auto ntuple = RNTupleWriter::Recreate(std::move(model), "f", fileGuard.GetPath());

*mapF = {{"foo", 3.14}, {"bar", 2.72}};
ntuple->Fill();
}

auto ntuple = RNTupleReader::Open("f", fileGuard.GetPath());
EXPECT_EQ(1U, ntuple->GetNEntries());

std::ostringstream os;
ntuple->Show(0, os);
// clang-format off
std::string expected{std::string("")
+ "{\n"
+ " \"mapF\": [{\"_0\": \"bar\", \"_1\": 2.72}, {\"_0\": \"foo\", \"_1\": 3.14}]\n"
+ "}\n"};
// clang-format on
EXPECT_EQ(os.str(), expected);
}

TEST(RNTupleShow, Enum)
{

Expand Down
Loading