Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

libnixt: add Serialize #276

Merged
merged 1 commit into from
Jan 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions libnixt/include/nixt/ArrayRef.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/// \file
/// \brief `ArrayRef`, `BytesRef`, and related functions.
#pragma once

#include <string_view>

namespace nixt {

/// \brief Weak reference to an array, with begin and end pointers.
/// \note Please always pass/return by value and don't add member functions.
template <class T> struct ArrayRef {
const T *Begin;
const T *End;
};

using BytesRef = ArrayRef<char>;

/// \brief Iterator begin. Used for `range-based-for`
template <class T> inline const T *begin(ArrayRef<T> B) { return B.Begin; }

/// \brief Iterator end.
template <class T> inline const T *end(ArrayRef<T> B) { return B.End; }

inline std::string_view view(BytesRef B) { return {B.Begin, B.End}; }

/// \brief Advance the beginning pointer of bytes array.
template <class T> inline ArrayRef<T> advance(ArrayRef<T> B, long Offset) {
return {B.Begin + Offset, B.End};
}
/// \brief Get length of this array.
template <class T> inline std::size_t lengthof(ArrayRef<T> B) {
return B.End - B.Begin;
}

} // namespace nixt
41 changes: 41 additions & 0 deletions libnixt/include/nixt/PtrPool.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/// \file
/// \brief Pointer pool, for RAII memory management.

// TODO: This file is trivial and shared among many libraries, maybe should move
// this in a standalone public header.

#pragma once

#include <memory>
#include <vector>

namespace nixt {

/// \brief A simple pointer pool, a vector of `unique_ptr`s.
///
/// It is used for "owning" nodes. Other classes can use weak/raw pointers to
/// the nodes, to avoid cyclic references.
///
/// Also in nix AST, the nodes are not owned by it's parent because in bison
/// algorithm nodes should be copyable while performing shift-reduce. So in our
/// implementation nodes are owned in this structure.
template <class T> struct PtrPool {
std::vector<std::unique_ptr<T>> Nodes;

/// \brief Takes ownership of a node, add it to the pool.
template <class U> U *add(std::unique_ptr<U> Node) {
Nodes.push_back(std::move(Node));
return dynamic_cast<U *>(Nodes.back().get());
}

/// \brief Takes ownership from a raw pointer.
///
/// \note This should only be used when it is allocated by "malloc", and not
/// owned by other objects (otherwise it will cause double free).
template <class U> U *record(U *Node) {
Nodes.emplace_back(std::unique_ptr<U>(Node));
return Node;
}
};

} // namespace nixt
104 changes: 104 additions & 0 deletions libnixt/include/nixt/Serialize.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/// \file
/// \brief Serialize nix::Expr to bytes & deserialize from bytes.

#pragma once

#include "nixt/ArrayRef.h"
#include "nixt/PtrPool.h"

#include <nix/nixexpr.hh>

namespace nixt::serialize {

//===----------------------------------------------------------------------===//
// Shared type definitions & constants
//===----------------------------------------------------------------------===//

enum class EncodeKind : uint32_t {
#define NIX_EXPR(EXPR) EXPR,
#include "Nodes.inc"
#undef NIX_EXPR

// Special discriminator for nix::AttrName.
// struct AttrName
// {
// Symbol symbol;
// Expr * expr;
// AttrName(Symbol s) : symbol(s) {};
// AttrName(Expr * e) : expr(e) {};
// };
AttrNameSymbol,
};

using nix::NixFloat;
using nix::NixInt;

// Checking for actual type, for ABI compability during nix updates.
// Because nix language is currently very "stable", this should not easily
// broken.
static_assert(std::is_same_v<nix::NixFloat, double>);
static_assert(std::is_same_v<nix::NixInt, std::int64_t>);

using PosInt = decltype(nix::Pos::line);
static_assert(std::is_same_v<PosInt, decltype(nix::Pos::column)>);
static_assert(std::is_same_v<PosInt, uint32_t>); // check for ABI breakage.

/// \brief Header of serialized AST.
struct ASTHeader {
char Magic[8];
uint32_t Version;
};

//===----------------------------------------------------------------------===//
// Encoder
//===----------------------------------------------------------------------===//

/// \brief Basic primitives. Trivial data types are just written to a stream.
/// \returns The beginning offset of the data in the stream.
template <class T>
requires std::is_standard_layout_v<T> && std::is_trivial_v<T>
std::size_t encode(std::ostream &OS, const T &Data) {
std::size_t Ret = OS.tellp();
OS.write(reinterpret_cast<const char *>(&Data), sizeof(Data));
return Ret;
}

/// \brief Encode string to bytes.
std::size_t encode(std::ostream &OS, const std::string &Data);

/// \brief Encode string to bytes.
std::size_t encode(std::ostream &OS, const nix::Pos::Origin &Origin);

/// \brief Encode an AST. \p E is the root of the AST.
void encodeAST(std::ostream &OS, const nix::SymbolTable &STable,
const nix::PosTable &PTable, const nix::Pos::Origin &Origin,
const nix::Expr *E);

//===----------------------------------------------------------------------===//
// Decoder
//===----------------------------------------------------------------------===//

/// \brief Basic primitives. Deocde from bytes by `memcpy`.
/// \returns Size of bytes consumed.
template <class T>
requires std::is_standard_layout_v<T> && std::is_trivial_v<T>
std::size_t decode(BytesRef Data, T &Obj) {
assert(lengthof(Data) >= sizeof(T));
std::memcpy(&Obj, begin(Data), sizeof(T));
return sizeof(T);
}

/// \brief Decode string from bytes.
std::size_t decode(BytesRef Data, std::string &Str);

/// \brief Consume bytes from \p Data and construct an object of type \p T.
template <class T> T consume(BytesRef &Data) {
T Obj;
Data = advance(Data, decode(Data, Obj));
return Obj;
}

nix::Expr *consumeAST(BytesRef &Data, PtrPool<nix::Expr> &Pool,
nix::PosTable &PTable, nix::SymbolTable &STable);

} // namespace nixt::serialize
134 changes: 134 additions & 0 deletions libnixt/lib/Deserialize.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#include "nixt/ArrayRef.h"
#include "nixt/Serialize.h"

namespace nixt::serialize {
namespace {

class ASTDecoder {
PtrPool<nix::Expr> &Pool;
const char *Begin;
BytesRef Cur;
nix::PosTable &PTable;
nix::SymbolTable &STable;
const nix::Pos::Origin &Origin;
std::map<std::size_t, const nix::Expr *> ExprMap;

template <typename T> T eat() { return consume<T>(Cur); }

[[nodiscard]] std::size_t offset() const { return Cur.Begin - Begin; }

template <typename T, typename... ArgTs> T *create(ArgTs &&...Args) {
auto Ptr = Pool.record(new T(std::forward<ArgTs>(Args)...));
ExprMap[offset()] = Ptr;
return Ptr;
}

nix::PosIdx decodePosIdx() {
auto Line = eat<PosInt>();
if (Line == PosInt(-1))
return nix::noPos;

auto Column = eat<PosInt>();
return PTable.add(Origin, Line, Column);
}

nix::Symbol decodeSymbol() { return STable.create(eat<std::string>()); }

nix::ExprInt *decodeExprInt() {
return create<nix::ExprInt>(eat<nix::NixInt>());
}

nix::ExprFloat *decodeExprFloat() {
return create<nix::ExprFloat>(eat<nix::NixFloat>());
}

nix::ExprString *decodeExprString() {
return create<nix::ExprString>(eat<std::string>());
}

nix::ExprPath *decodeExprPath() {
return create<nix::ExprPath>(eat<nix::Path>());
}

nix::ExprVar *decodeExprVar() {
auto Pos = decodePosIdx();
auto Name = decodeSymbol();
nix::ExprVar E(Pos, Name);
E.fromWith = eat<bool>();
E.level = eat<nix::Level>();
E.displ = eat<nix::Displacement>();

return create<nix::ExprVar>(std::move(E));
}

public:
ASTDecoder(PtrPool<nix::Expr> &Pool, BytesRef Data, nix::PosTable &PTable,
nix::SymbolTable &STable, const nix::Pos::Origin &Origin)
: Pool(Pool), Begin(Data.Begin), Cur(Data), PTable(PTable),
STable(STable), Origin(Origin) {}

nix::Expr *decodeExpr() {
const auto Kind = eat<EncodeKind>();
switch (Kind) {
case EncodeKind::ExprInt:
return decodeExprInt();
case EncodeKind::ExprFloat:
return decodeExprFloat();
case EncodeKind::ExprString:
return decodeExprString();
case EncodeKind::ExprPath:
return decodeExprPath();
case EncodeKind::ExprVar:
return decodeExprVar();
default:
assert(false && "Unknown kind");
break;
}
}

[[nodiscard]] BytesRef getBytesRef() const { return Cur; }
};

} // namespace

std::size_t decode(BytesRef Data, std::string &Str) {
assert(lengthof(Data) >= sizeof(std::size_t));
std::size_t Length;
decode(Data, Length);
const char *Begin = Data.Begin + sizeof(std::size_t);
Str = std::string(Begin, Length);
return sizeof(std::size_t) + Length;
}

template <> nix::Pos::Origin consume<nix::Pos::Origin>(BytesRef &Data) {
auto Index = consume<std::size_t>(Data);
switch (Index) {
case 0:
return nix::Pos::none_tag{};
break;
case 1:
case 2:
return nix::Pos::Stdin{nix::make_ref<std::string>("")};
break;
case 3:
return nix::CanonPath(consume<std::string>(Data));
break;
default:
assert(false && "Unknown origin");
break;
}
}

nix::Expr *consumeAST(BytesRef &Data, PtrPool<nix::Expr> &Pool,
nix::PosTable &PTable, nix::SymbolTable &STable) {
auto Header = consume<ASTHeader>(Data);
assert(std::memcmp(Header.Magic, "\x7FNixAST\0", 8) == 0);
assert(Header.Version == 1);
auto Origin = consume<nix::Pos::Origin>(Data);
ASTDecoder Decoder(Pool, Data, PTable, STable, Origin);
nix::Expr *E = Decoder.decodeExpr();
Data = Decoder.getBytesRef();
return E;
}

} // namespace nixt::serialize
Loading