Skip to content

Commit

Permalink
Add basic output of where memory is stored after a compile. (carbon-l…
Browse files Browse the repository at this point in the history
…anguage#4136)

The output is really basic, I'm just adding this to help track how
memory is allocated.

```
---
filename:        'check/testdata/expr_category/in_place_tuple_init.carbon'
source_:
  used_bytes:      8057
  reserved_bytes:  8057
tokens_.allocator_:
  used_bytes:      0
  reserved_bytes:  0
tokens_.token_infos_:
  used_bytes:      1040
  reserved_bytes:  2032

(eliding)

value_stores_.string_literals_.set_:
  used_bytes:      320
  reserved_bytes:  320
Total:
  used_bytes:      20609
  reserved_bytes:  29437
...
```

---------

Co-authored-by: josh11b <15258583+josh11b@users.noreply.github.com>
  • Loading branch information
2 people authored and brymer-meneses committed Aug 15, 2024
1 parent ccf1275 commit d2c1ae1
Show file tree
Hide file tree
Showing 20 changed files with 354 additions and 2 deletions.
12 changes: 12 additions & 0 deletions toolchain/base/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,23 @@ cc_library(
],
)

cc_library(
name = "mem_usage",
hdrs = ["mem_usage.h"],
deps = [
":yaml",
"//common:map",
"//common:set",
"@llvm-project//llvm:Support",
],
)

cc_library(
name = "value_store",
hdrs = ["value_store.h"],
deps = [
":index_base",
":mem_usage",
":yaml",
"//common:check",
"//common:hashing",
Expand Down
145 changes: 145 additions & 0 deletions toolchain/base/mem_usage.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
// Exceptions. See /LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#ifndef CARBON_TOOLCHAIN_BASE_MEM_USAGE_H_
#define CARBON_TOOLCHAIN_BASE_MEM_USAGE_H_

#include <cstdint>

#include "common/map.h"
#include "common/set.h"
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/FormatVariadic.h"
#include "toolchain/base/yaml.h"

namespace Carbon {

// Helps track memory usage for a compile.
//
// Users will mix `Add` and `Collect` calls, using `ConcatLabel` to label
// allocation sources. Typically we'll collect stats for growable, potentially
// large data types (such as `SmallVector`), ignoring small fixed-size members
// (such as pointers or `int32_t`).
//
// For example:
//
// auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
// -> void {
// // Explicit tracking.
// mem_usage.Add(MemUsage::ConcatLabel(label, "data_"), data_.used_bytes(),
// data_.reserved_bytes());
// // Common library types like `Map` and `llvm::SmallVector` have
// // type-specific support.
// mem_usage.Add(MemUsage::Concat(label, "array_"), array_);
// // Implementing `CollectMemUsage` allows use with the same interface.
// mem_usage.Collect(MemUsage::Concat(label, "obj_"), obj_);
// }
class MemUsage {
public:
// Adds tracking for used and reserved bytes, paired with the given label.
auto Add(std::string label, int64_t used_bytes, int64_t reserved_bytes)
-> void {
mem_usage_.push_back({.label = std::move(label),
.used_bytes = used_bytes,
.reserved_bytes = reserved_bytes});
}

// Adds usage tracking for an allocator.
auto Add(std::string label, const llvm::BumpPtrAllocator& allocator) -> void {
Add(std::move(label), allocator.getBytesAllocated(),
allocator.getTotalMemory());
}

// Adds usage tracking for a map.
template <typename KeyT, typename ValueT, ssize_t SmallSize,
typename KeyContextT>
auto Add(std::string label, Map<KeyT, ValueT, SmallSize, KeyContextT> map,
KeyContextT key_context = KeyContextT()) -> void {
// These don't track used bytes, so we set the same value for used and
// reserved bytes.
auto bytes = map.ComputeMetrics(key_context).storage_bytes;
Add(std::move(label), bytes, bytes);
}

// Adds usage tracking for a set.
template <typename KeyT, ssize_t SmallSize, typename KeyContextT>
auto Add(std::string label, Set<KeyT, SmallSize, KeyContextT> set,
KeyContextT key_context = KeyContextT()) -> void {
// These don't track used bytes, so we set the same value for used and
// reserved bytes.
auto bytes = set.ComputeMetrics(key_context).storage_bytes;
Add(std::move(label), bytes, bytes);
}

// Adds memory usage of an array's data. This ignores the possible overhead of
// a SmallVector's in-place storage; if it's used, it's going to be tiny
// relative to scaling memory costs.
//
// This uses SmallVector in order to get proper inference for T, which
// ArrayRef misses.
template <typename T, unsigned N>
auto Add(std::string label, const llvm::SmallVector<T, N>& array) -> void {
Add(std::move(label), array.size_in_bytes(), array.capacity_in_bytes());
}

// Adds memory usage for an object that provides `CollectMemUsage`.
//
// The expected signature of `CollectMemUsage` is above, in MemUsage class
// comments.
template <typename T>
auto Collect(llvm::StringRef label, const T& arg) -> void {
arg.CollectMemUsage(*this, label);
}

// Constructs a label for memory usage, handling the `.` concatenation.
// We don't expect much depth in labels per-call.
static auto ConcatLabel(llvm::StringRef label, llvm::StringRef child_label)
-> std::string {
return llvm::formatv("{0}.{1}", label, child_label);
}
static auto ConcatLabel(llvm::StringRef label, llvm::StringRef child_label1,
llvm::StringRef child_label2) -> std::string {
return llvm::formatv("{0}.{1}.{2}", label, child_label1, child_label2);
}

auto OutputYaml(llvm::StringRef filename) const -> Yaml::OutputMapping {
// Explicitly copy the filename.
return Yaml::OutputMapping([&, filename](Yaml::OutputMapping::Map map) {
map.Add("filename", filename);
int64_t total_used = 0;
int64_t total_reserved = 0;
for (const auto& entry : mem_usage_) {
total_used += entry.used_bytes;
total_reserved += entry.reserved_bytes;
map.Add(entry.label,
Yaml::OutputMapping([&](Yaml::OutputMapping::Map byte_map) {
byte_map.Add("used_bytes", entry.used_bytes);
byte_map.Add("reserved_bytes", entry.reserved_bytes);
}));
}
map.Add("Total",
Yaml::OutputMapping([&](Yaml::OutputMapping::Map byte_map) {
byte_map.Add("used_bytes", total_used);
byte_map.Add("reserved_bytes", total_reserved);
}));
});
}

private:
// Memory usage for a specific label.
struct Entry {
std::string label;
int64_t used_bytes;
int64_t reserved_bytes;
};

// The accumulated data on memory usage.
llvm::SmallVector<Entry> mem_usage_;
};

} // namespace Carbon

#endif // CARBON_TOOLCHAIN_BASE_MEM_USAGE_H_
28 changes: 28 additions & 0 deletions toolchain/base/value_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/YAMLParser.h"
#include "toolchain/base/index_base.h"
#include "toolchain/base/mem_usage.h"
#include "toolchain/base/yaml.h"

namespace Carbon {
Expand Down Expand Up @@ -187,6 +188,12 @@ class ValueStore
});
}

// Collects memory usage of the values.
auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
-> void {
mem_usage.Add(label.str(), values_);
}

auto array_ref() const -> llvm::ArrayRef<ValueType> { return values_; }
auto size() const -> size_t { return values_.size(); }

Expand Down Expand Up @@ -237,6 +244,15 @@ class CanonicalValueStore {
}
auto size() const -> size_t { return values_.size(); }

// Collects memory usage of the values and deduplication set.
auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
-> void {
mem_usage.Collect(MemUsage::ConcatLabel(label, "values_"), values_);
auto bytes =
set_.ComputeMetrics(KeyContext(values_.array_ref())).storage_bytes;
mem_usage.Add(MemUsage::ConcatLabel(label, "set_"), bytes, bytes);
}

private:
class KeyContext;

Expand Down Expand Up @@ -322,6 +338,18 @@ class SharedValueStores : public Yaml::Printable<SharedValueStores> {
});
}

// Collects memory usage for the various shared stores.
auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
-> void {
mem_usage.Collect(MemUsage::ConcatLabel(label, "ints_"), ints_);
mem_usage.Collect(MemUsage::ConcatLabel(label, "reals_"), reals_);
mem_usage.Collect(MemUsage::ConcatLabel(label, "floats_"), floats_);
mem_usage.Collect(MemUsage::ConcatLabel(label, "identifiers_"),
identifiers_);
mem_usage.Collect(MemUsage::ConcatLabel(label, "string_literals_"),
string_literals_);
}

private:
CanonicalValueStore<IntId> ints_;
ValueStore<RealId> reals_;
Expand Down
38 changes: 36 additions & 2 deletions toolchain/driver/driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,14 @@ Dump the generated assembly to stdout after codegen.
)""",
},
[&](auto& arg_b) { arg_b.Set(&dump_asm); });
b.AddFlag(
{
.name = "dump-mem-usage",
.help = R"""(
Dumps the amount of memory used.
)""",
},
[&](auto& arg_b) { arg_b.Set(&dump_mem_usage); });
b.AddFlag(
{
.name = "prelude-import",
Expand Down Expand Up @@ -344,6 +352,7 @@ Excludes files with the given prefix from dumps.
bool dump_sem_ir = false;
bool dump_llvm_ir = false;
bool dump_asm = false;
bool dump_mem_usage = false;
bool stream_errors = false;
bool preorder_parse_tree = false;
bool builtin_sem_ir = false;
Expand Down Expand Up @@ -540,6 +549,9 @@ class Driver::CompilationUnit {
sorting_consumer_ = SortingDiagnosticConsumer(*consumer);
consumer_ = &*sorting_consumer_;
}
if (options_.dump_mem_usage && IncludeInDumps()) {
mem_usage_ = MemUsage();
}
}

// Loads source and lexes it. Returns true on success.
Expand All @@ -552,6 +564,10 @@ class Driver::CompilationUnit {
*consumer_);
}
});
if (mem_usage_) {
mem_usage_->Add("source_", source_->text().size(),
source_->text().size());
}
if (!source_) {
success_ = false;
return;
Expand All @@ -565,6 +581,9 @@ class Driver::CompilationUnit {
consumer_->Flush();
driver_->output_stream_ << tokens_;
}
if (mem_usage_) {
mem_usage_->Collect("tokens_", *tokens_);
}
CARBON_VLOG() << "*** Lex::TokenizedBuffer ***\n" << tokens_;
if (tokens_->has_errors()) {
success_ = false;
Expand All @@ -582,6 +601,9 @@ class Driver::CompilationUnit {
consumer_->Flush();
parse_tree_->Print(driver_->output_stream_, options_.preorder_parse_tree);
}
if (mem_usage_) {
mem_usage_->Collect("parse_tree_", *parse_tree_);
}
CARBON_VLOG() << "*** Parse::Tree ***\n" << parse_tree_;
if (parse_tree_->has_errors()) {
success_ = false;
Expand All @@ -607,8 +629,12 @@ class Driver::CompilationUnit {
// to wait for code generation.
consumer_->Flush();

CARBON_VLOG() << "*** Raw SemIR::File ***\n" << *sem_ir_ << "\n";
if (mem_usage_) {
mem_usage_->Collect("sem_ir_", *sem_ir_);
}

if (options_.dump_raw_sem_ir && IncludeInDumps()) {
CARBON_VLOG() << "*** Raw SemIR::File ***\n" << *sem_ir_ << "\n";
sem_ir_->Print(driver_->output_stream_, options_.builtin_sem_ir);
if (options_.dump_sem_ir) {
driver_->output_stream_ << "\n";
Expand Down Expand Up @@ -659,11 +685,16 @@ class Driver::CompilationUnit {

// Runs post-compile logic. This is always called, and called after all other
// actions on the CompilationUnit.
auto PostCompile() const -> void {
auto PostCompile() -> void {
if (options_.dump_shared_values && IncludeInDumps()) {
Yaml::Print(driver_->output_stream_,
value_stores_.OutputYaml(input_filename_));
}
if (mem_usage_) {
mem_usage_->Collect("value_stores_", value_stores_);
Yaml::Print(driver_->output_stream_,
mem_usage_->OutputYaml(input_filename_));
}

// The diagnostics consumer must be flushed before compilation artifacts are
// destructed, because diagnostics can refer to their state.
Expand Down Expand Up @@ -773,6 +804,9 @@ class Driver::CompilationUnit {

bool success_ = true;

// Tracks memory usage of the compile.
std::optional<MemUsage> mem_usage_;

// These are initialized as steps are run.
std::optional<SourceBuffer> source_;
std::optional<Lex::TokenizedBuffer> tokens_;
Expand Down
19 changes: 19 additions & 0 deletions toolchain/driver/testdata/dump_mem_usage.carbon
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
// Exceptions. See /LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// ARGS: compile --phase=check --dump-mem-usage %s
//
// NOAUTOUPDATE
//
// Avoid testing specific values:
// SET-CHECK-SUBSET

var x: i32 = 1;

// CHECK:STDOUT: ---
// CHECK:STDOUT: filename: dump_mem_usage.carbon
// CHECK:STDOUT: source_:
// CHECK:STDOUT: used_bytes: 0
// CHECK:STDOUT: reserved_bytes: 0
// CHECK:STDOUT: ...
1 change: 1 addition & 0 deletions toolchain/lex/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ cc_library(
"//common:ostream",
"//common:string_helpers",
"//toolchain/base:index_base",
"//toolchain/base:mem_usage",
"//toolchain/base:value_store",
"//toolchain/diagnostics:diagnostic_emitter",
"//toolchain/source:source_buffer",
Expand Down
7 changes: 7 additions & 0 deletions toolchain/lex/tokenized_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,13 @@ auto TokenizedBuffer::AddToken(TokenInfo info) -> TokenIndex {
return TokenIndex(static_cast<int>(token_infos_.size()) - 1);
}

auto TokenizedBuffer::CollectMemUsage(MemUsage& mem_usage,
llvm::StringRef label) const -> void {
mem_usage.Add(MemUsage::ConcatLabel(label, "allocator_"), allocator_);
mem_usage.Add(MemUsage::ConcatLabel(label, "token_infos_"), token_infos_);
mem_usage.Add(MemUsage::ConcatLabel(label, "line_infos_"), line_infos_);
}

auto TokenIterator::Print(llvm::raw_ostream& output) const -> void {
output << token_.index;
}
Expand Down
5 changes: 5 additions & 0 deletions toolchain/lex/tokenized_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "llvm/Support/Allocator.h"
#include "llvm/Support/raw_ostream.h"
#include "toolchain/base/index_base.h"
#include "toolchain/base/mem_usage.h"
#include "toolchain/base/value_store.h"
#include "toolchain/diagnostics/diagnostic_emitter.h"
#include "toolchain/lex/token_index.h"
Expand Down Expand Up @@ -204,6 +205,10 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> {
auto PrintToken(llvm::raw_ostream& output_stream, TokenIndex token) const
-> void;

// Collects memory usage of members.
auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
-> void;

// Returns true if the buffer has errors that were detected at lexing time.
auto has_errors() const -> bool { return has_errors_; }

Expand Down
Loading

0 comments on commit d2c1ae1

Please sign in to comment.