Skip to content

Commit

Permalink
Add an arena for allocating strings.
Browse files Browse the repository at this point in the history
This reduces the amount of malloc traffic significantly,
speeding up parsing. For a no-op build of Chromium (Linux, Zen 2),
this reduces time spent from 4.61 to 4.08 seconds. However, note
that it also increases RSS from 914 to 937 MB; I haven't looked
deeply into why, but it's reasonable to assume that this is related
to the fact that we no longer merge small strings together (since
they are now immutable).

We still use some time in actually copying the string into the arena,
but it seems this is cheaper than just persisting the file contents
wholesale and pointing into that.
  • Loading branch information
Steinar H. Gunderson committed Nov 25, 2024
1 parent b40052c commit a404e27
Show file tree
Hide file tree
Showing 18 changed files with 488 additions and 283 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ check_platform_supports_browse_mode(platform_supports_ninja_browse)

# Core source files all build into ninja library.
add_library(libninja OBJECT
src/arena.cc
src/build_log.cc
src/build.cc
src/clean.cc
Expand Down Expand Up @@ -265,6 +266,7 @@ if(BUILD_TESTING)

# Tests all build into ninja_test executable.
add_executable(ninja_test
src/arena_test.cc
src/build_log_test.cc
src/build_test.cc
src/clean_test.cc
Expand Down
4 changes: 3 additions & 1 deletion configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,8 @@ def has_re2c() -> bool:

n.comment('Core source files all build into ninja library.')
objs.extend(re2c_objs)
for name in ['build',
for name in ['arena',
'build',
'build_log',
'clean',
'clparser',
Expand Down Expand Up @@ -631,6 +632,7 @@ def has_re2c() -> bool:
test_variables += [('pdb', 'ninja_test.pdb')]

test_names = [
'arena_test',
'build_log_test',
'build_test',
'clean_test',
Expand Down
32 changes: 32 additions & 0 deletions src/arena.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright 2024 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "arena.h"

#include <algorithm>

char* Arena::AllocSlowPath(size_t num_bytes)
{
size_t to_allocate = std::max(next_size_, num_bytes);

blocks_.emplace_back(new char [to_allocate]);
char* last_block = blocks_.back().get();
cur_ptr_ = last_block + num_bytes;
cur_end_ = last_block + to_allocate;

next_size_ += next_size_ / 2;

return last_block;
}

73 changes: 73 additions & 0 deletions src/arena.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// Copyright 2024 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <stddef.h>

#include <memory>
#include <vector>

#include "string_piece.h"

// A simple bump allocator that gives very fast and tight memory allocation
// for small values. It is primarily intended for StringPiece allocation,
// but all values returned are 8-byte aligned, so you can allocate more
// complex objects on it if you wish.
//
// All pointers returned by Alloc() are valid until the arena is destroyed,
// at which point everything is deallocated all at once. No destructors
// are run.
//
// The arena starts by allocating a single 4 kB block, and then increases by
// 50% every time it needs a new block. This gives O(1) calls to malloc.

struct Arena {
public:
char* Alloc(size_t num_bytes) {
if (static_cast<size_t>(cur_end_ - cur_ptr_) >= num_bytes) {
char *ret = cur_ptr_;
cur_ptr_ += num_bytes;
return ret;
}

return AllocSlowPath(num_bytes);
}

/// Make a new StringPiece with the same contents, that will live
/// for as long as the arena does.
StringPiece PersistStringPiece(StringPiece s) {
char *mem = Alloc(s.len_);
memcpy(mem, s.str_, s.len_);
return StringPiece(mem, s.len_);
}

void Clear() {
if (blocks_.empty()) {
return;
}
if (blocks_.size() > 1) {
blocks_.erase(blocks_.begin(), blocks_.begin() + blocks_.size() - 1);
}
cur_ptr_ = blocks_.back().get();
}

private:
char* AllocSlowPath(size_t num_bytes);

std::vector<std::unique_ptr<char[]>> blocks_;
char* cur_ptr_ = nullptr;
char* cur_end_ = nullptr;
size_t next_size_ = 4096;
};
64 changes: 64 additions & 0 deletions src/arena_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright 2024 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "arena.h"

#include "test.h"

TEST(ArenaTest, SimpleAlloc) {
Arena arena;

char *a = arena.Alloc(1);
memcpy(a, "a", 1);
char *b = arena.Alloc(2);
memcpy(b, "bc", 2);
char *c = arena.Alloc(8);
memcpy(c, "defghijk", 8);
char *d = arena.Alloc(8);
memcpy(d, "12345678", 8);

EXPECT_EQ("a", StringPiece(a, 1).AsString());
EXPECT_EQ("bc", StringPiece(b, 2).AsString());
EXPECT_EQ("defghijk", StringPiece(c, 8).AsString());
EXPECT_EQ("12345678", StringPiece(d, 8).AsString());
}

TEST(ArenaTest, LargeAlloc) {
Arena arena;

char *small = arena.Alloc(1);
memcpy(small, "a", 1);
char *large = arena.Alloc(1048576);
memset(large, 0x55, 1048576);
char *small2 = arena.Alloc(1);
memcpy(small2, "b", 1);

EXPECT_EQ("a", StringPiece(small, 1).AsString());
EXPECT_EQ("b", StringPiece(small2, 1).AsString());

for (int i = 0; i < 1048576; ++i) {
EXPECT_EQ(0x55, large[i]);
}
}

TEST(ArenaTest, Persist) {
Arena arena;

char *str = strdup("some string that will go away");
StringPiece persisted = arena.PersistStringPiece(str);
memset(str, 0x55, strlen(str));
free(str);

EXPECT_EQ("some string that will go away", persisted.AsString());
}
2 changes: 1 addition & 1 deletion src/dyndep_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ using namespace std;

DyndepParser::DyndepParser(State* state, FileReader* file_reader,
DyndepFile* dyndep_file)
: Parser(state, file_reader)
: Parser(state, file_reader, &arena_)
, dyndep_file_(dyndep_file) {
}

Expand Down
1 change: 1 addition & 0 deletions src/dyndep_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ struct DyndepParser: public Parser {
bool ParseLet(std::string* key, EvalString* val, std::string* err);
bool ParseEdge(std::string* err);

Arena arena_;
DyndepFile* dyndep_file_;
BindingEnv env_;
};
Expand Down
27 changes: 17 additions & 10 deletions src/eval_env.cc
Original file line number Diff line number Diff line change
Expand Up @@ -100,27 +100,34 @@ string BindingEnv::LookupWithFallback(const string& var,

string EvalString::Evaluate(Env* env) const {
if (parsed_.empty()) {
return single_token_;
return single_token_.AsString();
}

string result;
for (TokenList::const_iterator i = parsed_.begin(); i != parsed_.end(); ++i) {
if (i->second == RAW)
result.append(i->first);
result.append(i->first.begin(), i->first.end());
else
result.append(env->LookupVariable(i->first));
result.append(env->LookupVariable(i->first.AsString()));
}
return result;
}

void EvalString::AddText(StringPiece text) {
if (parsed_.empty()) {
single_token_.append(text.begin(), text.end());
} else if (!parsed_.empty() && parsed_.back().second == RAW) {
parsed_.back().first.append(text.begin(), text.end());
} else {
parsed_.push_back(std::make_pair(text.AsString(), RAW));
if (!single_token_.empty()) {
// Going from one to two tokens, so we can no longer apply
// our single_token_ optimization and need to push everything
// onto the vector.
parsed_.push_back(std::make_pair(single_token_, RAW));
} else {
// This is the first (nonempty) token, so we don't need to
// allocate anything on the vector (yet).
single_token_ = text;
return;
}
}
parsed_.push_back(make_pair(text, RAW));
}

void EvalString::AddSpecial(StringPiece text) {
Expand All @@ -130,14 +137,14 @@ void EvalString::AddSpecial(StringPiece text) {
// onto the vector.
parsed_.push_back(std::make_pair(std::move(single_token_), RAW));
}
parsed_.push_back(std::make_pair(text.AsString(), SPECIAL));
parsed_.push_back(std::make_pair(text, SPECIAL));
}

string EvalString::Serialize() const {
string result;
if (parsed_.empty() && !single_token_.empty()) {
result.append("[");
result.append(single_token_);
result.append(single_token_.AsString());
result.append("]");
} else {
for (const auto& pair : parsed_) {
Expand Down
8 changes: 4 additions & 4 deletions src/eval_env.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ struct EvalString {
/// @return The string with variables not expanded.
std::string Unparse() const;

void Clear() { parsed_.clear(); single_token_.clear(); }
void Clear() { parsed_.clear(); single_token_ = StringPiece(); }
bool empty() const { return parsed_.empty() && single_token_.empty(); }

void AddText(StringPiece text);
Expand All @@ -49,16 +49,16 @@ struct EvalString {
/// for use in tests.
std::string Serialize() const;

private:
public:
enum TokenType { RAW, SPECIAL };
typedef std::vector<std::pair<std::string, TokenType> > TokenList;
typedef std::vector<std::pair<StringPiece, TokenType> > TokenList;
TokenList parsed_;

// If we hold only a single RAW token, then we keep it here instead of
// pushing it on TokenList. This saves a bunch of allocations for
// what is a common case. If parsed_ is nonempty, then this value
// must be ignored.
std::string single_token_;
StringPiece single_token_;
};

/// An invocable build command and associated metadata (description, etc.).
Expand Down
Loading

0 comments on commit a404e27

Please sign in to comment.