Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up parsing #2519

Merged
merged 8 commits into from
Nov 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 3 additions & 52 deletions src/build_log.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,63 +53,14 @@ using namespace std;
namespace {

const char kFileSignature[] = "# ninja log v%d\n";
const int kOldestSupportedVersion = 6;
const int kCurrentVersion = 6;

// 64bit MurmurHash2, by Austin Appleby
#if defined(_MSC_VER)
#define BIG_CONSTANT(x) (x)
#else // defined(_MSC_VER)
#define BIG_CONSTANT(x) (x##LLU)
#endif // !defined(_MSC_VER)
inline
uint64_t MurmurHash64A(const void* key, size_t len) {
static const uint64_t seed = 0xDECAFBADDECAFBADull;
const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
const int r = 47;
uint64_t h = seed ^ (len * m);
const unsigned char* data = static_cast<const unsigned char*>(key);
while (len >= 8) {
uint64_t k;
memcpy(&k, data, sizeof k);
k *= m;
k ^= k >> r;
k *= m;
h ^= k;
h *= m;
data += 8;
len -= 8;
}
switch (len & 7)
{
case 7: h ^= uint64_t(data[6]) << 48;
NINJA_FALLTHROUGH;
case 6: h ^= uint64_t(data[5]) << 40;
NINJA_FALLTHROUGH;
case 5: h ^= uint64_t(data[4]) << 32;
NINJA_FALLTHROUGH;
case 4: h ^= uint64_t(data[3]) << 24;
NINJA_FALLTHROUGH;
case 3: h ^= uint64_t(data[2]) << 16;
NINJA_FALLTHROUGH;
case 2: h ^= uint64_t(data[1]) << 8;
NINJA_FALLTHROUGH;
case 1: h ^= uint64_t(data[0]);
h *= m;
};
h ^= h >> r;
h *= m;
h ^= h >> r;
return h;
}
#undef BIG_CONSTANT

const int kOldestSupportedVersion = 7;
const int kCurrentVersion = 7;

} // namespace

// static
uint64_t BuildLog::LogEntry::HashCommand(StringPiece command) {
return MurmurHash64A(command.str_, command.len_);
return rapidhash(command.str_, command.len_);
}

BuildLog::LogEntry::LogEntry(const string& output)
Expand Down
12 changes: 6 additions & 6 deletions src/build_log_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ TEST_F(BuildLogTest, FirstWriteAddsSignature) {

TEST_F(BuildLogTest, DoubleEntry) {
FILE* f = fopen(kTestFilename, "wb");
fprintf(f, "# ninja log v6\n");
fprintf(f, "# ninja log v7\n");
fprintf(f, "0\t1\t2\tout\t%" PRIx64 "\n",
BuildLog::LogEntry::HashCommand("command abc"));
fprintf(f, "0\t1\t2\tout\t%" PRIx64 "\n",
Expand Down Expand Up @@ -177,7 +177,7 @@ TEST_F(BuildLogTest, ObsoleteOldVersion) {

TEST_F(BuildLogTest, SpacesInOutput) {
FILE* f = fopen(kTestFilename, "wb");
fprintf(f, "# ninja log v6\n");
fprintf(f, "# ninja log v7\n");
fprintf(f, "123\t456\t456\tout with space\t%" PRIx64 "\n",
BuildLog::LogEntry::HashCommand("command"));
fclose(f);
Expand All @@ -200,10 +200,10 @@ TEST_F(BuildLogTest, DuplicateVersionHeader) {
// build log on Windows. This shouldn't crash, and the second version header
// should be ignored.
FILE* f = fopen(kTestFilename, "wb");
fprintf(f, "# ninja log v6\n");
fprintf(f, "# ninja log v7\n");
fprintf(f, "123\t456\t456\tout\t%" PRIx64 "\n",
BuildLog::LogEntry::HashCommand("command"));
fprintf(f, "# ninja log v6\n");
fprintf(f, "# ninja log v7\n");
fprintf(f, "456\t789\t789\tout2\t%" PRIx64 "\n",
BuildLog::LogEntry::HashCommand("command2"));
fclose(f);
Expand Down Expand Up @@ -252,7 +252,7 @@ struct TestDiskInterface : public DiskInterface {

TEST_F(BuildLogTest, Restat) {
FILE* f = fopen(kTestFilename, "wb");
fprintf(f, "# ninja log v6\n"
fprintf(f, "# ninja log v7\n"
"1\t2\t3\tout\tcommand\n");
fclose(f);
std::string err;
Expand Down Expand Up @@ -280,7 +280,7 @@ TEST_F(BuildLogTest, VeryLongInputLine) {
// Ninja's build log buffer is currently 256kB. Lines longer than that are
// silently ignored, but don't affect parsing of other lines.
FILE* f = fopen(kTestFilename, "wb");
fprintf(f, "# ninja log v6\n");
fprintf(f, "# ninja log v7\n");
fprintf(f, "123\t456\t456\tout\tcommand start");
for (size_t i = 0; i < (512 << 10) / strlen(" more_command"); ++i)
fputs(" more_command", f);
Expand Down
7 changes: 3 additions & 4 deletions src/deps_log.cc
Original file line number Diff line number Diff line change
Expand Up @@ -186,15 +186,13 @@ LoadStatus DepsLog::Load(const string& path, State* state, string* err) {
return LOAD_SUCCESS;
}

long offset;
long offset = ftell(f);
bool read_failed = false;
int unique_dep_record_count = 0;
int total_dep_record_count = 0;
for (;;) {
offset = ftell(f);

unsigned size;
if (fread(&size, 4, 1, f) < 1) {
if (fread(&size, sizeof(size), 1, f) < 1) {
if (!feof(f))
read_failed = true;
break;
Expand All @@ -206,6 +204,7 @@ LoadStatus DepsLog::Load(const string& path, State* state, string* err) {
read_failed = true;
break;
}
offset += size + sizeof(size);

if (is_deps) {
if ((size % 4) != 0) {
Expand Down
57 changes: 39 additions & 18 deletions src/eval_env.cc
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,10 @@ string BindingEnv::LookupWithFallback(const string& var,
}

string EvalString::Evaluate(Env* env) const {
if (parsed_.empty()) {
return single_token_;
}

string result;
for (TokenList::const_iterator i = parsed_.begin(); i != parsed_.end(); ++i) {
if (i->second == RAW)
Expand All @@ -110,40 +114,57 @@ string EvalString::Evaluate(Env* env) const {
}

void EvalString::AddText(StringPiece text) {
// Add it to the end of an existing RAW token if possible.
if (!parsed_.empty() && parsed_.back().second == RAW) {
parsed_.back().first.append(text.str_, text.len_);
if (parsed_.empty()) {
single_token_.append(text.begin(), text.end());
} else if (!parsed_.empty() && parsed_.back().second == RAW) {
parsed_.back().first.append(text.begin(), text.end());
} else {
parsed_.push_back(make_pair(text.AsString(), RAW));
parsed_.push_back(std::make_pair(text.AsString(), RAW));
}
sesse marked this conversation as resolved.
Show resolved Hide resolved
}

void EvalString::AddSpecial(StringPiece text) {
parsed_.push_back(make_pair(text.AsString(), SPECIAL));
if (parsed_.empty() && !single_token_.empty()) {
// Going from one to two tokens, so we can no longer apply
// our single_token_ optimization and need to push everything
// onto the vector.
parsed_.push_back(std::make_pair(std::move(single_token_), RAW));
}
parsed_.push_back(std::make_pair(text.AsString(), SPECIAL));
}

string EvalString::Serialize() const {
string result;
for (TokenList::const_iterator i = parsed_.begin();
i != parsed_.end(); ++i) {
if (parsed_.empty() && !single_token_.empty()) {
result.append("[");
if (i->second == SPECIAL)
result.append("$");
result.append(i->first);
result.append(single_token_);
result.append("]");
} else {
for (const auto& pair : parsed_) {
result.append("[");
if (pair.second == SPECIAL)
result.append("$");
result.append(pair.first.begin(), pair.first.end());
result.append("]");
}
}
return result;
}

string EvalString::Unparse() const {
string result;
for (TokenList::const_iterator i = parsed_.begin();
i != parsed_.end(); ++i) {
bool special = (i->second == SPECIAL);
if (special)
result.append("${");
result.append(i->first);
if (special)
result.append("}");
if (parsed_.empty() && !single_token_.empty()) {
result.append(single_token_.begin(), single_token_.end());
} else {
for (TokenList::const_iterator i = parsed_.begin();
sesse marked this conversation as resolved.
Show resolved Hide resolved
i != parsed_.end(); ++i) {
bool special = (i->second == SPECIAL);
if (special)
result.append("${");
result.append(i->first.begin(), i->first.end());
if (special)
result.append("}");
}
}
return result;
}
10 changes: 8 additions & 2 deletions src/eval_env.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ struct EvalString {
/// @return The string with variables not expanded.
std::string Unparse() const;

void Clear() { parsed_.clear(); }
bool empty() const { return parsed_.empty(); }
void Clear() { parsed_.clear(); single_token_.clear(); }
bool empty() const { return parsed_.empty() && single_token_.empty(); }

void AddText(StringPiece text);
void AddSpecial(StringPiece text);
Expand All @@ -53,6 +53,12 @@ struct EvalString {
enum TokenType { RAW, SPECIAL };
typedef std::vector<std::pair<std::string, TokenType> > TokenList;
TokenList parsed_;

// If we hold only a single RAW token, then we keep it here instead of
// pushing it on TokenList. This saves a bunch of allocations for
// what is a common case. If parsed_ is nonempty, then this value
// must be ignored.
std::string single_token_;
};

/// An invocable build command and associated metadata (description, etc.).
Expand Down
13 changes: 7 additions & 6 deletions src/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -740,12 +740,13 @@ bool ImplicitDepLoader::LoadDepsFromLog(Edge* edge, string* err) {
return false;
}

vector<Node*>::iterator implicit_dep =
PreallocateSpace(edge, deps->node_count);
for (int i = 0; i < deps->node_count; ++i, ++implicit_dep) {
Node* node = deps->nodes[i];
*implicit_dep = node;
node->AddOutEdge(edge);
Node** nodes = deps->nodes;
size_t node_count = deps->node_count;
edge->inputs_.insert(edge->inputs_.end() - edge->order_only_deps_,
nodes, nodes + node_count);
edge->implicit_deps_ += node_count;
for (size_t i = 0; i < node_count; ++i) {
nodes[i]->AddOutEdge(edge);
}
return true;
}
Expand Down
40 changes: 4 additions & 36 deletions src/hash_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,40 +20,8 @@
#include "string_piece.h"
#include "util.h"

// MurmurHash2, by Austin Appleby
static inline
unsigned int MurmurHash2(const void* key, size_t len) {
static const unsigned int seed = 0xDECAFBAD;
const unsigned int m = 0x5bd1e995;
const int r = 24;
unsigned int h = seed ^ len;
const unsigned char* data = static_cast<const unsigned char*>(key);
while (len >= 4) {
unsigned int k;
memcpy(&k, data, sizeof k);
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
switch (len) {
case 3: h ^= data[2] << 16;
NINJA_FALLTHROUGH;
case 2: h ^= data[1] << 8;
NINJA_FALLTHROUGH;
case 1: h ^= data[0];
h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}

#include <unordered_map>
#include "third_party/emhash/hash_table8.hpp"
#include "third_party/rapidhash/rapidhash.h"

namespace std {
template<>
Expand All @@ -62,7 +30,7 @@ struct hash<StringPiece> {
typedef size_t result_type;

size_t operator()(StringPiece key) const {
return MurmurHash2(key.str_, key.len_);
return rapidhash(key.str_, key.len_);
}
};
}
Expand All @@ -73,7 +41,7 @@ struct hash<StringPiece> {
/// mapping StringPiece => Foo*.
template<typename V>
struct ExternalStringHashMap {
typedef std::unordered_map<StringPiece, V> Type;
typedef emhash8::HashMap<StringPiece, V> Type;
};

#endif // NINJA_MAP_H_
Loading
Loading