Skip to content

Commit

Permalink
Consider more factors when determining preallocation size of WAL files
Browse files Browse the repository at this point in the history
Summary: Currently the WAL file preallocation size is 1.1 * write_buffer_size. This, however, will be over-estimated if options.db_write_buffer_size or options.max_total_wal_size is set and is much smaller.

Test Plan: Add a unit test.

Reviewers: andrewkr, yiwu

Reviewed By: yiwu

Subscribers: leveldb, andrewkr, dhruba

Differential Revision: https://reviews.facebook.net/D63957
  • Loading branch information
siying committed Sep 19, 2016
1 parent 4c3f449 commit b666f85
Show file tree
Hide file tree
Showing 5 changed files with 124 additions and 5 deletions.
24 changes: 21 additions & 3 deletions db/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3682,6 +3682,24 @@ bool DBImpl::MCOverlap(ManualCompaction* m, ManualCompaction* m1) {
return true;
}

uint64_t DBImpl::GetWalPreallocateBlockSize(uint64_t write_buffer_size) const {
uint64_t bsize = write_buffer_size / 10 + write_buffer_size;
// Some users might set very high write_buffer_size and rely on
// max_total_wal_size or other parameters to control the WAL size.
if (db_options_.max_total_wal_size > 0) {
bsize = std::min(bsize, db_options_.max_total_wal_size);
}
if (db_options_.db_write_buffer_size > 0) {
bsize = std::min(bsize, db_options_.db_write_buffer_size);
}
if (db_options_.write_buffer_manager &&
db_options_.write_buffer_manager->enabled()) {
bsize = std::min(bsize, db_options_.write_buffer_manager->buffer_size());
}

return bsize;
}

namespace {
struct IterState {
IterState(DBImpl* _db, InstrumentedMutex* _mu, SuperVersion* _super_version,
Expand Down Expand Up @@ -4995,8 +5013,7 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) {
// Our final size should be less than write_buffer_size
// (compression, etc) but err on the side of caution.
lfile->SetPreallocationBlockSize(
mutable_cf_options.write_buffer_size / 10 +
mutable_cf_options.write_buffer_size);
GetWalPreallocateBlockSize(mutable_cf_options.write_buffer_size));
unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(lfile), opt_env_opt));
new_log = new log::Writer(std::move(file_writer), new_log_number,
Expand Down Expand Up @@ -5747,7 +5764,8 @@ Status DB::Open(const DBOptions& db_options, const std::string& dbname,
LogFileName(impl->db_options_.wal_dir, new_log_number),
&lfile, opt_env_options);
if (s.ok()) {
lfile->SetPreallocationBlockSize((max_write_buffer_size / 10) + max_write_buffer_size);
lfile->SetPreallocationBlockSize(
impl->GetWalPreallocateBlockSize(max_write_buffer_size));
impl->logfile_number_ = new_log_number;
unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(lfile), opt_env_options));
Expand Down
2 changes: 2 additions & 0 deletions db/db_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1067,6 +1067,8 @@ class DBImpl : public DB {
bool ShouldntRunManualCompaction(ManualCompaction* m);
bool HaveManualCompaction(ColumnFamilyData* cfd);
bool MCOverlap(ManualCompaction* m, ManualCompaction* m1);

uint64_t GetWalPreallocateBlockSize(uint64_t write_buffer_size) const;
};

// Sanitize db options. The caller should delete result.info_log if
Expand Down
1 change: 0 additions & 1 deletion db/db_test2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2065,7 +2065,6 @@ TEST_F(DBTest2, ReadAmpBitmapLiveInCacheAfterDBClose) {
ASSERT_EQ(total_useful_bytes_iter1 + total_useful_bytes_iter2,
total_loaded_bytes_iter1 + total_loaded_bytes_iter2);
}

} // namespace rocksdb

int main(int argc, char** argv) {
Expand Down
13 changes: 12 additions & 1 deletion db/db_test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,18 @@ class SpecialEnv : public EnvWrapper {
return s;
}
Status Truncate(uint64_t size) override { return base_->Truncate(size); }
Status Close() override { return base_->Close(); }
Status Close() override {
// SyncPoint is not supported in Released Windows Mode.
#if !(defined NDEBUG) || !defined(OS_WIN)
// Check preallocation size
// preallocation size is never passed to base file.
size_t preallocation_size = preallocation_block_size();
TEST_SYNC_POINT_CALLBACK("DBTestWalFile.GetPreallocationStatus",
&preallocation_size);
#endif // !(defined NDEBUG) || !defined(OS_WIN)

return base_->Close();
}
Status Flush() override { return base_->Flush(); }
Status Sync() override {
++env_->sync_counter_;
Expand Down
89 changes: 89 additions & 0 deletions db/db_wal_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,95 @@ TEST_F(DBWALTest, RecoveryWithEmptyLog) {
} while (ChangeOptions());
}

#if !(defined NDEBUG) || !defined(OS_WIN)
TEST_F(DBWALTest, PreallocateBlock) {
Options options = CurrentOptions();
options.write_buffer_size = 10 * 1000 * 1000;
options.max_total_wal_size = 0;

size_t expected_preallocation_size = static_cast<size_t>(
options.write_buffer_size + options.write_buffer_size / 10);

DestroyAndReopen(options);

std::atomic<int> called(0);
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DBTestWalFile.GetPreallocationStatus", [&](void* arg) {
ASSERT_TRUE(arg != nullptr);
size_t preallocation_size = *(static_cast<size_t*>(arg));
ASSERT_EQ(expected_preallocation_size, preallocation_size);
called.fetch_add(1);
});
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
Put("", "");
Flush();
Put("", "");
Close();
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
ASSERT_EQ(2, called.load());

options.max_total_wal_size = 1000 * 1000;
expected_preallocation_size = static_cast<size_t>(options.max_total_wal_size);
Reopen(options);
called.store(0);
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DBTestWalFile.GetPreallocationStatus", [&](void* arg) {
ASSERT_TRUE(arg != nullptr);
size_t preallocation_size = *(static_cast<size_t*>(arg));
ASSERT_EQ(expected_preallocation_size, preallocation_size);
called.fetch_add(1);
});
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
Put("", "");
Flush();
Put("", "");
Close();
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
ASSERT_EQ(2, called.load());

options.db_write_buffer_size = 800 * 1000;
expected_preallocation_size =
static_cast<size_t>(options.db_write_buffer_size);
Reopen(options);
called.store(0);
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DBTestWalFile.GetPreallocationStatus", [&](void* arg) {
ASSERT_TRUE(arg != nullptr);
size_t preallocation_size = *(static_cast<size_t*>(arg));
ASSERT_EQ(expected_preallocation_size, preallocation_size);
called.fetch_add(1);
});
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
Put("", "");
Flush();
Put("", "");
Close();
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
ASSERT_EQ(2, called.load());

expected_preallocation_size = 700 * 1000;
std::shared_ptr<WriteBufferManager> write_buffer_manager =
std::make_shared<WriteBufferManager>(static_cast<uint64_t>(700 * 1000));
options.write_buffer_manager = write_buffer_manager;
Reopen(options);
called.store(0);
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DBTestWalFile.GetPreallocationStatus", [&](void* arg) {
ASSERT_TRUE(arg != nullptr);
size_t preallocation_size = *(static_cast<size_t*>(arg));
ASSERT_EQ(expected_preallocation_size, preallocation_size);
called.fetch_add(1);
});
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
Put("", "");
Flush();
Put("", "");
Close();
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
ASSERT_EQ(2, called.load());
}
#endif // !(defined NDEBUG) || !defined(OS_WIN)

#ifndef ROCKSDB_LITE
TEST_F(DBWALTest, GetSortedWalFiles) {
do {
Expand Down

0 comments on commit b666f85

Please sign in to comment.