Skip to content

Commit

Permalink
Adapt RocksDB 8.1.1 (#112)
Browse files Browse the repository at this point in the history
  • Loading branch information
linxGnu authored May 26, 2023
1 parent bcd6ba4 commit b19b62d
Show file tree
Hide file tree
Showing 18 changed files with 323 additions and 62 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ libs/bzip2/libbz2.a
build/*
.vscode/c_cpp_properties.json
dist
.vscode/settings.json
8 changes: 4 additions & 4 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ CMAKE_REQUIRED_PARAMS="-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_INSTALL_PREF
snappy_version="1.1.10"
cd $BUILD_PATH && wget https://github.com/google/snappy/archive/${snappy_version}.tar.gz && tar xzf ${snappy_version}.tar.gz && cd snappy-${snappy_version} && \
mkdir -p build_place && cd build_place && \
CXXFLAGS='-fPIC -O3 -pipe -Wno-uninitialized -Werror,-Wno-sign-compare' cmake $CMAKE_REQUIRED_PARAMS -DSNAPPY_BUILD_TESTS=OFF -DSNAPPY_BUILD_BENCHMARKS=OFF .. && make install/strip -j16 && \
CXXFLAGS='-fPIC -O3 -pipe -Wno-uninitialized -Wno-sign-compare' cmake $CMAKE_REQUIRED_PARAMS -DSNAPPY_BUILD_TESTS=OFF -DSNAPPY_BUILD_BENCHMARKS=OFF .. && make install/strip -j16 && \
cd $BUILD_PATH && rm -rf *

export CFLAGS='-fPIC -O3 -pipe'
export CXXFLAGS='-fPIC -O3 -pipe -Wno-uninitialized'

zlib_version="1.2.11"
zlib_version="1.2.13"
cd $BUILD_PATH && wget https://github.com/madler/zlib/archive/v${zlib_version}.tar.gz && tar xzf v${zlib_version}.tar.gz && cd zlib-${zlib_version} && \
./configure --prefix=$INSTALL_PREFIX --static && make -j16 install && \
cd $BUILD_PATH && rm -rf *
Expand All @@ -27,7 +27,7 @@ cd $BUILD_PATH && wget https://github.com/lz4/lz4/archive/v${lz4_version}.tar.gz
cmake $CMAKE_REQUIRED_PARAMS -DLZ4_BUILD_LEGACY_LZ4C=OFF -DBUILD_SHARED_LIBS=OFF -DLZ4_POSITION_INDEPENDENT_LIB=ON && make -j16 install && \
cd $BUILD_PATH && rm -rf *

zstd_version="1.5.4"
zstd_version="1.5.5"
cd $BUILD_PATH && wget https://github.com/facebook/zstd/archive/v${zstd_version}.tar.gz && tar xzf v${zstd_version}.tar.gz && \
cd zstd-${zstd_version}/build/cmake && mkdir -p build_place && cd build_place && \
cmake $CMAKE_REQUIRED_PARAMS -DZSTD_BUILD_PROGRAMS=OFF -DZSTD_BUILD_CONTRIB=OFF -DZSTD_BUILD_STATIC=ON -DZSTD_BUILD_SHARED=OFF -DZSTD_BUILD_TESTS=OFF \
Expand All @@ -37,7 +37,7 @@ cd $BUILD_PATH && wget https://github.com/facebook/zstd/archive/v${zstd_version}
# Note: if you don't have a good reason, please do not set -DPORTABLE=ON
#
# This one is set here on purpose of compatibility with github action runtime processor
rocksdb_version="8.0.0"
rocksdb_version="8.1.1"
cd $BUILD_PATH && wget https://github.com/facebook/rocksdb/archive/v${rocksdb_version}.tar.gz && tar xzf v${rocksdb_version}.tar.gz && cd rocksdb-${rocksdb_version}/ && \
mkdir -p build_place && cd build_place && cmake -DCMAKE_BUILD_TYPE=Release $CMAKE_REQUIRED_PARAMS -DCMAKE_PREFIX_PATH=$INSTALL_PREFIX -DWITH_TESTS=OFF -DWITH_GFLAGS=OFF \
-DWITH_BENCHMARK_TOOLS=OFF -DWITH_TOOLS=OFF -DWITH_MD_LIBRARY=OFF -DWITH_RUNTIME_DEBUG=OFF -DROCKSDB_BUILD_SHARED=OFF -DWITH_SNAPPY=ON -DWITH_LZ4=ON -DWITH_ZLIB=ON -DWITH_LIBURING=OFF \
Expand Down
57 changes: 52 additions & 5 deletions c.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ typedef struct rocksdb_backup_engine_options_t rocksdb_backup_engine_options_t;
typedef struct rocksdb_restore_options_t rocksdb_restore_options_t;
typedef struct rocksdb_memory_allocator_t rocksdb_memory_allocator_t;
typedef struct rocksdb_lru_cache_options_t rocksdb_lru_cache_options_t;
typedef struct rocksdb_hyper_clock_cache_options_t
rocksdb_hyper_clock_cache_options_t;
typedef struct rocksdb_cache_t rocksdb_cache_t;
typedef struct rocksdb_compactionfilter_t rocksdb_compactionfilter_t;
typedef struct rocksdb_compactionfiltercontext_t
Expand Down Expand Up @@ -597,13 +599,14 @@ extern ROCKSDB_LIBRARY_API void rocksdb_release_snapshot(
extern ROCKSDB_LIBRARY_API char* rocksdb_property_value(rocksdb_t* db,
const char* propname);
/* returns 0 on success, -1 otherwise */
int rocksdb_property_int(rocksdb_t* db, const char* propname,
uint64_t* out_val);
extern ROCKSDB_LIBRARY_API int rocksdb_property_int(rocksdb_t* db,
const char* propname,
uint64_t* out_val);

/* returns 0 on success, -1 otherwise */
int rocksdb_property_int_cf(rocksdb_t* db,
rocksdb_column_family_handle_t* column_family,
const char* propname, uint64_t* out_val);
extern ROCKSDB_LIBRARY_API int rocksdb_property_int_cf(
rocksdb_t* db, rocksdb_column_family_handle_t* column_family,
const char* propname, uint64_t* out_val);

extern ROCKSDB_LIBRARY_API char* rocksdb_property_value_cf(
rocksdb_t* db, rocksdb_column_family_handle_t* column_family,
Expand Down Expand Up @@ -662,6 +665,11 @@ extern ROCKSDB_LIBRARY_API void rocksdb_flush_cf(
rocksdb_t* db, const rocksdb_flushoptions_t* options,
rocksdb_column_family_handle_t* column_family, char** errptr);

extern ROCKSDB_LIBRARY_API void rocksdb_flush_cfs(
rocksdb_t* db, const rocksdb_flushoptions_t* options,
rocksdb_column_family_handle_t** column_family, int num_column_families,
char** errptr);

extern ROCKSDB_LIBRARY_API void rocksdb_flush_wal(rocksdb_t* db,
unsigned char sync,
char** errptr);
Expand Down Expand Up @@ -2012,6 +2020,29 @@ rocksdb_cache_get_usage(rocksdb_cache_t* cache);
extern ROCKSDB_LIBRARY_API size_t
rocksdb_cache_get_pinned_usage(rocksdb_cache_t* cache);

/* HyperClockCache */
extern ROCKSDB_LIBRARY_API rocksdb_hyper_clock_cache_options_t*
rocksdb_hyper_clock_cache_options_create(size_t capacity,
size_t estimated_entry_charge);
extern ROCKSDB_LIBRARY_API void rocksdb_hyper_clock_cache_options_destroy(
rocksdb_hyper_clock_cache_options_t*);
extern ROCKSDB_LIBRARY_API void rocksdb_hyper_clock_cache_options_set_capacity(
rocksdb_hyper_clock_cache_options_t*, size_t);
extern ROCKSDB_LIBRARY_API void
rocksdb_hyper_clock_cache_options_set_estimated_entry_charge(
rocksdb_hyper_clock_cache_options_t*, size_t);
extern ROCKSDB_LIBRARY_API void
rocksdb_hyper_clock_cache_options_set_num_shard_bits(
rocksdb_hyper_clock_cache_options_t*, int);
extern ROCKSDB_LIBRARY_API void
rocksdb_hyper_clock_cache_options_set_memory_allocator(
rocksdb_hyper_clock_cache_options_t*, rocksdb_memory_allocator_t*);

extern ROCKSDB_LIBRARY_API rocksdb_cache_t* rocksdb_cache_create_hyper_clock(
size_t capacity, size_t estimated_entry_charge);
extern ROCKSDB_LIBRARY_API rocksdb_cache_t*
rocksdb_cache_create_hyper_clock_opts(rocksdb_hyper_clock_cache_options_t*);

/* DBPath */

extern ROCKSDB_LIBRARY_API rocksdb_dbpath_t* rocksdb_dbpath_create(
Expand Down Expand Up @@ -2116,6 +2147,11 @@ rocksdb_ingestexternalfileoptions_set_allow_blocking_flush(
extern ROCKSDB_LIBRARY_API void
rocksdb_ingestexternalfileoptions_set_ingest_behind(
rocksdb_ingestexternalfileoptions_t* opt, unsigned char ingest_behind);
extern ROCKSDB_LIBRARY_API void
rocksdb_ingestexternalfileoptions_set_fail_if_not_bottommost_level(
rocksdb_ingestexternalfileoptions_t* opt,
unsigned char fail_if_not_bottommost_level);

extern ROCKSDB_LIBRARY_API void rocksdb_ingestexternalfileoptions_destroy(
rocksdb_ingestexternalfileoptions_t* opt);

Expand Down Expand Up @@ -2198,6 +2234,12 @@ extern ROCKSDB_LIBRARY_API void rocksdb_universal_compaction_options_destroy(
extern ROCKSDB_LIBRARY_API rocksdb_fifo_compaction_options_t*
rocksdb_fifo_compaction_options_create(void);
extern ROCKSDB_LIBRARY_API void
rocksdb_fifo_compaction_options_set_allow_compaction(
rocksdb_fifo_compaction_options_t* fifo_opts, unsigned char allow_compaction);
extern ROCKSDB_LIBRARY_API unsigned char
rocksdb_fifo_compaction_options_get_allow_compaction(
rocksdb_fifo_compaction_options_t* fifo_opts);
extern ROCKSDB_LIBRARY_API void
rocksdb_fifo_compaction_options_set_max_table_files_size(
rocksdb_fifo_compaction_options_t* fifo_opts, uint64_t size);
extern ROCKSDB_LIBRARY_API uint64_t
Expand Down Expand Up @@ -2622,6 +2664,11 @@ extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_flush_cf(
rocksdb_transactiondb_t* txn_db, const rocksdb_flushoptions_t* options,
rocksdb_column_family_handle_t* column_family, char** errptr);

extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_flush_cfs(
rocksdb_transactiondb_t* txn_db, const rocksdb_flushoptions_t* options,
rocksdb_column_family_handle_t** column_families, int num_column_families,
char** errptr);

extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_flush_wal(
rocksdb_transactiondb_t* txn_db, unsigned char sync, char** errptr);

Expand Down
98 changes: 98 additions & 0 deletions cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,18 @@ func NewLRUCacheWithOptions(opt *LRUCacheOptions) *Cache {
return newNativeCache(cCache)
}

// NewHyperClockCache creates a new hyper clock cache.
func NewHyperClockCache(capacity, estimatedEntryCharge int) *Cache {
cCache := C.rocksdb_cache_create_hyper_clock(C.size_t(capacity), C.size_t(estimatedEntryCharge))
return newNativeCache(cCache)
}

// NewHyperClockCacheWithOpts creates a hyper clock cache with predefined options.
func NewHyperClockCacheWithOpts(opt *HyperClockCacheOptions) *Cache {
cCache := C.rocksdb_cache_create_hyper_clock_opts(opt.c)
return newNativeCache(cCache)
}

// NewNativeCache creates a Cache object.
func newNativeCache(c *C.rocksdb_cache_t) *Cache {
return &Cache{c: c}
Expand Down Expand Up @@ -90,3 +102,89 @@ func (l *LRUCacheOptions) SetNumShardBits(n int) {
func (l *LRUCacheOptions) SetMemoryAllocator(m *MemoryAllocator) {
C.rocksdb_lru_cache_options_set_memory_allocator(l.c, m.c)
}

// HyperClockCacheOptions are options for HyperClockCache.
//
// HyperClockCache is a lock-free Cache alternative for RocksDB block cache
// that offers much improved CPU efficiency vs. LRUCache under high parallel
// load or high contention, with some caveats:
// * Not a general Cache implementation: can only be used for
// BlockBasedTableOptions::block_cache, which RocksDB uses in a way that is
// compatible with HyperClockCache.
// * Requires an extra tuning parameter: see estimated_entry_charge below.
// Similarly, substantially changing the capacity with SetCapacity could
// harm efficiency.
// * SecondaryCache is not yet supported.
// * Cache priorities are less aggressively enforced, which could cause
// cache dilution from long range scans (unless they use fill_cache=false).
// * Can be worse for small caches, because if almost all of a cache shard is
// pinned (more likely with non-partitioned filters), then CLOCK eviction
// becomes very CPU intensive.
//
// See internal cache/clock_cache.h for full description.
type HyperClockCacheOptions struct {
c *C.rocksdb_hyper_clock_cache_options_t
}

// NewHyperClockCacheOptions creates new options for hyper clock cache.
func NewHyperClockCacheOptions(capacity, estimatedEntryCharge int) *HyperClockCacheOptions {
return &HyperClockCacheOptions{
c: C.rocksdb_hyper_clock_cache_options_create(C.size_t(capacity), C.size_t(estimatedEntryCharge)),
}
}

// SetCapacity sets the capacity of the cache.
func (h *HyperClockCacheOptions) SetCapacity(capacity int) {
C.rocksdb_hyper_clock_cache_options_set_capacity(h.c, C.size_t(capacity))
}

// SetEstimatedEntryCharge sets the estimated average `charge` associated with cache entries.
//
// This is a critical configuration parameter for good performance from the hyper
// cache, because having a table size that is fixed at creation time greatly
// reduces the required synchronization between threads.
// * If the estimate is substantially too low (e.g. less than half the true
// average) then metadata space overhead with be substantially higher (e.g.
// 200 bytes per entry rather than 100). With kFullChargeCacheMetadata, this
// can slightly reduce cache hit rates, and slightly reduce access times due
// to the larger working memory size.
// * If the estimate is substantially too high (e.g. 25% higher than the true
// average) then there might not be sufficient slots in the hash table for
// both efficient operation and capacity utilization (hit rate). The hyper
// cache will evict entries to prevent load factors that could dramatically
// affect lookup times, instead letting the hit rate suffer by not utilizing
// the full capacity.
//
// A reasonable choice is the larger of block_size and metadata_block_size.
// When WriteBufferManager (and similar) charge memory usage to the block
// cache, this can lead to the same effect as estimate being too low, which
// is better than the opposite. Therefore, the general recommendation is to
// assume that other memory charged to block cache could be negligible, and
// ignore it in making the estimate.
//
// The best parameter choice based on a cache in use is given by
// GetUsage() / GetOccupancyCount(), ignoring metadata overheads such as
// with kDontChargeCacheMetadata. More precisely with
// kFullChargeCacheMetadata is (GetUsage() - 64 * GetTableAddressCount()) /
// GetOccupancyCount(). However, when the average value size might vary
// (e.g. balance between metadata and data blocks in cache), it is better
// to estimate toward the lower side than the higher side.
func (h *HyperClockCacheOptions) SetEstimatedEntryCharge(v int) {
C.rocksdb_hyper_clock_cache_options_set_estimated_entry_charge(h.c, C.size_t(v))
}

// SetCapacity sets number of shards used for this cache.
func (h *HyperClockCacheOptions) SetNumShardBits(n int) {
C.rocksdb_hyper_clock_cache_options_set_num_shard_bits(h.c, C.int(n))
}

// SetMemoryAllocator for this cache.
func (h *HyperClockCacheOptions) SetMemoryAllocator(m *MemoryAllocator) {
C.rocksdb_hyper_clock_cache_options_set_memory_allocator(h.c, m.c)
}

// Destroy the options.
func (h *HyperClockCacheOptions) Destroy() {
C.rocksdb_hyper_clock_cache_options_destroy(h.c)
h.c = nil
}
32 changes: 30 additions & 2 deletions cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
"github.com/stretchr/testify/require"
)

func TestCache(t *testing.T) {
func TestLRUCache(t *testing.T) {
cache := NewLRUCache(19)
defer cache.Destroy()

Expand All @@ -17,7 +17,18 @@ func TestCache(t *testing.T) {
cache.DisownData()
}

func TestCacheWithOpts(t *testing.T) {
func TestHyperClockCache(t *testing.T) {
cache := NewHyperClockCache(100, 10)
defer cache.Destroy()

require.EqualValues(t, 100, cache.GetCapacity())
cache.SetCapacity(128)
require.EqualValues(t, 128, cache.GetCapacity())

cache.DisownData()
}

func TestLRUCacheWithOpts(t *testing.T) {
opts := NewLRUCacheOptions()
opts.SetCapacity(19)
opts.SetNumShardBits(2)
Expand All @@ -32,3 +43,20 @@ func TestCacheWithOpts(t *testing.T) {

cache.DisownData()
}

func TestHyperClockCacheWithOpts(t *testing.T) {
opts := NewHyperClockCacheOptions(100, 10)
opts.SetCapacity(19)
opts.SetEstimatedEntryCharge(10)
opts.SetNumShardBits(2)
defer opts.Destroy()

cache := NewHyperClockCacheWithOpts(opts)
defer cache.Destroy()

require.EqualValues(t, 19, cache.GetCapacity())
cache.SetCapacity(128)
require.EqualValues(t, 128, cache.GetCapacity())

cache.DisownData()
}
1 change: 1 addition & 0 deletions cf_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ func TestColumnFamilyBatchPutGet(t *testing.T) {

// trigger flush
require.Nil(t, db.FlushCF(cfh[0], NewDefaultFlushOptions()))
require.Nil(t, db.FlushCFs(cfh, NewDefaultFlushOptions()))

meta := db.GetColumnFamilyMetadataCF(cfh[0])
require.NotNil(t, meta)
Expand Down
33 changes: 33 additions & 0 deletions db.go
Original file line number Diff line number Diff line change
Expand Up @@ -1223,6 +1223,24 @@ func (db *DB) GetProperty(propName string) (value string) {
return
}

// GetIntProperty similar to `GetProperty`, but only works for a subset of properties whose
// return value is an integer. Return the value by integer.
func (db *DB) GetIntProperty(propName string) (value uint64, success bool) {
cProp := C.CString(propName)
success = C.rocksdb_property_int(db.c, cProp, (*C.uint64_t)(&value)) == 0
C.free(unsafe.Pointer(cProp))
return
}

// GetIntPropertyCF similar to `GetProperty`, but only works for a subset of properties whose
// return value is an integer. Return the value by integer.
func (db *DB) GetIntPropertyCF(propName string, cf *ColumnFamilyHandle) (value uint64, success bool) {
cProp := C.CString(propName)
success = C.rocksdb_property_int_cf(db.c, cf.c, cProp, (*C.uint64_t)(&value)) == 0
C.free(unsafe.Pointer(cProp))
return
}

// GetPropertyCF returns the value of a database property.
func (db *DB) GetPropertyCF(propName string, cf *ColumnFamilyHandle) (value string) {
cProp := C.CString(propName)
Expand Down Expand Up @@ -1562,6 +1580,21 @@ func (db *DB) FlushCF(cf *ColumnFamilyHandle, opts *FlushOptions) (err error) {
return
}

// FlushCFs triggers a manual flush for the database on specific column families.
func (db *DB) FlushCFs(cfs []*ColumnFamilyHandle, opts *FlushOptions) (err error) {
if n := len(cfs); n > 0 {
_cfs := make([]*C.rocksdb_column_family_handle_t, n)
for i := range _cfs {
_cfs[i] = cfs[i].c
}

var cErr *C.char
C.rocksdb_flush_cfs(db.c, opts.c, &_cfs[0], C.int(n), &cErr)
err = fromCError(cErr)
}
return
}

// FlushWAL flushes the WAL memory buffer to the file. If sync is true, it calls SyncWAL
// afterwards.
func (db *DB) FlushWAL(sync bool) (err error) {
Expand Down
4 changes: 4 additions & 0 deletions db_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@ import (
func TestOpenDb(t *testing.T) {
db := newTestDB(t, nil)
defer db.Close()

require.EqualValues(t, "0", db.GetProperty("rocksdb.num-immutable-mem-table"))
v, success := db.GetIntProperty("rocksdb.num-immutable-mem-table")
require.EqualValues(t, uint64(0), v)
require.True(t, success)
}

func TestDBCRUD(t *testing.T) {
Expand Down
Loading

0 comments on commit b19b62d

Please sign in to comment.