Skip to content

Commit

Permalink
Implementing empty slots and contiguous blocks in freelist
Browse files Browse the repository at this point in the history
  • Loading branch information
kriszyp committed Feb 15, 2024
1 parent 73d1eb4 commit 744fb7e
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 94 deletions.
119 changes: 63 additions & 56 deletions dependencies/lmdb/libraries/liblmdb/mdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -2726,14 +2726,16 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
env->me_block_size_cache = calloc(32, sizeof(pgno_t));
env->me_block_size_cache[0] = 31;
}
unsigned empty_entries = 0;
unsigned cache_size = env->me_block_size_cache[0];
pgno_t best_fit_start = 0; // this is a block we will use if we don't find an exact fit
pgno_t best_fit_size = -1;
pgno_t best_fit_start; // this is a block we will use if we don't find an exact fit
pgno_t best_fit_size;
for (op = MDB_FIRST;; op = MDB_NEXT) {
MDB_val key, data;
MDB_node *leaf;
pgno_t *idl;

best_fit_start = 0;
best_fit_size = -1;
/* Seek a big enough contiguous page range. Prefer
* pages at the tail, just truncating the list.
*/
Expand All @@ -2755,56 +2757,52 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
block_start = 0;
unsigned block_size = 0;
ssize_t entry;
empty_entries = 0;
// TODO: Skip this on the first iteration, since we already checked the cache
if (mop_len > n2) {
i = mop_len;
do {
entry = i == 0 ? 0 : mop[i];
fprintf(stderr, "pgno %u next would be %u\n", entry, block_start + block_size);
if (entry == 0) continue;
if (entry > 0) {
pgno = entry;
block_size = 1;
} else {
block_size = -entry;
pgno = mop[--i];
for (i = 1; i <= mop_len; i++) {
entry = mop[i];
//fprintf(stderr, "pgno %u next would be %u\n", entry, block_start + block_size);
if (entry == 0) {
empty_entries++;
continue;
}
if (entry > 0) {
pgno = entry;
block_size = 1;
} else {
block_size = -entry;
pgno = mop[++i];
}

if (block_size >= num) {
if (block_size == num) {
// we found a block of the right size
mop[i] = 0;
if (block_size > 1) mop[i + 1] = 0;
goto search_done;
} else if (block_size < best_fit_size || best_fit_size == 0) {
best_fit_start = i - 1;
best_fit_size = block_size;
}
if (pgno == block_start + block_size) {
block_size++; // count current contiguous block size
} else {
if (block_size >= num) {
if (block_size == num) {
// we found a block of the right size
pgno = block_start;
goto search_done;
} else if (block_size < best_fit_size || best_fit_size == 0) {
best_fit_start = block_start;
best_fit_size = block_size;
}
}
if (block_size > 0) {
// cache this block size
if (block_size >= 2<<30) block_size = (2<<30) - 1;
unsigned cache_size = env->me_block_size_cache[0];
if (block_size > cache_size) {
fprintf(stderr, "expand block size cache to %u\n", block_size << 1);
env->me_block_size_cache = realloc(env->me_block_size_cache, (block_size << 1) * sizeof(pgno_t));
env->me_block_size_cache[0] = (block_size << 1) - 1;
memset(env->me_block_size_cache + cache_size + 1, 0, (env->me_block_size_cache[0] - cache_size) * sizeof(pgno_t));
cache_size = env->me_block_size_cache[0];
}
env->me_block_size_cache[block_size] = block_start;
fprintf(stderr, "cached block %u of size %u\n", block_start, block_size);
}
block_start = pgno;
block_size = 1;
}
if (block_size > 0) {
// cache this block size
if (block_size >= 2<<30) block_size = (2<<30) - 1;
unsigned cache_size = env->me_block_size_cache[0];
if (block_size > cache_size) {
fprintf(stderr, "expand block size cache to %u\n", block_size << 1);
env->me_block_size_cache = realloc(env->me_block_size_cache, (block_size << 1) * sizeof(pgno_t));
env->me_block_size_cache[0] = (block_size << 1) - 1;
memset(env->me_block_size_cache + cache_size + 1, 0, (env->me_block_size_cache[0] - cache_size) * sizeof(pgno_t));
cache_size = env->me_block_size_cache[0];
}
//if (mop[i-n2] == pgno+n2)
// goto search_done;
} while (--i >= 0);
if (--retry < 0)
break;
env->me_block_size_cache[block_size] = pgno;
fprintf(stderr, "cached block %u of size %u\n", pgno, block_size);
}
//if (mop[i-n2] == pgno+n2)
// goto search_done;
}
i = 0;

if (op == MDB_FIRST) { /* 1st iteration */
/* Prepare to fetch more and coalesce */
Expand Down Expand Up @@ -2869,11 +2867,11 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
rc = ENOMEM;
goto fail;
}
} else {
} /*else {
if ((rc = mdb_midl_need(&env->me_pghead, i)) != 0)
goto fail;
mop = env->me_pghead;
}
}*/
env->me_pglast = last;
#if (MDB_DEBUG) > 1
DPRINTF(("IDL read txn %"Yu" root %"Yu" num %u",
Expand All @@ -2883,7 +2881,7 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
#endif
/* Merge in descending sorted order */
fprintf(stderr, "merge\n");
for (unsigned i = i; i < idl[0]; i++) {
for (unsigned i = 1; i <= idl[0]; i++) {
if ((rc = mdb_midl_insert(&mop, idl[i])) != 0)
goto fail;
//mdb_midl_xmerge(mop, idl);
Expand All @@ -2892,9 +2890,15 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
mop_len = mop[0];
}
if (best_fit_start > 0) {
pgno = best_fit_start;
mop[best_fit_start] += num; // block length is a negative, so we add to it in order to subtract the amount we are using
if (mop[best_fit_start] == -1) mop[best_fit_start] = 0;
pgno = mop[best_fit_start + 1];
mop[best_fit_start + 1] += num;
env->me_freelist_position = pgno;
fprintf(stderr, "using best fit at %u size %u of %u\n", pgno, num, best_fit_size);
env->me_block_size_cache[best_fit_size] = 0; // clear this out of the cache (TODO: could move it)

i = 1; // indicate that we found something
goto search_done;
}
/* Use new pages from the map when nothing suitable in the freeDB */
Expand Down Expand Up @@ -2931,10 +2935,13 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
}
}
if (i) {
mop[0] = mop_len -= num;
/* Move any stragglers down */
if (empty_entries > (mop_len >> 1) + 200) {
fprintf(stderr, "should resize\n");
}
/* mop[0] = mop_len -= num;
/* Move any stragglers down
for (j = i-num; j < mop_len; )
mop[++j] = mop[++i];
mop[++j] = mop[++i];*/
} else {
txn->mt_next_pgno = pgno + num;
}
Expand Down
107 changes: 69 additions & 38 deletions dependencies/lmdb/libraries/liblmdb/midl.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,20 +41,32 @@ unsigned mdb_midl_search( MDB_IDL ids, MDB_ID id )
unsigned base = 0;
unsigned cursor = 1;
int val = 0;
unsigned n = ids[0];

while( 0 < n ) {
unsigned pivot = n >> 1;
cursor = base + pivot + 1;
val = CMP( ids[cursor], id );
unsigned end = ids[0];

while( base + 1 < end ) {
cursor = (base + end + 1) >> 1;
ssize_t entry;
while((entry = ids[cursor]) == 0) {
if (++cursor > end) {
// we went past the end, search other direction
cursor = (base + end) >> 1;
while((entry = ids[cursor]) == 0) {
if (--cursor <= base) {
// completely empty section
return (base + end + 1) >> 1;
}
}
}
}
if (entry < 0) entry = ids[cursor + 1]; // block length, skip past and compare actual id
val = CMP( entry, id );

if( val < 0 ) {
n = pivot;

if (cursor == end) return cursor;
end = cursor;
} else if ( val > 0 ) {
if (cursor == base) return cursor + 1;
base = cursor;
n -= pivot + 1;

} else {
return cursor;
}
Expand All @@ -73,7 +85,7 @@ int mdb_midl_insert( MDB_IDL* ids_ref, MDB_ID id )
unsigned x, i;

x = mdb_midl_search( ids, id );
assert( x > 0 );
//assert( x > 0 );

if( x < 1 ) {
/* internal error */
Expand All @@ -82,56 +94,69 @@ int mdb_midl_insert( MDB_IDL* ids_ref, MDB_ID id )

if ( x <= ids[0] && ids[x] == id ) {
/* duplicate */
assert(0);
//assert(0);
return -1;
}

if ( ++ids[0] >= MDB_IDL_DB_MAX ) {
if ( ids[0] >= MDB_IDL_DB_MAX ) {
/* no room */
--ids[0];
return -2;

} else {
if (x >= ids[0]) return -3; // at the end
MDB_ID next_id = ids[x];
if (id < 0) next_id = ids[x + 1];
if (id - 1 == next_id) {
if (x > ids[0]) return -3; // at the end
ssize_t next_id = ids[x];
if (next_id < 0) next_id = ids[x + 1];
if (id - 1 == next_id && next_id > 0) {
// connected to next entry
ids[x]--; // increment negatively, as we have just expanded a block
ids[x + 1] = id;
// ids[x + 1] = id; // no need to adjust id, so since we are adding to the end of the block
return 0;
}
unsigned before = x;
while (!ids[--before] && before >= 0){} // move past empty entries
if (before >= 0) {
unsigned before = x; // this will end up pointing to an entry or zero right before a block of empty space
while (!ids[--before] && before > 0) {
// move past empty entries
}
if (before > 0) {
MDB_ID next_id = before > 0 ? ids[before] : 0;
int count = before > 1 ? -ids[before - 1] : 0;
if (count < 1) count = 1;
if (next_id - count == id) {
if (next_id - 1 == id) {
// connected to previous entry
ids[before]--; // adjust the starting block to include this
if (count > 1) {
ids[before - 1]--; // can just update the count to include this id
return 0;
} else {
// TODO: need to make space for this one
id = -2; // switching a single entry to a block size of 2
x = before;
goto insert_id;
}
}
}
if (x == 1 && ids[0] > 2 && ids[1] == 0 && ids[2] == 0 && ids[3] == 0) {
// this occurs when we have an empty list
ids[2] = id;
return 0;
}
if (before + 1 < x) {
// there is an empty slot we can use, find a place in the middle
ids[(before + x) >> 1] = id;
return 0;
ids[before + 3 < x ? (before + 2) : (before + 1)] = id;
i = 0;
goto check_full;
}
insert_id:
// move items to try to make room
MDB_ID last_id = id;
ssize_t last_id = id;
i = x;
do {
MDB_ID next_id = ids[i];
next_id = ids[i];
ids[i++] = last_id;
last_id = next_id;
} while(next_id);
check_full:
if (x == ids[0] || // if it is full
x - i > ids[0] >> 3) { // or too many moves. TODO: This threshold should actually be more like the square root of the length
i > 0 && (i - x > ids[0] >> 3)) { // or too many moves. TODO: This threshold should actually be more like the square root of the length
// grow the ids (this will replace the reference too)
mdb_midl_need(ids_ref, 1);
}
Expand All @@ -142,10 +167,10 @@ int mdb_midl_insert( MDB_IDL* ids_ref, MDB_ID id )

MDB_IDL mdb_midl_alloc(int num)
{
MDB_IDL ids = malloc((num+2) * sizeof(MDB_ID));
MDB_IDL ids = calloc((num+2), sizeof(MDB_ID));
if (ids) {
*ids++ = num;
*ids = 0;
*ids = num;
}
return ids;
}
Expand Down Expand Up @@ -184,23 +209,29 @@ int mdb_midl_need( MDB_IDL *idp, unsigned num )
MDB_IDL ids = *idp;
num += ids[0];
if (num > ids[-1]) {
num = (num + num/4 + (256 + 2)) & -256;
num = (num + num + (256 + 2)) & -256;
MDB_IDL new_ids;
if (!(new_ids = alloc(ids-1, num * sizeof(MDB_ID))))
if (!(new_ids = calloc(num, sizeof(MDB_ID))))
return ENOMEM;
*new_ids++ = num - 2;
unsigned j = 0;
*new_ids = num - 2;
unsigned j = 1;
// re-spread out the entries with gaps for growth
for (unsigned i = 1; i < ids[0]; i++) {
for (unsigned i = 1; i <= ids[0]; i++) {
new_ids[j++] = 0; // empty slot for growth
ssize_t entry;
while (!(entry = ids[i])) { i++; }
while (!(entry = ids[i])) {
if (++i > ids[0]) break;
}
new_ids[j++] = entry;
if (entry < 0) new_ids[j++] = ids[i++]; // this was a block with a length
if (entry < 0) new_ids[j++] = ids[++i]; // this was a block with a length
}
// now shrink (or grow) back to appropriate size
new_ids = alloc(new_ids - 1, (j + (j >> 3)) * sizeof(MDB_ID));
new_ids++;
num = (j + (j >> 3) + 22) & -16;
if (num > new_ids[0]) num = new_ids[0];
new_ids = realloc(new_ids - 1, (num + 2) * sizeof(MDB_ID));
*new_ids++ = num;
*new_ids = num;
*idp = new_ids;
}
return 0;
Expand Down

0 comments on commit 744fb7e

Please sign in to comment.