Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce dbuf_find() lock contention #13405

Merged
merged 1 commit into from
May 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions include/sys/dbuf.h
Original file line number Diff line number Diff line change
Expand Up @@ -321,13 +321,12 @@ typedef struct dmu_buf_impl {
uint8_t db_dirtycnt;
} dmu_buf_impl_t;

/* Note: the dbuf hash table is exposed only for the mdb module */
#define DBUF_MUTEXES 2048
#define DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)])
#define DBUF_RWLOCKS 8192
#define DBUF_HASH_RWLOCK(h, idx) (&(h)->hash_rwlocks[(idx) & (DBUF_RWLOCKS-1)])
typedef struct dbuf_hash_table {
uint64_t hash_table_mask;
dmu_buf_impl_t **hash_table;
kmutex_t hash_mutexes[DBUF_MUTEXES] ____cacheline_aligned;
krwlock_t hash_rwlocks[DBUF_RWLOCKS] ____cacheline_aligned;
} dbuf_hash_table_t;

typedef void (*dbuf_prefetch_fn)(void *, boolean_t);
Expand Down
26 changes: 13 additions & 13 deletions module/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -339,18 +339,18 @@ dbuf_find(objset_t *os, uint64_t obj, uint8_t level, uint64_t blkid)
hv = dbuf_hash(os, obj, level, blkid);
idx = hv & h->hash_table_mask;

mutex_enter(DBUF_HASH_MUTEX(h, idx));
rw_enter(DBUF_HASH_RWLOCK(h, idx), RW_READER);
for (db = h->hash_table[idx]; db != NULL; db = db->db_hash_next) {
if (DBUF_EQUAL(db, os, obj, level, blkid)) {
mutex_enter(&db->db_mtx);
if (db->db_state != DB_EVICTING) {
mutex_exit(DBUF_HASH_MUTEX(h, idx));
rw_exit(DBUF_HASH_RWLOCK(h, idx));
return (db);
}
mutex_exit(&db->db_mtx);
}
}
mutex_exit(DBUF_HASH_MUTEX(h, idx));
rw_exit(DBUF_HASH_RWLOCK(h, idx));
return (NULL);
}

Expand Down Expand Up @@ -393,13 +393,13 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
hv = dbuf_hash(os, obj, level, blkid);
idx = hv & h->hash_table_mask;

mutex_enter(DBUF_HASH_MUTEX(h, idx));
rw_enter(DBUF_HASH_RWLOCK(h, idx), RW_WRITER);
for (dbf = h->hash_table[idx], i = 0; dbf != NULL;
dbf = dbf->db_hash_next, i++) {
if (DBUF_EQUAL(dbf, os, obj, level, blkid)) {
mutex_enter(&dbf->db_mtx);
if (dbf->db_state != DB_EVICTING) {
mutex_exit(DBUF_HASH_MUTEX(h, idx));
rw_exit(DBUF_HASH_RWLOCK(h, idx));
return (dbf);
}
mutex_exit(&dbf->db_mtx);
Expand All @@ -417,7 +417,7 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
mutex_enter(&db->db_mtx);
db->db_hash_next = h->hash_table[idx];
h->hash_table[idx] = db;
mutex_exit(DBUF_HASH_MUTEX(h, idx));
rw_exit(DBUF_HASH_RWLOCK(h, idx));
uint64_t he = atomic_inc_64_nv(&dbuf_stats.hash_elements.value.ui64);
DBUF_STAT_MAX(hash_elements_max, he);

Expand Down Expand Up @@ -474,13 +474,13 @@ dbuf_hash_remove(dmu_buf_impl_t *db)

/*
* We mustn't hold db_mtx to maintain lock ordering:
* DBUF_HASH_MUTEX > db_mtx.
* DBUF_HASH_RWLOCK > db_mtx.
*/
ASSERT(zfs_refcount_is_zero(&db->db_holds));
ASSERT(db->db_state == DB_EVICTING);
ASSERT(!MUTEX_HELD(&db->db_mtx));

mutex_enter(DBUF_HASH_MUTEX(h, idx));
rw_enter(DBUF_HASH_RWLOCK(h, idx), RW_WRITER);
dbp = &h->hash_table[idx];
while ((dbf = *dbp) != db) {
dbp = &dbf->db_hash_next;
Expand All @@ -491,7 +491,7 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
if (h->hash_table[idx] &&
h->hash_table[idx]->db_hash_next == NULL)
DBUF_STAT_BUMPDOWN(hash_chains);
mutex_exit(DBUF_HASH_MUTEX(h, idx));
rw_exit(DBUF_HASH_RWLOCK(h, idx));
atomic_dec_64(&dbuf_stats.hash_elements.value.ui64);
}

Expand Down Expand Up @@ -914,8 +914,8 @@ dbuf_init(void)
sizeof (dmu_buf_impl_t),
0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0);

for (i = 0; i < DBUF_MUTEXES; i++)
mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL);
for (i = 0; i < DBUF_RWLOCKS; i++)
rw_init(&h->hash_rwlocks[i], NULL, RW_DEFAULT, NULL);

dbuf_stats_init(h);

Expand Down Expand Up @@ -981,8 +981,8 @@ dbuf_fini(void)

dbuf_stats_destroy();

for (i = 0; i < DBUF_MUTEXES; i++)
mutex_destroy(&h->hash_mutexes[i]);
for (i = 0; i < DBUF_RWLOCKS; i++)
rw_destroy(&h->hash_rwlocks[i]);
#if defined(_KERNEL)
/*
* Large allocations which do not require contiguous pages
Expand Down
4 changes: 2 additions & 2 deletions module/zfs/dbuf_stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ dbuf_stats_hash_table_data(char *buf, size_t size, void *data)
if (size)
buf[0] = 0;

mutex_enter(DBUF_HASH_MUTEX(h, dsh->idx));
rw_enter(DBUF_HASH_RWLOCK(h, dsh->idx), RW_READER);
for (db = h->hash_table[dsh->idx]; db != NULL; db = db->db_hash_next) {
/*
* Returning ENOMEM will cause the data and header functions
Expand All @@ -158,7 +158,7 @@ dbuf_stats_hash_table_data(char *buf, size_t size, void *data)

mutex_exit(&db->db_mtx);
}
mutex_exit(DBUF_HASH_MUTEX(h, dsh->idx));
rw_exit(DBUF_HASH_RWLOCK(h, dsh->idx));

return (error);
}
Expand Down