Skip to content

Commit

Permalink
Decrease contention on dn_struct_rwlock
Browse files Browse the repository at this point in the history
Currently, sequential async write workloads spend a lot of time 
contending on the dn_struct_rwlock. This lock is responsible for 
protecting the entire block tree below it; this naturally results 
in some serialization during heavy write workloads. This can be 
resolved by having per-dbuf locking, which will allow multiple 
writers in the same object at the same time.

We introduce a new rwlock, the db_rwlock. This lock is responsible 
for protecting the contents of the dbuf that it is a part of; when 
reading a block pointer from a dbuf, you hold the lock as a reader. 
When writing data to a dbuf, you hold it as a writer. This allows 
multiple threads to write to different parts of a file at the same 
time.

Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed by: Matt Ahrens matt@delphix.com
Reviewed by: George Wilson george.wilson@delphix.com
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
External-issue: DLPX-52564
External-issue: DLPX-53085
External-issue: DLPX-57384
Closes openzfs#8946
  • Loading branch information
pcd1193182 authored and behlendorf committed Jul 8, 2019
1 parent cb70964 commit f664f1e
Show file tree
Hide file tree
Showing 7 changed files with 247 additions and 120 deletions.
27 changes: 24 additions & 3 deletions include/sys/dbuf.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,12 @@ typedef enum override_states {
DR_OVERRIDDEN
} override_states_t;

typedef enum db_lock_type {
DLT_NONE,
DLT_PARENT,
DLT_OBJSET
} db_lock_type_t;

typedef struct dbuf_dirty_record {
/* link on our parents dirty list */
list_node_t dr_dirty_node;
Expand Down Expand Up @@ -217,6 +223,22 @@ typedef struct dmu_buf_impl {
*/
uint8_t db_level;

/*
* Protects db_buf's contents if they contain an indirect block or data
* block of the meta-dnode. We use this lock to protect the structure of
* the block tree. This means that when modifying this dbuf's data, we
* grab its rwlock. When modifying its parent's data (including the
* blkptr to this dbuf), we grab the parent's rwlock. The lock ordering
* for this lock is:
* 1) dn_struct_rwlock
* 2) db_rwlock
* We don't currently grab multiple dbufs' db_rwlocks at once.
*/
krwlock_t db_rwlock;

/* buffer holding our data */
arc_buf_t *db_buf;

/* db_mtx protects the members below */
kmutex_t db_mtx;

Expand All @@ -232,9 +254,6 @@ typedef struct dmu_buf_impl {
*/
zfs_refcount_t db_holds;

/* buffer holding our data */
arc_buf_t *db_buf;

kcondvar_t db_changed;
dbuf_dirty_record_t *db_data_pending;

Expand Down Expand Up @@ -335,6 +354,8 @@ void dbuf_destroy(dmu_buf_impl_t *db);
void dbuf_unoverride(dbuf_dirty_record_t *dr);
void dbuf_sync_list(list_t *list, int level, dmu_tx_t *tx);
void dbuf_release_bp(dmu_buf_impl_t *db);
db_lock_type_t dmu_buf_lock_parent(dmu_buf_impl_t *db, krw_t rw, void *tag);
void dmu_buf_unlock_parent(dmu_buf_impl_t *db, db_lock_type_t type, void *tag);

void dbuf_free_range(struct dnode *dn, uint64_t start, uint64_t end,
struct dmu_tx *);
Expand Down
5 changes: 3 additions & 2 deletions include/sys/dmu_zfetch.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
*/

/*
* Copyright (c) 2014 by Delphix. All rights reserved.
* Copyright (c) 2014, 2017 by Delphix. All rights reserved.
*/

#ifndef _DMU_ZFETCH_H
Expand Down Expand Up @@ -66,7 +66,8 @@ void zfetch_fini(void);

void dmu_zfetch_init(zfetch_t *, struct dnode *);
void dmu_zfetch_fini(zfetch_t *);
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t);
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t,
boolean_t);


#ifdef __cplusplus
Expand Down
Loading

0 comments on commit f664f1e

Please sign in to comment.