Skip to content

Commit

Permalink
Re-embed multilist_t storage
Browse files Browse the repository at this point in the history
This commit partially reverts changes to multilists in PR 7968
(multi-threaded spa-sync()) and adds some cache line alignments to
separate read-only multilists and heavily modified refcount's to different
cache lines.

Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Sponsored-by: iXsystems, Inc.
Closes openzfs#12158
  • Loading branch information
amotin authored Jun 10, 2021
1 parent eec5ba1 commit ffdf019
Show file tree
Hide file tree
Showing 12 changed files with 99 additions and 104 deletions.
12 changes: 6 additions & 6 deletions include/sys/arc_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,20 +74,20 @@ typedef struct arc_state {
/*
* list of evictable buffers
*/
multilist_t *arcs_list[ARC_BUFC_NUMTYPES];
multilist_t arcs_list[ARC_BUFC_NUMTYPES];
/*
* supports the "dbufs" kstat
*/
arc_state_type_t arcs_state;
/*
* total amount of evictable data in this state
*/
zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES];
zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES] ____cacheline_aligned;
/*
* total amount of data in this state; this includes: evictable,
* non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA.
*/
zfs_refcount_t arcs_size;
/*
* supports the "dbufs" kstat
*/
arc_state_type_t arcs_state;
} arc_state_t;

typedef struct arc_callback arc_callback_t;
Expand Down
4 changes: 2 additions & 2 deletions include/sys/dmu_objset.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ struct objset {
/* no lock needed: */
struct dmu_tx *os_synctx; /* XXX sketchy */
zil_header_t os_zil_header;
multilist_t *os_synced_dnodes;
multilist_t os_synced_dnodes;
uint64_t os_flags;
uint64_t os_freed_dnodes;
boolean_t os_rescan_dnodes;
Expand All @@ -172,7 +172,7 @@ struct objset {

/* Protected by os_lock */
kmutex_t os_lock;
multilist_t *os_dirty_dnodes[TXG_SIZE];
multilist_t os_dirty_dnodes[TXG_SIZE];
list_t os_dnodes;
list_t os_downgraded_dbufs;

Expand Down
2 changes: 1 addition & 1 deletion include/sys/metaslab_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ struct metaslab_class {
* List of all loaded metaslabs in the class, sorted in order of most
* recent use.
*/
multilist_t *mc_metaslab_txg_list;
multilist_t mc_metaslab_txg_list;

metaslab_class_allocator_t mc_allocator[];
};
Expand Down
3 changes: 2 additions & 1 deletion include/sys/multilist.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,9 @@ struct multilist {
multilist_sublist_index_func_t *ml_index_func;
};

void multilist_create(multilist_t *, size_t, size_t,
multilist_sublist_index_func_t *);
void multilist_destroy(multilist_t *);
multilist_t *multilist_create(size_t, size_t, multilist_sublist_index_func_t *);

void multilist_insert(multilist_t *, void *);
void multilist_remove(multilist_t *, void *);
Expand Down
86 changes: 43 additions & 43 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2327,7 +2327,7 @@ add_reference(arc_buf_hdr_t *hdr, void *tag)
(state != arc_anon)) {
/* We don't use the L2-only state list. */
if (state != arc_l2c_only) {
multilist_remove(state->arcs_list[arc_buf_type(hdr)],
multilist_remove(&state->arcs_list[arc_buf_type(hdr)],
hdr);
arc_evictable_space_decrement(hdr, state);
}
Expand Down Expand Up @@ -2361,7 +2361,7 @@ remove_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag)
*/
if (((cnt = zfs_refcount_remove(&hdr->b_l1hdr.b_refcnt, tag)) == 0) &&
(state != arc_anon)) {
multilist_insert(state->arcs_list[arc_buf_type(hdr)], hdr);
multilist_insert(&state->arcs_list[arc_buf_type(hdr)], hdr);
ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0);
arc_evictable_space_increment(hdr, state);
}
Expand Down Expand Up @@ -2464,7 +2464,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
if (refcnt == 0) {
if (old_state != arc_anon && old_state != arc_l2c_only) {
ASSERT(HDR_HAS_L1HDR(hdr));
multilist_remove(old_state->arcs_list[buftype], hdr);
multilist_remove(&old_state->arcs_list[buftype], hdr);

if (GHOST_STATE(old_state)) {
ASSERT0(bufcnt);
Expand All @@ -2481,7 +2481,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
* beforehand.
*/
ASSERT(HDR_HAS_L1HDR(hdr));
multilist_insert(new_state->arcs_list[buftype], hdr);
multilist_insert(&new_state->arcs_list[buftype], hdr);

if (GHOST_STATE(new_state)) {
ASSERT0(bufcnt);
Expand Down Expand Up @@ -2633,8 +2633,8 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
* L2 headers should never be on the L2 state list since they don't
* have L1 headers allocated.
*/
ASSERT(multilist_is_empty(arc_l2c_only->arcs_list[ARC_BUFC_DATA]) &&
multilist_is_empty(arc_l2c_only->arcs_list[ARC_BUFC_METADATA]));
ASSERT(multilist_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]) &&
multilist_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]));
}

void
Expand Down Expand Up @@ -4200,7 +4200,7 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
arc_buf_contents_t type)
{
uint64_t total_evicted = 0;
multilist_t *ml = state->arcs_list[type];
multilist_t *ml = &state->arcs_list[type];
int num_sublists;
arc_buf_hdr_t **markers;

Expand Down Expand Up @@ -4534,8 +4534,8 @@ arc_evict_meta(uint64_t meta_used)
static arc_buf_contents_t
arc_evict_type(arc_state_t *state)
{
multilist_t *data_ml = state->arcs_list[ARC_BUFC_DATA];
multilist_t *meta_ml = state->arcs_list[ARC_BUFC_METADATA];
multilist_t *data_ml = &state->arcs_list[ARC_BUFC_DATA];
multilist_t *meta_ml = &state->arcs_list[ARC_BUFC_METADATA];
int data_idx = multilist_get_random_index(data_ml);
int meta_idx = multilist_get_random_index(meta_ml);
multilist_sublist_t *data_mls;
Expand Down Expand Up @@ -7455,44 +7455,44 @@ arc_state_init(void)
arc_mfu_ghost = &ARC_mfu_ghost;
arc_l2c_only = &ARC_l2c_only;

arc_mru->arcs_list[ARC_BUFC_METADATA] =
multilist_create(sizeof (arc_buf_hdr_t),
multilist_create(&arc_mru->arcs_list[ARC_BUFC_METADATA],
sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
arc_state_multilist_index_func);
arc_mru->arcs_list[ARC_BUFC_DATA] =
multilist_create(sizeof (arc_buf_hdr_t),
multilist_create(&arc_mru->arcs_list[ARC_BUFC_DATA],
sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
arc_state_multilist_index_func);
arc_mru_ghost->arcs_list[ARC_BUFC_METADATA] =
multilist_create(sizeof (arc_buf_hdr_t),
multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA],
sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
arc_state_multilist_index_func);
arc_mru_ghost->arcs_list[ARC_BUFC_DATA] =
multilist_create(sizeof (arc_buf_hdr_t),
multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA],
sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
arc_state_multilist_index_func);
arc_mfu->arcs_list[ARC_BUFC_METADATA] =
multilist_create(sizeof (arc_buf_hdr_t),
multilist_create(&arc_mfu->arcs_list[ARC_BUFC_METADATA],
sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
arc_state_multilist_index_func);
arc_mfu->arcs_list[ARC_BUFC_DATA] =
multilist_create(sizeof (arc_buf_hdr_t),
multilist_create(&arc_mfu->arcs_list[ARC_BUFC_DATA],
sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
arc_state_multilist_index_func);
arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA] =
multilist_create(sizeof (arc_buf_hdr_t),
multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA],
sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
arc_state_multilist_index_func);
arc_mfu_ghost->arcs_list[ARC_BUFC_DATA] =
multilist_create(sizeof (arc_buf_hdr_t),
multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA],
sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
arc_state_multilist_index_func);
arc_l2c_only->arcs_list[ARC_BUFC_METADATA] =
multilist_create(sizeof (arc_buf_hdr_t),
multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA],
sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
arc_state_multilist_index_func);
arc_l2c_only->arcs_list[ARC_BUFC_DATA] =
multilist_create(sizeof (arc_buf_hdr_t),
multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_DATA],
sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
arc_state_multilist_index_func);

Expand Down Expand Up @@ -7558,16 +7558,16 @@ arc_state_fini(void)
zfs_refcount_destroy(&arc_mfu_ghost->arcs_size);
zfs_refcount_destroy(&arc_l2c_only->arcs_size);

multilist_destroy(arc_mru->arcs_list[ARC_BUFC_METADATA]);
multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]);
multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_METADATA]);
multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]);
multilist_destroy(arc_mru->arcs_list[ARC_BUFC_DATA]);
multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_DATA]);
multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_DATA]);
multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]);
multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_METADATA]);
multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_DATA]);
multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_METADATA]);
multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]);
multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_METADATA]);
multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]);
multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_DATA]);
multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA]);
multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_DATA]);
multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]);
multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]);
multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]);

aggsum_fini(&arc_meta_used);
aggsum_fini(&arc_size);
Expand Down Expand Up @@ -8624,16 +8624,16 @@ l2arc_sublist_lock(int list_num)

switch (list_num) {
case 0:
ml = arc_mfu->arcs_list[ARC_BUFC_METADATA];
ml = &arc_mfu->arcs_list[ARC_BUFC_METADATA];
break;
case 1:
ml = arc_mru->arcs_list[ARC_BUFC_METADATA];
ml = &arc_mru->arcs_list[ARC_BUFC_METADATA];
break;
case 2:
ml = arc_mfu->arcs_list[ARC_BUFC_DATA];
ml = &arc_mfu->arcs_list[ARC_BUFC_DATA];
break;
case 3:
ml = arc_mru->arcs_list[ARC_BUFC_DATA];
ml = &arc_mru->arcs_list[ARC_BUFC_DATA];
break;
default:
return (NULL);
Expand Down
20 changes: 10 additions & 10 deletions module/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,8 @@ static boolean_t dbuf_evict_thread_exit;
* by those caches' matching enum values (from dbuf_cached_state_t).
*/
typedef struct dbuf_cache {
multilist_t *cache;
zfs_refcount_t size;
multilist_t cache;
zfs_refcount_t size ____cacheline_aligned;
} dbuf_cache_t;
dbuf_cache_t dbuf_caches[DB_CACHE_MAX];

Expand Down Expand Up @@ -667,9 +667,9 @@ dbuf_cache_above_lowater(void)
static void
dbuf_evict_one(void)
{
int idx = multilist_get_random_index(dbuf_caches[DB_DBUF_CACHE].cache);
int idx = multilist_get_random_index(&dbuf_caches[DB_DBUF_CACHE].cache);
multilist_sublist_t *mls = multilist_sublist_lock(
dbuf_caches[DB_DBUF_CACHE].cache, idx);
&dbuf_caches[DB_DBUF_CACHE].cache, idx);

ASSERT(!MUTEX_HELD(&dbuf_evict_lock));

Expand Down Expand Up @@ -833,8 +833,8 @@ dbuf_init(void)
dbu_evict_taskq = taskq_create("dbu_evict", 1, defclsyspri, 0, 0, 0);

for (dbuf_cached_state_t dcs = 0; dcs < DB_CACHE_MAX; dcs++) {
dbuf_caches[dcs].cache =
multilist_create(sizeof (dmu_buf_impl_t),
multilist_create(&dbuf_caches[dcs].cache,
sizeof (dmu_buf_impl_t),
offsetof(dmu_buf_impl_t, db_cache_link),
dbuf_cache_multilist_index_func);
zfs_refcount_create(&dbuf_caches[dcs].size);
Expand Down Expand Up @@ -901,7 +901,7 @@ dbuf_fini(void)

for (dbuf_cached_state_t dcs = 0; dcs < DB_CACHE_MAX; dcs++) {
zfs_refcount_destroy(&dbuf_caches[dcs].size);
multilist_destroy(dbuf_caches[dcs].cache);
multilist_destroy(&dbuf_caches[dcs].cache);
}

if (dbuf_ksp != NULL) {
Expand Down Expand Up @@ -2755,7 +2755,7 @@ dbuf_destroy(dmu_buf_impl_t *db)
ASSERT(db->db_caching_status == DB_DBUF_CACHE ||
db->db_caching_status == DB_DBUF_METADATA_CACHE);

multilist_remove(dbuf_caches[db->db_caching_status].cache, db);
multilist_remove(&dbuf_caches[db->db_caching_status].cache, db);
(void) zfs_refcount_remove_many(
&dbuf_caches[db->db_caching_status].size,
db->db.db_size, db);
Expand Down Expand Up @@ -3465,7 +3465,7 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid,
ASSERT(db->db_caching_status == DB_DBUF_CACHE ||
db->db_caching_status == DB_DBUF_METADATA_CACHE);

multilist_remove(dbuf_caches[db->db_caching_status].cache, db);
multilist_remove(&dbuf_caches[db->db_caching_status].cache, db);
(void) zfs_refcount_remove_many(
&dbuf_caches[db->db_caching_status].size,
db->db.db_size, db);
Expand Down Expand Up @@ -3707,7 +3707,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting)
DB_DBUF_METADATA_CACHE : DB_DBUF_CACHE;
db->db_caching_status = dcs;

multilist_insert(dbuf_caches[dcs].cache, db);
multilist_insert(&dbuf_caches[dcs].cache, db);
size = zfs_refcount_add_many(
&dbuf_caches[dcs].size,
db->db.db_size, db);
Expand Down
Loading

0 comments on commit ffdf019

Please sign in to comment.