Skip to content

Commit

Permalink
4756 metaslab_group_preload() could deadlock
Browse files Browse the repository at this point in the history
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Approved by: Garrett D'Amore <garrett@damore.org>
  • Loading branch information
grwilson authored and Christopher Siden committed Jun 12, 2014
1 parent b89e420 commit 30beaff
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 5 deletions.
4 changes: 3 additions & 1 deletion usr/src/cmd/ztest/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
Expand Down Expand Up @@ -185,6 +185,7 @@ static const ztest_shared_opts_t ztest_opts_defaults = {
extern uint64_t metaslab_gang_bang;
extern uint64_t metaslab_df_alloc_threshold;
extern uint64_t zfs_deadman_synctime_ms;
extern int metaslab_preload_limit;

static ztest_shared_opts_t *ztest_shared_opts;
static ztest_shared_opts_t ztest_opts;
Expand Down Expand Up @@ -5593,6 +5594,7 @@ ztest_run(ztest_shared_t *zs)
kernel_init(FREAD | FWRITE);
VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
spa->spa_debug = B_TRUE;
metaslab_preload_limit = ztest_random(20) + 1;
ztest_spa = spa;

VERIFY0(dmu_objset_own(ztest_opts.zo_pool,
Expand Down
3 changes: 2 additions & 1 deletion usr/src/lib/libzpool/common/llib-lzpool
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
*/

/*
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
*/

/* LINTLIBRARY */
Expand Down Expand Up @@ -66,3 +66,4 @@ extern uint64_t metaslab_gang_bang;
extern uint64_t metaslab_df_alloc_threshold;
extern boolean_t zfeature_checks_disable;
extern uint64_t zfs_deadman_synctime_ms;
extern int metaslab_preload_limit;
25 changes: 22 additions & 3 deletions usr/src/uts/common/fs/zfs/metaslab.c
Original file line number Diff line number Diff line change
Expand Up @@ -1207,6 +1207,8 @@ metaslab_preload(void *arg)
metaslab_t *msp = arg;
spa_t *spa = msp->ms_group->mg_vd->vdev_spa;

ASSERT(!MUTEX_HELD(&msp->ms_group->mg_lock));

mutex_enter(&msp->ms_lock);
metaslab_load_wait(msp);
if (!msp->ms_loaded)
Expand All @@ -1231,19 +1233,36 @@ metaslab_group_preload(metaslab_group_t *mg)
taskq_wait(mg->mg_taskq);
return;
}
mutex_enter(&mg->mg_lock);

mutex_enter(&mg->mg_lock);
/*
* Prefetch the next potential metaslabs
* Load the next potential metaslabs
*/
for (msp = avl_first(t); msp != NULL; msp = AVL_NEXT(t, msp)) {
msp = avl_first(t);
while (msp != NULL) {
metaslab_t *msp_next = AVL_NEXT(t, msp);

/* If we have reached our preload limit then we're done */
if (++m > metaslab_preload_limit)
break;

/*
* We must drop the metaslab group lock here to preserve
* lock ordering with the ms_lock (when grabbing both
* the mg_lock and the ms_lock, the ms_lock must be taken
* first). As a result, it is possible that the ordering
* of the metaslabs within the avl tree may change before
* we reacquire the lock. The metaslab cannot be removed from
* the tree while we're in syncing context so it is safe to
* drop the mg_lock here. If the metaslabs are reordered
* nothing will break -- we just may end up loading a
* less than optimal one.
*/
mutex_exit(&mg->mg_lock);
VERIFY(taskq_dispatch(mg->mg_taskq, metaslab_preload,
msp, TQ_SLEEP) != NULL);
mutex_enter(&mg->mg_lock);
msp = msp_next;
}
mutex_exit(&mg->mg_lock);
}
Expand Down

0 comments on commit 30beaff

Please sign in to comment.