From 83e86fc3838a3c7d589ca28d09b6f361dc9ea0ea Mon Sep 17 00:00:00 2001 From: Boris Protopopov Date: Thu, 11 May 2017 16:40:33 -0400 Subject: [PATCH] Revert commit 1ee159f4 Revert commit 1ee159f423b5eb3c4646b0ba2dd0fb359503ba90 Fix lock order inversion with zvol_open()... as it did not account for use of zvols as vdevs. The latter use cases resulted in the lock order inversion deadlocks that involved spa_namespace_lock and bdev->bd_mutex. Signed-off-by: Boris Protopopov --- module/zfs/zvol.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 01e06aed2631..121f75e4e257 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -1188,12 +1188,36 @@ static int zvol_first_open(zvol_state_t *zv) { objset_t *os; - int error; + int error, locked = 0; + + /* + * In all other cases the spa_namespace_lock is taken before the + * bdev->bd_mutex lock. But in this case the Linux __blkdev_get() + * function calls fops->open() with the bdev->bd_mutex lock held. + * This deadlock can be easily observed with zvols used as vdevs. + * + * To avoid a potential lock inversion deadlock we preemptively + * try to take the spa_namespace_lock(). Normally it will not + * be contended and this is safe because spa_open_common() handles + * the case where the caller already holds the spa_namespace_lock. + * + * When it is contended we risk a lock inversion if we were to + * block waiting for the lock. Luckily, the __blkdev_get() + * function allows us to return -ERESTARTSYS which will result in + * bdev->bd_mutex being dropped, reacquired, and fops->open() being + * called again. This process can be repeated safely until both + * locks are acquired. + */ + if (!mutex_owned(&spa_namespace_lock)) { + locked = mutex_tryenter(&spa_namespace_lock); + if (!locked) + return (-SET_ERROR(ERESTARTSYS)); + } /* lie and say we're read-only */ error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zv, &os); if (error) - return (SET_ERROR(-error)); + goto out_mutex; zv->zv_objset = os; @@ -1204,6 +1228,9 @@ zvol_first_open(zvol_state_t *zv) zv->zv_objset = NULL; } +out_mutex: + if (locked) + mutex_exit(&spa_namespace_lock); return (SET_ERROR(-error)); }