diff --git a/module/os/linux/zfs/vdev_object_store.c b/module/os/linux/zfs/vdev_object_store.c index 902364fc9ee2..6303f8951a68 100644 --- a/module/os/linux/zfs/vdev_object_store.c +++ b/module/os/linux/zfs/vdev_object_store.c @@ -1858,10 +1858,8 @@ vdev_object_store_metaslab_init(vdev_t *vd, metaslab_t *msp, uint64_t vdev_object_store_metaslab_offset(vdev_t *vd) { - boolean_t lock_held = spa_config_held(vd->vdev_spa, - SCL_ALLOC, RW_WRITER); - if (!lock_held) - spa_config_enter(vd->vdev_spa, SCL_ALLOC, FTAG, RW_WRITER); + ASSERT3U(spa_config_held(vd->vdev_spa, SCL_ALLOC, RW_WRITER), ==, + SCL_ALLOC); uint64_t blockid = 0; for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { @@ -1869,9 +1867,6 @@ vdev_object_store_metaslab_offset(vdev_t *vd) blockid = MAX(blockid, msp->ms_lbas[0]); } - if (!lock_held) - spa_config_exit(vd->vdev_spa, SCL_ALLOC, FTAG); - /* * The blockid represents the next block that will be allocated * so we need to subtract one to get the last allocated block diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 74d280f3f572..32a3cb1e897a 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -522,6 +522,30 @@ spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw) int wlocks_held = 0; ASSERT3U(SCL_LOCKS, <, sizeof (wlocks_held) * NBBY); + /* + * If we're using an object-base pool, we may need + * to flush out any pending writes. We do this only + * when we are trying to grab the SCL_ZIO lock. + */ + boolean_t flush_needed = (rw == RW_WRITER) && + spa_is_object_based(spa) && (locks & SCL_ZIO); + + /* + * If this is an object-based pool and a flush is required, then + * we may have to also acquire the SCL_ALLOC lock. We need to add + * this to the list of locks that are going to be acquired but we + * release it before returning to the caller. This allows us to lock + * out allocations so that we can enable the object store passthru + * logic while still grabbing the locks in the correct order. + */ + boolean_t lock_needed = flush_needed && + spa_config_held(spa, SCL_ALLOC, RW_WRITER) == 0 && + !(locks & SCL_ALLOC); + + if (lock_needed) { + locks |= SCL_ALLOC; + } + for (int i = 0; i < SCL_LOCKS; i++) { vdev_t *rvd = spa->spa_root_vdev; spa_config_lock_t *scl = &spa->spa_config_lock[i]; @@ -540,10 +564,6 @@ spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw) while (scl->scl_count != 0) { scl->scl_write_wanted++; - boolean_t flush_needed = - spa_is_object_based(spa) && - ((1 << i) == SCL_ZIO); - /* * If we're on object based pool and * we're trying to lock the SCL_LOCK, @@ -554,7 +574,7 @@ spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw) * flush any I/Os quickly that might * be holding the SCL_ZIO lock as reader. */ - if (flush_needed) { + if (flush_needed && (1 << i) == SCL_ZIO) { vdev_object_store_enable_passthru(rvd); } cv_wait(&scl->scl_cv, &scl->scl_lock); @@ -566,6 +586,10 @@ spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw) mutex_exit(&scl->scl_lock); } ASSERT3U(wlocks_held, <=, locks); + + if (lock_needed) { + spa_config_exit(spa, SCL_ALLOC, tag); + } } void @@ -2848,6 +2872,9 @@ boolean_t spa_is_object_based(spa_t *spa) { vdev_t *rvd = spa->spa_root_vdev; + if (rvd == NULL) + return (B_FALSE); + for (uint64_t c = 0; c < rvd->vdev_children; c++) { if (vdev_is_object_based(rvd->vdev_child[c])) return (B_TRUE); diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 67927efdaf13..5fdac26caa30 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -706,7 +706,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, vdev_alloc_bias_t alloc_bias = VDEV_BIAS_NONE; boolean_t top_level = (parent && !parent->vdev_parent); - ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); + ASSERT3U(spa_config_held(spa, SCL_ALL, RW_WRITER), ==, SCL_ALL); if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0) return (SET_ERROR(EINVAL)); @@ -5224,6 +5224,9 @@ vdev_is_concrete(vdev_t *vd) boolean_t vdev_is_object_based(vdev_t *vd) { + if (vd == NULL) + return (B_FALSE); + vdev_ops_t *ops = vd->vdev_ops; if (vd->vdev_ops->vdev_op_leaf && ops == &vdev_object_store_ops) return (B_TRUE);