From f6a8d27613f4e6056c1b93024a733e2957592fba Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 8 Nov 2012 14:18:22 -0800 Subject: [PATCH] Fix zvol+btrfs hang When using a zvol to back a btrfs filesystem the btrfs mount would hang. This was due to the bio completion callback used in btrfs assuming that lower level drivers would never modify the bio->bi_io_vecs after they were submitted via bio_submit(). If they are modified btrfs will miscalculate which pages need to be unlocked resulting in a hang. It's worth mentioning that other file systems such as ext[234] and xfs work fine because they do not make the same assumption in the bio completion callback. The most straight forward way to fix the issue is to present the semantics expected by btrfs. This is done by cloning the bios attached to each request and then using the clones bvecs to perform the required accounting. The clones are freed after each read/write and the original unmodified bios are linked back in to the request. Signed-off-by: Brian Behlendorf Issue #469 --- module/zfs/dmu.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index e2abf8cf2360..ee6ca3034026 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1022,11 +1022,57 @@ dmu_req_copy(void *arg_buf, int size, int *offset, struct request *req) return 0; } +static void +dmu_bio_put(struct bio *bio) +{ + struct bio *bio_next; + + while (bio) { + bio_next = bio->bi_next; + bio_put(bio); + bio = bio_next; + } +} + +static int +dmu_bio_clone(struct bio *bio, struct bio **bio_copy) +{ + struct bio *bio_root = NULL; + struct bio *bio_last = NULL; + struct bio *bio_new; + + if (bio == NULL) + return EINVAL; + + while (bio) { + bio_new = bio_clone(bio, GFP_NOIO); + if (bio_new == NULL) { + dmu_bio_put(bio_root); + return ENOMEM; + } + + if (bio_last) { + bio_last->bi_next = bio_new; + bio_last = bio_new; + } else { + bio_root = bio_new; + bio_last = bio_new; + } + + bio = bio->bi_next; + } + + *bio_copy = bio_root; + + return 0; +} + int dmu_read_req(objset_t *os, uint64_t object, struct request *req) { uint64_t size = blk_rq_bytes(req); uint64_t offset = blk_rq_pos(req) << 9; + struct bio *bio_saved = req->bio; dmu_buf_t **dbp; int numbufs, i, err; @@ -1039,6 +1085,10 @@ dmu_read_req(objset_t *os, uint64_t object, struct request *req) if (err) return (err); + err = dmu_bio_clone(bio_saved, &req->bio); + if (err) + goto error; + for (i = 0; i < numbufs; i++) { int tocpy, didcpy, bufoff; dmu_buf_t *db = dbp[i]; @@ -1062,6 +1112,10 @@ dmu_read_req(objset_t *os, uint64_t object, struct request *req) offset += didcpy; err = 0; } + + dmu_bio_put(req->bio); + req->bio = bio_saved; +error: dmu_buf_rele_array(dbp, numbufs, FTAG); return (err); @@ -1072,6 +1126,7 @@ dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx) { uint64_t size = blk_rq_bytes(req); uint64_t offset = blk_rq_pos(req) << 9; + struct bio *bio_saved = req->bio; dmu_buf_t **dbp; int numbufs; int err = 0; @@ -1085,6 +1140,10 @@ dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx) if (err) return (err); + err = dmu_bio_clone(bio_saved, &req->bio); + if (err) + goto error; + for (i = 0; i < numbufs; i++) { int tocpy, didcpy, bufoff; dmu_buf_t *db = dbp[i]; @@ -1119,7 +1178,11 @@ dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx) err = 0; } + dmu_bio_put(req->bio); + req->bio = bio_saved; +error: dmu_buf_rele_array(dbp, numbufs, FTAG); + return (err); }