Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NAS-129309 / None / Add support for zvol block cloning #265

Merged
merged 1 commit into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions config/kernel-blkdev.m4
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,36 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BLK_MODE_T], [
])
])

dnl #
dnl # Upstream patch for blkdev copy offload support
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_COPY_OFFLOAD], [
ZFS_LINUX_TEST_SRC([blkdev_copy_offload], [
#include <linux/bio.h>
#include <linux/blkdev.h>
], [
struct block_device *bdev_in = NULL, *bdev_out = NULL;
loff_t pos_in = 0, pos_out = 0;
ssize_t ret __attribute__ ((unused));
ssize_t len = 0;
void *private = NULL;
void (*endio)(void *, int, ssize_t) = NULL;
ret = blkdev_copy_offload(bdev_in, pos_in, pos_out, len,
endio, private, GFP_KERNEL, bdev_out);
])
])

AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_COPY_OFFLOAD], [
AC_MSG_CHECKING([whether blkdev_copy_offload exists])
ZFS_LINUX_TEST_RESULT([blkdev_copy_offload], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_BLKDEV_COPY_OFFLOAD, 1,
[blkdev_copy_offload exists])
], [
AC_MSG_RESULT(no)
])
])

dnl #
dnl # 2.6.38 API change,
dnl # Added blkdev_put()
Expand Down Expand Up @@ -759,6 +789,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
ZFS_AC_KERNEL_SRC_BLKDEV_DISK_CHECK_MEDIA_CHANGE
ZFS_AC_KERNEL_SRC_BLKDEV_BLK_STS_RESV_CONFLICT
ZFS_AC_KERNEL_SRC_BLKDEV_BLK_MODE_T
ZFS_AC_KERNEL_SRC_BLKDEV_COPY_OFFLOAD
])

AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
Expand All @@ -781,4 +812,5 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
ZFS_AC_KERNEL_BLKDEV_DISK_CHECK_MEDIA_CHANGE
ZFS_AC_KERNEL_BLKDEV_BLK_STS_RESV_CONFLICT
ZFS_AC_KERNEL_BLKDEV_BLK_MODE_T
ZFS_AC_KERNEL_BLKDEV_COPY_OFFLOAD
])
2 changes: 2 additions & 0 deletions include/sys/zvol.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ extern int zvol_set_ro(const char *, boolean_t);
extern zvol_state_handle_t *zvol_suspend(const char *);
extern int zvol_resume(zvol_state_handle_t *);
extern void *zvol_tag(zvol_state_handle_t *);
extern int zvol_clone_range(zvol_state_handle_t *, uint64_t,
zvol_state_handle_t *, uint64_t, uint64_t);

extern int zvol_init(void);
extern void zvol_fini(void);
Expand Down
3 changes: 3 additions & 0 deletions include/sys/zvol_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ void zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off,
uint64_t len);
void zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
uint64_t size, boolean_t commit);
void zvol_log_clone_range(zilog_t *zilog, dmu_tx_t *tx, int txtype,
uint64_t off, uint64_t len, uint64_t blksz, const blkptr_t *bps,
size_t nbps);
int zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
struct lwb *lwb, zio_t *zio);
int zvol_init_impl(void);
Expand Down
137 changes: 137 additions & 0 deletions module/os/linux/zfs/zvol_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include <sys/zvol.h>
#include <sys/zvol_impl.h>
#include <cityhash.h>
#include <sys/zfs_znode.h>

#include <linux/blkdev_compat.h>
#include <linux/task_io_accounting_ops.h>
Expand Down Expand Up @@ -68,6 +69,8 @@ static unsigned int zvol_threads = 0;
static unsigned int zvol_blk_mq_threads = 0;
static unsigned int zvol_blk_mq_actual_threads;
static boolean_t zvol_use_blk_mq = B_FALSE;
static boolean_t zvol_bclone_enabled = B_TRUE;
static unsigned long zvol_max_copy_bytes = 0;

/*
* The maximum number of volblocksize blocks to process per thread. Typically,
Expand Down Expand Up @@ -496,6 +499,85 @@ zvol_read_task(void *arg)
zv_request_task_free(task);
}

#ifdef HAVE_BLKDEV_COPY_OFFLOAD
static void zvol_clone_range_impl(zv_request_t *zvr)
{
zvol_state_t *zv_src = zvr->zv, *zv_dst = NULL;
struct request *req = zvr->rq;
struct bio *bio = zvr->bio;
zfs_uio_t uio_src, uio_dst;
uint64_t len = 0;
int error = EINVAL, seg = 1;
struct blkdev_copy_offload_io *offload_io;

if (!zvol_bclone_enabled) {
zvol_end_io(bio, req, -SET_ERROR(EOPNOTSUPP));
return;
}

memset(&uio_src, 0, sizeof (zfs_uio_t));
memset(&uio_dst, 0, sizeof (zfs_uio_t));

if (bio) {
/*
* Single-Queue Request: driver_private contains the
* destination ZVOL.
*/
offload_io = bio->bi_private;
ixhamza marked this conversation as resolved.
Show resolved Hide resolved
if (offload_io && offload_io->driver_private)
zv_dst = offload_io->driver_private;
if (bio->bi_iter.bi_size !=
offload_io->dst_bio->bi_iter.bi_size) {
zvol_end_io(bio, req, -SET_ERROR(error));
return;
}
zfs_uio_bvec_init(&uio_src, bio, NULL);
zfs_uio_bvec_init(&uio_dst, offload_io->dst_bio, NULL);
len = bio->bi_iter.bi_size;
} else {
/*
* Multi-Queue (MQ) Request: First bio contains information
* about destination and the second contains information
* about the source
*/
struct bio *bio_temp;
__rq_for_each_bio(bio_temp, req) {
if (seg == blk_rq_nr_phys_segments(req)) {
offload_io = bio_temp->bi_private;
zfs_uio_bvec_init(&uio_src, bio_temp, NULL);
if (len != bio_temp->bi_iter.bi_size) {
zvol_end_io(bio, req,
-SET_ERROR(error));
return;
}
if (offload_io && offload_io->driver_private)
zv_dst = offload_io->driver_private;
} else {
zfs_uio_bvec_init(&uio_dst, bio_temp, NULL);
len = bio_temp->bi_iter.bi_size;
}
seg++;
}
}

if (!zv_src || !zv_dst) {
zvol_end_io(bio, req, -SET_ERROR(error));
return;
}

error = zvol_clone_range(zv_src, uio_src.uio_loffset, zv_dst,
uio_dst.uio_loffset, len);
zvol_end_io(bio, req, -error);
}

static void
zvol_clone_range_task(void *arg)
{
zv_request_task_t *task = arg;
zvol_clone_range_impl(&task->zvr);
zv_request_task_free(task);
}
#endif

/*
* Process a BIO or request
Expand Down Expand Up @@ -555,6 +637,24 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
blk_mq_hw_queue);
tq_idx = taskq_hash % ztqs->tqs_cnt;

#ifdef HAVE_BLKDEV_COPY_OFFLOAD
if ((bio && op_is_copy(bio_op(bio))) ||
(rq && op_is_copy(req_op(rq)))) {
if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
zvol_end_io(bio, rq, -SET_ERROR(EROFS));
goto out;
}
if (force_sync) {
zvol_clone_range_impl(&zvr);
} else {
task = zv_request_task_create(zvr);
taskq_dispatch_ent(ztqs->tqs_taskq[tq_idx],
zvol_clone_range_task, task, 0, &task->ent);
}
goto out;
}
#endif

if (rw == WRITE) {
if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
zvol_end_io(bio, rq, -SET_ERROR(EROFS));
Expand Down Expand Up @@ -1607,6 +1707,10 @@ zvol_os_create_minor(const char *name)
uint64_t hash = zvol_name_hash(name);
uint64_t volthreading;
bool replayed_zil = B_FALSE;
#ifdef HAVE_BLKDEV_COPY_OFFLOAD
struct queue_limits *lim;
uint64_t max_clone_blocks = 1022;
#endif

if (zvol_inhibit_dev)
return (0);
Expand Down Expand Up @@ -1693,6 +1797,33 @@ zvol_os_create_minor(const char *name)
else
replayed_zil = zil_replay(os, zv, zvol_replay_vector);
}
#ifdef HAVE_BLKDEV_COPY_OFFLOAD
lim = &zv->zv_zso->zvo_queue->limits;
lim->max_user_copy_sectors = UINT_MAX;

/*
* When zvol_bclone_enabled is unset, blkdev_copy_offload() should
* return early and fall back to the default path. Existing zvols
* would require export/import to make this applicable.
*/
if (!zvol_bclone_enabled) {
lim->max_copy_hw_sectors = 0;
lim->max_copy_sectors = 0;
} else if (!zvol_max_copy_bytes) {
if (zv->zv_zilog)
max_clone_blocks = zil_max_log_data(zv->zv_zilog,
sizeof (lr_clone_range_t)) / sizeof (blkptr_t);
lim->max_copy_hw_sectors = MIN((doi->doi_data_block_size *
max_clone_blocks), BLK_COPY_MAX_BYTES) >> SECTOR_SHIFT;
lim->max_copy_sectors = MIN((doi->doi_data_block_size *
max_clone_blocks), BLK_COPY_MAX_BYTES) >> SECTOR_SHIFT;
} else {
lim->max_copy_hw_sectors = MIN(zvol_max_copy_bytes,
BLK_COPY_MAX_BYTES) >> SECTOR_SHIFT;
lim->max_copy_sectors = MIN(zvol_max_copy_bytes,
BLK_COPY_MAX_BYTES) >> SECTOR_SHIFT;
}
#endif
if (replayed_zil)
zil_close(zv->zv_zilog);
zv->zv_zilog = NULL;
Expand Down Expand Up @@ -1934,6 +2065,12 @@ module_param(zvol_blk_mq_blocks_per_thread, uint, 0644);
MODULE_PARM_DESC(zvol_blk_mq_blocks_per_thread,
"Process volblocksize blocks per thread");

module_param(zvol_max_copy_bytes, ulong, 0644);
MODULE_PARM_DESC(zvol_max_copy_bytes, "max copy bytes for zvol block cloning");

module_param(zvol_bclone_enabled, uint, 0644);
MODULE_PARM_DESC(zvol_bclone_enabled, "Disable block cloning for zvols");

#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
module_param(zvol_open_timeout_ms, uint, 0644);
MODULE_PARM_DESC(zvol_open_timeout_ms, "Timeout for ZVOL open retries");
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/zfs_vnops.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ int zfs_bclone_enabled = 1;
* a copy of the file and is therefore not the default. However, in certain
* scenarios this behavior may be desirable so a tunable is provided.
*/
static int zfs_bclone_wait_dirty = 0;
int zfs_bclone_wait_dirty = 0;

/*
* Enable Direct I/O. If this setting is 0, then all I/O requests will be
Expand Down
Loading
Loading