From 85bad8f010d08fb0d08fc2c33e5ca3630dfe2a82 Mon Sep 17 00:00:00 2001 From: Sergei Shtepa Date: Tue, 7 Jun 2022 15:01:45 +0200 Subject: [PATCH] patch for 5.19 rc1 --- patches/lk5.19/0000-cover-letter.patch | 152 ++++ ...k_filter-enable-block-device-filters.patch | 436 ++++++++++ ...-header-file-of-the-module-interface.patch | 483 ++++++++++ ...odule-management-interface-functions.patch | 484 ++++++++++ ...lock-blksnap-init-and-exit-functions.patch | 167 ++++ ...block-blksnap-interaction-with-sysfs.patch | 117 +++ ...taching-and-detaching-the-filter-and.patch | 800 +++++++++++++++++ ...blksnap-map-of-change-block-tracking.patch | 423 +++++++++ ...g-buffer-in-the-form-of-an-array-of-.patch | 282 ++++++ ...nimum-data-storage-unit-of-the-origi.patch | 514 +++++++++++ ...ffer-in-memory-for-the-minimum-data-.patch | 258 ++++++ ...nctions-and-structures-for-performin.patch | 359 ++++++++ ...torage-for-storing-difference-blocks.patch | 442 ++++++++++ ...ent-queue-from-the-difference-storag.patch | 189 ++++ ...ner-of-information-about-overwritten.patch | 823 ++++++++++++++++++ ...-blksnap-snapshot-image-block-device.patch | 442 ++++++++++ .../lk5.19/0016-block-blksnap-snapshot.patch | 784 +++++++++++++++++ ...bugging-mechanism-for-monitoring-mem.patch | 175 ++++ .../lk5.19/0018-block-blksnap-Kconfig.patch | 124 +++ .../lk5.19/0019-block-blksnap-Makefile.patch | 42 + ...ap-adds-a-blksnap-to-the-kernel-tree.patch | 36 + 21 files changed, 7532 insertions(+) create mode 100644 patches/lk5.19/0000-cover-letter.patch create mode 100644 patches/lk5.19/0001-block-blk_filter-enable-block-device-filters.patch create mode 100644 patches/lk5.19/0002-block-blksnap-header-file-of-the-module-interface.patch create mode 100644 patches/lk5.19/0003-block-blksnap-module-management-interface-functions.patch create mode 100644 patches/lk5.19/0004-lock-blksnap-init-and-exit-functions.patch create mode 100644 patches/lk5.19/0005-block-blksnap-interaction-with-sysfs.patch create mode 100644 patches/lk5.19/0006-block-blksnap-attaching-and-detaching-the-filter-and.patch create mode 100644 patches/lk5.19/0007-block-blksnap-map-of-change-block-tracking.patch create mode 100644 patches/lk5.19/0008-block-blksnap-big-buffer-in-the-form-of-an-array-of-.patch create mode 100644 patches/lk5.19/0009-block-blksnap-minimum-data-storage-unit-of-the-origi.patch create mode 100644 patches/lk5.19/0010-block-blksnap-buffer-in-memory-for-the-minimum-data-.patch create mode 100644 patches/lk5.19/0011-block-blksnap-functions-and-structures-for-performin.patch create mode 100644 patches/lk5.19/0012-block-blksnap-storage-for-storing-difference-blocks.patch create mode 100644 patches/lk5.19/0013-block-blksnap-event-queue-from-the-difference-storag.patch create mode 100644 patches/lk5.19/0014-block-blksnap-owner-of-information-about-overwritten.patch create mode 100644 patches/lk5.19/0015-block-blksnap-snapshot-image-block-device.patch create mode 100644 patches/lk5.19/0016-block-blksnap-snapshot.patch create mode 100644 patches/lk5.19/0017-block-blksnap-debugging-mechanism-for-monitoring-mem.patch create mode 100644 patches/lk5.19/0018-block-blksnap-Kconfig.patch create mode 100644 patches/lk5.19/0019-block-blksnap-Makefile.patch create mode 100644 patches/lk5.19/0020-block-blksnap-adds-a-blksnap-to-the-kernel-tree.patch diff --git a/patches/lk5.19/0000-cover-letter.patch b/patches/lk5.19/0000-cover-letter.patch new file mode 100644 index 00000000..d13f655d --- /dev/null +++ b/patches/lk5.19/0000-cover-letter.patch @@ -0,0 +1,152 @@ +From a203abd2b70fe3a9da925eeb9165a86cba247cbc Mon Sep 17 00:00:00 2001 +From: Sergei Shtepa +Date: Tue, 7 Jun 2022 14:54:43 +0200 +Subject: [PATCH 00/20] blksnap - creating non-persistent snapshots for backup + +Hi all. + +I suggest the blksnap kernel module for consideration. It allows to create +non-persistent snapshots of any block devices. The main purpose of such +snapshots is to create backups of block devices entirely. + +A snapshot is created simultaneously for several block devices, ensuring +their consistent state in the backup. + +A change tracker is implemented in the module. It allows to determine +which blocks were changed during the time between the last snapshot +created and any of the previous snapshots of the same generation. +This allows to implement the logic of both incremental and differential +backups. + +An arbitrary range of sectors on any block device can be used to store +snapshot changes. The size of the change store can be increased after the +snapshot is created by adding new sector ranges. This allows to create a +storage of differes in individual files on a file system that can occupy +the entire space of a block device and increase the storage of differes +as needed. + +To create images of snapshots of block devices, the module stores blocks +of the original block device that have been changed since the snapshot was +taken. To do this, the module intercepts write requests and reads blocks +that need to be overwritten. This algorithm guarantees the safety of the +data of the original block device in case of overflow of the snapshot and +even in case of unpredictable critical errors. + +To connect and disconnect the module to the block layer, the concept of a +block device filter is introduced. Functions for connecting filters are +added to the block layer and the ability to intercept I/O requests is +provided. + +The blksnap module was created specifically for upstream based on the +experience of operating the out-of-tree veeamsnap module, which is part of +the Veeam Agent for Linux product. I am sure that the module will be in +demand by other creators of backup tools and will save them from having to +use their out-of-tree kernel modules. + +A tool, a library for working with blksnap, tests and some documentations +can be found at www.github.com/veeam/blksnap. + +Sergei Shtepa (20): + block, blk_filter: enable block device filters + block, blksnap: header file of the module interface + block, blksnap: module management interface functions + lock, blksnap: init() and exit() functions + block, blksnap: interaction with sysfs + block, blksnap: attaching and detaching the filter and handling a bios + block, blksnap: map of change block tracking + block, blksnap: big buffer in the form of an array of pages + block, blksnap: minimum data storage unit of the original block device + block, blksnap: buffer in memory for the minimum data storage unit + block, blksnap: functions and structures for performing block I/O + operations + block, blksnap: storage for storing difference blocks + block, blksnap: event queue from the difference storage + block, blksnap: owner of information about overwritten blocks of the + original block device + block, blksnap: snapshot image block device + block, blksnap: snapshot + block, blksnap: debugging mechanism for monitoring memory consumption + block, blksnap: Kconfig + block, blksnap: Makefile + block, blksnap: adds a blksnap to the kernel tree + + block/Kconfig | 8 + + block/bdev.c | 129 +++++ + block/blk-core.c | 88 ++++ + drivers/block/Kconfig | 2 + + drivers/block/Makefile | 1 + + drivers/block/blksnap/Kconfig | 101 ++++ + drivers/block/blksnap/Makefile | 20 + + drivers/block/blksnap/big_buffer.c | 218 ++++++++ + drivers/block/blksnap/big_buffer.h | 27 + + drivers/block/blksnap/cbt_map.c | 280 ++++++++++ + drivers/block/blksnap/cbt_map.h | 112 ++++ + drivers/block/blksnap/chunk.c | 352 +++++++++++++ + drivers/block/blksnap/chunk.h | 129 +++++ + drivers/block/blksnap/ctrl.c | 445 ++++++++++++++++ + drivers/block/blksnap/ctrl.h | 7 + + drivers/block/blksnap/diff_area.c | 602 +++++++++++++++++++++ + drivers/block/blksnap/diff_area.h | 179 +++++++ + drivers/block/blksnap/diff_buffer.c | 146 ++++++ + drivers/block/blksnap/diff_buffer.h | 78 +++ + drivers/block/blksnap/diff_io.c | 205 ++++++++ + drivers/block/blksnap/diff_io.h | 122 +++++ + drivers/block/blksnap/diff_storage.c | 316 +++++++++++ + drivers/block/blksnap/diff_storage.h | 94 ++++ + drivers/block/blksnap/event_queue.c | 90 ++++ + drivers/block/blksnap/event_queue.h | 64 +++ + drivers/block/blksnap/main.c | 109 ++++ + drivers/block/blksnap/memory_checker.c | 100 ++++ + drivers/block/blksnap/memory_checker.h | 41 ++ + drivers/block/blksnap/params.h | 10 + + drivers/block/blksnap/snapimage.c | 345 ++++++++++++ + drivers/block/blksnap/snapimage.h | 65 +++ + drivers/block/blksnap/snapshot.c | 671 ++++++++++++++++++++++++ + drivers/block/blksnap/snapshot.h | 76 +++ + drivers/block/blksnap/sysfs.c | 81 +++ + drivers/block/blksnap/sysfs.h | 5 + + drivers/block/blksnap/tracker.c | 693 +++++++++++++++++++++++++ + drivers/block/blksnap/tracker.h | 71 +++ + drivers/block/blksnap/version.h | 8 + + include/linux/blk_snap.h | 460 ++++++++++++++++ + include/linux/blk_types.h | 22 + + include/linux/blkdev.h | 81 +++ + 41 files changed, 6653 insertions(+) + create mode 100644 drivers/block/blksnap/Kconfig + create mode 100644 drivers/block/blksnap/Makefile + create mode 100644 drivers/block/blksnap/big_buffer.c + create mode 100644 drivers/block/blksnap/big_buffer.h + create mode 100644 drivers/block/blksnap/cbt_map.c + create mode 100644 drivers/block/blksnap/cbt_map.h + create mode 100644 drivers/block/blksnap/chunk.c + create mode 100644 drivers/block/blksnap/chunk.h + create mode 100644 drivers/block/blksnap/ctrl.c + create mode 100644 drivers/block/blksnap/ctrl.h + create mode 100644 drivers/block/blksnap/diff_area.c + create mode 100644 drivers/block/blksnap/diff_area.h + create mode 100644 drivers/block/blksnap/diff_buffer.c + create mode 100644 drivers/block/blksnap/diff_buffer.h + create mode 100644 drivers/block/blksnap/diff_io.c + create mode 100644 drivers/block/blksnap/diff_io.h + create mode 100644 drivers/block/blksnap/diff_storage.c + create mode 100644 drivers/block/blksnap/diff_storage.h + create mode 100644 drivers/block/blksnap/event_queue.c + create mode 100644 drivers/block/blksnap/event_queue.h + create mode 100644 drivers/block/blksnap/main.c + create mode 100644 drivers/block/blksnap/memory_checker.c + create mode 100644 drivers/block/blksnap/memory_checker.h + create mode 100644 drivers/block/blksnap/params.h + create mode 100644 drivers/block/blksnap/snapimage.c + create mode 100644 drivers/block/blksnap/snapimage.h + create mode 100644 drivers/block/blksnap/snapshot.c + create mode 100644 drivers/block/blksnap/snapshot.h + create mode 100644 drivers/block/blksnap/sysfs.c + create mode 100644 drivers/block/blksnap/sysfs.h + create mode 100644 drivers/block/blksnap/tracker.c + create mode 100644 drivers/block/blksnap/tracker.h + create mode 100644 drivers/block/blksnap/version.h + create mode 100644 include/linux/blk_snap.h + +-- +2.20.1 + diff --git a/patches/lk5.19/0001-block-blk_filter-enable-block-device-filters.patch b/patches/lk5.19/0001-block-blk_filter-enable-block-device-filters.patch new file mode 100644 index 00000000..77dcdad6 --- /dev/null +++ b/patches/lk5.19/0001-block-blk_filter-enable-block-device-filters.patch @@ -0,0 +1,436 @@ +From 595307f34a06be4fdd1a358243e6ccb216c4647b Mon Sep 17 00:00:00 2001 +From: Sergei Shtepa +Date: Tue, 7 Jun 2022 14:23:26 +0200 +Subject: [PATCH 01/20] block, blk_filter: enable block device filters + +Allows to attach block device filters to the block devices. +Kernel modules can use this functionality to extend the +capabilities of the block layer. + +Signed-off-by: Sergei Shtepa +--- + block/Kconfig | 8 +++ + block/bdev.c | 129 ++++++++++++++++++++++++++++++++++++++ + block/blk-core.c | 88 ++++++++++++++++++++++++++ + include/linux/blk_types.h | 22 +++++++ + include/linux/blkdev.h | 81 ++++++++++++++++++++++++ + 5 files changed, 328 insertions(+) + +diff --git a/block/Kconfig b/block/Kconfig +index 50b17e260fa2..256483e00224 100644 +--- a/block/Kconfig ++++ b/block/Kconfig +@@ -225,6 +225,14 @@ config BLK_MQ_RDMA + config BLK_PM + def_bool PM + ++config BLK_FILTER ++ bool "Enable block device filters" ++ default n ++ help ++ Enabling this lets the block layer filters handle bio requests. ++ Kernel modules can use this feature to extend the functionality ++ of the block layer. ++ + # do not use in new code + config BLOCK_HOLDER_DEPRECATED + bool +diff --git a/block/bdev.c b/block/bdev.c +index 5fe06c1f2def..4bcd9f4378e3 100644 +--- a/block/bdev.c ++++ b/block/bdev.c +@@ -426,8 +426,15 @@ static void init_once(void *data) + inode_init_once(&ei->vfs_inode); + } + ++#ifdef CONFIG_BLK_FILTER ++static void bdev_filter_cleanup(struct block_device *bdev); ++#endif ++ + static void bdev_evict_inode(struct inode *inode) + { ++#ifdef CONFIG_BLK_FILTER ++ bdev_filter_cleanup(I_BDEV(inode)); ++#endif + truncate_inode_pages_final(&inode->i_data); + invalidate_inode_buffers(inode); /* is it needed here? */ + clear_inode(inode); +@@ -503,6 +510,11 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) + return NULL; + } + bdev->bd_disk = disk; ++ ++#ifdef CONFIG_BLK_FILTER ++ memset(bdev->bd_filters, 0, sizeof(bdev->bd_filters)); ++ spin_lock_init(&bdev->bd_filters_lock); ++#endif + return bdev; + } + +@@ -1071,3 +1083,120 @@ void sync_bdevs(bool wait) + spin_unlock(&blockdev_superblock->s_inode_list_lock); + iput(old_inode); + } ++ ++#ifdef CONFIG_BLK_FILTER ++static void bdev_filter_cleanup(struct block_device *bdev) ++{ ++ int altitude; ++ struct bdev_filter *flt; ++ ++ for (altitude = 0; altitude < bdev_filter_alt_end; altitude++) { ++ spin_lock(&bdev->bd_filters_lock); ++ flt = bdev->bd_filters[altitude]; ++ bdev->bd_filters[altitude] = NULL; ++ spin_unlock(&bdev->bd_filters_lock); ++ ++ bdev_filter_put(flt); ++ } ++} ++ ++/** ++ * bdev_filter_attach - Attach a filter to the original block device. ++ * @bdev: ++ * Block device. ++ * @name: ++ * Name of the block device filter. ++ * @altitude: ++ * Altituda number of the block device filter. ++ * @flt: ++ * Pointer to the filter structure. ++ * ++ * Before adding a filter, it is necessary to initialize &struct bdev_filter. ++ * ++ * The bdev_filter_detach() function allows to detach the filter from the block ++ * device. ++ * ++ * Return: ++ * 0 - OK ++ * -EALREADY - a filter with this name already exists ++ */ ++int bdev_filter_attach(struct block_device *bdev, const char *name, ++ const enum bdev_filter_altitudes altitude, ++ struct bdev_filter *flt) ++{ ++ int ret = 0; ++ ++ spin_lock(&bdev->bd_filters_lock); ++ if (bdev->bd_filters[altitude]) ++ ret = -EALREADY; ++ else ++ bdev->bd_filters[altitude] = flt; ++ spin_unlock(&bdev->bd_filters_lock); ++ ++ if (!ret) ++ pr_info("block device filter '%s' has been attached to %d:%d", ++ name, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(bdev_filter_attach); ++ ++/** ++ * bdev_filter_detach - Detach a filter from the block device. ++ * @bdev: ++ * Block device. ++ * @name: ++ * Name of the block device filter. ++ * @altitude: ++ * Altituda number of the block device filter. ++ * ++ * The filter should be added using the bdev_filter_attach() function. ++ * ++ * Return: ++ * 0 - OK ++ * -ENOENT - the filter was not found in the linked list ++ */ ++int bdev_filter_detach(struct block_device *bdev, const char *name, ++ const enum bdev_filter_altitudes altitude) ++{ ++ struct bdev_filter *flt = NULL; ++ ++ spin_lock(&bdev->bd_filters_lock); ++ flt = bdev->bd_filters[altitude]; ++ bdev->bd_filters[altitude] = NULL; ++ spin_unlock(&bdev->bd_filters_lock); ++ ++ if (!flt) ++ return -ENOENT; ++ ++ bdev_filter_put(flt); ++ pr_info("block device filter '%s' has been detached from %d:%d", ++ name, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); ++ return 0; ++} ++EXPORT_SYMBOL_GPL(bdev_filter_detach); ++ ++/** ++ * bdev_filter_get_by_altitude - Get filter by altitude. ++ * @bdev: ++ * Pointer to the block device structure. ++ * ++ * Return: ++ * pointer - pointer to filters structure from &struct blk_filter ++ * NULL - no filter has been set ++ */ ++struct bdev_filter *bdev_filter_get_by_altitude(struct block_device *bdev, ++ const enum bdev_filter_altitudes altitude) ++{ ++ struct bdev_filter *flt; ++ ++ spin_lock(&bdev->bd_filters_lock); ++ flt = bdev->bd_filters[altitude]; ++ if (flt) ++ bdev_filter_get(flt); ++ spin_unlock(&bdev->bd_filters_lock); ++ ++ return flt; ++} ++EXPORT_SYMBOL_GPL(bdev_filter_get_by_altitude); ++#endif +diff --git a/block/blk-core.c b/block/blk-core.c +index 06ff5bbfe8f6..a44906fb08aa 100644 +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -757,6 +757,86 @@ void submit_bio_noacct_nocheck(struct bio *bio) + __submit_bio_noacct(bio); + } + ++#ifdef CONFIG_BLK_FILTER ++ ++/** ++ * __filter_bio() - Process bio by the block device filter. ++ * @flt: ++ * Block device filter. ++ * @bio: ++ * Original I/O unit. ++ * ++ * Return: ++ * bdev_filter_pass - original bio should be submitted ++ * bdev_filter_skip - do not submit original bio ++ * bdev_filter_redirect - repeat bio processing for another block device ++ */ ++static inline enum bdev_filter_result __filter_bio(struct bdev_filter *flt, ++ struct bio *bio) ++{ ++ enum bdev_filter_result result; ++ struct bio *new_bio; ++ struct bio_list bio_list[2] = { }; ++ ++ do { ++ bio_list_init(&bio_list[0]); ++ current->bio_list = bio_list; ++ ++ result = flt->fops->submit_bio_cb(bio, flt); ++ ++ current->bio_list = NULL; ++ ++ while ((new_bio = bio_list_pop(&bio_list[0]))) { ++ bio_set_flag(new_bio, BIO_FILTERED); ++ submit_bio_noacct(new_bio); ++ }; ++ } while (result == bdev_filter_repeat); ++ ++ return result; ++} ++ ++/** ++ * filter_bio() - Pass bio to the block device filters. ++ * @bio: ++ * Original I/O unit. ++ * ++ * Return: ++ * true - original bio should be submitted ++ * false - do not submit original bio ++ */ ++static bool filter_bio(struct bio *bio) ++{ ++ enum bdev_filter_result result = bdev_filter_pass; ++ ++ if (bio_flagged(bio, BIO_FILTERED)) ++ return true; ++ do { ++ struct block_device *bdev = bio->bi_bdev; ++ unsigned int altitude = 0; ++ ++ while (altitude < bdev_filter_alt_end) { ++ struct bdev_filter *flt; ++ ++ spin_lock(&bdev->bd_filters_lock); ++ flt = bdev->bd_filters[altitude]; ++ if (flt) ++ bdev_filter_get(flt); ++ spin_unlock(&bdev->bd_filters_lock); ++ ++ if (flt) { ++ result = __filter_bio(flt, bio); ++ bdev_filter_put(flt); ++ if (result != bdev_filter_pass) ++ break; ++ } ++ altitude++; ++ } ++ } while (result == bdev_filter_redirect); ++ ++ return (result == bdev_filter_pass); ++} ++#endif ++ + /** + * submit_bio_noacct - re-submit a bio to the block device layer for I/O + * @bio: The bio describing the location in memory and on the device. +@@ -790,6 +870,14 @@ void submit_bio_noacct(struct bio *bio) + goto end_io; + if (unlikely(bio_check_ro(bio))) + goto end_io; ++#ifdef CONFIG_BLK_FILTER ++ /* ++ * It looks like should_fail_bio() and bio_check_ro() can be placed ++ * in a separate block device filter for debugging. ++ */ ++ if (!filter_bio(bio)) ++ goto end_io; ++#endif + if (!bio_flagged(bio, BIO_REMAPPED)) { + if (unlikely(bio_check_eod(bio))) + goto end_io; +diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h +index a24d4078fb21..b88f506ea59e 100644 +--- a/include/linux/blk_types.h ++++ b/include/linux/blk_types.h +@@ -37,6 +37,23 @@ struct bio_crypt_ctx; + #define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) + #define SECTOR_MASK (PAGE_SECTORS - 1) + ++#ifdef CONFIG_BLK_FILTER ++/** ++ * enum bdev_filter_altitudes - Set of reserved altitudes for block device ++ * filters. ++ * ++ * @bdev_filter_alt_blksnap: ++ * An altitude for the blksnap module. ++ * @bdev_filter_alt_end: ++ * Indicates the end of the altitude set. ++ */ ++enum bdev_filter_altitudes { ++ bdev_filter_alt_blksnap = 0, ++ bdev_filter_alt_end ++}; ++struct bdev_filter; ++#endif ++ + struct block_device { + sector_t bd_start_sect; + sector_t bd_nr_sectors; +@@ -68,6 +85,10 @@ struct block_device { + #ifdef CONFIG_FAIL_MAKE_REQUEST + bool bd_make_it_fail; + #endif ++#ifdef CONFIG_BLK_FILTER ++ struct bdev_filter *bd_filters[bdev_filter_alt_end]; ++ spinlock_t bd_filters_lock; ++#endif + } __randomize_layout; + + #define bdev_whole(_bdev) \ +@@ -332,6 +353,7 @@ enum { + BIO_QOS_MERGED, /* but went through rq_qos merge path */ + BIO_REMAPPED, + BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */ ++ BIO_FILTERED, /* bio has already been filtered */ + BIO_FLAG_LAST + }; + +diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h +index 608d577734c2..24cb5293897f 100644 +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -1573,4 +1573,85 @@ struct io_comp_batch { + + #define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { } + ++#ifdef CONFIG_BLK_FILTER ++/** ++ * enum bdev_filter_result - The result of bio processing by ++ * the block device filter. ++ * ++ * @bdev_filter_skip: ++ * Original bio does not need to be submitted. ++ * @bdev_filter_pass: ++ * It is necessary to submit the original request. ++ * @bdev_filter_repeat: ++ * Bio processing has not been completed, a second call is required. ++ * @bdev_filter_redirect: ++ * Original bio was redirected to another block device. The set ++ * of filters on it is different, so processing must be repeated. ++ */ ++enum bdev_filter_result { ++ bdev_filter_skip = 0, ++ bdev_filter_pass, ++ bdev_filter_repeat, ++ bdev_filter_redirect ++}; ++struct bdev_filter; ++/** ++ * bdev_filter_operations - List of callback functions for the filter. ++ * ++ * @submit_bio_cb: ++ * A callback function for bio processing. ++ * @detach_cb: ++ * A callback function to disable the filter when removing a block ++ * device from the system. ++ */ ++struct bdev_filter_operations { ++ enum bdev_filter_result (*submit_bio_cb)(struct bio *bio, ++ struct bdev_filter *flt); ++ void (*detach_cb)(struct kref *kref); ++}; ++/** ++ * struct bdev_filter - Block device filter. ++ * ++ * @kref: ++ * Kernel reference counter. ++ * @fops: ++ * The pointer to &struct bdev_filter_operations with callback ++ * functions for the filter. ++ */ ++struct bdev_filter { ++ struct kref kref; ++ const struct bdev_filter_operations *fops; ++}; ++/** ++ * bdev_filter_init - Initialization of the filter structure. ++ * @flt: ++ * Pointer to the &struct bdev_filter to be initialized. ++ * @fops: ++ * The callback functions for the filter. ++ */ ++static inline void bdev_filter_init(struct bdev_filter *flt, ++ const struct bdev_filter_operations *fops) ++{ ++ kref_init(&flt->kref); ++ flt->fops = fops; ++}; ++int bdev_filter_attach(struct block_device *bdev, const char *name, ++ const enum bdev_filter_altitudes altitude, ++ struct bdev_filter *flt); ++int bdev_filter_detach(struct block_device *bdev, const char *name, ++ const enum bdev_filter_altitudes altitude); ++struct bdev_filter *bdev_filter_get_by_altitude(struct block_device *bdev, ++ const enum bdev_filter_altitudes altitude); ++static inline void bdev_filter_get(struct bdev_filter *flt) ++{ ++ kref_get(&flt->kref); ++} ++static inline void bdev_filter_put(struct bdev_filter *flt) ++{ ++ if (flt) ++ kref_put(&flt->kref, flt->fops->detach_cb); ++}; ++ ++#endif ++ + #endif /* _LINUX_BLKDEV_H */ +-- +2.20.1 + diff --git a/patches/lk5.19/0002-block-blksnap-header-file-of-the-module-interface.patch b/patches/lk5.19/0002-block-blksnap-header-file-of-the-module-interface.patch new file mode 100644 index 00000000..55adc240 --- /dev/null +++ b/patches/lk5.19/0002-block-blksnap-header-file-of-the-module-interface.patch @@ -0,0 +1,483 @@ +From edd582fd7d4be91afea106a894c56fd025d61bab Mon Sep 17 00:00:00 2001 +From: Sergei Shtepa +Date: Tue, 7 Jun 2022 14:24:33 +0200 +Subject: [PATCH 02/20] block, blksnap: header file of the module interface + +The header file contains a set of declarations, structures and control +requests (ioctl) that allows to manage the module from the user space. + +Signed-off-by: Sergei Shtepa +--- + include/linux/blk_snap.h | 460 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 460 insertions(+) + create mode 100644 include/linux/blk_snap.h + +diff --git a/include/linux/blk_snap.h b/include/linux/blk_snap.h +new file mode 100644 +index 000000000000..a3753a6a9527 +--- /dev/null ++++ b/include/linux/blk_snap.h +@@ -0,0 +1,460 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#pragma once ++#include ++#include ++ ++#define BLK_SNAP_MODULE_NAME "blksnap" ++#define BLK_SNAP_IMAGE_NAME "blksnap-image" ++#define BLK_SNAP 'V' ++ ++enum blk_snap_ioctl { ++ /* ++ * Service controls ++ */ ++ blk_snap_ioctl_version, ++ /* ++ * Contols for tracking ++ */ ++ blk_snap_ioctl_tracker_remove, ++ blk_snap_ioctl_tracker_collect, ++ blk_snap_ioctl_tracker_read_cbt_map, ++ blk_snap_ioctl_tracker_mark_dirty_blocks, ++ /* ++ * Snapshot contols ++ */ ++ blk_snap_ioctl_snapshot_create, ++ blk_snap_ioctl_snapshot_destroy, ++ blk_snap_ioctl_snapshot_append_storage, ++ blk_snap_ioctl_snapshot_take, ++ blk_snap_ioctl_snapshot_collect, ++ blk_snap_ioctl_snapshot_collect_images, ++ blk_snap_ioctl_snapshot_wait_event, ++ blk_snap_ioctl_end, ++}; ++ ++/** ++ * struct blk_snap_version - Result for the &IOCTL_BLK_SNAP_VERSION control. ++ * @major: ++ * Version major part. ++ * @minor: ++ * Version minor part. ++ * @revision: ++ * Revision number. ++ * @build: ++ * Build number. Should be zero. ++ */ ++struct blk_snap_version { ++ __u16 major; ++ __u16 minor; ++ __u16 revision; ++ __u16 build; ++}; ++/** ++ * IOCTL_BLK_SNAP_VERSION - Get version and compatibility flags. ++ * ++ * Linking the product behavior to the version code does not seem to be a very ++ * good idea. Version is only for logs. ++ */ ++#define IOCTL_BLK_SNAP_VERSION \ ++ _IOW(BLK_SNAP, blk_snap_ioctl_version, struct blk_snap_version) ++ ++/* ++ * The main functionality of the module is change block tracking (CBT). ++ * Next, a number of ioctls will describe the interface for the CBT mechanism. ++ */ ++ ++/** ++ * struct blk_snap_dev_t - Block device ID. ++ * @mj: ++ * Device ID major part. ++ * @mn: ++ * Device ID minor part. ++ * ++ * In user space and in kernel space, block devices are encoded differently. ++ * We need to enter our own type to guarantee the correct transmission of the ++ * major and minor parts. ++ */ ++struct blk_snap_dev_t { ++ __u32 mj; ++ __u32 mn; ++}; ++ ++/** ++ * struct blk_snap_tracker_remove - Input argument for the ++ * &IOCTL_BLK_SNAP_TRACKER_REMOVE control. ++ * @dev_id: ++ * Device ID. ++ */ ++struct blk_snap_tracker_remove { ++ struct blk_snap_dev_t dev_id; ++}; ++/** ++ * IOCTL_BLK_SNAP_TRACKER_REMOVE - Remove a device from tracking. ++ * ++ * Removes the device from tracking changes. ++ * Adding a device for tracking is performed when creating a snapshot ++ * that includes this device. ++ */ ++#define IOCTL_BLK_SNAP_TRACKER_REMOVE \ ++ _IOW(BLK_SNAP, blk_snap_ioctl_tracker_remove, \ ++ struct blk_snap_tracker_remove) ++ ++/** ++ * struct blk_snap_cbt_info - Information about change tracking for a block ++ * device. ++ * @dev_id: ++ * Device ID. ++ * @blk_size: ++ * Block size in bytes. ++ * @device_capacity: ++ * Device capacity in bytes. ++ * @blk_count: ++ * Number of blocks. ++ * @generation_id: ++ * Unique identification number of change tracking generation. ++ * @snap_number: ++ * Current changes number. ++ */ ++struct blk_snap_cbt_info { ++ struct blk_snap_dev_t dev_id; ++ __u32 blk_size; ++ __u64 device_capacity; ++ __u32 blk_count; ++ uuid_t generation_id; ++ __u8 snap_number; ++}; ++/** ++ * struct blk_snap_tracker_collect - Argument for the ++ * &IOCTL_BLK_SNAP_TRACKER_COLLECT control. ++ * @count: ++ * Size of @cbt_info_array in the number of &struct blk_snap_cbt_info. ++ * If @cbt_info_array has not enough space, it will contain the required ++ * size of the array. ++ * @cbt_info_array: ++ * Pointer to the array for output. ++ */ ++struct blk_snap_tracker_collect { ++ __u32 count; ++ struct blk_snap_cbt_info *cbt_info_array; ++}; ++/** ++ * IOCTL_BLK_SNAP_TRACKER_COLLECT - Collect all tracked devices. ++ * ++ * Getting information about all devices under tracking. ++ * This ioctl returns the same information that the module outputs ++ * to sysfs for each device under tracking. ++ */ ++#define IOCTL_BLK_SNAP_TRACKER_COLLECT \ ++ _IOW(BLK_SNAP, blk_snap_ioctl_tracker_collect, \ ++ struct blk_snap_tracker_collect) ++ ++/** ++ * struct blk_snap_tracker_read_cbt_bitmap - Argument for the ++ * &IOCTL_BLK_SNAP_TRACKER_READ_CBT_MAP control. ++ * @dev_id: ++ * Device ID. ++ * @offset: ++ * Offset from the beginning of the CBT bitmap in bytes. ++ * @length: ++ * Size of @buff in bytes. ++ * @buff: ++ * Pointer to the buffer for output. ++ */ ++struct blk_snap_tracker_read_cbt_bitmap { ++ struct blk_snap_dev_t dev_id; ++ __u32 offset; ++ __u32 length; ++ __u8 *buff; ++}; ++/** ++ * IOCTL_BLK_SNAP_TRACKER_READ_CBT_MAP - Read the CBT map. ++ * ++ * This ioctl allows to read the table of changes. Sysfs also has a file that ++ * allows to read this table. ++ */ ++#define IOCTL_BLK_SNAP_TRACKER_READ_CBT_MAP \ ++ _IOR(BLK_SNAP, blk_snap_ioctl_tracker_read_cbt_map, \ ++ struct blk_snap_tracker_read_cbt_bitmap) ++ ++/** ++ * struct blk_snap_block_range - Element of array for ++ * &struct blk_snap_tracker_mark_dirty_blocks. ++ * @sector_offset: ++ * Offset from the beginning of the disk in sectors. ++ * @sector_count: ++ * Number of sectors. ++ */ ++struct blk_snap_block_range { ++ __u64 sector_offset; ++ __u64 sector_count; ++}; ++/** ++ * struct blk_snap_tracker_mark_dirty_blocks - Argument for the ++ * &IOCTL_BLK_SNAP_TRACKER_MARK_DIRTY_BLOCKS control. ++ * @dev_id: ++ * Device ID. ++ * @count: ++ * Size of @dirty_blocks_array in the number of ++ * &struct blk_snap_block_range. ++ * @dirty_blocks_array: ++ * Pointer to the array of &struct blk_snap_block_range. ++ */ ++struct blk_snap_tracker_mark_dirty_blocks { ++ struct blk_snap_dev_t dev_id; ++ __u32 count; ++ struct blk_snap_block_range *dirty_blocks_array; ++}; ++/** ++ * IOCTL_BLK_SNAP_TRACKER_MARK_DIRTY_BLOCKS - Set dirty blocks in the CBT map. ++ * ++ * There are cases when some blocks need to be marked as changed. ++ * This ioctl allows to do this. ++ */ ++#define IOCTL_BLK_SNAP_TRACKER_MARK_DIRTY_BLOCKS \ ++ _IOR(BLK_SNAP, blk_snap_ioctl_tracker_mark_dirty_blocks, \ ++ struct blk_snap_tracker_mark_dirty_blocks) ++ ++/* ++ * Next, there will be a description of the interface for working with ++ * snapshots. ++ */ ++ ++/** ++ * struct blk_snap_snapshot_create - Argument for the ++ * &IOCTL_BLK_SNAP_SNAPSHOT_CREATE control. ++ * @count: ++ * Size of @dev_id_array in the number of &struct blk_snap_dev_t. ++ * @dev_id_array: ++ * Pointer to the array of &struct blk_snap_dev_t. ++ * @id: ++ * Return ID of the created snapshot. ++ */ ++struct blk_snap_snapshot_create { ++ __u32 count; ++ struct blk_snap_dev_t *dev_id_array; ++ uuid_t id; ++}; ++/** ++ * This ioctl creates a snapshot structure in the memory and allocates an ++ * identifier for it. Further interaction with the snapshot is possible by ++ * this identifier. ++ * Several snapshots can be created at the same time, but with the condition ++ * that one block device can only be included in one snapshot. ++ */ ++#define IOCTL_BLK_SNAP_SNAPSHOT_CREATE \ ++ _IOW(BLK_SNAP, blk_snap_ioctl_snapshot_create, \ ++ struct blk_snap_snapshot_create) ++ ++/** ++ * struct blk_snap_snapshot_destroy - Argument for the ++ * &IOCTL_BLK_SNAP_SNAPSHOT_DESTROY control. ++ * @id: ++ * Snapshot ID. ++ */ ++struct blk_snap_snapshot_destroy { ++ uuid_t id; ++}; ++/** ++ * IOCTL_BLK_SNAP_SNAPSHOT_DESTROY - Release and destroy the snapshot. ++ * ++ * Destroys all snapshot structures and releases all its allocated resources. ++ */ ++#define IOCTL_BLK_SNAP_SNAPSHOT_DESTROY \ ++ _IOR(BLK_SNAP, blk_snap_ioctl_snapshot_destroy, \ ++ struct blk_snap_snapshot_destroy) ++ ++/** ++ * struct blk_snap_snapshot_append_storage - Argument for the ++ * &IOCTL_BLK_SNAP_SNAPSHOT_APPEND_STORAGE control. ++ * @id: ++ * Snapshot ID. ++ * @dev_id: ++ * Device ID. ++ * @count: ++ * Size of @ranges in the number of &struct blk_snap_block_range. ++ * @ranges: ++ * Pointer to the array of &struct blk_snap_block_range. ++ */ ++struct blk_snap_snapshot_append_storage { ++ uuid_t id; ++ struct blk_snap_dev_t dev_id; ++ __u32 count; ++ struct blk_snap_block_range *ranges; ++}; ++/** ++ * IOCTL_BLK_SNAP_SNAPSHOT_APPEND_STORAGE - Append storage to the difference ++ * storage of the snapshot. ++ * ++ * The snapshot difference storage can be set either before or after creating ++ * the snapshot images. This allows to dynamically expand the difference ++ * storage while holding the snapshot. ++ */ ++#define IOCTL_BLK_SNAP_SNAPSHOT_APPEND_STORAGE \ ++ _IOW(BLK_SNAP, blk_snap_ioctl_snapshot_append_storage, \ ++ struct blk_snap_snapshot_append_storage) ++ ++/** ++ * struct blk_snap_snapshot_take - Argument for the ++ * &IOCTL_BLK_SNAP_SNAPSHOT_TAKE control. ++ * @id: ++ * Snapshot ID. ++ */ ++struct blk_snap_snapshot_take { ++ uuid_t id; ++}; ++/** ++ * IOCTL_BLK_SNAP_SNAPSHOT_TAKE - Take snapshot. ++ * ++ * This ioctl creates snapshot images of block devices and switches CBT tables. ++ * The snapshot must be created before this call, and the areas of block ++ * devices should be added to the difference storage. ++ */ ++#define IOCTL_BLK_SNAP_SNAPSHOT_TAKE \ ++ _IOR(BLK_SNAP, blk_snap_ioctl_snapshot_take, \ ++ struct blk_snap_snapshot_take) ++ ++/** ++ * struct blk_snap_snapshot_collect - Argument for the ++ * &IOCTL_BLK_SNAP_SNAPSHOT_COLLECT control. ++ * @count: ++ * Size of @ids in the number of &uuid_t. ++ * If @ids has not enough space, it will contain the required ++ * size of the array. ++ * @ids: ++ * Pointer to the array with the snapshot ID for output. If the pointer is ++ * zero, the ioctl returns the number of active snapshots in &count. ++ * ++ */ ++struct blk_snap_snapshot_collect { ++ __u32 count; ++ uuid_t *ids; ++}; ++/** ++ * IOCTL_BLK_SNAP_SNAPSHOT_COLLECT - Get collection of created snapshots. ++ * ++ * This information can also be obtained from files from sysfs. ++ */ ++#define IOCTL_BLK_SNAP_SNAPSHOT_COLLECT \ ++ _IOW(BLK_SNAP, blk_snap_ioctl_snapshot_collect, \ ++ struct blk_snap_snapshot_collect) ++/** ++ * struct blk_snap_image_info - Associates the original device in the snapshot ++ * and the corresponding snapshot image. ++ * @orig_dev_id: ++ * Device ID. ++ * @image_dev_id: ++ * Image ID. ++ */ ++struct blk_snap_image_info { ++ struct blk_snap_dev_t orig_dev_id; ++ struct blk_snap_dev_t image_dev_id; ++}; ++/** ++ * struct blk_snap_snapshot_collect_images - Argument for the ++ * &IOCTL_BLK_SNAP_SNAPSHOT_COLLECT_IMAGES control. ++ * @id: ++ * Snapshot ID. ++ * @count: ++ * Size of @image_info_array in the number of &struct blk_snap_image_info. ++ * If @image_info_array has not enough space, it will contain the required ++ * size of the array. ++ * @image_info_array: ++ * Pointer to the array for output. ++ */ ++struct blk_snap_snapshot_collect_images { ++ uuid_t id; ++ __u32 count; ++ struct blk_snap_image_info *image_info_array; ++}; ++/** ++ * IOCTL_BLK_SNAP_SNAPSHOT_COLLECT_IMAGES - Get a collection of devices and ++ * their snapshot images. ++ * ++ * While holding the snapshot, this ioctl allows you to get a table of ++ * correspondences of the original devices and their snapshot images. ++ * This information can also be obtained from files from sysfs. ++ */ ++#define IOCTL_BLK_SNAP_SNAPSHOT_COLLECT_IMAGES \ ++ _IOW(BLK_SNAP, blk_snap_ioctl_snapshot_collect_images, \ ++ struct blk_snap_snapshot_collect_images) ++ ++enum blk_snap_event_codes { ++ /** ++ * Low free space in difference storage event. ++ * ++ * If the free space in the difference storage is reduced to the ++ * specified limit, the module generates this event. ++ */ ++ blk_snap_event_code_low_free_space, ++ /** ++ * Snapshot image is corrupted event. ++ * ++ * If a chunk could not be allocated when trying to save data to the ++ * difference storage, this event is generated. ++ * However, this does not mean that the backup process was interrupted ++ * with an error. If the snapshot image has been read to the end by ++ * this time, the backup process is considered successful. ++ */ ++ blk_snap_event_code_corrupted, ++}; ++ ++/** ++ * struct blk_snap_snapshot_event - Argument for the ++ * &IOCTL_BLK_SNAP_SNAPSHOT_WAIT_EVENT control. ++ * @id: ++ * Snapshot ID. ++ * @timeout_ms: ++ * Timeout for waiting in milliseconds. ++ * @time_label: ++ * Timestamp of the received event. ++ * @code: ++ * Code of the received event. ++ * @data: ++ * The received event body. ++ */ ++struct blk_snap_snapshot_event { ++ uuid_t id; ++ __u32 timeout_ms; ++ __u32 code; ++ __s64 time_label; ++ __u8 data[4096 - 32]; ++}; ++static_assert( ++ sizeof(struct blk_snap_snapshot_event) == 4096, ++ "The size struct blk_snap_snapshot_event should be equal to the size of the page."); ++ ++/** ++ * IOCTL_BLK_SNAP_SNAPSHOT_WAIT_EVENT - Wait and get the event from the ++ * snapshot. ++ * ++ * While holding the snapshot, the kernel module can transmit information about ++ * changes in its state in the form of events to the user level. ++ * It is very important to receive these events as quickly as possible, so the ++ * user's thread is in the state of interruptable sleep. ++ */ ++#define IOCTL_BLK_SNAP_SNAPSHOT_WAIT_EVENT \ ++ _IOW(BLK_SNAP, blk_snap_ioctl_snapshot_wait_event, \ ++ struct blk_snap_snapshot_event) ++ ++/** ++ * struct blk_snap_event_low_free_space - Data for the ++ * &blk_snap_event_code_low_free_space event. ++ * @requested_nr_sect: ++ * The required number of sectors. ++ */ ++struct blk_snap_event_low_free_space { ++ __u64 requested_nr_sect; ++}; ++ ++/** ++ * struct blk_snap_event_corrupted - Data for the ++ * &blk_snap_event_code_corrupted event. ++ * @orig_dev_id: ++ * Device ID. ++ * @err_code: ++ * Error code. ++ */ ++struct blk_snap_event_corrupted { ++ struct blk_snap_dev_t orig_dev_id; ++ __s32 err_code; ++}; +-- +2.20.1 + diff --git a/patches/lk5.19/0003-block-blksnap-module-management-interface-functions.patch b/patches/lk5.19/0003-block-blksnap-module-management-interface-functions.patch new file mode 100644 index 00000000..b3ed7771 --- /dev/null +++ b/patches/lk5.19/0003-block-blksnap-module-management-interface-functions.patch @@ -0,0 +1,484 @@ +From b0a5ff75fd0938d1393317723728a5caeb229780 Mon Sep 17 00:00:00 2001 +From: Sergei Shtepa +Date: Tue, 7 Jun 2022 14:26:08 +0200 +Subject: [PATCH 03/20] block, blksnap: module management interface functions + +Implementation of module management interface functions. At this level, +the input and output parameters are converted and the corresponding +subsystems of the module are called. + +Signed-off-by: Sergei Shtepa +--- + drivers/block/blksnap/ctrl.c | 445 +++++++++++++++++++++++++++++++++++ + drivers/block/blksnap/ctrl.h | 7 + + 2 files changed, 452 insertions(+) + create mode 100644 drivers/block/blksnap/ctrl.c + create mode 100644 drivers/block/blksnap/ctrl.h + +diff --git a/drivers/block/blksnap/ctrl.c b/drivers/block/blksnap/ctrl.c +new file mode 100644 +index 000000000000..30c3f538f5b4 +--- /dev/null ++++ b/drivers/block/blksnap/ctrl.c +@@ -0,0 +1,445 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#define pr_fmt(fmt) KBUILD_MODNAME "-ctrl: " fmt ++ ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++#include "memory_checker.h" ++#endif ++#include "ctrl.h" ++#include "params.h" ++#include "version.h" ++#include "snapshot.h" ++#include "snapimage.h" ++#include "tracker.h" ++#include "big_buffer.h" ++ ++static int blk_snap_major; ++ ++static long ctrl_unlocked_ioctl(struct file *filp, unsigned int cmd, ++ unsigned long arg); ++ ++static const struct file_operations ctrl_fops = { ++ .owner = THIS_MODULE, ++ .unlocked_ioctl = ctrl_unlocked_ioctl, ++}; ++ ++static const struct blk_snap_version version = { ++ .major = VERSION_MAJOR, ++ .minor = VERSION_MINOR, ++ .revision = VERSION_REVISION, ++ .build = VERSION_BUILD, ++}; ++ ++int get_blk_snap_major(void) ++{ ++ return blk_snap_major; ++} ++ ++int ctrl_init(void) ++{ ++ int ret; ++ ++ ret = register_chrdev(0, BLK_SNAP_MODULE_NAME, &ctrl_fops); ++ if (ret < 0) { ++ pr_err("Failed to register a character device. errno=%d\n", ++ abs(blk_snap_major)); ++ return ret; ++ } ++ ++ blk_snap_major = ret; ++ pr_info("Register control device [%d:0].\n", blk_snap_major); ++ return 0; ++} ++ ++void ctrl_done(void) ++{ ++ pr_info("Unregister control device\n"); ++ ++ unregister_chrdev(blk_snap_major, BLK_SNAP_MODULE_NAME); ++} ++ ++static int ioctl_version(unsigned long arg) ++{ ++ if (copy_to_user((void *)arg, &version, sizeof(version))) { ++ pr_err("Unable to get version: invalid user buffer\n"); ++ return -ENODATA; ++ } ++ ++ return 0; ++} ++ ++static int ioctl_tracker_remove(unsigned long arg) ++{ ++ struct blk_snap_tracker_remove karg; ++ ++ if (copy_from_user(&karg, (void *)arg, sizeof(karg)) != 0) { ++ pr_err("Unable to remove device from tracking: invalid user buffer\n"); ++ return -ENODATA; ++ } ++ return tracker_remove(MKDEV(karg.dev_id.mj, karg.dev_id.mn)); ++} ++ ++static int ioctl_tracker_collect(unsigned long arg) ++{ ++ int res; ++ struct blk_snap_tracker_collect karg; ++ struct blk_snap_cbt_info *cbt_info = NULL; ++ ++ pr_debug("Collecting tracking devices\n"); ++ ++ if (copy_from_user(&karg, (void *)arg, sizeof(karg))) { ++ pr_err("Unable to collect tracking devices: invalid user buffer\n"); ++ return -ENODATA; ++ } ++ ++ if (!karg.cbt_info_array) { ++ /* ++ * If the buffer is empty, this is a request to determine ++ * the number of trackers. ++ */ ++ res = tracker_collect(0, NULL, &karg.count); ++ if (res) { ++ pr_err("Failed to execute tracker_collect. errno=%d\n", ++ abs(res)); ++ return res; ++ } ++ if (copy_to_user((void *)arg, (void *)&karg, sizeof(karg))) { ++ pr_err("Unable to collect tracking devices: invalid user buffer for arguments\n"); ++ return -ENODATA; ++ } ++ return 0; ++ } ++ ++ cbt_info = kcalloc(karg.count, sizeof(struct blk_snap_cbt_info), ++ GFP_KERNEL); ++ if (cbt_info == NULL) ++ return -ENOMEM; ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_blk_snap_cbt_info); ++#endif ++ res = tracker_collect(karg.count, cbt_info, &karg.count); ++ if (res) { ++ pr_err("Failed to execute tracker_collect. errno=%d\n", ++ abs(res)); ++ goto fail; ++ } ++ ++ if (copy_to_user(karg.cbt_info_array, cbt_info, ++ karg.count * sizeof(struct blk_snap_cbt_info))) { ++ pr_err("Unable to collect tracking devices: invalid user buffer for CBT info\n"); ++ res = -ENODATA; ++ goto fail; ++ } ++ ++ if (copy_to_user((void *)arg, (void *)&karg, sizeof(karg))) { ++ pr_err("Unable to collect tracking devices: invalid user buffer for arguments\n"); ++ res = -ENODATA; ++ goto fail; ++ } ++fail: ++ kfree(cbt_info); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_dec(memory_object_blk_snap_cbt_info); ++#endif ++ return res; ++} ++ ++static int ioctl_tracker_read_cbt_map(unsigned long arg) ++{ ++ struct blk_snap_tracker_read_cbt_bitmap karg; ++ ++ if (copy_from_user(&karg, (void *)arg, sizeof(karg))) { ++ pr_err("Unable to read CBT map: invalid user buffer\n"); ++ return -ENODATA; ++ } ++ ++ return tracker_read_cbt_bitmap(MKDEV(karg.dev_id.mj, karg.dev_id.mn), ++ karg.offset, karg.length, ++ (char __user *)karg.buff); ++} ++ ++static int ioctl_tracker_mark_dirty_blocks(unsigned long arg) ++{ ++ int ret = 0; ++ struct blk_snap_tracker_mark_dirty_blocks karg; ++ struct blk_snap_block_range *dirty_blocks_array; ++ ++ if (copy_from_user(&karg, (void *)arg, sizeof(karg))) { ++ pr_err("Unable to mark dirty blocks: invalid user buffer\n"); ++ return -ENODATA; ++ } ++ ++ dirty_blocks_array = kcalloc( ++ karg.count, sizeof(struct blk_snap_block_range), GFP_KERNEL); ++ if (!dirty_blocks_array) ++ return -ENOMEM; ++ ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_blk_snap_block_range); ++#endif ++ if (copy_from_user(dirty_blocks_array, (void *)karg.dirty_blocks_array, ++ karg.count * sizeof(struct blk_snap_block_range))) { ++ pr_err("Unable to mark dirty blocks: invalid user buffer\n"); ++ ret = -ENODATA; ++ } else { ++ if (karg.dev_id.mj == snapimage_major()) ++ ret = snapshot_mark_dirty_blocks( ++ MKDEV(karg.dev_id.mj, karg.dev_id.mn), ++ dirty_blocks_array, karg.count); ++ else ++ ret = tracker_mark_dirty_blocks( ++ MKDEV(karg.dev_id.mj, karg.dev_id.mn), ++ dirty_blocks_array, karg.count); ++ } ++ ++ kfree(dirty_blocks_array); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_dec(memory_object_blk_snap_block_range); ++#endif ++ return ret; ++} ++ ++static int ioctl_snapshot_create(unsigned long arg) ++{ ++ int ret; ++ struct blk_snap_snapshot_create karg; ++ struct blk_snap_dev_t *dev_id_array = NULL; ++ ++ if (copy_from_user(&karg, (void *)arg, sizeof(karg))) { ++ pr_err("Unable to create snapshot: invalid user buffer\n"); ++ return -ENODATA; ++ } ++ ++ dev_id_array = ++ kcalloc(karg.count, sizeof(struct blk_snap_dev_t), GFP_KERNEL); ++ if (dev_id_array == NULL) { ++ pr_err("Unable to create snapshot: too many devices %d\n", ++ karg.count); ++ return -ENOMEM; ++ } ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_blk_snap_dev_t); ++#endif ++ if (copy_from_user(dev_id_array, (void *)karg.dev_id_array, ++ karg.count * sizeof(struct blk_snap_dev_t))) { ++ pr_err("Unable to create snapshot: invalid user buffer\n"); ++ ret = -ENODATA; ++ goto out; ++ } ++ ++ ret = snapshot_create(dev_id_array, karg.count, &karg.id); ++ if (ret) ++ goto out; ++ ++ if (copy_to_user((void *)arg, &karg, sizeof(karg))) { ++ pr_err("Unable to create snapshot: invalid user buffer\n"); ++ ret = -ENODATA; ++ } ++out: ++ kfree(dev_id_array); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_dec(memory_object_blk_snap_dev_t); ++#endif ++ return ret; ++} ++ ++static int ioctl_snapshot_destroy(unsigned long arg) ++{ ++ struct blk_snap_snapshot_destroy karg; ++ ++ if (copy_from_user(&karg, (void *)arg, sizeof(karg))) { ++ pr_err("Unable to destroy snapshot: invalid user buffer\n"); ++ return -ENODATA; ++ } ++ ++ return snapshot_destroy(&karg.id); ++} ++ ++static int ioctl_snapshot_append_storage(unsigned long arg) ++{ ++ int res = 0; ++ struct blk_snap_snapshot_append_storage karg; ++ struct big_buffer *ranges = NULL; ++ size_t ranges_buffer_size; ++ ++ pr_debug("Append difference storage\n"); ++ ++ if (copy_from_user(&karg, (void *)arg, sizeof(karg))) { ++ pr_err("Unable to append difference storage: invalid user buffer\n"); ++ return -EINVAL; ++ } ++ ++ /* ++ * Rarely, but there are file systems in which the blocks on the disk ++ * are significantly fragmented. And the drive for the diff storage can be ++ * quite large. ++ * At the same time, an attempt to allocate several pages of continuous ++ * address space on such systems often causes an ENOMEM error. ++ * Therefore, an array of pages is used to store an array of ranges of ++ * available disk space. ++ */ ++ ranges_buffer_size = karg.count * sizeof(struct blk_snap_block_range); ++ ranges = big_buffer_alloc(ranges_buffer_size, GFP_KERNEL); ++ if (!ranges) { ++ pr_err("Unable to append difference storage: cannot allocate [%zu] bytes\n", ++ ranges_buffer_size); ++ return -ENOMEM; ++ } ++ ++ if (big_buffer_copy_from_user((void *)karg.ranges, 0, ranges, ++ ranges_buffer_size) != ++ ranges_buffer_size) { ++ pr_err("Unable to add file to snapstore: invalid user buffer for parameters\n"); ++ big_buffer_free(ranges); ++ return -ENODATA; ++ } ++ ++ res = snapshot_append_storage(&karg.id, karg.dev_id, ranges, ++ (size_t)karg.count); ++ big_buffer_free(ranges); ++ ++ return res; ++} ++ ++static int ioctl_snapshot_take(unsigned long arg) ++{ ++ struct blk_snap_snapshot_take karg; ++ ++ if (copy_from_user(&karg, (void *)arg, sizeof(karg))) { ++ pr_err("Unable to take snapshot: invalid user buffer\n"); ++ return -ENODATA; ++ } ++ ++ return snapshot_take(&karg.id); ++} ++ ++static int ioctl_snapshot_wait_event(unsigned long arg) ++{ ++ int ret = 0; ++ struct blk_snap_snapshot_event *karg; ++ struct event *event; ++ ++ //pr_debug("Wait event\n"); ++ karg = kzalloc(sizeof(struct blk_snap_snapshot_event), GFP_KERNEL); ++ if (!karg) ++ return -ENOMEM; ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_blk_snap_snapshot_event); ++#endif ++ if (copy_from_user(karg, (void *)arg, ++ sizeof(struct blk_snap_snapshot_event))) { ++ pr_err("Unable failed to get snapstore error code: invalid user buffer\n"); ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ event = snapshot_wait_event(&karg->id, karg->timeout_ms); ++ if (IS_ERR(event)) { ++ ret = PTR_ERR(event); ++ goto out; ++ } ++ ++ pr_debug("Received event=%lld code=%d data_size=%d\n", event->time, ++ event->code, event->data_size); ++ karg->code = event->code; ++ karg->time_label = event->time; ++ ++ if (event->data_size > sizeof(karg->data)) { ++ pr_err("Event size %d is too big\n", event->data_size); ++ ret = -ENOSPC; ++ /* If we can't copy all the data, we copy only part of it. */ ++ } ++ memcpy(karg->data, event->data, event->data_size); ++ //min_t(size_t, event->data_size, sizeof(karg->data))); ++ event_free(event); ++ ++ if (copy_to_user((void *)arg, karg, ++ sizeof(struct blk_snap_snapshot_event))) { ++ pr_err("Unable to get snapstore error code: invalid user buffer\n"); ++ ret = -EINVAL; ++ } ++out: ++ kfree(karg); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_dec(memory_object_blk_snap_snapshot_event); ++#endif ++ return ret; ++} ++ ++static int ioctl_snapshot_collect(unsigned long arg) ++{ ++ int ret; ++ struct blk_snap_snapshot_collect karg; ++ ++ if (copy_from_user(&karg, (void *)arg, sizeof(karg))) { ++ pr_err("Unable to collect available snapshots: invalid user buffer\n"); ++ return -ENODATA; ++ } ++ ++ ret = snapshot_collect(&karg.count, karg.ids); ++ ++ if (copy_to_user((void *)arg, &karg, sizeof(karg))) { ++ pr_err("Unable to collect available snapshots: invalid user buffer\n"); ++ return -ENODATA; ++ } ++ ++ return ret; ++} ++ ++static int ioctl_snapshot_collect_images(unsigned long arg) ++{ ++ int ret; ++ struct blk_snap_snapshot_collect_images karg; ++ ++ if (copy_from_user(&karg, (void *)arg, sizeof(karg))) { ++ pr_err("Unable to collect snapshot images: invalid user buffer\n"); ++ return -ENODATA; ++ } ++ ++ ret = snapshot_collect_images(&karg.id, karg.image_info_array, ++ &karg.count); ++ ++ if (copy_to_user((void *)arg, &karg, sizeof(karg))) { ++ pr_err("Unable to collect snapshot images: invalid user buffer\n"); ++ return -ENODATA; ++ } ++ ++ return ret; ++} ++ ++static int (*const blk_snap_ioctl_table[])(unsigned long arg) = { ++ ioctl_version, ++ ioctl_tracker_remove, ++ ioctl_tracker_collect, ++ ioctl_tracker_read_cbt_map, ++ ioctl_tracker_mark_dirty_blocks, ++ ioctl_snapshot_create, ++ ioctl_snapshot_destroy, ++ ioctl_snapshot_append_storage, ++ ioctl_snapshot_take, ++ ioctl_snapshot_collect, ++ ioctl_snapshot_collect_images, ++ ioctl_snapshot_wait_event, ++}; ++ ++static_assert( ++ sizeof(blk_snap_ioctl_table) == (blk_snap_ioctl_end * sizeof(void *)), ++ "The size of table blk_snap_ioctl_table does not match the enum blk_snap_ioctl."); ++ ++static long ctrl_unlocked_ioctl(struct file *filp, unsigned int cmd, ++ unsigned long arg) ++{ ++ int nr = _IOC_NR(cmd); ++ ++ if (nr > (sizeof(blk_snap_ioctl_table) / sizeof(void *))) ++ return -ENOTTY; ++ ++ if (!blk_snap_ioctl_table[nr]) ++ return -ENOTTY; ++ ++ return blk_snap_ioctl_table[nr](arg); ++} +diff --git a/drivers/block/blksnap/ctrl.h b/drivers/block/blksnap/ctrl.h +new file mode 100644 +index 000000000000..4f313891febb +--- /dev/null ++++ b/drivers/block/blksnap/ctrl.h +@@ -0,0 +1,7 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#pragma once ++ ++int get_blk_snap_major(void); ++ ++int ctrl_init(void); ++void ctrl_done(void); +-- +2.20.1 + diff --git a/patches/lk5.19/0004-lock-blksnap-init-and-exit-functions.patch b/patches/lk5.19/0004-lock-blksnap-init-and-exit-functions.patch new file mode 100644 index 00000000..a8dd1b7b --- /dev/null +++ b/patches/lk5.19/0004-lock-blksnap-init-and-exit-functions.patch @@ -0,0 +1,167 @@ +From 0c498bd7f4e0ea8d0f0fa72ebfc0c4238f32fb96 Mon Sep 17 00:00:00 2001 +From: Sergei Shtepa +Date: Tue, 7 Jun 2022 14:27:37 +0200 +Subject: [PATCH 04/20] lock, blksnap: init() and exit() functions + +Contains callback functions for loading and unloading the module. The +module parameters and other mandatory declarations for the kernel module +are also defined. + +Signed-off-by: Sergei Shtepa +--- + drivers/block/blksnap/main.c | 109 ++++++++++++++++++++++++++++++++ + drivers/block/blksnap/params.h | 10 +++ + drivers/block/blksnap/version.h | 8 +++ + 3 files changed, 127 insertions(+) + create mode 100644 drivers/block/blksnap/main.c + create mode 100644 drivers/block/blksnap/params.h + create mode 100644 drivers/block/blksnap/version.h + +diff --git a/drivers/block/blksnap/main.c b/drivers/block/blksnap/main.c +new file mode 100644 +index 000000000000..9737df4e6461 +--- /dev/null ++++ b/drivers/block/blksnap/main.c +@@ -0,0 +1,109 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++#include ++#include ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++#include "memory_checker.h" ++#endif ++#include "version.h" ++#include "params.h" ++#include "ctrl.h" ++#include "sysfs.h" ++#include "snapimage.h" ++#include "snapshot.h" ++#include "tracker.h" ++#include "diff_io.h" ++ ++static int __init blk_snap_init(void) ++{ ++ int result; ++ ++ pr_info("Loading\n"); ++ pr_debug("Version: %s\n", VERSION_STR); ++ pr_debug("tracking_block_minimum_shift: %d\n", ++ tracking_block_minimum_shift); ++ pr_debug("tracking_block_maximum_count: %d\n", ++ tracking_block_maximum_count); ++ pr_debug("chunk_minimum_shift: %d\n", chunk_minimum_shift); ++ pr_debug("chunk_maximum_count: %d\n", chunk_maximum_count); ++ pr_debug("chunk_maximum_in_cache: %d\n", chunk_maximum_in_cache); ++ pr_debug("free_diff_buffer_pool_size: %d\n", ++ free_diff_buffer_pool_size); ++ pr_debug("diff_storage_minimum: %d\n", diff_storage_minimum); ++ ++ result = diff_io_init(); ++ if (result) ++ return result; ++ ++ result = snapimage_init(); ++ if (result) ++ return result; ++ ++ result = tracker_init(); ++ if (result) ++ return result; ++ ++ result = ctrl_init(); ++ if (result) ++ return result; ++ ++ result = sysfs_init(); ++ return result; ++} ++ ++static void __exit blk_snap_exit(void) ++{ ++ pr_info("Unloading module\n"); ++ ++ sysfs_done(); ++ ctrl_done(); ++ ++ diff_io_done(); ++ snapshot_done(); ++ snapimage_done(); ++ tracker_done(); ++ ++ pr_info("Module was unloaded\n"); ++} ++ ++module_init(blk_snap_init); ++module_exit(blk_snap_exit); ++ ++int tracking_block_minimum_shift = CONFIG_BLK_SNAP_TRACKING_BLOCK_MINIMUM_SHIFT; ++int tracking_block_maximum_count = CONFIG_BLK_SNAP_TRACKING_BLOCK_MAXIMUM_COUNT; ++int chunk_minimum_shift = CONFIG_BLK_SNAP_CHUNK_MINIMUM_SHIFT; ++int chunk_maximum_count = CONFIG_BLK_SNAP_CHUNK_MAXIMUM_COUNT; ++int chunk_maximum_in_cache = CONFIG_BLK_SNAP_CHUNK_MAXIMUM_IN_CACHE; ++int free_diff_buffer_pool_size = CONFIG_BLK_SNAP_FREE_DIFF_BUFFER_POOL_SIZE; ++int diff_storage_minimum = CONFIG_BLK_SNAP_DIFF_STORAGE_MINIMUM; ++ ++module_param_named(tracking_block_minimum_shift, tracking_block_minimum_shift, ++ int, 0644); ++MODULE_PARM_DESC(tracking_block_minimum_shift, ++ "The power of 2 for minimum trackings block size"); ++module_param_named(tracking_block_maximum_count, tracking_block_maximum_count, ++ int, 0644); ++MODULE_PARM_DESC(tracking_block_maximum_count, ++ "The limit of the maximum number of trackings blocks"); ++module_param_named(chunk_minimum_shift, chunk_minimum_shift, int, 0644); ++MODULE_PARM_DESC(chunk_minimum_shift, ++ "The power of 2 for minimum snapshots chunk size"); ++module_param_named(chunk_maximum_count, chunk_maximum_count, int, 0644); ++MODULE_PARM_DESC(chunk_maximum_count, ++ "The limit of the maximum number of snapshots chunks"); ++module_param_named(chunk_maximum_in_cache, chunk_maximum_in_cache, int, 0644); ++MODULE_PARM_DESC(chunk_maximum_in_cache, ++ "The limit of the maximum chunks in memory cache"); ++module_param_named(free_diff_buffer_pool_size, free_diff_buffer_pool_size, int, ++ 0644); ++MODULE_PARM_DESC(free_diff_buffer_pool_size, ++ "The maximum size of the free buffers pool"); ++module_param_named(diff_storage_minimum, diff_storage_minimum, int, 0644); ++MODULE_PARM_DESC( ++ diff_storage_minimum, ++ "The minimum allowable size of the difference storage in sectors"); ++ ++MODULE_DESCRIPTION("Block Layer Snapshot Kernel Module"); ++MODULE_VERSION(VERSION_STR); ++MODULE_AUTHOR("Veeam Software Group GmbH"); ++MODULE_LICENSE("GPL"); +diff --git a/drivers/block/blksnap/params.h b/drivers/block/blksnap/params.h +new file mode 100644 +index 000000000000..1f8f0bd23e9f +--- /dev/null ++++ b/drivers/block/blksnap/params.h +@@ -0,0 +1,10 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#pragma once ++ ++extern int tracking_block_minimum_shift; ++extern int tracking_block_maximum_count; ++extern int chunk_minimum_shift; ++extern int chunk_maximum_count; ++extern int chunk_maximum_in_cache; ++extern int free_diff_buffer_pool_size; ++extern int diff_storage_minimum; +diff --git a/drivers/block/blksnap/version.h b/drivers/block/blksnap/version.h +new file mode 100644 +index 000000000000..0d02a68f44a8 +--- /dev/null ++++ b/drivers/block/blksnap/version.h +@@ -0,0 +1,8 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#pragma once ++ ++#define VERSION_MAJOR 1 ++#define VERSION_MINOR 0 ++#define VERSION_REVISION 0 ++#define VERSION_BUILD 0 ++#define VERSION_STR "1.0.0.0" +-- +2.20.1 + diff --git a/patches/lk5.19/0005-block-blksnap-interaction-with-sysfs.patch b/patches/lk5.19/0005-block-blksnap-interaction-with-sysfs.patch new file mode 100644 index 00000000..9b64cd1a --- /dev/null +++ b/patches/lk5.19/0005-block-blksnap-interaction-with-sysfs.patch @@ -0,0 +1,117 @@ +From 4e447af7a8e0db9b0e4ad42abfa0f8fc68565600 Mon Sep 17 00:00:00 2001 +From: Sergei Shtepa +Date: Tue, 7 Jun 2022 14:28:57 +0200 +Subject: [PATCH 05/20] block, blksnap: interaction with sysfs + +Provides creation of a class file /sys/class/blksnap and a device file +/dev/blksnap for module management. + +Signed-off-by: Sergei Shtepa +--- + drivers/block/blksnap/sysfs.c | 81 +++++++++++++++++++++++++++++++++++ + drivers/block/blksnap/sysfs.h | 5 +++ + 2 files changed, 86 insertions(+) + create mode 100644 drivers/block/blksnap/sysfs.c + create mode 100644 drivers/block/blksnap/sysfs.h + +diff --git a/drivers/block/blksnap/sysfs.c b/drivers/block/blksnap/sysfs.c +new file mode 100644 +index 000000000000..32a34609d106 +--- /dev/null ++++ b/drivers/block/blksnap/sysfs.c +@@ -0,0 +1,81 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#define pr_fmt(fmt) KBUILD_MODNAME "-sysfs: " fmt ++#include ++#include ++#include ++#include ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++#include "memory_checker.h" ++#endif ++#include "sysfs.h" ++#include "ctrl.h" ++ ++static ssize_t major_show(struct class *class, struct class_attribute *attr, ++ char *buf) ++{ ++ sprintf(buf, "%d", get_blk_snap_major()); ++ return strlen(buf); ++} ++ ++/* Declare class_attr_major */ ++CLASS_ATTR_RO(major); ++ ++static struct class *blk_snap_class; ++ ++static struct device *blk_snap_device; ++ ++int sysfs_init(void) ++{ ++ struct device *dev; ++ int res; ++ ++ blk_snap_class = class_create(THIS_MODULE, BLK_SNAP_MODULE_NAME); ++ if (IS_ERR(blk_snap_class)) { ++ res = PTR_ERR(blk_snap_class); ++ ++ pr_err("Bad class create. errno=%d\n", abs(res)); ++ return res; ++ } ++ ++ pr_info("Create 'major' sysfs attribute\n"); ++ res = class_create_file(blk_snap_class, &class_attr_major); ++ if (res) { ++ pr_err("Failed to create 'major' sysfs file\n"); ++ ++ class_destroy(blk_snap_class); ++ blk_snap_class = NULL; ++ return res; ++ } ++ ++ dev = device_create(blk_snap_class, NULL, ++ MKDEV(get_blk_snap_major(), 0), NULL, ++ BLK_SNAP_MODULE_NAME); ++ if (IS_ERR(dev)) { ++ res = PTR_ERR(dev); ++ pr_err("Failed to create device, errno=%d\n", abs(res)); ++ ++ class_remove_file(blk_snap_class, &class_attr_major); ++ class_destroy(blk_snap_class); ++ blk_snap_class = NULL; ++ return res; ++ } ++ ++ blk_snap_device = dev; ++ return res; ++} ++ ++void sysfs_done(void) ++{ ++ pr_info("Cleanup sysfs\n"); ++ ++ if (blk_snap_device) { ++ device_unregister(blk_snap_device); ++ blk_snap_device = NULL; ++ } ++ ++ if (blk_snap_class != NULL) { ++ class_remove_file(blk_snap_class, &class_attr_major); ++ class_destroy(blk_snap_class); ++ blk_snap_class = NULL; ++ } ++} +diff --git a/drivers/block/blksnap/sysfs.h b/drivers/block/blksnap/sysfs.h +new file mode 100644 +index 000000000000..b41c301fe33b +--- /dev/null ++++ b/drivers/block/blksnap/sysfs.h +@@ -0,0 +1,5 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#pragma once ++ ++int sysfs_init(void); ++void sysfs_done(void); +-- +2.20.1 + diff --git a/patches/lk5.19/0006-block-blksnap-attaching-and-detaching-the-filter-and.patch b/patches/lk5.19/0006-block-blksnap-attaching-and-detaching-the-filter-and.patch new file mode 100644 index 00000000..1f3fa455 --- /dev/null +++ b/patches/lk5.19/0006-block-blksnap-attaching-and-detaching-the-filter-and.patch @@ -0,0 +1,800 @@ +From c00a9737e45f9eaf7b731206f69e321f7879abbf Mon Sep 17 00:00:00 2001 +From: Sergei Shtepa +Date: Tue, 7 Jun 2022 14:33:06 +0200 +Subject: [PATCH 06/20] block, blksnap: attaching and detaching the filter and + handling a bios + +The struct tracker contains callback functions for handling a I/O units +of a block device. When a write request is handled, the change block +tracking (CBT) map functions are called and initiates the process of +copying data from the original block device to the change store. +Attaching and detaching the tracker is provided by the functions +bdev_filter_*() of the kernel. + +Signed-off-by: Sergei Shtepa +--- + drivers/block/blksnap/tracker.c | 693 ++++++++++++++++++++++++++++++++ + drivers/block/blksnap/tracker.h | 71 ++++ + 2 files changed, 764 insertions(+) + create mode 100644 drivers/block/blksnap/tracker.c + create mode 100644 drivers/block/blksnap/tracker.h + +diff --git a/drivers/block/blksnap/tracker.c b/drivers/block/blksnap/tracker.c +new file mode 100644 +index 000000000000..705e64321cb2 +--- /dev/null ++++ b/drivers/block/blksnap/tracker.c +@@ -0,0 +1,693 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#define pr_fmt(fmt) KBUILD_MODNAME "-tracker: " fmt ++#include ++#include ++#include ++#include ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++#include "memory_checker.h" ++#endif ++#include "params.h" ++#include "tracker.h" ++#include "cbt_map.h" ++#include "diff_area.h" ++ ++struct tracked_device { ++ struct list_head link; ++ dev_t dev_id; ++}; ++ ++DEFINE_PERCPU_RWSEM(tracker_submit_lock); ++LIST_HEAD(tracked_device_list); ++DEFINE_SPINLOCK(tracked_device_lock); ++static refcount_t trackers_counter = REFCOUNT_INIT(1); ++ ++struct tracker_release_worker { ++ struct work_struct work; ++ struct list_head list; ++ spinlock_t lock; ++}; ++static struct tracker_release_worker tracker_release_worker; ++ ++void tracker_lock(void) ++{ ++ pr_info("Lock trackers\n"); ++ percpu_down_write(&tracker_submit_lock); ++} ++ ++void tracker_unlock(void) ++{ ++ percpu_up_write(&tracker_submit_lock); ++ pr_info("Trackers have been unlocked\n"); ++} ++ ++void tracker_free(struct tracker *tracker) ++{ ++ might_sleep(); ++ ++ pr_debug("Free tracker for device [%u:%u].\n", MAJOR(tracker->dev_id), ++ MINOR(tracker->dev_id)); ++ ++ diff_area_put(tracker->diff_area); ++ cbt_map_put(tracker->cbt_map); ++ ++ kfree(tracker); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_dec(memory_object_tracker); ++#endif ++ refcount_dec(&trackers_counter); ++} ++ ++struct tracker *tracker_get_by_dev(struct block_device *bdev) ++{ ++ struct bdev_filter *flt; ++ ++ flt = bdev_filter_get_by_altitude(bdev, bdev_filter_alt_blksnap); ++ if (IS_ERR(flt)) ++ return ERR_PTR(PTR_ERR(flt)); ++ if (!flt) ++ return NULL; ++ return container_of(flt, struct tracker, flt); ++} ++ ++static enum bdev_filter_result tracker_submit_bio_cb(struct bio *bio, ++ struct bdev_filter *flt) ++{ ++ enum bdev_filter_result ret = bdev_filter_pass; ++ struct tracker *tracker = container_of(flt, struct tracker, flt); ++ int err; ++ sector_t sector; ++ sector_t count; ++ unsigned int current_flag; ++ ++ if (bio->bi_opf & REQ_NOWAIT) { ++ if (!percpu_down_read_trylock(&tracker_submit_lock)) { ++ bio_wouldblock_error(bio); ++ return bdev_filter_skip; ++ } ++ } else ++ percpu_down_read(&tracker_submit_lock); ++ ++ if (!op_is_write(bio_op(bio))) ++ goto out; ++ ++ if (!bio->bi_iter.bi_size) ++ goto out; ++ ++ sector = bio->bi_iter.bi_sector; ++ count = (sector_t)(round_up(bio->bi_iter.bi_size, SECTOR_SIZE) / ++ SECTOR_SIZE); ++ ++ current_flag = memalloc_noio_save(); ++ err = cbt_map_set(tracker->cbt_map, sector, count); ++ memalloc_noio_restore(current_flag); ++ if (unlikely(err)) ++ goto out; ++ ++ if (!atomic_read(&tracker->snapshot_is_taken)) ++ goto out; ++ ++ if (diff_area_is_corrupted(tracker->diff_area)) ++ goto out; ++ ++ current_flag = memalloc_noio_save(); ++ err = diff_area_copy(tracker->diff_area, sector, count, ++ !!(bio->bi_opf & REQ_NOWAIT)); ++ memalloc_noio_restore(current_flag); ++ ++ if (unlikely(err)) { ++ if (err == -EAGAIN) { ++ bio_wouldblock_error(bio); ++ ret = bdev_filter_skip; ++ } else ++ pr_err("Failed to copy data to diff storage with error %d\n", abs(err)); ++ ++ goto out; ++ } ++ ++ /* ++ * If a new bio was created during the handling and the original bio ++ * must be processed synchronously (flag REQ_SYNC), then new bios must ++ * be sent and returned to complete the processing of the original bio. ++ */ ++ if (!bio_list_empty(current->bio_list) && (bio->bi_opf & REQ_SYNC)) ++ ret = bdev_filter_repeat; ++out: ++ percpu_up_read(&tracker_submit_lock); ++ return ret; ++} ++ ++ ++static void tracker_release_work(struct work_struct *work) ++{ ++ struct tracker *tracker = NULL; ++ struct tracker_release_worker *tracker_release = ++ container_of(work, struct tracker_release_worker, work); ++ ++ do { ++ spin_lock(&tracker_release->lock); ++ tracker = list_first_entry_or_null(&tracker_release->list, ++ struct tracker, link); ++ if (tracker) ++ list_del(&tracker->link); ++ spin_unlock(&tracker_release->lock); ++ ++ if (tracker) ++ tracker_free(tracker); ++ } while (tracker); ++} ++ ++static void tracker_detach_cb(struct kref *kref) ++{ ++ struct bdev_filter *flt = container_of(kref, struct bdev_filter, kref); ++ struct tracker *tracker = container_of(flt, struct tracker, flt); ++ ++ spin_lock(&tracker_release_worker.lock); ++ list_add_tail(&tracker->link, &tracker_release_worker.list); ++ spin_unlock(&tracker_release_worker.lock); ++ ++ queue_work(system_wq, &tracker_release_worker.work); ++} ++ ++static const struct bdev_filter_operations tracker_fops = { ++ .submit_bio_cb = tracker_submit_bio_cb, ++ .detach_cb = tracker_detach_cb ++}; ++ ++static int tracker_filter_attach(struct block_device *bdev, ++ struct tracker *tracker) ++{ ++ int ret; ++ bool is_frozen = false; ++ ++ pr_debug("Tracker attach filter\n"); ++ ++ if (freeze_bdev(bdev)) ++ pr_err("Failed to freeze device [%u:%u]\n", MAJOR(bdev->bd_dev), ++ MINOR(bdev->bd_dev)); ++ else { ++ is_frozen = true; ++ pr_debug("Device [%u:%u] was frozen\n", MAJOR(bdev->bd_dev), ++ MINOR(bdev->bd_dev)); ++ } ++ ++ ret = bdev_filter_attach(bdev, KBUILD_MODNAME, bdev_filter_alt_blksnap, ++ &tracker->flt); ++ if (is_frozen) { ++ if (thaw_bdev(bdev)) ++ pr_err("Failed to thaw device [%u:%u]\n", ++ MAJOR(tracker->dev_id), MINOR(tracker->dev_id)); ++ else ++ pr_debug("Device [%u:%u] was unfrozen\n", ++ MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); ++ } ++ ++ if (ret) ++ pr_err("Failed to attach tracker to device [%u:%u]\n", ++ MAJOR(tracker->dev_id), MINOR(tracker->dev_id)); ++ ++ return ret; ++} ++ ++static int tracker_filter_detach(struct block_device *bdev) ++{ ++ int ret; ++ bool is_frozen = false; ++ ++ pr_debug("Tracker delete filter\n"); ++ if (freeze_bdev(bdev)) ++ pr_err("Failed to freeze device [%u:%u]\n", MAJOR(bdev->bd_dev), ++ MINOR(bdev->bd_dev)); ++ else { ++ is_frozen = true; ++ pr_debug("Device [%u:%u] was frozen\n", MAJOR(bdev->bd_dev), ++ MINOR(bdev->bd_dev)); ++ } ++ ++ ret = bdev_filter_detach(bdev, KBUILD_MODNAME, bdev_filter_alt_blksnap); ++ ++ if (is_frozen) { ++ if (thaw_bdev(bdev)) ++ pr_err("Failed to thaw device [%u:%u]\n", ++ MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); ++ else ++ pr_debug("Device [%u:%u] was unfrozen\n", ++ MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); ++ } ++ ++ if (ret) ++ pr_err("Failed to detach filter from device [%u:%u]\n", ++ MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); ++ return ret; ++} ++ ++static struct tracker *tracker_new(struct block_device *bdev) ++{ ++ int ret; ++ struct tracker *tracker = NULL; ++ struct cbt_map *cbt_map; ++ ++ pr_debug("Creating tracker for device [%u:%u].\n", MAJOR(bdev->bd_dev), ++ MINOR(bdev->bd_dev)); ++ ++ tracker = kzalloc(sizeof(struct tracker), GFP_KERNEL); ++ if (tracker == NULL) ++ return ERR_PTR(-ENOMEM); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_tracker); ++#endif ++ refcount_inc(&trackers_counter); ++ bdev_filter_init(&tracker->flt, &tracker_fops); ++ INIT_LIST_HEAD(&tracker->link); ++ atomic_set(&tracker->snapshot_is_taken, false); ++ tracker->dev_id = bdev->bd_dev; ++ ++ pr_info("Create tracker for device [%u:%u]. Capacity 0x%llx sectors\n", ++ MAJOR(tracker->dev_id), MINOR(tracker->dev_id), ++ (unsigned long long)bdev_nr_sectors(bdev)); ++ ++ cbt_map = cbt_map_create(bdev); ++ if (!cbt_map) { ++ pr_err("Failed to create tracker for device [%u:%u]\n", ++ MAJOR(tracker->dev_id), MINOR(tracker->dev_id)); ++ ret = -ENOMEM; ++ goto fail; ++ } ++ tracker->cbt_map = cbt_map; ++ ++ ret = tracker_filter_attach(bdev, tracker); ++ if (ret) { ++ pr_err("Failed to attach tracker. errno=%d\n", abs(ret)); ++ goto fail; ++ } ++ /* ++ * The filter stores a pointer to the tracker. ++ * The tracker will not be released until its filter is released. ++ */ ++ ++ pr_debug("New tracker for device [%u:%u] was created.\n", ++ MAJOR(tracker->dev_id), MINOR(tracker->dev_id)); ++ ++ return tracker; ++fail: ++ tracker_put(tracker); ++ return ERR_PTR(ret); ++} ++ ++int tracker_take_snapshot(struct tracker *tracker) ++{ ++ int ret = 0; ++ bool cbt_reset_needed = false; ++ sector_t capacity; ++ ++ if (tracker->cbt_map->is_corrupted) { ++ cbt_reset_needed = true; ++ pr_warn("Corrupted CBT table detected. CBT fault\n"); ++ } ++ ++ capacity = bdev_nr_sectors(tracker->diff_area->orig_bdev); ++ if (tracker->cbt_map->device_capacity != capacity) { ++ cbt_reset_needed = true; ++ pr_warn("Device resize detected. CBT fault\n"); ++ } ++ ++ if (cbt_reset_needed) { ++ ret = cbt_map_reset(tracker->cbt_map, capacity); ++ if (ret) { ++ pr_err("Failed to create tracker. errno=%d\n", ++ abs(ret)); ++ return ret; ++ } ++ } ++ ++ cbt_map_switch(tracker->cbt_map); ++ atomic_set(&tracker->snapshot_is_taken, true); ++ ++ return 0; ++} ++ ++void tracker_release_snapshot(struct tracker *tracker) ++{ ++ if (!tracker) ++ return; ++ ++ pr_debug("Tracker for device [%u:%u] release snapshot\n", ++ MAJOR(tracker->dev_id), MINOR(tracker->dev_id)); ++ ++ atomic_set(&tracker->snapshot_is_taken, false); ++} ++ ++int tracker_init(void) ++{ ++ INIT_WORK(&tracker_release_worker.work, tracker_release_work); ++ INIT_LIST_HEAD(&tracker_release_worker.list); ++ spin_lock_init(&tracker_release_worker.lock); ++ ++ return 0; ++} ++ ++/** ++ * tracker_wait_for_release - Waiting for all trackers are released. ++ */ ++static void tracker_wait_for_release(void) ++{ ++ long inx = 0; ++ u64 start_waiting = jiffies_64; ++ ++ while (refcount_read(&trackers_counter) > 1) { ++ schedule_timeout_interruptible(HZ); ++ if (jiffies_64 > (start_waiting + 10*HZ)) { ++ start_waiting = jiffies_64; ++ inx++; ++ ++ if (inx <= 12) ++ pr_warn("Waiting for trackers release\n"); ++ ++ WARN_ONCE(inx > 12, "Failed to release trackers\n"); ++ } ++ } ++} ++ ++void tracker_done(void) ++{ ++ struct tracked_device *tr_dev; ++ ++ pr_debug("Cleanup trackers\n"); ++ while (true) { ++ spin_lock(&tracked_device_lock); ++ tr_dev = list_first_entry_or_null(&tracked_device_list, ++ struct tracked_device, link); ++ if (tr_dev) ++ list_del(&tr_dev->link); ++ spin_unlock(&tracked_device_lock); ++ ++ if (!tr_dev) ++ break; ++ ++ tracker_remove(tr_dev->dev_id); ++ kfree(tr_dev); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_dec(memory_object_tracked_device); ++#endif ++ } ++ ++ tracker_wait_for_release(); ++} ++ ++struct tracker *tracker_create_or_get(dev_t dev_id) ++{ ++ struct tracker *tracker; ++ struct block_device *bdev; ++ struct tracked_device *tr_dev; ++ ++ bdev = blkdev_get_by_dev(dev_id, 0, NULL); ++ if (IS_ERR(bdev)) { ++ pr_info("Cannot open device [%u:%u]\n", MAJOR(dev_id), ++ MINOR(dev_id)); ++ return ERR_PTR(PTR_ERR(bdev)); ++ } ++ ++ tracker = tracker_get_by_dev(bdev); ++ if (IS_ERR(tracker)) { ++ int err = PTR_ERR(tracker); ++ ++ pr_err("Cannot get tracker for device [%u:%u]. errno=%d\n", ++ MAJOR(dev_id), MINOR(dev_id), abs(err)); ++ goto put_bdev; ++ } ++ if (tracker) { ++ pr_debug("Device [%u:%u] is already under tracking\n", ++ MAJOR(dev_id), MINOR(dev_id)); ++ goto put_bdev; ++ } ++ ++ tr_dev = kzalloc(sizeof(struct tracked_device), GFP_KERNEL); ++ if (!tr_dev) { ++ tracker = ERR_PTR(-ENOMEM); ++ goto put_bdev; ++ } ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_tracked_device); ++#endif ++ INIT_LIST_HEAD(&tr_dev->link); ++ tr_dev->dev_id = dev_id; ++ ++ tracker = tracker_new(bdev); ++ if (IS_ERR(tracker)) { ++ int err = PTR_ERR(tracker); ++ ++ pr_err("Failed to create tracker. errno=%d\n", abs(err)); ++ kfree(tr_dev); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_dec(memory_object_tracked_device); ++#endif ++ } else { ++ /* ++ * It is normal that the new trackers filter will have ++ * a ref counter value of 2. This allows not to detach ++ * the filter when the snapshot is released. ++ */ ++ bdev_filter_get(&tracker->flt); ++ ++ spin_lock(&tracked_device_lock); ++ list_add_tail(&tr_dev->link, &tracked_device_list); ++ spin_unlock(&tracked_device_lock); ++ } ++put_bdev: ++ blkdev_put(bdev, 0); ++ return tracker; ++} ++ ++int tracker_remove(dev_t dev_id) ++{ ++ int ret; ++ struct tracker *tracker; ++ struct block_device *bdev; ++ ++ pr_info("Removing device [%u:%u] from tracking\n", MAJOR(dev_id), ++ MINOR(dev_id)); ++ ++ bdev = blkdev_get_by_dev(dev_id, 0, NULL); ++ if (IS_ERR(bdev)) { ++ pr_info("Cannot open device [%u:%u]\n", MAJOR(dev_id), ++ MINOR(dev_id)); ++ return PTR_ERR(bdev); ++ } ++ ++ tracker = tracker_get_by_dev(bdev); ++ if (IS_ERR(tracker)) { ++ ret = PTR_ERR(tracker); ++ ++ pr_err("Failed to get tracker for device [%u:%u]. errno=%d\n", ++ MAJOR(dev_id), MINOR(dev_id), abs(ret)); ++ goto put_bdev; ++ } ++ if (!tracker) { ++ pr_info("Unable to remove device [%u:%u] from tracking: ", ++ MAJOR(dev_id), MINOR(dev_id)); ++ pr_info("tracker not found\n"); ++ ret = -ENODATA; ++ goto put_bdev; ++ } ++ ++ if (atomic_read(&tracker->snapshot_is_taken)) { ++ pr_err("Tracker for device [%u:%u] is busy with a snapshot\n", ++ MAJOR(dev_id), MINOR(dev_id)); ++ ret = -EBUSY; ++ goto put_tracker; ++ } ++ ++ ret = tracker_filter_detach(bdev); ++ if (ret) ++ pr_err("Failed to remove tracker from device [%u:%u]\n", ++ MAJOR(dev_id), MINOR(dev_id)); ++ else { ++ struct tracked_device *tr_dev = NULL; ++ struct tracked_device *iter_tr_dev; ++ ++ spin_lock(&tracked_device_lock); ++ list_for_each_entry(iter_tr_dev, &tracked_device_list, link) { ++ if (iter_tr_dev->dev_id == dev_id) { ++ list_del(&iter_tr_dev->link); ++ tr_dev = iter_tr_dev; ++ break; ++ } ++ } ++ spin_unlock(&tracked_device_lock); ++ ++ kfree(tr_dev); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ if (tr_dev) ++ memory_object_dec(memory_object_tracked_device); ++#endif ++ } ++put_tracker: ++ tracker_put(tracker); ++put_bdev: ++ blkdev_put(bdev, 0); ++ return ret; ++} ++ ++int tracker_read_cbt_bitmap(dev_t dev_id, unsigned int offset, size_t length, ++ char __user *user_buff) ++{ ++ int ret; ++ struct tracker *tracker; ++ struct block_device *bdev; ++ ++ bdev = blkdev_get_by_dev(dev_id, 0, NULL); ++ if (IS_ERR(bdev)) { ++ pr_info("Cannot open device [%u:%u]\n", MAJOR(dev_id), ++ MINOR(dev_id)); ++ return PTR_ERR(bdev); ++ } ++ ++ tracker = tracker_get_by_dev(bdev); ++ if (IS_ERR(tracker)) { ++ pr_err("Cannot get tracker for device [%u:%u]\n", ++ MAJOR(dev_id), MINOR(dev_id)); ++ ret = PTR_ERR(tracker); ++ goto put_bdev; ++ } ++ if (!tracker) { ++ pr_info("Unable to read CBT bitmap for device [%u:%u]: ", ++ MAJOR(dev_id), MINOR(dev_id)); ++ pr_info("tracker not found\n"); ++ ret = -ENODATA; ++ goto put_bdev; ++ } ++ ++ if (atomic_read(&tracker->snapshot_is_taken)) { ++ ret = cbt_map_read_to_user(tracker->cbt_map, user_buff, ++ offset, length); ++ } else { ++ pr_err("Unable to read CBT bitmap for device [%u:%u]: ", ++ MAJOR(dev_id), MINOR(dev_id)); ++ pr_err("device is not captured by snapshot\n"); ++ ret = -EPERM; ++ } ++ ++ tracker_put(tracker); ++put_bdev: ++ blkdev_put(bdev, 0); ++ return ret; ++} ++ ++static inline void collect_cbt_info(dev_t dev_id, ++ struct blk_snap_cbt_info *cbt_info) ++{ ++ struct block_device *bdev; ++ struct tracker *tracker; ++ ++ bdev = blkdev_get_by_dev(dev_id, 0, NULL); ++ if (IS_ERR(bdev)) { ++ pr_err("Cannot open device [%u:%u]\n", MAJOR(dev_id), ++ MINOR(dev_id)); ++ return; ++ } ++ ++ tracker = tracker_get_by_dev(bdev); ++ if (IS_ERR_OR_NULL(tracker)) ++ goto put_bdev; ++ if (!tracker->cbt_map) ++ goto put_tracker; ++ ++ cbt_info->device_capacity = ++ (__u64)(tracker->cbt_map->device_capacity << SECTOR_SHIFT); ++ cbt_info->blk_size = (__u32)cbt_map_blk_size(tracker->cbt_map); ++ cbt_info->blk_count = (__u32)tracker->cbt_map->blk_count; ++ cbt_info->snap_number = (__u8)tracker->cbt_map->snap_number_previous; ++ uuid_copy(&cbt_info->generation_id, &tracker->cbt_map->generation_id); ++put_tracker: ++ tracker_put(tracker); ++put_bdev: ++ blkdev_put(bdev, 0); ++} ++ ++int tracker_collect(int max_count, struct blk_snap_cbt_info *cbt_info, ++ int *pcount) ++{ ++ int ret = 0; ++ int count = 0; ++ int iter = 0; ++ struct tracked_device *tr_dev; ++ ++ if (!cbt_info) { ++ /** ++ * Just calculate trackers list length. ++ */ ++ spin_lock(&tracked_device_lock); ++ list_for_each_entry(tr_dev, &tracked_device_list, link) ++ ++count; ++ spin_unlock(&tracked_device_lock); ++ goto out; ++ } ++ ++ spin_lock(&tracked_device_lock); ++ list_for_each_entry(tr_dev, &tracked_device_list, link) { ++ if (count >= max_count) { ++ ret = -ENOBUFS; ++ break; ++ } ++ ++ cbt_info[count].dev_id.mj = MAJOR(tr_dev->dev_id); ++ cbt_info[count].dev_id.mn = MINOR(tr_dev->dev_id); ++ ++count; ++ } ++ spin_unlock(&tracked_device_lock); ++ ++ if (ret) ++ return ret; ++ ++ for (iter = 0; iter < count; iter++) { ++ dev_t dev_id = MKDEV(cbt_info[iter].dev_id.mj, ++ cbt_info[iter].dev_id.mn); ++ ++ collect_cbt_info(dev_id, &cbt_info[iter]); ++ } ++out: ++ *pcount = count; ++ return 0; ++} ++ ++int tracker_mark_dirty_blocks(dev_t dev_id, ++ struct blk_snap_block_range *block_ranges, ++ unsigned int count) ++{ ++ int ret = 0; ++ struct tracker *tracker; ++ struct block_device *bdev; ++ ++ bdev = blkdev_get_by_dev(dev_id, 0, NULL); ++ if (IS_ERR(bdev)) { ++ pr_err("Cannot open device [%u:%u]\n", MAJOR(dev_id), ++ MINOR(dev_id)); ++ return PTR_ERR(bdev); ++ } ++ ++ pr_debug("Marking [%d] dirty blocks for device [%u:%u]\n", count, ++ MAJOR(dev_id), MINOR(dev_id)); ++ ++ tracker = tracker_get_by_dev(bdev); ++ if (IS_ERR(tracker)) { ++ pr_err("Failed to get tracker for device [%u:%u]\n", ++ MAJOR(dev_id), MINOR(dev_id)); ++ ret = PTR_ERR(tracker); ++ goto put_bdev; ++ } ++ if (!tracker) { ++ pr_err("Cannot find tracker for device [%u:%u]\n", ++ MAJOR(dev_id), MINOR(dev_id)); ++ ret = -ENODEV; ++ goto put_bdev; ++ } ++ ++ ret = cbt_map_mark_dirty_blocks(tracker->cbt_map, block_ranges, count); ++ if (ret) ++ pr_err("Failed to set CBT table. errno=%d\n", abs(ret)); ++ ++ tracker_put(tracker); ++put_bdev: ++ blkdev_put(bdev, 0); ++ return ret; ++} +diff --git a/drivers/block/blksnap/tracker.h b/drivers/block/blksnap/tracker.h +new file mode 100644 +index 000000000000..a9b0bec7b601 +--- /dev/null ++++ b/drivers/block/blksnap/tracker.h +@@ -0,0 +1,71 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#pragma once ++#include ++#include ++#include ++#include ++#include ++#include ++ ++struct cbt_map; ++struct diff_area; ++ ++/** ++ * struct tracker - Tracker for a block device. ++ * ++ * @kref: ++ * Protects the structure from being released during processing of ++ * an ioctl. ++ * @link: ++ * List header. ++ * @dev_id: ++ * Original block device ID. ++ * @snapshot_is_taken: ++ * Indicates that a snapshot was taken for the device whose bios are ++ * handled by this tracker. ++ * @cbt_map: ++ * Pointer to a change block tracker map. ++ * @diff_area: ++ * Pointer to a difference area. ++ * ++ * The main goal of the tracker is to handle bios. The tracker detectes ++ * the range of sectors that will change and transmits them to the CBT map ++ * and to the difference area. ++ */ ++struct tracker { ++ struct bdev_filter flt; ++ struct list_head link; ++ dev_t dev_id; ++ ++ atomic_t snapshot_is_taken; ++ ++ struct cbt_map *cbt_map; ++ struct diff_area *diff_area; ++}; ++ ++void tracker_lock(void); ++void tracker_unlock(void); ++ ++static inline void tracker_put(struct tracker *tracker) ++{ ++ if (likely(tracker)) ++ bdev_filter_put(&tracker->flt); ++}; ++ ++struct tracker *tracker_get_by_dev(struct block_device *bdev); ++ ++int tracker_init(void); ++void tracker_done(void); ++ ++struct tracker *tracker_create_or_get(dev_t dev_id); ++int tracker_remove(dev_t dev_id); ++int tracker_collect(int max_count, struct blk_snap_cbt_info *cbt_info, ++ int *pcount); ++int tracker_read_cbt_bitmap(dev_t dev_id, unsigned int offset, size_t length, ++ char __user *user_buff); ++int tracker_mark_dirty_blocks(dev_t dev_id, ++ struct blk_snap_block_range *block_ranges, ++ unsigned int count); ++ ++int tracker_take_snapshot(struct tracker *tracker); ++void tracker_release_snapshot(struct tracker *tracker); +-- +2.20.1 + diff --git a/patches/lk5.19/0007-block-blksnap-map-of-change-block-tracking.patch b/patches/lk5.19/0007-block-blksnap-map-of-change-block-tracking.patch new file mode 100644 index 00000000..94d5f7c7 --- /dev/null +++ b/patches/lk5.19/0007-block-blksnap-map-of-change-block-tracking.patch @@ -0,0 +1,423 @@ +From 7c156316921f6233c8a58810dcd4fe73acbc4ee4 Mon Sep 17 00:00:00 2001 +From: Sergei Shtepa +Date: Tue, 7 Jun 2022 14:33:55 +0200 +Subject: [PATCH 07/20] block, blksnap: map of change block tracking + +Description of the struct cbt_map for storing change map data and +functions for managing this map. + +Signed-off-by: Sergei Shtepa +--- + drivers/block/blksnap/cbt_map.c | 280 ++++++++++++++++++++++++++++++++ + drivers/block/blksnap/cbt_map.h | 112 +++++++++++++ + 2 files changed, 392 insertions(+) + create mode 100644 drivers/block/blksnap/cbt_map.c + create mode 100644 drivers/block/blksnap/cbt_map.h + +diff --git a/drivers/block/blksnap/cbt_map.c b/drivers/block/blksnap/cbt_map.c +new file mode 100644 +index 000000000000..aac0f9236d1a +--- /dev/null ++++ b/drivers/block/blksnap/cbt_map.c +@@ -0,0 +1,280 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#define pr_fmt(fmt) KBUILD_MODNAME "-cbt_map: " fmt ++#include ++#include ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++#include "memory_checker.h" ++#endif ++#include "cbt_map.h" ++#include "params.h" ++ ++static inline unsigned long long count_by_shift(sector_t capacity, ++ unsigned long long shift) ++{ ++ sector_t blk_size = 1ull << (shift - SECTOR_SHIFT); ++ ++ return round_up(capacity, blk_size) / blk_size; ++} ++ ++static void cbt_map_calculate_block_size(struct cbt_map *cbt_map) ++{ ++ unsigned long long shift; ++ unsigned long long count; ++ ++ /** ++ * The size of the tracking block is calculated based on the size of the disk ++ * so that the CBT table does not exceed a reasonable size. ++ */ ++ shift = tracking_block_minimum_shift; ++ count = count_by_shift(cbt_map->device_capacity, shift); ++ ++ while (count > tracking_block_maximum_count) { ++ shift = shift << 1; ++ count = count_by_shift(cbt_map->device_capacity, shift); ++ } ++ ++ cbt_map->blk_size_shift = shift; ++ cbt_map->blk_count = count; ++} ++ ++static int cbt_map_allocate(struct cbt_map *cbt_map) ++{ ++ pr_debug("Allocate CBT map of %zu blocks\n", cbt_map->blk_count); ++ ++ cbt_map->read_map = big_buffer_alloc(cbt_map->blk_count, GFP_KERNEL); ++ if (cbt_map->read_map != NULL) ++ big_buffer_memset(cbt_map->read_map, 0); ++ ++ cbt_map->write_map = big_buffer_alloc(cbt_map->blk_count, GFP_KERNEL); ++ if (cbt_map->write_map != NULL) ++ big_buffer_memset(cbt_map->write_map, 0); ++ ++ if ((cbt_map->read_map == NULL) || (cbt_map->write_map == NULL)) { ++ pr_err("Cannot allocate CBT map. %zu blocks are required.\n", ++ cbt_map->blk_count); ++ return -ENOMEM; ++ } ++ ++ cbt_map->snap_number_previous = 0; ++ cbt_map->snap_number_active = 1; ++ generate_random_uuid(cbt_map->generation_id.b); ++ cbt_map->is_corrupted = false; ++ ++ return 0; ++} ++ ++static void cbt_map_deallocate(struct cbt_map *cbt_map) ++{ ++ cbt_map->is_corrupted = false; ++ ++ if (cbt_map->read_map != NULL) { ++ big_buffer_free(cbt_map->read_map); ++ cbt_map->read_map = NULL; ++ } ++ ++ if (cbt_map->write_map != NULL) { ++ big_buffer_free(cbt_map->write_map); ++ cbt_map->write_map = NULL; ++ } ++} ++ ++int cbt_map_reset(struct cbt_map *cbt_map, sector_t device_capacity) ++{ ++ cbt_map_deallocate(cbt_map); ++ ++ cbt_map->device_capacity = device_capacity; ++ cbt_map_calculate_block_size(cbt_map); ++ cbt_map->is_corrupted = false; ++ ++ return cbt_map_allocate(cbt_map); ++} ++ ++static inline void cbt_map_destroy(struct cbt_map *cbt_map) ++{ ++ pr_debug("CBT map destroy\n"); ++ ++ cbt_map_deallocate(cbt_map); ++ kfree(cbt_map); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_dec(memory_object_cbt_map); ++#endif ++} ++ ++struct cbt_map *cbt_map_create(struct block_device *bdev) ++{ ++ struct cbt_map *cbt_map = NULL; ++ ++ pr_debug("CBT map create\n"); ++ ++ cbt_map = kzalloc(sizeof(struct cbt_map), GFP_KERNEL); ++ if (cbt_map == NULL) ++ return NULL; ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_cbt_map); ++#endif ++ cbt_map->device_capacity = bdev_nr_sectors(bdev); ++ cbt_map_calculate_block_size(cbt_map); ++ ++ if (cbt_map_allocate(cbt_map)) { ++ cbt_map_destroy(cbt_map); ++ return NULL; ++ } ++ ++ spin_lock_init(&cbt_map->locker); ++ kref_init(&cbt_map->kref); ++ cbt_map->is_corrupted = false; ++ ++ return cbt_map; ++} ++ ++void cbt_map_destroy_cb(struct kref *kref) ++{ ++ cbt_map_destroy(container_of(kref, struct cbt_map, kref)); ++} ++ ++void cbt_map_switch(struct cbt_map *cbt_map) ++{ ++ pr_debug("CBT map switch\n"); ++ spin_lock(&cbt_map->locker); ++ ++ big_buffer_memcpy(cbt_map->read_map, cbt_map->write_map); ++ ++ cbt_map->snap_number_previous = cbt_map->snap_number_active; ++ ++cbt_map->snap_number_active; ++ if (cbt_map->snap_number_active == 256) { ++ cbt_map->snap_number_active = 1; ++ ++ big_buffer_memset(cbt_map->write_map, 0); ++ ++ generate_random_uuid(cbt_map->generation_id.b); ++ ++ pr_debug("CBT reset\n"); ++ } ++ spin_unlock(&cbt_map->locker); ++} ++ ++static inline int _cbt_map_set(struct cbt_map *cbt_map, sector_t sector_start, ++ sector_t sector_cnt, u8 snap_number, ++ struct big_buffer *map) ++{ ++ int res = 0; ++ u8 num; ++ size_t cbt_block; ++ size_t cbt_block_first = (size_t)( ++ sector_start >> (cbt_map->blk_size_shift - SECTOR_SHIFT)); ++ size_t cbt_block_last = ++ (size_t)((sector_start + sector_cnt - 1) >> ++ (cbt_map->blk_size_shift - SECTOR_SHIFT)); //inclusive ++ ++ for (cbt_block = cbt_block_first; cbt_block <= cbt_block_last; ++ ++cbt_block) { ++ if (unlikely(cbt_block >= cbt_map->blk_count)) { ++ pr_err("Block index is too large.\n"); ++ pr_err("Block #%zu was demanded, map size %zu blocks.\n", ++ cbt_block, cbt_map->blk_count); ++ res = -EINVAL; ++ break; ++ } ++ ++ res = big_buffer_byte_get(map, cbt_block, &num); ++ if (unlikely(res)) { ++ pr_err("CBT table out of range\n"); ++ break; ++ } ++ ++ if (num < snap_number) { ++ res = big_buffer_byte_set(map, cbt_block, snap_number); ++ if (unlikely(res)) { ++ pr_err("CBT table out of range\n"); ++ break; ++ } ++ } ++ } ++ return res; ++} ++ ++int cbt_map_set(struct cbt_map *cbt_map, sector_t sector_start, ++ sector_t sector_cnt) ++{ ++ int res; ++ ++ spin_lock(&cbt_map->locker); ++ if (unlikely(cbt_map->is_corrupted)) { ++ spin_unlock(&cbt_map->locker); ++ return -EINVAL; ++ } ++ res = _cbt_map_set(cbt_map, sector_start, sector_cnt, ++ (u8)cbt_map->snap_number_active, cbt_map->write_map); ++ if (unlikely(res)) ++ cbt_map->is_corrupted = true; ++ ++ spin_unlock(&cbt_map->locker); ++ ++ return res; ++} ++ ++int cbt_map_set_both(struct cbt_map *cbt_map, sector_t sector_start, ++ sector_t sector_cnt) ++{ ++ int res; ++ ++ spin_lock(&cbt_map->locker); ++ if (unlikely(cbt_map->is_corrupted)) { ++ spin_unlock(&cbt_map->locker); ++ return -EINVAL; ++ } ++ res = _cbt_map_set(cbt_map, sector_start, sector_cnt, ++ (u8)cbt_map->snap_number_active, cbt_map->write_map); ++ if (!res) ++ res = _cbt_map_set(cbt_map, sector_start, sector_cnt, ++ (u8)cbt_map->snap_number_previous, ++ cbt_map->read_map); ++ spin_unlock(&cbt_map->locker); ++ ++ return res; ++} ++ ++size_t cbt_map_read_to_user(struct cbt_map *cbt_map, char __user *user_buff, ++ size_t offset, size_t size) ++{ ++ size_t readed = 0; ++ size_t left_size; ++ size_t real_size = min((cbt_map->blk_count - offset), size); ++ ++ if (unlikely(cbt_map->is_corrupted)) { ++ pr_err("CBT table was corrupted\n"); ++ return -EFAULT; ++ } ++ ++ left_size = real_size - big_buffer_copy_to_user(user_buff, offset, ++ cbt_map->read_map, ++ real_size); ++ ++ if (left_size == 0) ++ readed = real_size; ++ else { ++ pr_err("Not all CBT data was read. Left [%zu] bytes\n", ++ left_size); ++ readed = real_size - left_size; ++ } ++ ++ return readed; ++} ++ ++int cbt_map_mark_dirty_blocks(struct cbt_map *cbt_map, ++ struct blk_snap_block_range *block_ranges, ++ unsigned int count) ++{ ++ int inx; ++ int ret = 0; ++ ++ for (inx = 0; inx < count; inx++) { ++ ret = cbt_map_set_both( ++ cbt_map, (sector_t)block_ranges[inx].sector_offset, ++ (sector_t)block_ranges[inx].sector_count); ++ if (ret) ++ break; ++ } ++ ++ return ret; ++} +diff --git a/drivers/block/blksnap/cbt_map.h b/drivers/block/blksnap/cbt_map.h +new file mode 100644 +index 000000000000..934f417f7da7 +--- /dev/null ++++ b/drivers/block/blksnap/cbt_map.h +@@ -0,0 +1,112 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#pragma once ++#include ++#include ++#include ++#include ++#include ++ ++#include "big_buffer.h" ++ ++struct blk_snap_block_range; ++ ++/** ++ * struct cbt_map - The table of changes for a block device. ++ * ++ * @kref: ++ * Reference counter. ++ * @locker: ++ * Locking for atomic modification of structure members. ++ * @blk_size_shift: ++ * The power of 2 used to specify the change tracking block size. ++ * @blk_count: ++ * The number of change tracking blocks. ++ * @device_capacity: ++ * The actual capacity of the device. ++ * @read_map: ++ * A table of changes available for reading. This is the table that can ++ * be read after taking a snapshot. ++ * @write_map: ++ * The current table for tracking changes. ++ * @snap_number_active: ++ * The current sequential number of changes. This is the number that is written to ++ * the current table when the block data changes. ++ * @snap_number_previous: ++ * The previous sequential number of changes. This number is used to identify the ++ * blocks that were changed between the penultimate snapshot and the last snapshot. ++ * @generation_id: ++ * UUID of the generation of changes. ++ * @is_corrupted: ++ * A flag that the change tracking data is no longer reliable. ++ * ++ * The change block tracking map is a byte table. Each byte stores the ++ * sequential number of changes for one block. To determine which blocks have changed ++ * since the previous snapshot with the change number 4, it is enough to ++ * find all bytes with the number more than 4. ++ * ++ * Since one byte is allocated to track changes in one block, the change ++ * table is created again at the 255th snapshot. At the same time, a new ++ * unique generation identifier is generated. Tracking changes is ++ * possible only for tables of the same generation. ++ * ++ * There are two tables on the change block tracking map. One is ++ * available for reading, and the other is available for writing. At the moment of taking ++ * a snapshot, the tables are synchronized. The user's process, when ++ * calling the corresponding ioctl, can read the readable table. ++ * At the same time, the change tracking mechanism continues to work with ++ * the writable table. ++ * ++ * To provide the ability to mount a snapshot image as writeable, it is ++ * possible to make changes to both of these tables simultaneously. ++ * ++ */ ++struct cbt_map { ++ struct kref kref; ++ ++ spinlock_t locker; ++ ++ size_t blk_size_shift; ++ size_t blk_count; ++ sector_t device_capacity; ++ ++ struct big_buffer *read_map; ++ struct big_buffer *write_map; ++ ++ unsigned long snap_number_active; ++ unsigned long snap_number_previous; ++ uuid_t generation_id; ++ ++ bool is_corrupted; ++}; ++ ++struct cbt_map *cbt_map_create(struct block_device *bdev); ++int cbt_map_reset(struct cbt_map *cbt_map, sector_t device_capacity); ++ ++void cbt_map_destroy_cb(struct kref *kref); ++static inline void cbt_map_get(struct cbt_map *cbt_map) ++{ ++ kref_get(&cbt_map->kref); ++}; ++static inline void cbt_map_put(struct cbt_map *cbt_map) ++{ ++ if (likely(cbt_map)) ++ kref_put(&cbt_map->kref, cbt_map_destroy_cb); ++}; ++ ++void cbt_map_switch(struct cbt_map *cbt_map); ++int cbt_map_set(struct cbt_map *cbt_map, sector_t sector_start, ++ sector_t sector_cnt); ++int cbt_map_set_both(struct cbt_map *cbt_map, sector_t sector_start, ++ sector_t sector_cnt); ++ ++size_t cbt_map_read_to_user(struct cbt_map *cbt_map, char __user *user_buffer, ++ size_t offset, size_t size); ++ ++static inline size_t cbt_map_blk_size(struct cbt_map *cbt_map) ++{ ++ return 1 << cbt_map->blk_size_shift; ++}; ++ ++int cbt_map_mark_dirty_blocks(struct cbt_map *cbt_map, ++ struct blk_snap_block_range *block_ranges, ++ unsigned int count); +-- +2.20.1 + diff --git a/patches/lk5.19/0008-block-blksnap-big-buffer-in-the-form-of-an-array-of-.patch b/patches/lk5.19/0008-block-blksnap-big-buffer-in-the-form-of-an-array-of-.patch new file mode 100644 index 00000000..02b8f25a --- /dev/null +++ b/patches/lk5.19/0008-block-blksnap-big-buffer-in-the-form-of-an-array-of-.patch @@ -0,0 +1,282 @@ +From 859a9e1f9c0d37bcb4e20c63fb201e424a006c08 Mon Sep 17 00:00:00 2001 +From: Sergei Shtepa +Date: Tue, 7 Jun 2022 14:34:37 +0200 +Subject: [PATCH 08/20] block, blksnap: big buffer in the form of an array of + pages + +Description of the struct big_buffer, which is used to store +sufficiently large amounts of data, such as a CBT map, and functions for +working with it. There are systems on which quite often the kmalloc() +memory allocation function of several hundred kilobytes ends with an +error code, and it is impossible to use virtual memory when handling an +I/O unit, since a page fault situation is possible. A simple array of +pages solves this problem. + +Signed-off-by: Sergei Shtepa +--- + drivers/block/blksnap/big_buffer.c | 218 +++++++++++++++++++++++++++++ + drivers/block/blksnap/big_buffer.h | 27 ++++ + 2 files changed, 245 insertions(+) + create mode 100644 drivers/block/blksnap/big_buffer.c + create mode 100644 drivers/block/blksnap/big_buffer.h + +diff --git a/drivers/block/blksnap/big_buffer.c b/drivers/block/blksnap/big_buffer.c +new file mode 100644 +index 000000000000..bcefc09653e4 +--- /dev/null ++++ b/drivers/block/blksnap/big_buffer.c +@@ -0,0 +1,218 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++#include ++#include ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++#include "memory_checker.h" ++#endif ++#include "big_buffer.h" ++ ++static inline size_t page_count_calc(size_t buffer_size) ++{ ++ size_t page_count = buffer_size / PAGE_SIZE; ++ ++ if (buffer_size & (PAGE_SIZE - 1)) ++ page_count += 1; ++ return page_count; ++} ++ ++struct big_buffer *big_buffer_alloc(size_t buffer_size, int gfp_opt) ++{ ++ int res = 0; ++ struct big_buffer *bbuff; ++ size_t count; ++ size_t inx; ++ ++ count = page_count_calc(buffer_size); ++ ++ bbuff = kzalloc(sizeof(struct big_buffer) + count * sizeof(void *), ++ gfp_opt); ++ if (bbuff == NULL) ++ return NULL; ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_big_buffer); ++#endif ++ bbuff->pg_cnt = count; ++ for (inx = 0; inx < bbuff->pg_cnt; ++inx) { ++ struct page *pg = alloc_page(gfp_opt); ++ ++ if (!pg) { ++ res = -ENOMEM; ++ break; ++ } ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_page); ++#endif ++ bbuff->pg[inx] = page_address(pg); ++ } ++ ++ if (res) { ++ big_buffer_free(bbuff); ++ return NULL; ++ } ++ ++ return bbuff; ++} ++ ++void big_buffer_free(struct big_buffer *bbuff) ++{ ++ size_t inx; ++ size_t count = bbuff->pg_cnt; ++ ++ if (bbuff == NULL) ++ return; ++ ++ for (inx = 0; inx < count; ++inx) ++ if (bbuff->pg[inx] != NULL) { ++ free_page((unsigned long)bbuff->pg[inx]); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_dec(memory_object_page); ++#endif ++ } ++ ++ kfree(bbuff); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_dec(memory_object_big_buffer); ++#endif ++} ++ ++size_t big_buffer_copy_to_user(char __user *dst_user, size_t offset, ++ struct big_buffer *bbuff, size_t length) ++{ ++ size_t left_data_length; ++ int page_inx = offset / PAGE_SIZE; ++ size_t processed_len = 0; ++ size_t unordered = offset & (PAGE_SIZE - 1); ++ ++ if (unordered) { //first ++ size_t page_len = ++ min_t(size_t, (PAGE_SIZE - unordered), length); ++ ++ left_data_length = ++ copy_to_user(dst_user + processed_len, ++ bbuff->pg[page_inx] + unordered, page_len); ++ if (left_data_length) { ++ pr_err("Failed to copy data from big_buffer to user buffer\n"); ++ return processed_len; ++ } ++ ++ ++page_inx; ++ processed_len += page_len; ++ } ++ ++ while ((processed_len < length) && (page_inx < bbuff->pg_cnt)) { ++ size_t page_len = ++ min_t(size_t, PAGE_SIZE, (length - processed_len)); ++ ++ left_data_length = copy_to_user(dst_user + processed_len, ++ bbuff->pg[page_inx], page_len); ++ if (left_data_length) { ++ pr_err("Failed to copy data from big_buffer to user buffer\n"); ++ break; ++ } ++ ++ ++page_inx; ++ processed_len += page_len; ++ } ++ ++ return processed_len; ++} ++ ++size_t big_buffer_copy_from_user(const char __user *src_user, size_t offset, ++ struct big_buffer *bbuff, size_t length) ++{ ++ size_t left_data_length; ++ int page_inx = offset / PAGE_SIZE; ++ size_t processed_len = 0; ++ size_t unordered = offset & (PAGE_SIZE - 1); ++ ++ if (unordered) { //first ++ size_t page_len = ++ min_t(size_t, (PAGE_SIZE - unordered), length); ++ ++ left_data_length = ++ copy_from_user(bbuff->pg[page_inx] + unordered, ++ src_user + processed_len, page_len); ++ if (left_data_length) { ++ pr_err("Failed to copy data from user buffer to big_buffer\n"); ++ return processed_len; ++ } ++ ++ ++page_inx; ++ processed_len += page_len; ++ } ++ ++ while ((processed_len < length) && (page_inx < bbuff->pg_cnt)) { ++ size_t page_len = ++ min_t(size_t, PAGE_SIZE, (length - processed_len)); ++ ++ left_data_length = ++ copy_from_user(bbuff->pg[page_inx], ++ src_user + processed_len, page_len); ++ if (left_data_length) { ++ pr_err("Failed to copy data from user buffer to big_buffer\n"); ++ break; ++ } ++ ++ ++page_inx; ++ processed_len += page_len; ++ } ++ ++ return processed_len; ++} ++ ++void *big_buffer_get_element(struct big_buffer *bbuff, size_t index, ++ size_t sizeof_element) ++{ ++ size_t elements_in_page = PAGE_SIZE / sizeof_element; ++ size_t pg_inx = index / elements_in_page; ++ size_t pg_ofs = (index - (pg_inx * elements_in_page)) * sizeof_element; ++ ++ if (pg_inx >= bbuff->pg_cnt) ++ return NULL; ++ ++ return bbuff->pg[pg_inx] + pg_ofs; ++} ++ ++void big_buffer_memset(struct big_buffer *bbuff, int value) ++{ ++ size_t inx; ++ ++ for (inx = 0; inx < bbuff->pg_cnt; ++inx) ++ memset(bbuff->pg[inx], value, PAGE_SIZE); ++} ++ ++void big_buffer_memcpy(struct big_buffer *dst, struct big_buffer *src) ++{ ++ size_t inx; ++ size_t count = min_t(size_t, dst->pg_cnt, src->pg_cnt); ++ ++ for (inx = 0; inx < count; ++inx) ++ memcpy(dst->pg[inx], src->pg[inx], PAGE_SIZE); ++} ++ ++int big_buffer_byte_get(struct big_buffer *bbuff, size_t inx, u8 *value) ++{ ++ size_t page_inx = inx >> PAGE_SHIFT; ++ size_t byte_pos = inx & (PAGE_SIZE - 1); ++ ++ if (page_inx >= bbuff->pg_cnt) ++ return -ENODATA; ++ ++ *value = bbuff->pg[page_inx][byte_pos]; ++ ++ return 0; ++} ++ ++int big_buffer_byte_set(struct big_buffer *bbuff, size_t inx, u8 value) ++{ ++ size_t page_inx = inx >> PAGE_SHIFT; ++ size_t byte_pos = inx & (PAGE_SIZE - 1); ++ ++ if (page_inx >= bbuff->pg_cnt) ++ return -ENODATA; ++ ++ bbuff->pg[page_inx][byte_pos] = value; ++ ++ return 0; ++} +diff --git a/drivers/block/blksnap/big_buffer.h b/drivers/block/blksnap/big_buffer.h +new file mode 100644 +index 000000000000..4519d5f11c8e +--- /dev/null ++++ b/drivers/block/blksnap/big_buffer.h +@@ -0,0 +1,27 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#pragma once ++#include ++ ++struct big_buffer { ++ size_t pg_cnt; ++ u8 *pg[0]; ++}; ++ ++struct big_buffer *big_buffer_alloc(size_t count, int gfp_opt); ++void big_buffer_free(struct big_buffer *bbuff); ++ ++size_t big_buffer_copy_to_user(char __user *dst_user_buffer, size_t offset, ++ struct big_buffer *bbuff, size_t length); ++size_t big_buffer_copy_from_user(const char __user *src_user_buffer, ++ size_t offset, struct big_buffer *bbuff, ++ size_t length); ++ ++void *big_buffer_get_element(struct big_buffer *bbuff, size_t index, ++ size_t sizeof_element); ++ ++void big_buffer_memset(struct big_buffer *bbuff, int value); ++void big_buffer_memcpy(struct big_buffer *dst, struct big_buffer *src); ++ ++//byte access ++int big_buffer_byte_get(struct big_buffer *bbuff, size_t inx, u8 *value); ++int big_buffer_byte_set(struct big_buffer *bbuff, size_t inx, u8 value); +-- +2.20.1 + diff --git a/patches/lk5.19/0009-block-blksnap-minimum-data-storage-unit-of-the-origi.patch b/patches/lk5.19/0009-block-blksnap-minimum-data-storage-unit-of-the-origi.patch new file mode 100644 index 00000000..3ec28db5 --- /dev/null +++ b/patches/lk5.19/0009-block-blksnap-minimum-data-storage-unit-of-the-origi.patch @@ -0,0 +1,514 @@ +From 3ac895527d4b431079509912ec188cdd25c019ef Mon Sep 17 00:00:00 2001 +From: Sergei Shtepa +Date: Tue, 7 Jun 2022 14:35:25 +0200 +Subject: [PATCH 09/20] block, blksnap: minimum data storage unit of the + original block device + +The struct chunk describes the minimum data storage unit of the original +block device. Functions for working with these minimal blocks implement +algorithms for reading and writing blocks. + +Signed-off-by: Sergei Shtepa +--- + drivers/block/blksnap/chunk.c | 352 ++++++++++++++++++++++++++++++++++ + drivers/block/blksnap/chunk.h | 129 +++++++++++++ + 2 files changed, 481 insertions(+) + create mode 100644 drivers/block/blksnap/chunk.c + create mode 100644 drivers/block/blksnap/chunk.h + +diff --git a/drivers/block/blksnap/chunk.c b/drivers/block/blksnap/chunk.c +new file mode 100644 +index 000000000000..d2bb41a9e9ea +--- /dev/null ++++ b/drivers/block/blksnap/chunk.c +@@ -0,0 +1,352 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#define pr_fmt(fmt) KBUILD_MODNAME "-chunk: " fmt ++#include ++#include ++#include ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++#include "memory_checker.h" ++#endif ++#include "params.h" ++#include "chunk.h" ++#include "diff_io.h" ++#include "diff_buffer.h" ++#include "diff_area.h" ++#include "diff_storage.h" ++ ++void chunk_diff_buffer_release(struct chunk *chunk) ++{ ++ if (unlikely(!chunk->diff_buffer)) ++ return; ++ ++ chunk_state_unset(chunk, CHUNK_ST_BUFFER_READY); ++ diff_buffer_release(chunk->diff_area, chunk->diff_buffer); ++ chunk->diff_buffer = NULL; ++} ++ ++void chunk_store_failed(struct chunk *chunk, int error) ++{ ++ struct diff_area *diff_area = chunk->diff_area; ++ ++ chunk_state_set(chunk, CHUNK_ST_FAILED); ++ chunk_diff_buffer_release(chunk); ++ diff_storage_free_region(chunk->diff_region); ++ chunk->diff_region = NULL; ++ ++ up(&chunk->lock); ++ if (error) ++ diff_area_set_corrupted(diff_area, error); ++}; ++ ++int chunk_schedule_storing(struct chunk *chunk, bool is_nowait) ++{ ++ struct diff_area *diff_area = chunk->diff_area; ++ ++ //pr_debug("Schedule storing chunk #%ld\n", chunk->number); ++ if (WARN(!list_is_first(&chunk->cache_link, &chunk->cache_link), ++ "The chunk already in the cache")) ++ return -EINVAL; ++ ++#ifdef CONFIG_BLK_SNAP_ALLOW_DIFF_STORAGE_IN_MEMORY ++ if (diff_area->in_memory) { ++ up(&chunk->lock); ++ return 0; ++ } ++#endif ++ if (!chunk->diff_region) { ++ struct diff_region *diff_region; ++ ++ diff_region = diff_storage_new_region( ++ diff_area->diff_storage, ++ diff_area_chunk_sectors(diff_area)); ++ if (IS_ERR(diff_region)) { ++ pr_debug("Cannot get store for chunk #%ld\n", ++ chunk->number); ++ return PTR_ERR(diff_region); ++ } ++ ++ chunk->diff_region = diff_region; ++ } ++ ++ return chunk_async_store_diff(chunk, is_nowait); ++} ++ ++void chunk_schedule_caching(struct chunk *chunk) ++{ ++ int in_cache_count = 0; ++ struct diff_area *diff_area = chunk->diff_area; ++ ++ might_sleep(); ++ ++ //pr_debug("Add chunk #%ld to cache\n", chunk->number); ++ spin_lock(&diff_area->caches_lock); ++ if (WARN(!list_is_first(&chunk->cache_link, &chunk->cache_link), ++ "The chunk already in the cache")) { ++ spin_unlock(&diff_area->caches_lock); ++ ++ chunk_store_failed(chunk, 0); ++ return; ++ } ++ ++ if (chunk_state_check(chunk, CHUNK_ST_DIRTY)) { ++ list_add_tail(&chunk->cache_link, ++ &diff_area->write_cache_queue); ++ in_cache_count = ++ atomic_inc_return(&diff_area->write_cache_count); ++ } else { ++ list_add_tail(&chunk->cache_link, &diff_area->read_cache_queue); ++ in_cache_count = ++ atomic_inc_return(&diff_area->read_cache_count); ++ } ++ spin_unlock(&diff_area->caches_lock); ++ ++ up(&chunk->lock); ++ ++ // Initiate the cache clearing process. ++ if ((in_cache_count > chunk_maximum_in_cache) && ++ !diff_area_is_corrupted(diff_area)) ++ queue_work(system_wq, &diff_area->cache_release_work); ++} ++ ++static void chunk_notify_load(void *ctx) ++{ ++ struct chunk *chunk = ctx; ++ int error = chunk->diff_io->error; ++ ++ diff_io_free(chunk->diff_io); ++ chunk->diff_io = NULL; ++ ++ might_sleep(); ++ ++ if (unlikely(error)) { ++ chunk_store_failed(chunk, error); ++ goto out; ++ } ++ ++ if (unlikely(chunk_state_check(chunk, CHUNK_ST_FAILED))) { ++ pr_err("Chunk in a failed state\n"); ++ up(&chunk->lock); ++ goto out; ++ } ++ ++ if (chunk_state_check(chunk, CHUNK_ST_LOADING)) { ++ int ret; ++ unsigned int current_flag; ++ ++ chunk_state_unset(chunk, CHUNK_ST_LOADING); ++ chunk_state_set(chunk, CHUNK_ST_BUFFER_READY); ++ ++ current_flag = memalloc_noio_save(); ++ ret = chunk_schedule_storing(chunk, false); ++ memalloc_noio_restore(current_flag); ++ if (ret) ++ chunk_store_failed(chunk, ret); ++ goto out; ++ } ++ ++ pr_err("invalid chunk state 0x%x\n", atomic_read(&chunk->state)); ++ up(&chunk->lock); ++out: ++ atomic_dec(&chunk->diff_area->pending_io_count); ++} ++ ++static void chunk_notify_store(void *ctx) ++{ ++ struct chunk *chunk = ctx; ++ int error = chunk->diff_io->error; ++ ++ diff_io_free(chunk->diff_io); ++ chunk->diff_io = NULL; ++ ++ might_sleep(); ++ ++ if (unlikely(error)) { ++ chunk_store_failed(chunk, error); ++ goto out; ++ } ++ ++ if (unlikely(chunk_state_check(chunk, CHUNK_ST_FAILED))) { ++ pr_err("Chunk in a failed state\n"); ++ chunk_store_failed(chunk, 0); ++ goto out; ++ } ++ if (chunk_state_check(chunk, CHUNK_ST_STORING)) { ++ chunk_state_unset(chunk, CHUNK_ST_STORING); ++ chunk_state_set(chunk, CHUNK_ST_STORE_READY); ++ ++ if (chunk_state_check(chunk, CHUNK_ST_DIRTY)) { ++ chunk_state_unset(chunk, CHUNK_ST_DIRTY); ++ chunk_diff_buffer_release(chunk); ++ } else { ++ unsigned int current_flag; ++ ++ current_flag = memalloc_noio_save(); ++ chunk_schedule_caching(chunk); ++ memalloc_noio_restore(current_flag); ++ goto out; ++ } ++ } else ++ pr_err("invalid chunk state 0x%x\n", atomic_read(&chunk->state)); ++ up(&chunk->lock); ++out: ++ atomic_dec(&chunk->diff_area->pending_io_count); ++} ++ ++struct chunk *chunk_alloc(struct diff_area *diff_area, unsigned long number) ++{ ++ struct chunk *chunk; ++ ++ chunk = kzalloc(sizeof(struct chunk), GFP_KERNEL); ++ if (!chunk) ++ return NULL; ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_chunk); ++#endif ++ INIT_LIST_HEAD(&chunk->cache_link); ++ sema_init(&chunk->lock, 1); ++ chunk->diff_area = diff_area; ++ chunk->number = number; ++ atomic_set(&chunk->state, 0); ++ ++ return chunk; ++} ++ ++void chunk_free(struct chunk *chunk) ++{ ++ if (unlikely(!chunk)) ++ return; ++ ++ down(&chunk->lock); ++ chunk_diff_buffer_release(chunk); ++ diff_storage_free_region(chunk->diff_region); ++ chunk_state_set(chunk, CHUNK_ST_FAILED); ++ up(&chunk->lock); ++ ++ kfree(chunk); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_dec(memory_object_chunk); ++#endif ++} ++ ++/** ++ * chunk_async_store_diff() - Starts asynchronous storing of a chunk to the ++ * difference storage. ++ * ++ */ ++int chunk_async_store_diff(struct chunk *chunk, bool is_nowait) ++{ ++ int ret; ++ struct diff_io *diff_io; ++ struct diff_region *region = chunk->diff_region; ++ ++ if (WARN(!list_is_first(&chunk->cache_link, &chunk->cache_link), ++ "The chunk already in the cache")) ++ return -EINVAL; ++ ++ diff_io = diff_io_new_async_write(chunk_notify_store, chunk, is_nowait); ++ if (unlikely(!diff_io)) { ++ if (is_nowait) ++ return -EAGAIN; ++ else ++ return -ENOMEM; ++ } ++ ++ WARN_ON(chunk->diff_io); ++ chunk->diff_io = diff_io; ++ chunk_state_set(chunk, CHUNK_ST_STORING); ++ atomic_inc(&chunk->diff_area->pending_io_count); ++ ++ ret = diff_io_do(chunk->diff_io, region, chunk->diff_buffer, is_nowait); ++ if (ret) { ++ atomic_dec(&chunk->diff_area->pending_io_count); ++ diff_io_free(chunk->diff_io); ++ chunk->diff_io = NULL; ++ } ++ ++ return ret; ++} ++ ++/** ++ * chunk_async_load_orig() - Starts asynchronous loading of a chunk from ++ * the original block device. ++ */ ++int chunk_async_load_orig(struct chunk *chunk, const bool is_nowait) ++{ ++ int ret; ++ struct diff_io *diff_io; ++ struct diff_region region = { ++ .bdev = chunk->diff_area->orig_bdev, ++ .sector = (sector_t)(chunk->number) * ++ diff_area_chunk_sectors(chunk->diff_area), ++ .count = chunk->sector_count, ++ }; ++ ++ diff_io = diff_io_new_async_read(chunk_notify_load, chunk, is_nowait); ++ if (unlikely(!diff_io)) { ++ if (is_nowait) ++ return -EAGAIN; ++ else ++ return -ENOMEM; ++ } ++ ++ WARN_ON(chunk->diff_io); ++ chunk->diff_io = diff_io; ++ chunk_state_set(chunk, CHUNK_ST_LOADING); ++ atomic_inc(&chunk->diff_area->pending_io_count); ++ ++ ret = diff_io_do(chunk->diff_io, ®ion, chunk->diff_buffer, is_nowait); ++ if (ret) { ++ atomic_dec(&chunk->diff_area->pending_io_count); ++ diff_io_free(chunk->diff_io); ++ chunk->diff_io = NULL; ++ } ++ return ret; ++} ++ ++/** ++ * chunk_load_orig() - Performs synchronous loading of a chunk from the ++ * original block device. ++ */ ++int chunk_load_orig(struct chunk *chunk) ++{ ++ int ret; ++ struct diff_io *diff_io; ++ struct diff_region region = { ++ .bdev = chunk->diff_area->orig_bdev, ++ .sector = (sector_t)(chunk->number) * ++ diff_area_chunk_sectors(chunk->diff_area), ++ .count = chunk->sector_count, ++ }; ++ ++ diff_io = diff_io_new_sync_read(); ++ if (unlikely(!diff_io)) ++ return -ENOMEM; ++ ++ ret = diff_io_do(diff_io, ®ion, chunk->diff_buffer, false); ++ if (!ret) ++ ret = diff_io->error; ++ ++ diff_io_free(diff_io); ++ return ret; ++} ++ ++/** ++ * chunk_load_diff() - Performs synchronous loading of a chunk from the ++ * difference storage. ++ */ ++int chunk_load_diff(struct chunk *chunk) ++{ ++ int ret; ++ struct diff_io *diff_io; ++ struct diff_region *region = chunk->diff_region; ++ ++ diff_io = diff_io_new_sync_read(); ++ if (unlikely(!diff_io)) ++ return -ENOMEM; ++ ++ ret = diff_io_do(diff_io, region, chunk->diff_buffer, false); ++ if (!ret) ++ ret = diff_io->error; ++ ++ diff_io_free(diff_io); ++ return ret; ++} +diff --git a/drivers/block/blksnap/chunk.h b/drivers/block/blksnap/chunk.h +new file mode 100644 +index 000000000000..7f4f321aceed +--- /dev/null ++++ b/drivers/block/blksnap/chunk.h +@@ -0,0 +1,129 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#pragma once ++#include ++#include ++#include ++#include ++ ++struct diff_area; ++struct diff_region; ++struct diff_io; ++ ++/** ++ * enum chunk_st - Possible states for a chunk. ++ * ++ * @CHUNK_ST_FAILED: ++ * An error occurred while processing the chunk data. ++ * @CHUNK_ST_DIRTY: ++ * The chunk is in the dirty state. The chunk is marked dirty in case ++ * there was a write operation to the snapshot image. ++ * @CHUNK_ST_BUFFER_READY: ++ * The data of the chunk is ready to be read from the RAM buffer. ++ * @CHUNK_ST_STORE_READY: ++ * The data of the chunk has been written to the difference storage. ++ * @CHUNK_ST_LOADING: ++ * The data is being read from the original block device. ++ * @CHUNK_ST_STORING: ++ * The data is being saved to the difference storage. ++ * ++ * Chunks life circle. ++ * Copy-on-write when writing to original: ++ * 0 -> LOADING -> BUFFER_READY -> BUFFER_READY | STORING -> ++ * BUFFER_READY | STORE_READY -> STORE_READY ++ * Write to snapshot image: ++ * 0 -> LOADING -> BUFFER_READY | DIRTY -> DIRTY | STORING -> ++ * BUFFER_READY | STORE_READY -> STORE_READY ++ */ ++enum chunk_st { ++ CHUNK_ST_FAILED = (1 << 0), ++ CHUNK_ST_DIRTY = (1 << 1), ++ CHUNK_ST_BUFFER_READY = (1 << 2), ++ CHUNK_ST_STORE_READY = (1 << 3), ++ CHUNK_ST_LOADING = (1 << 4), ++ CHUNK_ST_STORING = (1 << 5), ++}; ++ ++/** ++ * struct chunk - Minimum data storage unit. ++ * ++ * @cache_link: ++ * The list header allows to create caches of chunks. ++ * @diff_area: ++ * Pointer to the difference area - the storage of changes for a specific device. ++ * @number: ++ * Sequential number of the chunk. ++ * @sector_count: ++ * Number of sectors in the current chunk. This is especially true ++ * for the last chunk. ++ * @lock: ++ * Binary semaphore. Syncs access to the chunks fields: state, ++ * diff_buffer, diff_region and diff_io. ++ * @state: ++ * Defines the state of a chunk. May contain CHUNK_ST_* bits. ++ * @diff_buffer: ++ * Pointer to &struct diff_buffer. Describes a buffer in the memory ++ * for storing the chunk data. ++ * @diff_region: ++ * Pointer to &struct diff_region. Describes a copy of the chunk data ++ * on the difference storage. ++ * @diff_io: ++ * Provides I/O operations for a chunk. ++ * ++ * This structure describes the block of data that the module operates ++ * with when executing the copy-on-write algorithm and when performing I/O ++ * to snapshot images. ++ * ++ * If the data of the chunk has been changed or has just been read, then ++ * the chunk gets into cache. ++ * ++ * The semaphore is blocked for writing if there is no actual data in the ++ * buffer, since a block of data is being read from the original device or ++ * from a diff storage. If data is being read from or written to the ++ * diff_buffer, the semaphore must be locked. ++ */ ++struct chunk { ++ struct list_head cache_link; ++ struct diff_area *diff_area; ++ ++ unsigned long number; ++ sector_t sector_count; ++ ++ struct semaphore lock; ++ ++ atomic_t state; ++ struct diff_buffer *diff_buffer; ++ struct diff_region *diff_region; ++ struct diff_io *diff_io; ++}; ++ ++static inline void chunk_state_set(struct chunk *chunk, int st) ++{ ++ atomic_or(st, &chunk->state); ++}; ++ ++static inline void chunk_state_unset(struct chunk *chunk, int st) ++{ ++ atomic_and(~st, &chunk->state); ++}; ++ ++static inline bool chunk_state_check(struct chunk *chunk, int st) ++{ ++ return !!(atomic_read(&chunk->state) & st); ++}; ++ ++struct chunk *chunk_alloc(struct diff_area *diff_area, unsigned long number); ++void chunk_free(struct chunk *chunk); ++ ++int chunk_schedule_storing(struct chunk *chunk, bool is_nowait); ++void chunk_diff_buffer_release(struct chunk *chunk); ++void chunk_store_failed(struct chunk *chunk, int error); ++ ++void chunk_schedule_caching(struct chunk *chunk); ++ ++/* Asynchronous operations are used to implement the COW algorithm. */ ++int chunk_async_store_diff(struct chunk *chunk, bool is_nowait); ++int chunk_async_load_orig(struct chunk *chunk, const bool is_nowait); ++ ++/* Synchronous operations are used to implement reading and writing to the snapshot image. */ ++int chunk_load_orig(struct chunk *chunk); ++int chunk_load_diff(struct chunk *chunk); +-- +2.20.1 + diff --git a/patches/lk5.19/0010-block-blksnap-buffer-in-memory-for-the-minimum-data-.patch b/patches/lk5.19/0010-block-blksnap-buffer-in-memory-for-the-minimum-data-.patch new file mode 100644 index 00000000..c734d6f5 --- /dev/null +++ b/patches/lk5.19/0010-block-blksnap-buffer-in-memory-for-the-minimum-data-.patch @@ -0,0 +1,258 @@ +From 368a0d654f130c2b930fcfbd1a8c00156b56de5c Mon Sep 17 00:00:00 2001 +From: Sergei Shtepa +Date: Tue, 7 Jun 2022 14:36:03 +0200 +Subject: [PATCH 10/20] block, blksnap: buffer in memory for the minimum data + storage unit + +The struct diff_buffer describes a buffer in memory for the minimum data +storage block of the original block device (struct chunk). +Buffer allocation and release functions allow to reduce the number of +allocations and releases of a large number of memory pages. + +Signed-off-by: Sergei Shtepa +--- + drivers/block/blksnap/diff_buffer.c | 146 ++++++++++++++++++++++++++++ + drivers/block/blksnap/diff_buffer.h | 78 +++++++++++++++ + 2 files changed, 224 insertions(+) + create mode 100644 drivers/block/blksnap/diff_buffer.c + create mode 100644 drivers/block/blksnap/diff_buffer.h + +diff --git a/drivers/block/blksnap/diff_buffer.c b/drivers/block/blksnap/diff_buffer.c +new file mode 100644 +index 000000000000..8f9532a4922b +--- /dev/null ++++ b/drivers/block/blksnap/diff_buffer.c +@@ -0,0 +1,146 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#define pr_fmt(fmt) KBUILD_MODNAME "-diff-buffer: " fmt ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++#include "memory_checker.h" ++#endif ++#include "params.h" ++#include "diff_buffer.h" ++#include "diff_area.h" ++ ++void diff_buffer_free(struct diff_buffer *diff_buffer) ++{ ++ size_t inx = 0; ++ struct page *page; ++ ++ if (unlikely(!diff_buffer)) ++ return; ++ ++ for (inx = 0; inx < diff_buffer->page_count; inx++) { ++ page = diff_buffer->pages[inx]; ++ if (page) { ++ __free_page(page); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_dec(memory_object_page); ++#endif ++ } ++ } ++ ++ kfree(diff_buffer); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_dec(memory_object_diff_buffer); ++#endif ++} ++ ++struct diff_buffer *diff_buffer_new(size_t page_count, size_t buffer_size, ++ gfp_t gfp_mask) ++{ ++ struct diff_buffer *diff_buffer; ++ size_t inx = 0; ++ struct page *page; ++ ++ if (unlikely(page_count <= 0)) ++ return NULL; ++ ++ /* ++ * In case of overflow, it is better to get a null pointer ++ * than a pointer to some memory area. Therefore + 1. ++ */ ++ diff_buffer = kzalloc(sizeof(struct diff_buffer) + ++ (page_count + 1) * sizeof(struct page *), ++ gfp_mask); ++ if (!diff_buffer) ++ return NULL; ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_diff_buffer); ++#endif ++ INIT_LIST_HEAD(&diff_buffer->link); ++ diff_buffer->size = buffer_size; ++ diff_buffer->page_count = page_count; ++ ++ for (inx = 0; inx < page_count; inx++) { ++ page = alloc_page(gfp_mask); ++ if (!page) ++ goto fail; ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_page); ++#endif ++ diff_buffer->pages[inx] = page; ++ } ++ return diff_buffer; ++fail: ++ diff_buffer_free(diff_buffer); ++ return NULL; ++} ++ ++struct diff_buffer *diff_buffer_take(struct diff_area *diff_area, ++ const bool is_nowait) ++{ ++ struct diff_buffer *diff_buffer = NULL; ++ sector_t chunk_sectors; ++ size_t page_count; ++ size_t buffer_size; ++ ++ spin_lock(&diff_area->free_diff_buffers_lock); ++ diff_buffer = list_first_entry_or_null(&diff_area->free_diff_buffers, ++ struct diff_buffer, link); ++ if (diff_buffer) { ++ list_del(&diff_buffer->link); ++ atomic_dec(&diff_area->free_diff_buffers_count); ++ } ++ spin_unlock(&diff_area->free_diff_buffers_lock); ++ ++ /* Return free buffer if it was found in a pool */ ++ if (diff_buffer) ++ return diff_buffer; ++ ++ /* Allocate new buffer */ ++ chunk_sectors = diff_area_chunk_sectors(diff_area); ++ page_count = round_up(chunk_sectors, SECTOR_IN_PAGE) / SECTOR_IN_PAGE; ++ buffer_size = chunk_sectors << SECTOR_SHIFT; ++ ++ diff_buffer = ++ diff_buffer_new(page_count, buffer_size, ++ is_nowait ? (GFP_NOIO | GFP_NOWAIT) : GFP_NOIO); ++ if (unlikely(!diff_buffer)) { ++ if (is_nowait) ++ return ERR_PTR(-EAGAIN); ++ else ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ return diff_buffer; ++} ++ ++void diff_buffer_release(struct diff_area *diff_area, ++ struct diff_buffer *diff_buffer) ++{ ++ if (atomic_read(&diff_area->free_diff_buffers_count) > ++ free_diff_buffer_pool_size) { ++ diff_buffer_free(diff_buffer); ++ return; ++ } ++ spin_lock(&diff_area->free_diff_buffers_lock); ++ list_add_tail(&diff_buffer->link, &diff_area->free_diff_buffers); ++ atomic_inc(&diff_area->free_diff_buffers_count); ++ spin_unlock(&diff_area->free_diff_buffers_lock); ++} ++ ++void diff_buffer_cleanup(struct diff_area *diff_area) ++{ ++ struct diff_buffer *diff_buffer = NULL; ++ ++ do { ++ spin_lock(&diff_area->free_diff_buffers_lock); ++ diff_buffer = ++ list_first_entry_or_null(&diff_area->free_diff_buffers, ++ struct diff_buffer, link); ++ if (diff_buffer) { ++ list_del(&diff_buffer->link); ++ atomic_dec(&diff_area->free_diff_buffers_count); ++ } ++ spin_unlock(&diff_area->free_diff_buffers_lock); ++ ++ if (diff_buffer) ++ diff_buffer_free(diff_buffer); ++ } while (diff_buffer); ++} +diff --git a/drivers/block/blksnap/diff_buffer.h b/drivers/block/blksnap/diff_buffer.h +new file mode 100644 +index 000000000000..1d504e445d59 +--- /dev/null ++++ b/drivers/block/blksnap/diff_buffer.h +@@ -0,0 +1,78 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#pragma once ++#include ++#include ++#include ++#include ++ ++struct diff_area; ++ ++/** ++ * struct diff_buffer - Difference buffer. ++ * @link: ++ * The list header allows to create a pool of the diff_buffer structures. ++ * @size: ++ * Count of bytes in the buffer. ++ * @page_count: ++ * The number of pages reserved for the buffer. ++ * @pages: ++ * An array of pointers to pages. ++ * ++ * Describes the memory buffer for a chunk in the memory. ++ */ ++struct diff_buffer { ++ struct list_head link; ++ size_t size; ++ size_t page_count; ++ struct page *pages[0]; ++}; ++ ++/** ++ * struct diff_buffer_iter - Iterator for &struct diff_buffer. ++ * @page: ++ * A pointer to the current page. ++ * @offset: ++ * The offset in bytes in the current page. ++ * @bytes: ++ * The number of bytes that can be read or written from the current page. ++ * ++ * It is convenient to use when copying data from or to &struct bio_vec. ++ */ ++struct diff_buffer_iter { ++ struct page *page; ++ size_t offset; ++ size_t bytes; ++}; ++ ++#define SECTOR_IN_PAGE (1 << (PAGE_SHIFT - SECTOR_SHIFT)) ++ ++static inline bool diff_buffer_iter_get(struct diff_buffer *diff_buffer, ++ sector_t ofs, ++ struct diff_buffer_iter *iter) ++{ ++ size_t page_inx; ++ ++ if (diff_buffer->size <= (ofs << SECTOR_SHIFT)) ++ return false; ++ ++ page_inx = ofs >> (PAGE_SHIFT - SECTOR_SHIFT); ++ ++ iter->page = diff_buffer->pages[page_inx]; ++ iter->offset = (size_t)(ofs & (SECTOR_IN_PAGE - 1)) << SECTOR_SHIFT; ++ /* ++ * The size cannot exceed the size of the page, taking into account ++ * the offset in this page. ++ * But at the same time it is unacceptable to go beyond the allocated ++ * buffer. ++ */ ++ iter->bytes = min_t(size_t, (PAGE_SIZE - iter->offset), ++ (diff_buffer->size - (ofs << SECTOR_SHIFT))); ++ ++ return true; ++}; ++ ++struct diff_buffer *diff_buffer_take(struct diff_area *diff_area, ++ const bool is_nowait); ++void diff_buffer_release(struct diff_area *diff_area, ++ struct diff_buffer *diff_buffer); ++void diff_buffer_cleanup(struct diff_area *diff_area); +-- +2.20.1 + diff --git a/patches/lk5.19/0011-block-blksnap-functions-and-structures-for-performin.patch b/patches/lk5.19/0011-block-blksnap-functions-and-structures-for-performin.patch new file mode 100644 index 00000000..e5758d81 --- /dev/null +++ b/patches/lk5.19/0011-block-blksnap-functions-and-structures-for-performin.patch @@ -0,0 +1,359 @@ +From 82e8fd0f13fc708f750e21f5e078437cda4960f7 Mon Sep 17 00:00:00 2001 +From: Sergei Shtepa +Date: Tue, 7 Jun 2022 14:37:00 +0200 +Subject: [PATCH 11/20] block, blksnap: functions and structures for performing + block I/O operations + +Provides synchronous and asynchronous block I/O operations for the +buffer of the minimum data storage block (struct diff_buffer). + +Signed-off-by: Sergei Shtepa +--- + drivers/block/blksnap/diff_io.c | 205 ++++++++++++++++++++++++++++++++ + drivers/block/blksnap/diff_io.h | 122 +++++++++++++++++++ + 2 files changed, 327 insertions(+) + create mode 100644 drivers/block/blksnap/diff_io.c + create mode 100644 drivers/block/blksnap/diff_io.h + +diff --git a/drivers/block/blksnap/diff_io.c b/drivers/block/blksnap/diff_io.c +new file mode 100644 +index 000000000000..075f3d6bbe74 +--- /dev/null ++++ b/drivers/block/blksnap/diff_io.c +@@ -0,0 +1,205 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#define pr_fmt(fmt) KBUILD_MODNAME "-diff-io: " fmt ++#include ++#include ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++#include "memory_checker.h" ++#endif ++#include "diff_io.h" ++#include "diff_buffer.h" ++ ++#define SECTORS_IN_PAGE (PAGE_SIZE / SECTOR_SIZE) ++ ++struct bio_set diff_io_bioset = { 0 }; ++ ++int diff_io_init(void) ++{ ++ return bioset_init(&diff_io_bioset, 64, 0, ++ BIOSET_NEED_BVECS | BIOSET_NEED_RESCUER); ++} ++ ++void diff_io_done(void) ++{ ++ bioset_exit(&diff_io_bioset); ++} ++ ++static void diff_io_notify_cb(struct work_struct *work) ++{ ++ struct diff_io_async *async = ++ container_of(work, struct diff_io_async, work); ++ ++ might_sleep(); ++ async->notify_cb(async->ctx); ++} ++ ++static void diff_io_endio(struct bio *bio) ++{ ++ struct diff_io *diff_io = bio->bi_private; ++ ++ cant_sleep(); ++ if (bio->bi_status != BLK_STS_OK) ++ diff_io->error = -EIO; ++ ++ if (atomic_dec_and_test(&diff_io->bio_count)) { ++ if (diff_io->is_sync_io) ++ complete(&diff_io->notify.sync.completion); ++ else ++ queue_work(system_wq, &diff_io->notify.async.work); ++ } ++ ++ bio_put(bio); ++} ++ ++static inline struct diff_io *diff_io_new(bool is_write, bool is_nowait) ++{ ++ struct diff_io *diff_io; ++ gfp_t gfp_mask = is_nowait ? (GFP_NOIO | GFP_NOWAIT) : GFP_NOIO; ++ ++ diff_io = kzalloc(sizeof(struct diff_io), gfp_mask); ++ if (unlikely(!diff_io)) ++ return NULL; ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_diff_io); ++#endif ++ diff_io->error = 0; ++ diff_io->is_write = is_write; ++ atomic_set(&diff_io->bio_count, 0); ++ ++ return diff_io; ++} ++ ++struct diff_io *diff_io_new_sync(bool is_write) ++{ ++ struct diff_io *diff_io; ++ ++ diff_io = diff_io_new(is_write, false); ++ if (unlikely(!diff_io)) ++ return NULL; ++ ++ diff_io->is_sync_io = true; ++ init_completion(&diff_io->notify.sync.completion); ++ return diff_io; ++} ++ ++struct diff_io *diff_io_new_async(bool is_write, bool is_nowait, ++ void (*notify_cb)(void *ctx), void *ctx) ++{ ++ struct diff_io *diff_io; ++ ++ diff_io = diff_io_new(is_write, is_nowait); ++ if (unlikely(!diff_io)) ++ return NULL; ++ ++ diff_io->is_sync_io = false; ++ INIT_WORK(&diff_io->notify.async.work, diff_io_notify_cb); ++ diff_io->notify.async.ctx = ctx; ++ diff_io->notify.async.notify_cb = notify_cb; ++ return diff_io; ++} ++ ++static inline bool check_page_aligned(sector_t sector) ++{ ++ return !(sector & ((1ULL << (PAGE_SHIFT - SECTOR_SHIFT)) - 1)); ++} ++ ++static inline unsigned short calc_page_count(sector_t sectors) ++{ ++ return round_up(sectors, SECTOR_IN_PAGE) / SECTOR_IN_PAGE; ++} ++ ++int diff_io_do(struct diff_io *diff_io, struct diff_region *diff_region, ++ struct diff_buffer *diff_buffer, const bool is_nowait) ++{ ++ int ret = 0; ++ struct bio *bio = NULL; ++ struct bio *flush_bio = NULL; ++ struct page **current_page_ptr; ++ unsigned int opf; ++ gfp_t gfp; ++ unsigned short nr_iovecs; ++ sector_t processed = 0; ++ ++ if (unlikely(!check_page_aligned(diff_region->sector))) { ++ pr_err("Difference storage block should be aligned to PAGE_SIZE\n"); ++ ret = -EINVAL; ++ goto fail; ++ } ++ ++ nr_iovecs = calc_page_count(diff_region->count); ++ if (unlikely(nr_iovecs > diff_buffer->page_count)) { ++ pr_err("The difference storage block is larger than the buffer size\n"); ++ ret = -EINVAL; ++ goto fail; ++ } ++ ++ // Allocate both bios ++ opf = diff_io->is_write ? REQ_OP_WRITE : REQ_OP_READ; ++ gfp = GFP_NOIO | (is_nowait ? GFP_NOWAIT : 0); ++ ++ bio = bio_alloc_bioset(diff_region->bdev, nr_iovecs, ++ opf | REQ_SYNC | REQ_IDLE | REQ_FUA, ++ gfp, &diff_io_bioset); ++ if (unlikely(!bio)) { ++ if (is_nowait) ++ ret = -EAGAIN; ++ else ++ ret = -ENOMEM; ++ goto fail; ++ } ++ ++ flush_bio = bio_alloc_bioset(diff_region->bdev, 0, ++ opf | REQ_SYNC | REQ_PREFLUSH, ++ gfp, &diff_io_bioset); ++ if (unlikely(!flush_bio)) { ++ if (is_nowait) ++ ret = -EAGAIN; ++ else ++ ret = -ENOMEM; ++ goto fail; ++ } ++ ++ atomic_set(&diff_io->bio_count, 2); ++ ++ // Submit bio with datas ++ bio_set_flag(bio, BIO_FILTERED); ++ bio->bi_end_io = diff_io_endio; ++ bio->bi_private = diff_io; ++ bio->bi_iter.bi_sector = diff_region->sector; ++ ++ current_page_ptr = diff_buffer->pages; ++ while (processed < diff_region->count) { ++ sector_t bvec_len_sect; ++ unsigned int bvec_len; ++ ++ bvec_len_sect = min_t(sector_t, SECTORS_IN_PAGE, ++ diff_region->count - processed); ++ bvec_len = (unsigned int)(bvec_len_sect << SECTOR_SHIFT); ++ ++ if (bio_add_page(bio, *current_page_ptr, bvec_len, 0) == 0) { ++ bio_put(bio); ++ return -EFAULT; ++ } ++ ++ current_page_ptr++; ++ processed += bvec_len_sect; ++ } ++ submit_bio_noacct(bio); ++ ++ // Submit flush bio ++ bio_set_flag(flush_bio, BIO_FILTERED); ++ flush_bio->bi_end_io = diff_io_endio; ++ flush_bio->bi_private = diff_io; ++ flush_bio->bi_iter.bi_sector = 0; ++ submit_bio_noacct(flush_bio); ++ ++ if (diff_io->is_sync_io) ++ wait_for_completion_io(&diff_io->notify.sync.completion); ++ ++ return 0; ++fail: ++ if (bio) ++ bio_put(bio); ++ if (flush_bio) ++ bio_put(flush_bio); ++ return ret; ++} +diff --git a/drivers/block/blksnap/diff_io.h b/drivers/block/blksnap/diff_io.h +new file mode 100644 +index 000000000000..40cd3d88c838 +--- /dev/null ++++ b/drivers/block/blksnap/diff_io.h +@@ -0,0 +1,122 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#pragma once ++#include ++#include ++ ++struct diff_buffer; ++ ++/** ++ * struct diff_region - Describes the location of the chunks data on ++ * difference storage. ++ * @bdev: ++ * The target block device. ++ * @sector: ++ * The sector offset of the region's first sector. ++ * @count: ++ * The count of sectors in the region. ++ */ ++struct diff_region { ++ struct block_device *bdev; ++ sector_t sector; ++ sector_t count; ++}; ++ ++/** ++ * struct diff_io_sync - Structure for notification about completion of ++ * synchronous I/O. ++ * @completion: ++ * Indicates that the request has been processed. ++ * ++ * Allows to wait for completion of the I/O operation in the ++ * current thread. ++ */ ++struct diff_io_sync { ++ struct completion completion; ++}; ++ ++/** ++ * struct diff_io_async - Structure for notification about completion of ++ * asynchronous I/O. ++ * @work: ++ * The &struct work_struct allows to schedule execution of an I/O operation ++ * in a separate process. ++ * @notify_cb: ++ * A pointer to the callback function that will be executed when ++ * the I/O execution is completed. ++ * @ctx: ++ * The context for the callback function ¬ify_cb. ++ * ++ * Allows to schedule execution of an I/O operation. ++ */ ++struct diff_io_async { ++ struct work_struct work; ++ void (*notify_cb)(void *ctx); ++ void *ctx; ++}; ++ ++/** ++ * struct diff_io - Structure for I/O maintenance. ++ * @error: ++ * Zero if the I/O operation is successful, or an error code if it fails. ++ * @bio_count: ++ * The count of bio in the I/O request. ++ * @is_write: ++ * Indicates that a write operation is being performed. ++ * @is_sync_io: ++ * Indicates that the operation is being performed synchronously. ++ * @notify: ++ * This union may contain the diff_io_sync or diff_io_async structure ++ * for synchronous or asynchronous request. ++ * ++ * The request to perform an I/O operation is executed for a region of sectors. ++ * Such a region may contain several bios. It is necessary to notify about the ++ * completion of processing of all bios. The diff_io structure allows to do it. ++ */ ++struct diff_io { ++ int error; ++ atomic_t bio_count; ++ bool is_write; ++ bool is_sync_io; ++ union { ++ struct diff_io_sync sync; ++ struct diff_io_async async; ++ } notify; ++}; ++ ++int diff_io_init(void); ++void diff_io_done(void); ++ ++static inline void diff_io_free(struct diff_io *diff_io) ++{ ++ kfree(diff_io); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ if (diff_io) ++ memory_object_dec(memory_object_diff_io); ++#endif ++} ++ ++struct diff_io *diff_io_new_sync(bool is_write); ++static inline struct diff_io *diff_io_new_sync_read(void) ++{ ++ return diff_io_new_sync(false); ++}; ++static inline struct diff_io *diff_io_new_sync_write(void) ++{ ++ return diff_io_new_sync(true); ++}; ++ ++struct diff_io *diff_io_new_async(bool is_write, bool is_nowait, ++ void (*notify_cb)(void *ctx), void *ctx); ++static inline struct diff_io * ++diff_io_new_async_read(void (*notify_cb)(void *ctx), void *ctx, bool is_nowait) ++{ ++ return diff_io_new_async(false, is_nowait, notify_cb, ctx); ++}; ++static inline struct diff_io * ++diff_io_new_async_write(void (*notify_cb)(void *ctx), void *ctx, bool is_nowait) ++{ ++ return diff_io_new_async(true, is_nowait, notify_cb, ctx); ++}; ++ ++int diff_io_do(struct diff_io *diff_io, struct diff_region *diff_region, ++ struct diff_buffer *diff_buffer, const bool is_nowait); +-- +2.20.1 + diff --git a/patches/lk5.19/0012-block-blksnap-storage-for-storing-difference-blocks.patch b/patches/lk5.19/0012-block-blksnap-storage-for-storing-difference-blocks.patch new file mode 100644 index 00000000..46d237b1 --- /dev/null +++ b/patches/lk5.19/0012-block-blksnap-storage-for-storing-difference-blocks.patch @@ -0,0 +1,442 @@ +From 272f1808072fb22db473b8aa8ff6ae54e63fc767 Mon Sep 17 00:00:00 2001 +From: Sergei Shtepa +Date: Tue, 7 Jun 2022 14:37:41 +0200 +Subject: [PATCH 12/20] block, blksnap: storage for storing difference blocks + +Provides management of regions of block devices available for storing +difference blocks of a snapshot. Contains lists of free and already +occupied regions. + +Signed-off-by: Sergei Shtepa +--- + drivers/block/blksnap/diff_storage.c | 316 +++++++++++++++++++++++++++ + drivers/block/blksnap/diff_storage.h | 94 ++++++++ + 2 files changed, 410 insertions(+) + create mode 100644 drivers/block/blksnap/diff_storage.c + create mode 100644 drivers/block/blksnap/diff_storage.h + +diff --git a/drivers/block/blksnap/diff_storage.c b/drivers/block/blksnap/diff_storage.c +new file mode 100644 +index 000000000000..7138ad53606d +--- /dev/null ++++ b/drivers/block/blksnap/diff_storage.c +@@ -0,0 +1,316 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#define pr_fmt(fmt) KBUILD_MODNAME "-diff-storage: " fmt ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++#include "memory_checker.h" ++#endif ++#include "params.h" ++#include "chunk.h" ++#include "diff_io.h" ++#include "diff_buffer.h" ++#include "diff_storage.h" ++ ++/** ++ * struct storage_bdev - Information about the opened block device. ++ */ ++struct storage_bdev { ++ struct list_head link; ++ dev_t dev_id; ++ struct block_device *bdev; ++}; ++ ++/** ++ * struct storage_block - A storage unit reserved for storing differences. ++ * ++ */ ++struct storage_block { ++ struct list_head link; ++ struct block_device *bdev; ++ sector_t sector; ++ sector_t count; ++ sector_t used; ++}; ++ ++static inline void diff_storage_event_low(struct diff_storage *diff_storage) ++{ ++ struct blk_snap_event_low_free_space data = { ++ .requested_nr_sect = diff_storage_minimum, ++ }; ++ ++ diff_storage->requested += data.requested_nr_sect; ++ pr_debug( ++ "Diff storage low free space. Portion: %llu sectors, requested: %llu\n", ++ data.requested_nr_sect, diff_storage->requested); ++ event_gen(&diff_storage->event_queue, GFP_NOIO, ++ blk_snap_event_code_low_free_space, &data, sizeof(data)); ++} ++ ++struct diff_storage *diff_storage_new(void) ++{ ++ struct diff_storage *diff_storage; ++ ++ diff_storage = kzalloc(sizeof(struct diff_storage), GFP_KERNEL); ++ if (!diff_storage) ++ return NULL; ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_diff_storage); ++#endif ++ kref_init(&diff_storage->kref); ++ spin_lock_init(&diff_storage->lock); ++ INIT_LIST_HEAD(&diff_storage->storage_bdevs); ++ INIT_LIST_HEAD(&diff_storage->empty_blocks); ++ INIT_LIST_HEAD(&diff_storage->filled_blocks); ++ ++ event_queue_init(&diff_storage->event_queue); ++ diff_storage_event_low(diff_storage); ++ ++ return diff_storage; ++} ++ ++static inline struct storage_block * ++first_empty_storage_block(struct diff_storage *diff_storage) ++{ ++ return list_first_entry_or_null(&diff_storage->empty_blocks, ++ struct storage_block, link); ++}; ++ ++static inline struct storage_block * ++first_filled_storage_block(struct diff_storage *diff_storage) ++{ ++ return list_first_entry_or_null(&diff_storage->filled_blocks, ++ struct storage_block, link); ++}; ++ ++static inline struct storage_bdev * ++first_storage_bdev(struct diff_storage *diff_storage) ++{ ++ return list_first_entry_or_null(&diff_storage->storage_bdevs, ++ struct storage_bdev, link); ++}; ++ ++void diff_storage_free(struct kref *kref) ++{ ++ struct diff_storage *diff_storage = ++ container_of(kref, struct diff_storage, kref); ++ struct storage_block *blk; ++ struct storage_bdev *storage_bdev; ++ ++ while ((blk = first_empty_storage_block(diff_storage))) { ++ list_del(&blk->link); ++ kfree(blk); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_dec(memory_object_storage_block); ++#endif ++ } ++ ++ while ((blk = first_filled_storage_block(diff_storage))) { ++ list_del(&blk->link); ++ kfree(blk); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_dec(memory_object_storage_block); ++#endif ++ } ++ ++ while ((storage_bdev = first_storage_bdev(diff_storage))) { ++ blkdev_put(storage_bdev->bdev, FMODE_READ | FMODE_WRITE); ++ list_del(&storage_bdev->link); ++ kfree(storage_bdev); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_dec(memory_object_storage_bdev); ++#endif ++ } ++ event_queue_done(&diff_storage->event_queue); ++ ++ kfree(diff_storage); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_dec(memory_object_diff_storage); ++#endif ++} ++ ++struct block_device *diff_storage_bdev_by_id(struct diff_storage *diff_storage, ++ dev_t dev_id) ++{ ++ struct block_device *bdev = NULL; ++ struct storage_bdev *storage_bdev; ++ ++ spin_lock(&diff_storage->lock); ++ list_for_each_entry(storage_bdev, &diff_storage->storage_bdevs, link) { ++ if (storage_bdev->dev_id == dev_id) { ++ bdev = storage_bdev->bdev; ++ break; ++ } ++ } ++ spin_unlock(&diff_storage->lock); ++ ++ return bdev; ++} ++ ++static inline struct block_device * ++diff_storage_add_storage_bdev(struct diff_storage *diff_storage, dev_t dev_id) ++{ ++ struct block_device *bdev; ++ struct storage_bdev *storage_bdev; ++ ++ bdev = blkdev_get_by_dev(dev_id, FMODE_READ | FMODE_WRITE, NULL); ++ if (IS_ERR(bdev)) { ++ pr_err("Failed to open device. errno=%d\n", ++ abs((int)PTR_ERR(bdev))); ++ return bdev; ++ } ++ ++ storage_bdev = kzalloc(sizeof(struct storage_bdev), GFP_KERNEL); ++ if (!storage_bdev) { ++ blkdev_put(bdev, FMODE_READ | FMODE_WRITE); ++ return ERR_PTR(-ENOMEM); ++ } ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_storage_bdev); ++#endif ++ storage_bdev->bdev = bdev; ++ storage_bdev->dev_id = dev_id; ++ INIT_LIST_HEAD(&storage_bdev->link); ++ ++ spin_lock(&diff_storage->lock); ++ list_add_tail(&storage_bdev->link, &diff_storage->storage_bdevs); ++ spin_unlock(&diff_storage->lock); ++ ++ return bdev; ++} ++ ++static inline int diff_storage_add_range(struct diff_storage *diff_storage, ++ struct block_device *bdev, ++ sector_t sector, sector_t count) ++{ ++ struct storage_block *storage_block; ++ ++ pr_debug("Add range to diff storage: [%u:%u] %llu:%llu\n", ++ MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev), sector, count); ++ ++ storage_block = kzalloc(sizeof(struct storage_block), GFP_KERNEL); ++ if (!storage_block) ++ return -ENOMEM; ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_storage_block); ++#endif ++ INIT_LIST_HEAD(&storage_block->link); ++ storage_block->bdev = bdev; ++ storage_block->sector = sector; ++ storage_block->count = count; ++ ++ spin_lock(&diff_storage->lock); ++ list_add_tail(&storage_block->link, &diff_storage->empty_blocks); ++ diff_storage->capacity += count; ++ spin_unlock(&diff_storage->lock); ++ ++ return 0; ++} ++ ++int diff_storage_append_block(struct diff_storage *diff_storage, dev_t dev_id, ++ struct big_buffer *ranges, ++ unsigned int range_count) ++{ ++ int ret; ++ int inx; ++ struct block_device *bdev; ++ struct blk_snap_block_range *range; ++ ++ pr_debug("Append %u blocks\n", range_count); ++ ++ bdev = diff_storage_bdev_by_id(diff_storage, dev_id); ++ if (!bdev) { ++ bdev = diff_storage_add_storage_bdev(diff_storage, dev_id); ++ if (IS_ERR(bdev)) ++ return PTR_ERR(bdev); ++ } ++ ++ for (inx = 0; inx < range_count; inx++) { ++ range = big_buffer_get_element( ++ ranges, inx, sizeof(struct blk_snap_block_range)); ++ if (unlikely(!range)) ++ return -EINVAL; ++ ++ ret = diff_storage_add_range(diff_storage, bdev, ++ range->sector_offset, ++ range->sector_count); ++ if (unlikely(ret)) ++ return ret; ++ } ++ ++ if (atomic_read(&diff_storage->low_space_flag) && ++ (diff_storage->capacity >= diff_storage->requested)) ++ atomic_set(&diff_storage->low_space_flag, 0); ++ ++ return 0; ++} ++ ++struct diff_region *diff_storage_new_region(struct diff_storage *diff_storage, ++ sector_t count) ++{ ++ int ret = 0; ++ struct diff_region *diff_region; ++ sector_t sectors_left; ++ ++ if (atomic_read(&diff_storage->overflow_flag)) ++ return ERR_PTR(-ENOSPC); ++ ++ diff_region = kzalloc(sizeof(struct diff_region), GFP_NOIO); ++ if (!diff_region) ++ return ERR_PTR(-ENOMEM); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_diff_region); ++#endif ++ spin_lock(&diff_storage->lock); ++ do { ++ struct storage_block *storage_block; ++ sector_t available; ++ ++ storage_block = first_empty_storage_block(diff_storage); ++ if (unlikely(!storage_block)) { ++ atomic_inc(&diff_storage->overflow_flag); ++ ret = -ENOSPC; ++ break; ++ } ++ ++ available = storage_block->count - storage_block->used; ++ if (likely(available >= count)) { ++ diff_region->bdev = storage_block->bdev; ++ diff_region->sector = ++ storage_block->sector + storage_block->used; ++ diff_region->count = count; ++ ++ storage_block->used += count; ++ diff_storage->filled += count; ++ break; ++ } ++ ++ list_del(&storage_block->link); ++ list_add_tail(&storage_block->link, ++ &diff_storage->filled_blocks); ++ /* ++ * If there is still free space in the storage block, but ++ * it is not enough to store a piece, then such a block is ++ * considered used. ++ * We believe that the storage blocks are large enough ++ * to accommodate several pieces entirely. ++ */ ++ diff_storage->filled += available; ++ } while (1); ++ sectors_left = diff_storage->requested - diff_storage->filled; ++ spin_unlock(&diff_storage->lock); ++ ++ if (ret) { ++ pr_err("Cannot get empty storage block\n"); ++ diff_storage_free_region(diff_region); ++ return ERR_PTR(ret); ++ } ++ ++ if ((sectors_left <= diff_storage_minimum) && ++ (atomic_inc_return(&diff_storage->low_space_flag) == 1)) ++ diff_storage_event_low(diff_storage); ++ ++ return diff_region; ++} +diff --git a/drivers/block/blksnap/diff_storage.h b/drivers/block/blksnap/diff_storage.h +new file mode 100644 +index 000000000000..f4385e19b6c9 +--- /dev/null ++++ b/drivers/block/blksnap/diff_storage.h +@@ -0,0 +1,94 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#pragma once ++#include "event_queue.h" ++#include "big_buffer.h" ++ ++struct diff_region; ++ ++/** ++ * struct diff_storage - Difference storage. ++ * ++ * @kref: ++ * The reference counter. ++ * @lock: ++ * Spinlock allows to guarantee the safety of linked lists. ++ * @storage_bdevs: ++ * List of opened block devices. Blocks for storing snapshot data can be ++ * located on different block devices. So, all opened block devices are ++ * located in this list. Blocks on opened block devices are allocated for ++ * storing the chunks data. ++ * @empty_blocks: ++ * List of empty blocks on storage. This list can be updated while ++ * holding a snapshot. This allows us to dynamically increase the ++ * storage size for these snapshots. ++ * @filled_blocks: ++ * List of filled blocks. When the blocks from the list of empty blocks are filled, ++ * we move them to the list of filled blocks. ++ * @capacity: ++ * Total amount of available storage space. ++ * @filled: ++ * The number of sectors already filled in. ++ * @requested: ++ * The number of sectors already requested from user space. ++ * @low_space_flag: ++ * The flag is set if the number of free regions available in the ++ * difference storage is less than the allowed minimum. ++ * @overflow_flag: ++ * The request for a free region failed due to the absence of free ++ * regions in the difference storage. ++ * @event_queue: ++ * A queue of events to pass events to user space. Diff storage and its ++ * owner can notify its snapshot about events like snapshot overflow, ++ * low free space and snapshot terminated. ++ * ++ * The difference storage manages the regions of block devices that are used ++ * to store the data of the original block devices in the snapshot. ++ * The difference storage is created one per snapshot and is used to store ++ * data from all the original snapshot block devices. At the same time, the ++ * difference storage itself can contain regions on various block devices. ++ */ ++struct diff_storage { ++ struct kref kref; ++ spinlock_t lock; ++ ++ struct list_head storage_bdevs; ++ struct list_head empty_blocks; ++ struct list_head filled_blocks; ++ ++ sector_t capacity; ++ sector_t filled; ++ sector_t requested; ++ ++ atomic_t low_space_flag; ++ atomic_t overflow_flag; ++ ++ struct event_queue event_queue; ++}; ++ ++struct diff_storage *diff_storage_new(void); ++void diff_storage_free(struct kref *kref); ++ ++static inline void diff_storage_get(struct diff_storage *diff_storage) ++{ ++ kref_get(&diff_storage->kref); ++}; ++static inline void diff_storage_put(struct diff_storage *diff_storage) ++{ ++ if (likely(diff_storage)) ++ kref_put(&diff_storage->kref, diff_storage_free); ++}; ++ ++int diff_storage_append_block(struct diff_storage *diff_storage, dev_t dev_id, ++ struct big_buffer *ranges, ++ unsigned int range_count); ++struct diff_region *diff_storage_new_region(struct diff_storage *diff_storage, ++ sector_t count); ++ ++static inline void diff_storage_free_region(struct diff_region *region) ++{ ++ kfree(region); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ if (region) ++ memory_object_dec(memory_object_diff_region); ++#endif ++} +-- +2.20.1 + diff --git a/patches/lk5.19/0013-block-blksnap-event-queue-from-the-difference-storag.patch b/patches/lk5.19/0013-block-blksnap-event-queue-from-the-difference-storag.patch new file mode 100644 index 00000000..011aa4f4 --- /dev/null +++ b/patches/lk5.19/0013-block-blksnap-event-queue-from-the-difference-storag.patch @@ -0,0 +1,189 @@ +From 3f4f51d12ed0ddb2dcb4376980cf2fb1c85b5b0a Mon Sep 17 00:00:00 2001 +From: Sergei Shtepa +Date: Tue, 7 Jun 2022 14:38:27 +0200 +Subject: [PATCH 13/20] block, blksnap: event queue from the difference storage + +Provides transmission of events from the difference storage to the user +process. Only two events are currently defined. The first is that there +are few free regions in the difference storage. The second is that the +request for a free region for storing differences failed with an error, +since there are no more free regions left in the difference storage +(the snapshot overflow state). + +Signed-off-by: Sergei Shtepa +--- + drivers/block/blksnap/event_queue.c | 90 +++++++++++++++++++++++++++++ + drivers/block/blksnap/event_queue.h | 64 ++++++++++++++++++++ + 2 files changed, 154 insertions(+) + create mode 100644 drivers/block/blksnap/event_queue.c + create mode 100644 drivers/block/blksnap/event_queue.h + +diff --git a/drivers/block/blksnap/event_queue.c b/drivers/block/blksnap/event_queue.c +new file mode 100644 +index 000000000000..abf0a584ae40 +--- /dev/null ++++ b/drivers/block/blksnap/event_queue.c +@@ -0,0 +1,90 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#define pr_fmt(fmt) KBUILD_MODNAME "-event_queue: " fmt ++#include ++#include ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++#include "memory_checker.h" ++#endif ++#include "event_queue.h" ++ ++void event_queue_init(struct event_queue *event_queue) ++{ ++ INIT_LIST_HEAD(&event_queue->list); ++ spin_lock_init(&event_queue->lock); ++ init_waitqueue_head(&event_queue->wq_head); ++} ++ ++void event_queue_done(struct event_queue *event_queue) ++{ ++ struct event *event; ++ ++ spin_lock(&event_queue->lock); ++ while (!list_empty(&event_queue->list)) { ++ event = list_first_entry(&event_queue->list, struct event, ++ link); ++ list_del(&event->link); ++ event_free(event); ++ } ++ spin_unlock(&event_queue->lock); ++} ++ ++int event_gen(struct event_queue *event_queue, gfp_t flags, int code, ++ const void *data, int data_size) ++{ ++ struct event *event; ++ ++ event = kzalloc(sizeof(struct event) + data_size, flags); ++ if (!event) ++ return -ENOMEM; ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_event); ++#endif ++ event->time = ktime_get(); ++ event->code = code; ++ event->data_size = data_size; ++ memcpy(event->data, data, data_size); ++ ++ pr_debug("Generate event: time=%lld code=%d data_size=%d\n", ++ event->time, event->code, event->data_size); ++ ++ spin_lock(&event_queue->lock); ++ list_add_tail(&event->link, &event_queue->list); ++ spin_unlock(&event_queue->lock); ++ ++ wake_up(&event_queue->wq_head); ++ return 0; ++} ++ ++struct event *event_wait(struct event_queue *event_queue, ++ unsigned long timeout_ms) ++{ ++ int ret; ++ ++ ret = wait_event_interruptible_timeout(event_queue->wq_head, ++ !list_empty(&event_queue->list), ++ timeout_ms); ++ ++ if (ret > 0) { ++ struct event *event; ++ ++ spin_lock(&event_queue->lock); ++ event = list_first_entry(&event_queue->list, struct event, ++ link); ++ list_del(&event->link); ++ spin_unlock(&event_queue->lock); ++ ++ pr_debug("Event received: time=%lld code=%d\n", event->time, ++ event->code); ++ return event; ++ } ++ if (ret == 0) ++ return ERR_PTR(-ENOENT); ++ ++ if (ret == -ERESTARTSYS) { ++ pr_debug("event waiting interrupted\n"); ++ return ERR_PTR(-EINTR); ++ } ++ ++ pr_err("Failed to wait event. errno=%d\n", abs(ret)); ++ return ERR_PTR(ret); ++} +diff --git a/drivers/block/blksnap/event_queue.h b/drivers/block/blksnap/event_queue.h +new file mode 100644 +index 000000000000..27d4b90a9f1d +--- /dev/null ++++ b/drivers/block/blksnap/event_queue.h +@@ -0,0 +1,64 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#pragma once ++#include ++#include ++#include ++#include ++#include ++ ++/** ++ * struct event - An event to be passed to the user space. ++ * @link: ++ * The list header allows to combine events from the queue. ++ * @time: ++ * A timestamp indicates when an event occurred. ++ * @code: ++ * Event code. ++ * @data_size: ++ * The number of bytes in the event data array. ++ * @data: ++ * An array of event data. ++ * ++ * Events can be different, so they contain different data. The size of the ++ * data array is not defined exactly, but it has limitations. The size of ++ * the event structure may exceed the PAGE_SIZE. ++ */ ++struct event { ++ struct list_head link; ++ ktime_t time; ++ int code; ++ int data_size; ++ char data[1]; /* up to PAGE_SIZE - sizeof(struct blk_snap_snapshot_event) */ ++}; ++ ++/** ++ * struct event_queue - A queue of &struct event. ++ * @list: ++ * Linked list for storing events. ++ * @lock: ++ * Spinlock allows to guarantee safety of the linked list. ++ * @wq_head: ++ * A wait queue allows to put a user thread in a waiting state until ++ * an event appears in the linked list. ++ */ ++struct event_queue { ++ struct list_head list; ++ spinlock_t lock; ++ struct wait_queue_head wq_head; ++}; ++ ++void event_queue_init(struct event_queue *event_queue); ++void event_queue_done(struct event_queue *event_queue); ++ ++int event_gen(struct event_queue *event_queue, gfp_t flags, int code, ++ const void *data, int data_size); ++struct event *event_wait(struct event_queue *event_queue, ++ unsigned long timeout_ms); ++static inline void event_free(struct event *event) ++{ ++ kfree(event); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ if (event) ++ memory_object_dec(memory_object_event); ++#endif ++}; +-- +2.20.1 + diff --git a/patches/lk5.19/0014-block-blksnap-owner-of-information-about-overwritten.patch b/patches/lk5.19/0014-block-blksnap-owner-of-information-about-overwritten.patch new file mode 100644 index 00000000..d94d7068 --- /dev/null +++ b/patches/lk5.19/0014-block-blksnap-owner-of-information-about-overwritten.patch @@ -0,0 +1,823 @@ +From a674dd5904d97e64073873725936a998efca6cb3 Mon Sep 17 00:00:00 2001 +From: Sergei Shtepa +Date: Tue, 7 Jun 2022 14:39:13 +0200 +Subject: [PATCH 14/20] block, blksnap: owner of information about overwritten + blocks of the original block device + +This is perhaps the key component of the module. It stores information +about the modified blocks of the original device and the location of the +regions where these blocks are stored in the difference storage. +This information allows to restore the state of the block device at the +time of taking the snapshot and represent the snapshot image as a block +device. +When reading from a snapshot, if the block on the original device has +not yet been changed since the snapshot was taken, then the data is read +from the original block device. If the data on the original block device +has been overwritten, then the block is read from the difference storage. +Reads and writes are performed with minimal data storage blocks (struct +chunk). + +Signed-off-by: Sergei Shtepa +--- + drivers/block/blksnap/diff_area.c | 602 ++++++++++++++++++++++++++++++ + drivers/block/blksnap/diff_area.h | 179 +++++++++ + 2 files changed, 781 insertions(+) + create mode 100644 drivers/block/blksnap/diff_area.c + create mode 100644 drivers/block/blksnap/diff_area.h + +diff --git a/drivers/block/blksnap/diff_area.c b/drivers/block/blksnap/diff_area.c +new file mode 100644 +index 000000000000..2bad7f610656 +--- /dev/null ++++ b/drivers/block/blksnap/diff_area.c +@@ -0,0 +1,602 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#define pr_fmt(fmt) KBUILD_MODNAME "-diff-area: " fmt ++#include ++#include ++#include ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++#include "memory_checker.h" ++#endif ++#include "params.h" ++#include "chunk.h" ++#include "diff_area.h" ++#include "diff_buffer.h" ++#include "diff_storage.h" ++#include "diff_io.h" ++ ++static inline unsigned long chunk_number(struct diff_area *diff_area, ++ sector_t sector) ++{ ++ return (unsigned long)(sector >> ++ (diff_area->chunk_shift - SECTOR_SHIFT)); ++}; ++ ++static inline sector_t chunk_sector(struct chunk *chunk) ++{ ++ return (sector_t)(chunk->number) ++ << (chunk->diff_area->chunk_shift - SECTOR_SHIFT); ++} ++ ++static inline void recalculate_last_chunk_size(struct chunk *chunk) ++{ ++ sector_t capacity; ++ ++ capacity = bdev_nr_sectors(chunk->diff_area->orig_bdev); ++ if (capacity > round_down(capacity, chunk->sector_count)) ++ chunk->sector_count = ++ capacity - round_down(capacity, chunk->sector_count); ++} ++ ++static inline unsigned long long count_by_shift(sector_t capacity, ++ unsigned long long shift) ++{ ++ return round_up(capacity, 1ull << (shift - SECTOR_SHIFT)) >> ++ (shift - SECTOR_SHIFT); ++} ++ ++static void diff_area_calculate_chunk_size(struct diff_area *diff_area) ++{ ++ unsigned long long shift = chunk_minimum_shift; ++ unsigned long long count; ++ sector_t capacity; ++ sector_t min_io_sect; ++ ++ min_io_sect = ++ (sector_t)(bdev_io_min(diff_area->orig_bdev) >> SECTOR_SHIFT); ++ capacity = bdev_nr_sectors(diff_area->orig_bdev); ++ ++ count = count_by_shift(capacity, shift); ++ while ((count > chunk_maximum_count) || ++ (diff_area_chunk_sectors(diff_area) < min_io_sect)) { ++ shift = shift << 1; ++ count = count_by_shift(capacity, shift); ++ } ++ ++ diff_area->chunk_shift = shift; ++ diff_area->chunk_count = count; ++ ++ pr_info("The optimal chunk size was calculated as %llu bytes for device [%d:%d]\n", ++ (1ull << diff_area->chunk_shift), ++ MAJOR(diff_area->orig_bdev->bd_dev), ++ MINOR(diff_area->orig_bdev->bd_dev)); ++} ++ ++void diff_area_free(struct kref *kref) ++{ ++ unsigned long inx = 0; ++ u64 start_waiting; ++ struct chunk *chunk; ++ struct diff_area *diff_area = ++ container_of(kref, struct diff_area, kref); ++ ++ might_sleep(); ++ start_waiting = jiffies_64; ++ while (atomic_read(&diff_area->pending_io_count)) { ++ schedule_timeout_interruptible(1); ++ if (jiffies_64 > (start_waiting + HZ)) { ++ start_waiting = jiffies_64; ++ inx++; ++ pr_warn("Waiting for pending I/O to complete\n"); ++ if (inx > 5) { ++ pr_err("Failed to complete pending I/O\n"); ++ break; ++ } ++ } ++ } ++ ++ atomic_set(&diff_area->corrupt_flag, 1); ++ flush_work(&diff_area->cache_release_work); ++ xa_for_each(&diff_area->chunk_map, inx, chunk) ++ chunk_free(chunk); ++ xa_destroy(&diff_area->chunk_map); ++ ++ if (diff_area->orig_bdev) { ++ blkdev_put(diff_area->orig_bdev, FMODE_READ | FMODE_WRITE); ++ diff_area->orig_bdev = NULL; ++ } ++ ++ /* Clean up free_diff_buffers */ ++ diff_buffer_cleanup(diff_area); ++ ++ kfree(diff_area); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_dec(memory_object_diff_area); ++#endif ++} ++ ++static inline struct chunk * ++get_chunk_from_cache_and_write_lock(spinlock_t *caches_lock, ++ struct list_head *cache_queue, ++ atomic_t *cache_count) ++{ ++ struct chunk *iter; ++ struct chunk *chunk = NULL; ++ ++ spin_lock(caches_lock); ++ list_for_each_entry(iter, cache_queue, cache_link) { ++ if (!down_trylock(&iter->lock)) { ++ chunk = iter; ++ break; ++ } ++ /* ++ * If it is not possible to lock a chunk for writing, ++ * then it is currently in use, and we try to clean up the ++ * next chunk. ++ */ ++ } ++ if (likely(chunk)) { ++ atomic_dec(cache_count); ++ list_del_init(&chunk->cache_link); ++ } ++ spin_unlock(caches_lock); ++ ++ return chunk; ++} ++ ++static struct chunk * ++diff_area_get_chunk_from_cache_and_write_lock(struct diff_area *diff_area) ++{ ++ if (atomic_read(&diff_area->read_cache_count) > ++ chunk_maximum_in_cache) { ++ struct chunk *chunk = get_chunk_from_cache_and_write_lock( ++ &diff_area->caches_lock, &diff_area->read_cache_queue, ++ &diff_area->read_cache_count); ++ if (chunk) ++ return chunk; ++ } ++ ++ if (atomic_read(&diff_area->write_cache_count) > ++ chunk_maximum_in_cache) { ++ struct chunk *chunk = get_chunk_from_cache_and_write_lock( ++ &diff_area->caches_lock, &diff_area->write_cache_queue, ++ &diff_area->write_cache_count); ++ if (chunk) ++ return chunk; ++ } ++ ++ return NULL; ++} ++ ++static void diff_area_cache_release(struct diff_area *diff_area) ++{ ++ struct chunk *chunk; ++ ++ while (!diff_area_is_corrupted(diff_area) && ++ (chunk = diff_area_get_chunk_from_cache_and_write_lock( ++ diff_area))) { ++ ++ if (WARN(!chunk_state_check(chunk, CHUNK_ST_BUFFER_READY), ++ "Cannot release empty buffer for chunk #%ld", ++ chunk->number)) { ++ up(&chunk->lock); ++ continue; ++ } ++ ++ if (chunk_state_check(chunk, CHUNK_ST_DIRTY)) { ++ int ret; ++ ++ ret = chunk_schedule_storing(chunk, false); ++ if (ret) ++ chunk_store_failed(chunk, ret); ++ } else { ++ chunk_diff_buffer_release(chunk); ++ up(&chunk->lock); ++ } ++ } ++} ++ ++static void diff_area_cache_release_work(struct work_struct *work) ++{ ++ struct diff_area *diff_area = ++ container_of(work, struct diff_area, cache_release_work); ++ ++ diff_area_cache_release(diff_area); ++} ++ ++struct diff_area *diff_area_new(dev_t dev_id, struct diff_storage *diff_storage) ++{ ++ int ret = 0; ++ struct diff_area *diff_area = NULL; ++ struct block_device *bdev; ++ unsigned long number; ++ struct chunk *chunk; ++ ++ pr_debug("Open device [%u:%u]\n", MAJOR(dev_id), MINOR(dev_id)); ++ ++ bdev = blkdev_get_by_dev(dev_id, FMODE_READ | FMODE_WRITE, NULL); ++ if (IS_ERR(bdev)) { ++ pr_err("Failed to open device. errno=%d\n", ++ abs((int)PTR_ERR(bdev))); ++ return ERR_PTR(PTR_ERR(bdev)); ++ } ++ ++ diff_area = kzalloc(sizeof(struct diff_area), GFP_KERNEL); ++ if (!diff_area) { ++ blkdev_put(bdev, FMODE_READ | FMODE_WRITE); ++ return ERR_PTR(-ENOMEM); ++ } ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_diff_area); ++#endif ++ diff_area->orig_bdev = bdev; ++ diff_area->diff_storage = diff_storage; ++ ++ diff_area_calculate_chunk_size(diff_area); ++ pr_debug("Chunk size %llu in bytes\n", 1ULL << diff_area->chunk_shift); ++ pr_debug("Chunk count %lu\n", diff_area->chunk_count); ++ ++ kref_init(&diff_area->kref); ++ xa_init(&diff_area->chunk_map); ++ ++ if (!diff_storage->capacity) { ++#ifdef CONFIG_BLK_SNAP_ALLOW_DIFF_STORAGE_IN_MEMORY ++ diff_area->in_memory = true; ++ pr_debug("Difference storage is empty.\n"); ++ pr_debug("Only the memory cache will be used to store the snapshots difference.\n"); ++#else ++ pr_err("Difference storage is empty.\n"); ++ pr_err("In-memory difference storage is not supported"); ++ return ERR_PTR(-EFAULT); ++#endif ++ } ++ ++ spin_lock_init(&diff_area->caches_lock); ++ INIT_LIST_HEAD(&diff_area->read_cache_queue); ++ atomic_set(&diff_area->read_cache_count, 0); ++ INIT_LIST_HEAD(&diff_area->write_cache_queue); ++ atomic_set(&diff_area->write_cache_count, 0); ++ INIT_WORK(&diff_area->cache_release_work, diff_area_cache_release_work); ++ ++ spin_lock_init(&diff_area->free_diff_buffers_lock); ++ INIT_LIST_HEAD(&diff_area->free_diff_buffers); ++ atomic_set(&diff_area->free_diff_buffers_count, 0); ++ ++ atomic_set(&diff_area->corrupt_flag, 0); ++ atomic_set(&diff_area->pending_io_count, 0); ++ ++ /** ++ * Allocating all chunks in advance allows to avoid doing this in ++ * the process of filtering bio. ++ * In addition, the chunk structure has an rw semaphore that allows ++ * to lock data of a single chunk. ++ * Different threads can read, write, or dump their data to diff storage ++ * independently of each other, provided that different chunks are used. ++ */ ++ for (number = 0; number < diff_area->chunk_count; number++) { ++ chunk = chunk_alloc(diff_area, number); ++ if (!chunk) { ++ pr_err("Failed allocate chunk\n"); ++ ret = -ENOMEM; ++ break; ++ } ++ chunk->sector_count = diff_area_chunk_sectors(diff_area); ++ ++ ret = xa_insert(&diff_area->chunk_map, number, chunk, ++ GFP_KERNEL); ++ if (ret) { ++ pr_err("Failed insert chunk to chunk map\n"); ++ chunk_free(chunk); ++ break; ++ } ++ } ++ if (ret) { ++ diff_area_put(diff_area); ++ return ERR_PTR(ret); ++ } ++ ++ recalculate_last_chunk_size(chunk); ++ ++ atomic_set(&diff_area->corrupt_flag, 0); ++ ++ return diff_area; ++} ++ ++static void diff_area_take_chunk_from_cache(struct diff_area *diff_area, ++ struct chunk *chunk) ++{ ++ spin_lock(&diff_area->caches_lock); ++ if (!list_is_first(&chunk->cache_link, &chunk->cache_link)) { ++ list_del_init(&chunk->cache_link); ++ ++ if (chunk_state_check(chunk, CHUNK_ST_DIRTY)) ++ atomic_dec(&diff_area->write_cache_count); ++ else ++ atomic_dec(&diff_area->read_cache_count); ++ } ++ spin_unlock(&diff_area->caches_lock); ++} ++ ++/** ++ * diff_area_copy() - Implements the copy-on-write mechanism. ++ * ++ * ++ */ ++int diff_area_copy(struct diff_area *diff_area, sector_t sector, sector_t count, ++ const bool is_nowait) ++{ ++ int ret = 0; ++ sector_t offset; ++ struct chunk *chunk; ++ struct diff_buffer *diff_buffer; ++ sector_t area_sect_first; ++ sector_t chunk_sectors = diff_area_chunk_sectors(diff_area); ++ ++ area_sect_first = round_down(sector, chunk_sectors); ++ for (offset = area_sect_first; offset < (sector + count); ++ offset += chunk_sectors) { ++ chunk = xa_load(&diff_area->chunk_map, ++ chunk_number(diff_area, offset)); ++ if (!chunk) { ++ diff_area_set_corrupted(diff_area, -EINVAL); ++ return -EINVAL; ++ } ++ WARN_ON(chunk_number(diff_area, offset) != chunk->number); ++ if (is_nowait) { ++ if (down_trylock(&chunk->lock)) ++ return -EAGAIN; ++ } else { ++ ret = down_killable(&chunk->lock); ++ if (unlikely(ret)) ++ return ret; ++ } ++ ++ if (chunk_state_check(chunk, CHUNK_ST_FAILED | CHUNK_ST_DIRTY | ++ CHUNK_ST_STORE_READY)) { ++ /* ++ * The сhunk has already been: ++ * - Failed, when the snapshot is corrupted ++ * - Overwritten in the snapshot image ++ * - Already stored in the diff storage ++ */ ++ up(&chunk->lock); ++ continue; ++ } ++ ++ if (unlikely(chunk_state_check( ++ chunk, CHUNK_ST_LOADING | CHUNK_ST_STORING))) { ++ pr_err("Invalid chunk state\n"); ++ ret = -EFAULT; ++ goto fail_unlock_chunk; ++ } ++ ++ if (chunk_state_check(chunk, CHUNK_ST_BUFFER_READY)) { ++ diff_area_take_chunk_from_cache(diff_area, chunk); ++ /** ++ * The chunk has already been read, but now we need ++ * to store it to diff_storage. ++ */ ++ ret = chunk_schedule_storing(chunk, is_nowait); ++ if (unlikely(ret)) ++ goto fail_unlock_chunk; ++ } else { ++ diff_buffer = ++ diff_buffer_take(chunk->diff_area, is_nowait); ++ if (IS_ERR(diff_buffer)) { ++ ret = PTR_ERR(diff_buffer); ++ goto fail_unlock_chunk; ++ } ++ WARN(chunk->diff_buffer, "Chunks buffer has been lost"); ++ chunk->diff_buffer = diff_buffer; ++ ++ ret = chunk_async_load_orig(chunk, is_nowait); ++ if (unlikely(ret)) ++ goto fail_unlock_chunk; ++ } ++ } ++ ++ return ret; ++fail_unlock_chunk: ++ WARN_ON(!chunk); ++ chunk_store_failed(chunk, ret); ++ return ret; ++} ++ ++static inline void diff_area_image_put_chunk(struct chunk *chunk, bool is_write) ++{ ++ if (is_write) { ++ /** ++ * Since the chunk was taken to perform writing, ++ * we mark it as dirty. ++ */ ++ chunk_state_set(chunk, CHUNK_ST_DIRTY); ++ } ++ ++ chunk_schedule_caching(chunk); ++} ++ ++void diff_area_image_ctx_done(struct diff_area_image_ctx *io_ctx) ++{ ++ if (!io_ctx->chunk) ++ return; ++ ++ diff_area_image_put_chunk(io_ctx->chunk, io_ctx->is_write); ++} ++ ++static int diff_area_load_chunk_from_storage(struct diff_area *diff_area, ++ struct chunk *chunk) ++{ ++ struct diff_buffer *diff_buffer; ++ ++ diff_buffer = diff_buffer_take(diff_area, false); ++ if (IS_ERR(diff_buffer)) ++ return PTR_ERR(diff_buffer); ++ ++ WARN_ON(chunk->diff_buffer); ++ chunk->diff_buffer = diff_buffer; ++ ++ if (chunk_state_check(chunk, CHUNK_ST_STORE_READY)) ++ return chunk_load_diff(chunk); ++ ++ return chunk_load_orig(chunk); ++} ++ ++static struct chunk * ++diff_area_image_context_get_chunk(struct diff_area_image_ctx *io_ctx, ++ sector_t sector) ++{ ++ int ret; ++ struct chunk *chunk; ++ struct diff_area *diff_area = io_ctx->diff_area; ++ unsigned long new_chunk_number = chunk_number(diff_area, sector); ++ ++ chunk = io_ctx->chunk; ++ if (chunk) { ++ if (chunk->number == new_chunk_number) ++ return chunk; ++ ++ /* ++ * If the sector falls into a new chunk, then we release ++ * the old chunk. ++ */ ++ diff_area_image_put_chunk(chunk, io_ctx->is_write); ++ io_ctx->chunk = NULL; ++ } ++ ++ //pr_err("Take chunk #%ld\n", new_chunk_number); ++ /* Take a next chunk. */ ++ chunk = xa_load(&diff_area->chunk_map, new_chunk_number); ++ if (unlikely(!chunk)) ++ return ERR_PTR(-EINVAL); ++ ++ ret = down_killable(&chunk->lock); ++ if (ret) ++ return ERR_PTR(ret); ++ ++ if (unlikely(chunk_state_check(chunk, CHUNK_ST_FAILED))) { ++ pr_err("Chunk #%ld corrupted\n", chunk->number); ++ ++ pr_debug("new_chunk_number=%ld\n", new_chunk_number); ++ pr_debug("sector=%llu\n", sector); ++ pr_debug("Chunk size %llu in bytes\n", ++ (1ULL << diff_area->chunk_shift)); ++ pr_debug("Chunk count %lu\n", diff_area->chunk_count); ++ ++ ret = -EIO; ++ goto fail_unlock_chunk; ++ } ++ ++ /* ++ * If there is already data in the buffer, then nothing needs to be loaded. ++ * Otherwise, the chunk needs to be loaded from the original device or ++ * from the difference storage. ++ */ ++ if (!chunk_state_check(chunk, CHUNK_ST_BUFFER_READY)) { ++ ret = diff_area_load_chunk_from_storage(diff_area, chunk); ++ if (unlikely(ret)) ++ goto fail_unlock_chunk; ++ ++ /* Set the flag that the buffer contains the required data. */ ++ chunk_state_set(chunk, CHUNK_ST_BUFFER_READY); ++ } else ++ diff_area_take_chunk_from_cache(diff_area, chunk); ++ ++ io_ctx->chunk = chunk; ++ return chunk; ++ ++fail_unlock_chunk: ++ pr_err("Failed to load chunk #%ld\n", chunk->number); ++ up(&chunk->lock); ++ return ERR_PTR(ret); ++} ++ ++static inline sector_t diff_area_chunk_start(struct diff_area *diff_area, ++ struct chunk *chunk) ++{ ++ return (sector_t)(chunk->number) << diff_area->chunk_shift; ++} ++ ++/** ++ * diff_area_image_io - Implements copying data from the chunk to bio_vec when ++ * reading or from bio_tec to the chunk when writing. ++ */ ++blk_status_t diff_area_image_io(struct diff_area_image_ctx *io_ctx, ++ const struct bio_vec *bvec, sector_t *pos) ++{ ++ unsigned int bv_len = bvec->bv_len; ++ struct iov_iter iter; ++ ++ iov_iter_bvec(&iter, io_ctx->is_write ? WRITE : READ, bvec, 1, bv_len); ++ ++ while (bv_len) { ++ struct diff_buffer_iter diff_buffer_iter; ++ struct chunk *chunk; ++ sector_t buff_offset; ++ ++ chunk = diff_area_image_context_get_chunk(io_ctx, *pos); ++ if (IS_ERR(chunk)) ++ return BLK_STS_IOERR; ++ ++ buff_offset = *pos - chunk_sector(chunk); ++ while (bv_len && ++ diff_buffer_iter_get(chunk->diff_buffer, buff_offset, ++ &diff_buffer_iter)) { ++ ssize_t sz; ++ ++ if (io_ctx->is_write) ++ sz = copy_page_from_iter( ++ diff_buffer_iter.page, ++ diff_buffer_iter.offset, ++ diff_buffer_iter.bytes, &iter); ++ else ++ sz = copy_page_to_iter(diff_buffer_iter.page, ++ diff_buffer_iter.offset, ++ diff_buffer_iter.bytes, ++ &iter); ++ if (!sz) ++ return BLK_STS_IOERR; ++ ++ buff_offset += (sz >> SECTOR_SHIFT); ++ *pos += (sz >> SECTOR_SHIFT); ++ bv_len -= sz; ++ } ++ } ++ ++ return BLK_STS_OK; ++} ++ ++static inline void diff_area_event_corrupted(struct diff_area *diff_area, ++ int err_code) ++{ ++ struct blk_snap_event_corrupted data = { ++ .orig_dev_id.mj = MAJOR(diff_area->orig_bdev->bd_dev), ++ .orig_dev_id.mn = MINOR(diff_area->orig_bdev->bd_dev), ++ .err_code = abs(err_code), ++ }; ++ ++ event_gen(&diff_area->diff_storage->event_queue, GFP_NOIO, ++ blk_snap_event_code_corrupted, &data, ++ sizeof(struct blk_snap_event_corrupted)); ++} ++ ++void diff_area_set_corrupted(struct diff_area *diff_area, int err_code) ++{ ++ if (atomic_inc_return(&diff_area->corrupt_flag) != 1) ++ return; ++ ++ diff_area_event_corrupted(diff_area, err_code); ++ ++ pr_err("Set snapshot device is corrupted for [%u:%u] with error code %d\n", ++ MAJOR(diff_area->orig_bdev->bd_dev), ++ MINOR(diff_area->orig_bdev->bd_dev), abs(err_code)); ++} ++ ++void diff_area_throttling_io(struct diff_area *diff_area) ++{ ++ u64 start_waiting; ++ ++ start_waiting = jiffies_64; ++ while (atomic_read(&diff_area->pending_io_count)) { ++ schedule_timeout_interruptible(0); ++ if (jiffies_64 > (start_waiting + HZ / 10)) ++ break; ++ } ++} +diff --git a/drivers/block/blksnap/diff_area.h b/drivers/block/blksnap/diff_area.h +new file mode 100644 +index 000000000000..d8f2959a979a +--- /dev/null ++++ b/drivers/block/blksnap/diff_area.h +@@ -0,0 +1,179 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#pragma once ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "event_queue.h" ++ ++struct diff_storage; ++struct chunk; ++ ++/** ++ * struct diff_area - Discribes the difference area for one original device. ++ * @kref: ++ * The reference counter. The &struct diff_area can be shared between ++ * the &struct tracker and &struct snapimage. ++ * @orig_bdev: ++ * A pointer to the structure of an opened block device. ++ * @diff_storage: ++ * Pointer to difference storage for storing difference data. ++ * @chunk_shift: ++ * Power of 2 used to specify the chunk size. This allows to set different chunk sizes for ++ * huge and small block devices. ++ * @chunk_count: ++ * Count of chunks. The number of chunks into which the block device ++ * is divided. ++ * @chunk_map: ++ * A map of chunks. ++ * @in_memory: ++ * A sign that difference storage is not prepared and all differences are ++ * stored in RAM. ++ * @caches_lock: ++ * This spinlock guarantees consistency of the linked lists of chunk ++ * caches. ++ * @read_cache_queue: ++ * Queue for the read cache. ++ * @read_cache_count: ++ * The number of chunks in the read cache. ++ * @write_cache_queue: ++ * Queue for the write cache. ++ * @write_cache_count: ++ * The number of chunks in the write cache. ++ * @cache_release_work: ++ * The workqueue work item. This worker limits the number of chunks ++ * that store their data in RAM. ++ * @free_diff_buffers_lock: ++ * This spinlock guarantees consistency of the linked lists of ++ * free difference buffers. ++ * @free_diff_buffers: ++ * Linked list of free difference buffers allows to reduce the number ++ * of buffer allocation and release operations. ++ * @free_diff_buffers_count: ++ * The number of free difference buffers in the linked list. ++ * @corrupt_flag: ++ * The flag is set if an error occurred in the operation of the data ++ * saving mechanism in the diff area. In this case, an error will be ++ * generated when reading from the snapshot image. ++ * @pending_io_count: ++ * Counter of incomplete I/O operations. Allows to wait for all I/O ++ * operations to be completed before releasing this structure. ++ * ++ * The &struct diff_area is created for each block device in the snapshot. ++ * It is used to save the differences between the original block device and ++ * the snapshot image. That is, when writing data to the original device, ++ * the differences are copied as chunks to the difference storage. ++ * Reading and writing from the snapshot image is also performed using ++ * &struct diff_area. ++ * ++ * The xarray has a limit on the maximum size. This can be especially ++ * noticeable on 32-bit systems. This creates a limit in the size of ++ * supported disks. ++ * ++ * For example, for a 256 TiB disk with a block size of 65536 bytes, the ++ * number of elements in the chunk map will be equal to 2 with a power of 32. ++ * Therefore, the number of chunks into which the block device is divided is ++ * limited. ++ * ++ * To provide high performance, a read cache and a write cache for chunks are ++ * used. The cache algorithm is the simplest. If the data of the chunk was ++ * read to the difference buffer, then the buffer is not released immediately, ++ * but is placed at the end of the queue. The worker thread checks the number ++ * of chunks in the queue and releases a difference buffer for the first chunk ++ * in the queue, but only if the binary semaphore of the chunk is not locked. ++ * If the read thread accesses the chunk from the cache again, it returns ++ * back to the end of the queue. ++ * ++ * The linked list of difference buffers allows to have a certain number of ++ * "hot" buffers. This allows to reduce the number of allocations and releases ++ * of memory. ++ * ++ * ++ */ ++struct diff_area { ++ struct kref kref; ++ ++ struct block_device *orig_bdev; ++ struct diff_storage *diff_storage; ++ ++ unsigned long long chunk_shift; ++ unsigned long chunk_count; ++ struct xarray chunk_map; ++#ifdef CONFIG_BLK_SNAP_ALLOW_DIFF_STORAGE_IN_MEMORY ++ bool in_memory; ++#endif ++ spinlock_t caches_lock; ++ struct list_head read_cache_queue; ++ atomic_t read_cache_count; ++ struct list_head write_cache_queue; ++ atomic_t write_cache_count; ++ struct work_struct cache_release_work; ++ ++ spinlock_t free_diff_buffers_lock; ++ struct list_head free_diff_buffers; ++ atomic_t free_diff_buffers_count; ++ ++ atomic_t corrupt_flag; ++ atomic_t pending_io_count; ++}; ++ ++struct diff_area *diff_area_new(dev_t dev_id, ++ struct diff_storage *diff_storage); ++void diff_area_free(struct kref *kref); ++static inline void diff_area_get(struct diff_area *diff_area) ++{ ++ kref_get(&diff_area->kref); ++}; ++static inline void diff_area_put(struct diff_area *diff_area) ++{ ++ if (likely(diff_area)) ++ kref_put(&diff_area->kref, diff_area_free); ++}; ++void diff_area_set_corrupted(struct diff_area *diff_area, int err_code); ++static inline bool diff_area_is_corrupted(struct diff_area *diff_area) ++{ ++ return !!atomic_read(&diff_area->corrupt_flag); ++}; ++static inline sector_t diff_area_chunk_sectors(struct diff_area *diff_area) ++{ ++ return (sector_t)(1ULL << (diff_area->chunk_shift - SECTOR_SHIFT)); ++}; ++int diff_area_copy(struct diff_area *diff_area, sector_t sector, sector_t count, ++ const bool is_nowait); ++ ++/** ++ * struct diff_area_image_ctx - The context for processing an io request to ++ * the snapshot image. ++ * @diff_area: ++ * Pointer to &struct diff_area for the current snapshot image. ++ * @is_write: ++ * Distinguishes between the behavior of reading or writing when ++ * processing a request. ++ * @chunk: ++ * Current chunk. ++ */ ++struct diff_area_image_ctx { ++ struct diff_area *diff_area; ++ bool is_write; ++ struct chunk *chunk; ++}; ++ ++static inline void diff_area_image_ctx_init(struct diff_area_image_ctx *io_ctx, ++ struct diff_area *diff_area, ++ bool is_write) ++{ ++ io_ctx->diff_area = diff_area; ++ io_ctx->is_write = is_write; ++ io_ctx->chunk = NULL; ++}; ++void diff_area_image_ctx_done(struct diff_area_image_ctx *io_ctx); ++blk_status_t diff_area_image_io(struct diff_area_image_ctx *io_ctx, ++ const struct bio_vec *bvec, sector_t *pos); ++ ++/** ++ * ++ */ ++void diff_area_throttling_io(struct diff_area *diff_area); +-- +2.20.1 + diff --git a/patches/lk5.19/0015-block-blksnap-snapshot-image-block-device.patch b/patches/lk5.19/0015-block-blksnap-snapshot-image-block-device.patch new file mode 100644 index 00000000..bdc739f6 --- /dev/null +++ b/patches/lk5.19/0015-block-blksnap-snapshot-image-block-device.patch @@ -0,0 +1,442 @@ +From 05a59d6daa81d02ce5794826be2e200e4c03b6fe Mon Sep 17 00:00:00 2001 +From: Sergei Shtepa +Date: Tue, 7 Jun 2022 14:41:20 +0200 +Subject: [PATCH 15/20] block, blksnap: snapshot image block device + +Provides the operation of block devices of snapshot images. Read and +write operations are redirected to the regions of difference blocks for +block device (struct diff_area). + +Signed-off-by: Sergei Shtepa +--- + drivers/block/blksnap/snapimage.c | 345 ++++++++++++++++++++++++++++++ + drivers/block/blksnap/snapimage.h | 65 ++++++ + 2 files changed, 410 insertions(+) + create mode 100644 drivers/block/blksnap/snapimage.c + create mode 100644 drivers/block/blksnap/snapimage.h + +diff --git a/drivers/block/blksnap/snapimage.c b/drivers/block/blksnap/snapimage.c +new file mode 100644 +index 000000000000..df69e989056f +--- /dev/null ++++ b/drivers/block/blksnap/snapimage.c +@@ -0,0 +1,345 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#define pr_fmt(fmt) KBUILD_MODNAME "-snapimage: " fmt ++#include ++#include ++#include ++#include ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++#include "memory_checker.h" ++#endif ++#include "snapimage.h" ++#include "diff_area.h" ++#include "chunk.h" ++#include "cbt_map.h" ++ ++#define SNAPIMAGE_MAX_DEVICES 2048 ++ ++static unsigned int _major; ++static DEFINE_IDR(_minor_idr); ++static DEFINE_SPINLOCK(_minor_lock); ++ ++static void free_minor(int minor) ++{ ++ spin_lock(&_minor_lock); ++ idr_remove(&_minor_idr, minor); ++ spin_unlock(&_minor_lock); ++} ++ ++static int new_minor(int *minor, void *ptr) ++{ ++ int ret; ++ ++ idr_preload(GFP_KERNEL); ++ spin_lock(&_minor_lock); ++ ++ ret = idr_alloc(&_minor_idr, ptr, 0, 1 << MINORBITS, GFP_NOWAIT); ++ ++ spin_unlock(&_minor_lock); ++ idr_preload_end(); ++ ++ if (ret < 0) ++ return ret; ++ ++ *minor = ret; ++ return 0; ++} ++ ++static inline void snapimage_unprepare_worker(struct snapimage *snapimage) ++{ ++ kthread_flush_worker(&snapimage->worker); ++ kthread_stop(snapimage->worker_task); ++} ++ ++static int snapimage_kthread_worker_fn(void *worker_ptr) ++{ ++ current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; ++ return kthread_worker_fn(worker_ptr); ++} ++ ++static inline int snapimage_prepare_worker(struct snapimage *snapimage) ++{ ++ struct task_struct *task; ++ ++ kthread_init_worker(&snapimage->worker); ++ ++ task = kthread_run(snapimage_kthread_worker_fn, &snapimage->worker, ++ BLK_SNAP_IMAGE_NAME "%d", ++ MINOR(snapimage->image_dev_id)); ++ if (IS_ERR(task)) ++ return -ENOMEM; ++ ++ set_user_nice(task, MIN_NICE); ++ ++ snapimage->worker_task = task; ++ return 0; ++} ++ ++struct snapimage_cmd { ++ struct kthread_work work; ++}; ++ ++static void snapimage_queue_work(struct kthread_work *work) ++{ ++ struct snapimage_cmd *cmd = ++ container_of(work, struct snapimage_cmd, work); ++ struct request *rq = blk_mq_rq_from_pdu(cmd); ++ struct snapimage *snapimage = rq->q->queuedata; ++ blk_status_t status = BLK_STS_OK; ++ struct bio_vec bvec; ++ struct req_iterator iter; ++ struct diff_area_image_ctx io_ctx; ++ sector_t pos = blk_rq_pos(rq); ++ ++ diff_area_throttling_io(snapimage->diff_area); ++ diff_area_image_ctx_init(&io_ctx, snapimage->diff_area, ++ op_is_write(req_op(rq))); ++ rq_for_each_segment(bvec, rq, iter) { ++ status = diff_area_image_io(&io_ctx, &bvec, &pos); ++ if (unlikely(status != BLK_STS_OK)) ++ break; ++ } ++ diff_area_image_ctx_done(&io_ctx); ++ ++ blk_mq_end_request(rq, status); ++} ++ ++static int snapimage_init_request(struct blk_mq_tag_set *set, ++ struct request *rq, unsigned int hctx_idx, ++ unsigned int numa_node) ++{ ++ struct snapimage_cmd *cmd = blk_mq_rq_to_pdu(rq); ++ ++ kthread_init_work(&cmd->work, snapimage_queue_work); ++ return 0; ++} ++ ++static blk_status_t snapimage_queue_rq(struct blk_mq_hw_ctx *hctx, ++ const struct blk_mq_queue_data *bd) ++{ ++ int ret; ++ struct request *rq = bd->rq; ++ struct snapimage *snapimage = rq->q->queuedata; ++ struct snapimage_cmd *cmd = blk_mq_rq_to_pdu(rq); ++ ++ /* ++ * Cannot fall asleep in the context of this function, ++ * as we are under rwsem lockdown. ++ */ ++ ++ blk_mq_start_request(rq); ++ ++ if (unlikely(!snapimage->is_ready)) { ++ blk_mq_end_request(rq, BLK_STS_IOERR); ++ return BLK_STS_IOERR; ++ } ++ ++ if (op_is_write(req_op(rq))) { ++ ret = cbt_map_set_both(snapimage->cbt_map, blk_rq_pos(rq), ++ blk_rq_sectors(rq)); ++ if (unlikely(ret)) { ++ blk_mq_end_request(rq, BLK_STS_IOERR); ++ return BLK_STS_IOERR; ++ } ++ } ++ ++ kthread_queue_work(&snapimage->worker, &cmd->work); ++ return BLK_STS_OK; ++} ++ ++static const struct blk_mq_ops mq_ops = { ++ .queue_rq = snapimage_queue_rq, ++ .init_request = snapimage_init_request, ++}; ++ ++const struct block_device_operations bd_ops = { ++ .owner = THIS_MODULE, ++ //.open = snapimage_open, ++ //.ioctl = snapimage_ioctl, ++ //.release = snapimage_close, ++}; ++ ++static inline int snapimage_alloc_tag_set(struct snapimage *snapimage) ++{ ++ struct blk_mq_tag_set *set = &snapimage->tag_set; ++ ++ set->ops = &mq_ops; ++ set->nr_hw_queues = 1; ++ set->nr_maps = 1; ++ set->queue_depth = 128; ++ set->numa_node = NUMA_NO_NODE; ++ set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING; ++ ++ set->cmd_size = sizeof(struct snapimage_cmd); ++ set->driver_data = snapimage; ++ ++ return blk_mq_alloc_tag_set(set); ++} ++ ++void snapimage_free(struct snapimage *snapimage) ++{ ++ pr_info("Snapshot image disk [%u:%u] delete\n", ++ MAJOR(snapimage->image_dev_id), MINOR(snapimage->image_dev_id)); ++ ++ blk_mq_freeze_queue(snapimage->disk->queue); ++ snapimage->is_ready = false; ++ blk_mq_unfreeze_queue(snapimage->disk->queue); ++ ++ snapimage_unprepare_worker(snapimage); ++ ++ del_gendisk(snapimage->disk); ++ blk_cleanup_disk(snapimage->disk); ++ blk_mq_free_tag_set(&snapimage->tag_set); ++ ++ diff_area_put(snapimage->diff_area); ++ cbt_map_put(snapimage->cbt_map); ++ ++ free_minor(MINOR(snapimage->image_dev_id)); ++ kfree(snapimage); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_dec(memory_object_snapimage); ++#endif ++} ++ ++struct snapimage *snapimage_create(struct diff_area *diff_area, ++ struct cbt_map *cbt_map) ++{ ++ int ret = 0; ++ int minor; ++ struct snapimage *snapimage = NULL; ++ struct gendisk *disk; ++ ++ pr_info("Create snapshot image for device [%u:%u]\n", ++ MAJOR(diff_area->orig_bdev->bd_dev), ++ MINOR(diff_area->orig_bdev->bd_dev)); ++ ++ snapimage = kzalloc(sizeof(struct snapimage), GFP_KERNEL); ++ if (snapimage == NULL) ++ return ERR_PTR(-ENOMEM); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_snapimage); ++#endif ++ ret = new_minor(&minor, snapimage); ++ if (ret) { ++ pr_err("Failed to allocate minor for snapshot image device. errno=%d\n", ++ abs(ret)); ++ goto fail_free_image; ++ } ++ ++ snapimage->is_ready = true; ++ snapimage->capacity = cbt_map->device_capacity; ++ snapimage->image_dev_id = MKDEV(_major, minor); ++ pr_info("Snapshot image device id [%u:%u]\n", ++ MAJOR(snapimage->image_dev_id), MINOR(snapimage->image_dev_id)); ++ ++ ret = snapimage_prepare_worker(snapimage); ++ if (ret) { ++ pr_err("Failed to prepare worker thread. errno=%d\n", abs(ret)); ++ goto fail_free_minor; ++ } ++ ++ ret = snapimage_alloc_tag_set(snapimage); ++ if (ret) { ++ pr_err("Failed to allocate tag set. errno=%d\n", abs(ret)); ++ goto fail_free_worker; ++ } ++ ++ disk = blk_mq_alloc_disk(&snapimage->tag_set, snapimage); ++ if (IS_ERR(disk)) { ++ ret = PTR_ERR(disk); ++ pr_err("Failed to allocate disk. errno=%d\n", abs(ret)); ++ goto fail_free_tagset; ++ } ++ ++ blk_queue_max_hw_sectors(disk->queue, BLK_DEF_MAX_SECTORS); ++ blk_queue_flag_set(QUEUE_FLAG_NOMERGES, disk->queue); ++ ++ if (snprintf(disk->disk_name, DISK_NAME_LEN, "%s%d", ++ BLK_SNAP_IMAGE_NAME, minor) < 0) { ++ pr_err("Unable to set disk name for snapshot image device: invalid minor %u\n", ++ minor); ++ ret = -EINVAL; ++ goto fail_cleanup_disk; ++ } ++ pr_info("Snapshot image disk name [%s]\n", disk->disk_name); ++ ++ disk->flags = 0; ++ //disk->flags |= GENHD_FL_HIDDEN; ++ //disk->flags |= GENHD_FL_REMOVABLE; ++#ifdef GENHD_FL_NO_PART_SCAN ++ disk->flags |= GENHD_FL_NO_PART_SCAN; ++#else ++ disk->flags |= GENHD_FL_NO_PART; ++#endif ++ ++ ++ disk->major = _major; ++ disk->first_minor = minor; ++ disk->minors = 1; // One disk has only one partition. ++ ++ disk->fops = &bd_ops; ++ disk->private_data = snapimage; ++ snapimage->disk = disk; ++ ++ set_capacity(disk, snapimage->capacity); ++ pr_debug("Snapshot image device capacity %lld bytes\n", ++ (u64)(snapimage->capacity << SECTOR_SHIFT)); ++ ++ diff_area_get(diff_area); ++ snapimage->diff_area = diff_area; ++ cbt_map_get(cbt_map); ++ snapimage->cbt_map = cbt_map; ++ ++ ret = add_disk(disk); ++ if (ret) { ++ pr_err("Failed to add disk [%s] for snapshot image device\n", ++ disk->disk_name); ++ goto fail_cleanup_disk; ++ } ++ ++ return snapimage; ++ ++fail_cleanup_disk: ++ blk_cleanup_disk(disk); ++fail_free_tagset: ++ blk_mq_free_tag_set(&snapimage->tag_set); ++fail_free_worker: ++ snapimage_unprepare_worker(snapimage); ++fail_free_minor: ++ free_minor(minor); ++fail_free_image: ++ kfree(snapimage); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_dec(memory_object_snapimage); ++#endif ++ return ERR_PTR(ret); ++} ++ ++int snapimage_init(void) ++{ ++ int mj = 0; ++ ++ mj = register_blkdev(mj, BLK_SNAP_IMAGE_NAME); ++ if (mj < 0) { ++ pr_err("Failed to register snapshot image block device. errno=%d\n", ++ abs(mj)); ++ return mj; ++ } ++ _major = mj; ++ pr_info("Snapshot image block device major %d was registered\n", ++ _major); ++ ++ return 0; ++} ++ ++void snapimage_done(void) ++{ ++ unregister_blkdev(_major, BLK_SNAP_IMAGE_NAME); ++ pr_info("Snapshot image block device [%d] was unregistered\n", _major); ++ ++ idr_destroy(&_minor_idr); ++} ++ ++int snapimage_major(void) ++{ ++ return _major; ++} +diff --git a/drivers/block/blksnap/snapimage.h b/drivers/block/blksnap/snapimage.h +new file mode 100644 +index 000000000000..e978711004f0 +--- /dev/null ++++ b/drivers/block/blksnap/snapimage.h +@@ -0,0 +1,65 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#pragma once ++#include ++#include ++#include ++#include ++ ++struct diff_area; ++struct cbt_map; ++ ++/** ++ * struct snapimage - Snapshot image block device. ++ * ++ * @image_dev_id: ++ * ID of the snapshot image block device. ++ * @capacity: ++ * The size of the snapshot image in sectors must be equal to the size ++ * of the original device at the time of taking the snapshot. ++ * @is_ready: ++ * The flag means that the snapshot image is ready for processing ++ * I/O requests. ++ * @worker: ++ * The worker thread for processing I/O requests. ++ * @worker_task: ++ * A pointer to the &struct task of the worker thread. ++ * @tag_set: ++ * Area to keep a shared tag map. ++ * @disk: ++ * A pointer to the &struct gendisk for the image block device. ++ * @diff_area: ++ * A pointer to the owned &struct diff_area. ++ * @cbt_map: ++ * A pointer to the owned &struct cbt_map. ++ * ++ * The snapshot image is presented in the system as a block device. But ++ * when reading or writing a snapshot image, the data is redirected to ++ * the original block device or to the block device of the difference storage. ++ * ++ * The module does not prohibit reading and writing data to the snapshot ++ * from different threads in parallel. To avoid the problem with simultaneous ++ * access, it is enough to open the snapshot image block device with the ++ * FMODE_EXCL parameter. ++ */ ++struct snapimage { ++ dev_t image_dev_id; ++ sector_t capacity; ++ bool is_ready; ++ ++ struct kthread_worker worker; ++ struct task_struct *worker_task; ++ ++ struct blk_mq_tag_set tag_set; ++ struct gendisk *disk; ++ ++ struct diff_area *diff_area; ++ struct cbt_map *cbt_map; ++}; ++ ++int snapimage_init(void); ++void snapimage_done(void); ++int snapimage_major(void); ++ ++void snapimage_free(struct snapimage *snapimage); ++struct snapimage *snapimage_create(struct diff_area *diff_area, ++ struct cbt_map *cbt_map); +-- +2.20.1 + diff --git a/patches/lk5.19/0016-block-blksnap-snapshot.patch b/patches/lk5.19/0016-block-blksnap-snapshot.patch new file mode 100644 index 00000000..16616258 --- /dev/null +++ b/patches/lk5.19/0016-block-blksnap-snapshot.patch @@ -0,0 +1,784 @@ +From 163d84d5b8310f68e8b00c6cab5d03c0c0f7665a Mon Sep 17 00:00:00 2001 +From: Sergei Shtepa +Date: Tue, 7 Jun 2022 14:41:46 +0200 +Subject: [PATCH 16/20] block, blksnap: snapshot + +The struck snapshot combines block devices, for which a snapshot is +created, block devices of their snapshot images, as well as a difference +storage. +There may be several snapshots at the same time, but they should not +contain common block devices. This can be used for cases when backup is +scheduled once an hour for some block devices, and once a day for +others, and once a week for others. In this case, it is possible that +three snapshots are used at the same time. + +Signed-off-by: Sergei Shtepa +--- + drivers/block/blksnap/snapshot.c | 671 +++++++++++++++++++++++++++++++ + drivers/block/blksnap/snapshot.h | 76 ++++ + 2 files changed, 747 insertions(+) + create mode 100644 drivers/block/blksnap/snapshot.c + create mode 100644 drivers/block/blksnap/snapshot.h + +diff --git a/drivers/block/blksnap/snapshot.c b/drivers/block/blksnap/snapshot.c +new file mode 100644 +index 000000000000..d7285b8108d0 +--- /dev/null ++++ b/drivers/block/blksnap/snapshot.c +@@ -0,0 +1,671 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#define pr_fmt(fmt) KBUILD_MODNAME "-snapshot: " fmt ++#include ++#include ++#include ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++#include "memory_checker.h" ++#endif ++#include "snapshot.h" ++#include "tracker.h" ++#include "diff_storage.h" ++#include "diff_area.h" ++#include "snapimage.h" ++#include "cbt_map.h" ++ ++LIST_HEAD(snapshots); ++DECLARE_RWSEM(snapshots_lock); ++ ++static void snapshot_release(struct snapshot *snapshot) ++{ ++ int inx; ++ unsigned int current_flag; ++ ++ pr_info("Release snapshot %pUb\n", &snapshot->id); ++ ++ /* Destroy all snapshot images. */ ++ for (inx = 0; inx < snapshot->count; ++inx) { ++ struct snapimage *snapimage = snapshot->snapimage_array[inx]; ++ ++ if (snapimage) ++ snapimage_free(snapimage); ++ } ++ ++ /* Flush and freeze fs on each original block device. */ ++ for (inx = 0; inx < snapshot->count; ++inx) { ++ struct tracker *tracker = snapshot->tracker_array[inx]; ++ ++ if (!tracker || !tracker->diff_area) ++ continue; ++ ++ if (freeze_bdev(tracker->diff_area->orig_bdev)) ++ pr_err("Failed to freeze device [%u:%u]\n", ++ MAJOR(tracker->dev_id), MINOR(tracker->dev_id)); ++ } ++ ++ current_flag = memalloc_noio_save(); ++ tracker_lock(); ++ ++ /* Set tracker as available for new snapshots. */ ++ for (inx = 0; inx < snapshot->count; ++inx) ++ tracker_release_snapshot(snapshot->tracker_array[inx]); ++ ++ tracker_unlock(); ++ memalloc_noio_restore(current_flag); ++ ++ /* Thaw fs on each original block device. */ ++ for (inx = 0; inx < snapshot->count; ++inx) { ++ struct tracker *tracker = snapshot->tracker_array[inx]; ++ ++ if (!tracker || !tracker->diff_area) ++ continue; ++ ++ if (thaw_bdev(tracker->diff_area->orig_bdev)) ++ pr_err("Failed to thaw device [%u:%u]\n", ++ MAJOR(tracker->dev_id), MINOR(tracker->dev_id)); ++ } ++ ++ /* Destroy diff area for each tracker. */ ++ for (inx = 0; inx < snapshot->count; ++inx) { ++ struct tracker *tracker = snapshot->tracker_array[inx]; ++ ++ if (tracker) { ++ diff_area_put(tracker->diff_area); ++ tracker->diff_area = NULL; ++ ++ tracker_put(tracker); ++ snapshot->tracker_array[inx] = NULL; ++ } ++ } ++} ++ ++static void snapshot_free(struct kref *kref) ++{ ++ struct snapshot *snapshot = container_of(kref, struct snapshot, kref); ++ ++ if (snapshot->is_taken) ++ snapshot_release(snapshot); ++ kfree(snapshot->snapimage_array); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ if (snapshot->snapimage_array) ++ memory_object_dec(memory_object_snapimage_array); ++#endif ++ kfree(snapshot->tracker_array); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ if (snapshot->tracker_array) ++ memory_object_dec(memory_object_tracker_array); ++#endif ++ diff_storage_put(snapshot->diff_storage); ++ ++ kfree(snapshot); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_dec(memory_object_snapshot); ++#endif ++} ++ ++static inline void snapshot_get(struct snapshot *snapshot) ++{ ++ kref_get(&snapshot->kref); ++}; ++static inline void snapshot_put(struct snapshot *snapshot) ++{ ++ if (likely(snapshot)) ++ kref_put(&snapshot->kref, snapshot_free); ++}; ++ ++static struct snapshot *snapshot_new(unsigned int count) ++{ ++ int ret; ++ struct snapshot *snapshot = NULL; ++ ++ snapshot = kzalloc(sizeof(struct snapshot), GFP_KERNEL); ++ if (!snapshot) { ++ ret = -ENOMEM; ++ goto fail; ++ } ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_snapshot); ++#endif ++ snapshot->tracker_array = kcalloc(count, sizeof(void *), GFP_KERNEL); ++ if (!snapshot->tracker_array) { ++ ret = -ENOMEM; ++ goto fail_free_snapshot; ++ } ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_tracker_array); ++#endif ++ snapshot->snapimage_array = kcalloc(count, sizeof(void *), GFP_KERNEL); ++ if (!snapshot->snapimage_array) { ++ ret = -ENOMEM; ++ goto fail_free_trackers; ++ } ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_snapimage_array); ++#endif ++ snapshot->diff_storage = diff_storage_new(); ++ if (!snapshot->diff_storage) { ++ ret = -ENOMEM; ++ goto fail_free_snapimage; ++ } ++ ++ INIT_LIST_HEAD(&snapshot->link); ++ kref_init(&snapshot->kref); ++ uuid_gen(&snapshot->id); ++ snapshot->is_taken = false; ++ ++ return snapshot; ++ ++fail_free_snapimage: ++ kfree(snapshot->snapimage_array); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ if (snapshot->snapimage_array) ++ memory_object_dec(memory_object_superblock_array); ++#endif ++ kfree(snapshot->snapimage_array); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ if (snapshot->snapimage_array) ++ memory_object_dec(memory_object_snapimage_array); ++#endif ++fail_free_trackers: ++ kfree(snapshot->tracker_array); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ if (snapshot->tracker_array) ++ memory_object_dec(memory_object_tracker_array); ++#endif ++ ++fail_free_snapshot: ++ kfree(snapshot); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ if (snapshot) ++ memory_object_dec(memory_object_snapshot); ++#endif ++ ++fail: ++ return ERR_PTR(ret); ++} ++ ++void snapshot_done(void) ++{ ++ struct snapshot *snapshot; ++ ++ pr_debug("Cleanup snapshots\n"); ++ do { ++ down_write(&snapshots_lock); ++ snapshot = list_first_entry_or_null(&snapshots, struct snapshot, ++ link); ++ if (snapshot) ++ list_del(&snapshot->link); ++ up_write(&snapshots_lock); ++ ++ snapshot_put(snapshot); ++ } while (snapshot); ++} ++ ++int snapshot_create(struct blk_snap_dev_t *dev_id_array, unsigned int count, ++ uuid_t *id) ++{ ++ struct snapshot *snapshot = NULL; ++ int ret; ++ unsigned int inx; ++ ++ pr_info("Create snapshot for devices:\n"); ++ for (inx = 0; inx < count; ++inx) ++ pr_info("\t%u:%u\n", dev_id_array[inx].mj, ++ dev_id_array[inx].mn); ++ ++ snapshot = snapshot_new(count); ++ if (IS_ERR(snapshot)) { ++ pr_err("Unable to create snapshot: failed to allocate snapshot structure\n"); ++ return PTR_ERR(snapshot); ++ } ++ ++ ret = -ENODEV; ++ for (inx = 0; inx < count; ++inx) { ++ dev_t dev_id = ++ MKDEV(dev_id_array[inx].mj, dev_id_array[inx].mn); ++ struct tracker *tracker; ++ ++ tracker = tracker_create_or_get(dev_id); ++ if (IS_ERR(tracker)) { ++ pr_err("Unable to create snapshot\n"); ++ pr_err("Failed to add device [%u:%u] to snapshot tracking\n", ++ MAJOR(dev_id), MINOR(dev_id)); ++ ret = PTR_ERR(tracker); ++ goto fail; ++ } ++ ++ snapshot->tracker_array[inx] = tracker; ++ snapshot->count++; ++ } ++ ++ down_write(&snapshots_lock); ++ list_add_tail(&snapshots, &snapshot->link); ++ up_write(&snapshots_lock); ++ ++ uuid_copy(id, &snapshot->id); ++ pr_info("Snapshot %pUb was created\n", &snapshot->id); ++ return 0; ++fail: ++ pr_err("Snapshot cannot be created\n"); ++ ++ snapshot_put(snapshot); ++ return ret; ++} ++ ++static struct snapshot *snapshot_get_by_id(uuid_t *id) ++{ ++ struct snapshot *snapshot = NULL; ++ struct snapshot *s; ++ ++ down_read(&snapshots_lock); ++ if (list_empty(&snapshots)) ++ goto out; ++ ++ list_for_each_entry(s, &snapshots, link) { ++ if (uuid_equal(&s->id, id)) { ++ snapshot = s; ++ snapshot_get(snapshot); ++ break; ++ } ++ } ++out: ++ up_read(&snapshots_lock); ++ return snapshot; ++} ++ ++int snapshot_destroy(uuid_t *id) ++{ ++ struct snapshot *snapshot = NULL; ++ ++ pr_info("Destroy snapshot %pUb\n", id); ++ down_write(&snapshots_lock); ++ if (!list_empty(&snapshots)) { ++ struct snapshot *s = NULL; ++ ++ list_for_each_entry(s, &snapshots, link) { ++ if (uuid_equal(&s->id, id)) { ++ snapshot = s; ++ list_del(&snapshot->link); ++ break; ++ } ++ } ++ } ++ up_write(&snapshots_lock); ++ ++ if (!snapshot) { ++ pr_err("Unable to destroy snapshot: cannot find snapshot by id %pUb\n", ++ id); ++ return -ENODEV; ++ } ++ snapshot_put(snapshot); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ pr_debug("blksnap memory consumption:\n"); ++ memory_object_print(); ++ memory_object_max_print(); ++#endif ++ return 0; ++} ++ ++int snapshot_append_storage(uuid_t *id, struct blk_snap_dev_t dev_id, ++ struct big_buffer *ranges, unsigned int range_count) ++{ ++ int ret = 0; ++ struct snapshot *snapshot; ++ ++ snapshot = snapshot_get_by_id(id); ++ if (!snapshot) ++ return -ESRCH; ++ ++ ret = diff_storage_append_block(snapshot->diff_storage, ++ MKDEV(dev_id.mj, dev_id.mn), ranges, ++ range_count); ++ snapshot_put(snapshot); ++ return ret; ++} ++ ++int snapshot_take(uuid_t *id) ++{ ++ int ret = 0; ++ struct snapshot *snapshot; ++ int inx; ++ unsigned int current_flag; ++ ++ snapshot = snapshot_get_by_id(id); ++ if (!snapshot) ++ return -ESRCH; ++ ++ if (snapshot->is_taken) { ++ ret = -EALREADY; ++ goto out; ++ } ++ ++ if (!snapshot->count) { ++ ret = -ENODEV; ++ goto out; ++ } ++ ++ /* Allocate diff area for each device in the snapshot. */ ++ for (inx = 0; inx < snapshot->count; inx++) { ++ struct tracker *tracker = snapshot->tracker_array[inx]; ++ struct diff_area *diff_area; ++ ++ if (!tracker) ++ continue; ++ ++ diff_area = ++ diff_area_new(tracker->dev_id, snapshot->diff_storage); ++ if (IS_ERR(diff_area)) { ++ ret = PTR_ERR(diff_area); ++ goto fail; ++ } ++ tracker->diff_area = diff_area; ++ } ++ ++ /* Try to flush and freeze file system on each original block device. */ ++ for (inx = 0; inx < snapshot->count; inx++) { ++ struct tracker *tracker = snapshot->tracker_array[inx]; ++ ++ if (!tracker) ++ continue; ++ ++ if (freeze_bdev(tracker->diff_area->orig_bdev)) ++ pr_err("Failed to freeze device [%u:%u]\n", ++ MAJOR(tracker->dev_id), MINOR(tracker->dev_id)); ++ } ++ ++ current_flag = memalloc_noio_save(); ++ tracker_lock(); ++ ++ /* ++ * Take snapshot - switch CBT tables and enable COW logic ++ * for each tracker. ++ */ ++ for (inx = 0; inx < snapshot->count; inx++) { ++ if (!snapshot->tracker_array[inx]) ++ continue; ++ ret = tracker_take_snapshot(snapshot->tracker_array[inx]); ++ if (ret) { ++ pr_err("Unable to take snapshot: failed to capture snapshot %pUb\n", ++ &snapshot->id); ++ ++ break; ++ } ++ } ++ ++ if (ret) { ++ while (inx--) { ++ struct tracker *tracker = snapshot->tracker_array[inx]; ++ ++ if (tracker) ++ tracker_release_snapshot(tracker); ++ } ++ } else ++ snapshot->is_taken = true; ++ ++ tracker_unlock(); ++ memalloc_noio_restore(current_flag); ++ ++ /* Thaw file systems on original block devices. */ ++ for (inx = 0; inx < snapshot->count; inx++) { ++ struct tracker *tracker = snapshot->tracker_array[inx]; ++ ++ if (!tracker) ++ continue; ++ ++ if (thaw_bdev(tracker->diff_area->orig_bdev)) ++ pr_err("Failed to thaw device [%u:%u]\n", ++ MAJOR(tracker->dev_id), MINOR(tracker->dev_id)); ++ } ++ ++ if (ret) ++ goto fail; ++ ++ pr_info("Snapshot was taken successfully\n"); ++ ++ /** ++ * Sometimes a snapshot is in the state of corrupt immediately ++ * after it is taken. ++ */ ++ for (inx = 0; inx < snapshot->count; inx++) { ++ struct tracker *tracker = snapshot->tracker_array[inx]; ++ ++ if (!tracker) ++ continue; ++ ++ if (diff_area_is_corrupted(tracker->diff_area)) { ++ pr_err("Unable to freeze devices [%u:%u]: diff area is corrupted\n", ++ MAJOR(tracker->dev_id), MINOR(tracker->dev_id)); ++ ret = -EFAULT; ++ goto fail; ++ } ++ } ++ ++ /* Create all image block devices. */ ++ for (inx = 0; inx < snapshot->count; inx++) { ++ struct snapimage *snapimage; ++ struct tracker *tracker = snapshot->tracker_array[inx]; ++ ++ snapimage = ++ snapimage_create(tracker->diff_area, tracker->cbt_map); ++ if (IS_ERR(snapimage)) { ++ ret = PTR_ERR(snapimage); ++ pr_err("Failed to create snapshot image for device [%u:%u] with error=%d\n", ++ MAJOR(tracker->dev_id), MINOR(tracker->dev_id), ++ ret); ++ break; ++ } ++ snapshot->snapimage_array[inx] = snapimage; ++ } ++ ++ goto out; ++fail: ++ pr_err("Unable to take snapshot: failed to capture snapshot %pUb\n", ++ &snapshot->id); ++ ++ down_write(&snapshots_lock); ++ list_del(&snapshot->link); ++ up_write(&snapshots_lock); ++ snapshot_put(snapshot); ++out: ++ snapshot_put(snapshot); ++ return ret; ++} ++ ++struct event *snapshot_wait_event(uuid_t *id, unsigned long timeout_ms) ++{ ++ struct snapshot *snapshot; ++ struct event *event; ++ ++ //pr_debug("Wait event\n"); ++ snapshot = snapshot_get_by_id(id); ++ if (!snapshot) ++ return ERR_PTR(-ESRCH); ++ ++ event = event_wait(&snapshot->diff_storage->event_queue, timeout_ms); ++ ++ snapshot_put(snapshot); ++ return event; ++} ++ ++static inline int uuid_copy_to_user(uuid_t __user *dst, const uuid_t *src) ++{ ++ int len; ++ ++ len = copy_to_user(dst, src, sizeof(uuid_t)); ++ if (len) ++ return -ENODATA; ++ return 0; ++} ++ ++int snapshot_collect(unsigned int *pcount, uuid_t __user *id_array) ++{ ++ int ret = 0; ++ int inx = 0; ++ struct snapshot *s; ++ ++ pr_debug("Collect snapshots\n"); ++ ++ down_read(&snapshots_lock); ++ if (list_empty(&snapshots)) ++ goto out; ++ ++ if (!id_array) { ++ list_for_each_entry(s, &snapshots, link) ++ inx++; ++ goto out; ++ } ++ ++ list_for_each_entry(s, &snapshots, link) { ++ if (inx >= *pcount) { ++ ret = -ENODATA; ++ goto out; ++ } ++ ++ ret = uuid_copy_to_user(&id_array[inx], &s->id); ++ if (ret) { ++ pr_err("Unable to collect snapshots: failed to copy data to user buffer\n"); ++ goto out; ++ } ++ ++ inx++; ++ } ++out: ++ up_read(&snapshots_lock); ++ *pcount = inx; ++ return ret; ++} ++ ++int snapshot_collect_images( ++ uuid_t *id, struct blk_snap_image_info __user *user_image_info_array, ++ unsigned int *pcount) ++{ ++ int ret = 0; ++ int inx; ++ unsigned long len; ++ struct blk_snap_image_info *image_info_array = NULL; ++ struct snapshot *snapshot; ++ ++ pr_debug("Collect images for snapshots\n"); ++ ++ snapshot = snapshot_get_by_id(id); ++ if (!snapshot) ++ return -ESRCH; ++ ++ if (!snapshot->is_taken) { ++ ret = -ENODEV; ++ goto out; ++ } ++ ++ pr_debug("Found snapshot with %d devices\n", snapshot->count); ++ if (!user_image_info_array) { ++ pr_debug( ++ "Unable to collect snapshot images: users buffer is not set\n"); ++ goto out; ++ } ++ ++ if (*pcount < snapshot->count) { ++ ret = -ENODATA; ++ goto out; ++ } ++ ++ image_info_array = ++ kcalloc(snapshot->count, sizeof(struct blk_snap_image_info), ++ GFP_KERNEL); ++ if (!image_info_array) { ++ pr_err("Unable to collect snapshot images: not enough memory.\n"); ++ ret = -ENOMEM; ++ goto out; ++ } ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ memory_object_inc(memory_object_blk_snap_image_info); ++#endif ++ for (inx = 0; inx < snapshot->count; inx++) { ++ if (snapshot->tracker_array[inx]) { ++ dev_t orig_dev_id = ++ snapshot->tracker_array[inx]->dev_id; ++ ++ pr_debug("Original [%u:%u]\n", ++ MAJOR(orig_dev_id), ++ MINOR(orig_dev_id)); ++ image_info_array[inx].orig_dev_id.mj = ++ MAJOR(orig_dev_id); ++ image_info_array[inx].orig_dev_id.mn = ++ MINOR(orig_dev_id); ++ } ++ ++ if (snapshot->snapimage_array[inx]) { ++ dev_t image_dev_id = ++ snapshot->snapimage_array[inx]->image_dev_id; ++ ++ pr_debug("Image [%u:%u]\n", ++ MAJOR(image_dev_id), ++ MINOR(image_dev_id)); ++ image_info_array[inx].image_dev_id.mj = ++ MAJOR(image_dev_id); ++ image_info_array[inx].image_dev_id.mn = ++ MINOR(image_dev_id); ++ } ++ } ++ ++ len = copy_to_user(user_image_info_array, image_info_array, ++ snapshot->count * ++ sizeof(struct blk_snap_image_info)); ++ if (len != 0) { ++ pr_err("Unable to collect snapshot images: failed to copy data to user buffer\n"); ++ ret = -ENODATA; ++ } ++out: ++ *pcount = snapshot->count; ++ ++ kfree(image_info_array); ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ if (image_info_array) ++ memory_object_dec(memory_object_blk_snap_image_info); ++#endif ++ snapshot_put(snapshot); ++ ++ return ret; ++} ++ ++int snapshot_mark_dirty_blocks(dev_t image_dev_id, ++ struct blk_snap_block_range *block_ranges, ++ unsigned int count) ++{ ++ int ret = 0; ++ int inx = 0; ++ struct snapshot *s; ++ struct cbt_map *cbt_map = NULL; ++ ++ pr_debug("Marking [%d] dirty blocks for device [%u:%u]\n", count, ++ MAJOR(image_dev_id), MINOR(image_dev_id)); ++ ++ down_read(&snapshots_lock); ++ if (list_empty(&snapshots)) ++ goto out; ++ ++ list_for_each_entry(s, &snapshots, link) { ++ for (inx = 0; inx < s->count; inx++) { ++ if (s->snapimage_array[inx]->image_dev_id == ++ image_dev_id) { ++ cbt_map = s->snapimage_array[inx]->cbt_map; ++ break; ++ } ++ } ++ ++ inx++; ++ } ++ if (!cbt_map) { ++ pr_err("Cannot find snapshot image device [%u:%u]\n", ++ MAJOR(image_dev_id), MINOR(image_dev_id)); ++ ret = -ENODEV; ++ goto out; ++ } ++ ++ ret = cbt_map_mark_dirty_blocks(cbt_map, block_ranges, count); ++ if (ret) ++ pr_err("Failed to set CBT table. errno=%d\n", abs(ret)); ++out: ++ up_read(&snapshots_lock); ++ ++ return ret; ++} +diff --git a/drivers/block/blksnap/snapshot.h b/drivers/block/blksnap/snapshot.h +new file mode 100644 +index 000000000000..6929d7ab62e8 +--- /dev/null ++++ b/drivers/block/blksnap/snapshot.h +@@ -0,0 +1,76 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#pragma once ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "big_buffer.h" ++#include "event_queue.h" ++ ++struct tracker; ++struct diff_storage; ++struct snapimage; ++/** ++ * struct snapshot - Snapshot structure. ++ * @link: ++ * The list header allows to store snapshots in a linked list. ++ * @kref: ++ * Protects the structure from being released during the processing of ++ * an ioctl. ++ * @id: ++ * UUID of snapshot. ++ * @is_taken: ++ * Flag that the snapshot was taken. ++ * @diff_storage: ++ * A pointer to the difference storage of this snapshot. ++ * @count: ++ * The number of block devices in the snapshot. This number ++ * corresponds to the size of arrays of pointers to trackers ++ * and snapshot images. ++ * @tracker_array: ++ * Array of pointers to block device trackers. ++ * @snapimage_array: ++ * Array of pointers to images of snapshots of block devices. ++ * ++ * A snapshot corresponds to a single backup session and provides snapshot ++ * images for multiple block devices. Several backup sessions can be ++ * performed at the same time, which means that several snapshots can ++ * exist at the same time. However, the original block device can only ++ * belong to one snapshot. Creating multiple snapshots from the same block ++ * device is not allowed. ++ * ++ * A UUID is used to identify the snapshot. ++ * ++ */ ++struct snapshot { ++ struct list_head link; ++ struct kref kref; ++ uuid_t id; ++ bool is_taken; ++ struct diff_storage *diff_storage; ++ int count; ++ struct tracker **tracker_array; ++ struct snapimage **snapimage_array; ++}; ++ ++void snapshot_done(void); ++ ++int snapshot_create(struct blk_snap_dev_t *dev_id_array, unsigned int count, ++ uuid_t *id); ++int snapshot_destroy(uuid_t *id); ++int snapshot_append_storage(uuid_t *id, struct blk_snap_dev_t dev_id, ++ struct big_buffer *ranges, ++ unsigned int range_count); ++int snapshot_take(uuid_t *id); ++struct event *snapshot_wait_event(uuid_t *id, unsigned long timeout_ms); ++int snapshot_collect(unsigned int *pcount, uuid_t __user *id_array); ++int snapshot_collect_images(uuid_t *id, ++ struct blk_snap_image_info __user *image_info_array, ++ unsigned int *pcount); ++int snapshot_mark_dirty_blocks(dev_t image_dev_id, ++ struct blk_snap_block_range *block_ranges, ++ unsigned int count); +-- +2.20.1 + diff --git a/patches/lk5.19/0017-block-blksnap-debugging-mechanism-for-monitoring-mem.patch b/patches/lk5.19/0017-block-blksnap-debugging-mechanism-for-monitoring-mem.patch new file mode 100644 index 00000000..c229dfd3 --- /dev/null +++ b/patches/lk5.19/0017-block-blksnap-debugging-mechanism-for-monitoring-mem.patch @@ -0,0 +1,175 @@ +From 0b32e39d05f3bfbfe88247e91ee229dad3549a68 Mon Sep 17 00:00:00 2001 +From: Sergei Shtepa +Date: Tue, 7 Jun 2022 14:42:35 +0200 +Subject: [PATCH 17/20] block, blksnap: debugging mechanism for monitoring + memory consumption + +Of course, the kernel contains mechanisms for detecting memory leaks. +There are tools for monitoring memory consumption. However, this +mechanism turned out to be very simple and convenient for debugging, +and therefore it was decided to leave it in the current version. + +Signed-off-by: Sergei Shtepa +--- + drivers/block/blksnap/memory_checker.c | 100 +++++++++++++++++++++++++ + drivers/block/blksnap/memory_checker.h | 41 ++++++++++ + 2 files changed, 141 insertions(+) + create mode 100644 drivers/block/blksnap/memory_checker.c + create mode 100644 drivers/block/blksnap/memory_checker.h + +diff --git a/drivers/block/blksnap/memory_checker.c b/drivers/block/blksnap/memory_checker.c +new file mode 100644 +index 000000000000..96988ed99f08 +--- /dev/null ++++ b/drivers/block/blksnap/memory_checker.c +@@ -0,0 +1,100 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++#define pr_fmt(fmt) KBUILD_MODNAME "-memory_checker: " fmt ++#include ++#include ++#include "memory_checker.h" ++ ++char *memory_object_names[] = { ++ /*alloc_page*/ ++ "page", ++ /*kzalloc*/ ++ "cbt_map", ++ "chunk", ++ "blk_snap_snaphot_event", ++ "diff_area", ++ "big_buffer", ++ "diff_io", ++ "diff_storage", ++ "storage_bdev", ++ "storage_block", ++ "diff_region", ++ "diff_buffer", ++ "event", ++ "snapimage", ++ "snapshot", ++ "tracker", ++ "tracked_device", ++ /*kcalloc*/ ++ "blk_snap_cbt_info", ++ "blk_snap_block_range", ++ "blk_snap_dev_t", ++ "tracker_array", ++ "snapimage_array", ++ "superblock_array", ++ "blk_snap_image_info", ++ /*end*/ ++}; ++ ++static_assert( ++ sizeof(memory_object_names) == (memory_object_count * sizeof(char *)), ++ "The size of enum memory_object_type is not equal to size of memory_object_names array."); ++ ++static atomic_t memory_counter[memory_object_count]; ++static atomic_t memory_counter_max[memory_object_count]; ++ ++void memory_object_inc(enum memory_object_type type) ++{ ++ int value; ++ ++ if (unlikely(type >= memory_object_count)) ++ return; ++ ++ value = atomic_inc_return(&memory_counter[type]); ++ if (value > atomic_read(&memory_counter_max[type])) ++ atomic_inc(&memory_counter_max[type]); ++} ++ ++void memory_object_dec(enum memory_object_type type) ++{ ++ if (unlikely(type >= memory_object_count)) ++ return; ++ ++ atomic_dec(&memory_counter[type]); ++} ++ ++int memory_object_print(void) ++{ ++ int inx; ++ int not_free = 0; ++ ++ pr_info("Objects in memory:\n"); ++ for (inx = 0; inx < memory_object_count; inx++) { ++ int count = atomic_read(&memory_counter[inx]); ++ ++ if (count) { ++ not_free += count; ++ pr_info("%s: %d\n", memory_object_names[inx], count); ++ } ++ } ++ if (not_free) ++ pr_info("Found %d allocated objects\n", not_free); ++ else ++ pr_info("All objects have been released\n"); ++ return not_free; ++} ++ ++void memory_object_max_print(void) ++{ ++ int inx; ++ ++ pr_info("Maximim objects in memory:\n"); ++ for (inx = 0; inx < memory_object_count; inx++) { ++ int count = atomic_read(&memory_counter_max[inx]); ++ ++ if (count) ++ pr_info("%s: %d\n", memory_object_names[inx], count); ++ } ++ pr_info(".\n"); ++} ++#endif +diff --git a/drivers/block/blksnap/memory_checker.h b/drivers/block/blksnap/memory_checker.h +new file mode 100644 +index 000000000000..b19a2584d0ce +--- /dev/null ++++ b/drivers/block/blksnap/memory_checker.h +@@ -0,0 +1,41 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#pragma once ++#ifdef CONFIG_BLK_SNAP_DEBUG_MEMORY_LEAK ++ ++enum memory_object_type { ++ /*alloc_page*/ ++ memory_object_page, ++ /*kzalloc*/ ++ memory_object_cbt_map, ++ memory_object_chunk, ++ memory_object_blk_snap_snapshot_event, ++ memory_object_diff_area, ++ memory_object_big_buffer, ++ memory_object_diff_io, ++ memory_object_diff_storage, ++ memory_object_storage_bdev, ++ memory_object_storage_block, ++ memory_object_diff_region, ++ memory_object_diff_buffer, ++ memory_object_event, ++ memory_object_snapimage, ++ memory_object_snapshot, ++ memory_object_tracker, ++ memory_object_tracked_device, ++ /*kcalloc*/ ++ memory_object_blk_snap_cbt_info, ++ memory_object_blk_snap_block_range, ++ memory_object_blk_snap_dev_t, ++ memory_object_tracker_array, ++ memory_object_snapimage_array, ++ memory_object_superblock_array, ++ memory_object_blk_snap_image_info, ++ /*end*/ ++ memory_object_count ++}; ++ ++void memory_object_inc(enum memory_object_type type); ++void memory_object_dec(enum memory_object_type type); ++int memory_object_print(void); ++void memory_object_max_print(void); ++#endif +-- +2.20.1 + diff --git a/patches/lk5.19/0018-block-blksnap-Kconfig.patch b/patches/lk5.19/0018-block-blksnap-Kconfig.patch new file mode 100644 index 00000000..942d0c75 --- /dev/null +++ b/patches/lk5.19/0018-block-blksnap-Kconfig.patch @@ -0,0 +1,124 @@ +From 5044218e210564c4bd9a53e967e9ade316037b1d Mon Sep 17 00:00:00 2001 +From: Sergei Shtepa +Date: Tue, 7 Jun 2022 14:52:16 +0200 +Subject: [PATCH 18/20] block, blksnap: Kconfig + +The module configuration file allows you to set default values for +module parameters. + +Signed-off-by: Sergei Shtepa +--- + drivers/block/blksnap/Kconfig | 101 ++++++++++++++++++++++++++++++++++ + 1 file changed, 101 insertions(+) + create mode 100644 drivers/block/blksnap/Kconfig + +diff --git a/drivers/block/blksnap/Kconfig b/drivers/block/blksnap/Kconfig +new file mode 100644 +index 000000000000..8588a89e30ad +--- /dev/null ++++ b/drivers/block/blksnap/Kconfig +@@ -0,0 +1,101 @@ ++# SPDX-License-Identifier: GPL-2.0 ++# ++# block io layer filter module configuration ++# ++# ++ ++config BLK_SNAP ++ tristate "Block device snapshot and change tracker module" ++ depends on BLK_FILTER ++ help ++ Allow to create snapshots and track block changes for a block ++ devices. Designed for creating backups for any block devices ++ (without device mapper). Snapshots are temporary and are released ++ then backup is completed. Change block tracking allows you to ++ create incremental or differential backups. ++ ++config BLK_SNAP_TRACKING_BLOCK_MINIMUM_SHIFT ++ depends on BLK_SNAP ++ int "The power of 2 for minimum trackings block size" ++ default 16 ++ help ++ The minimum tracking block size by default is 64 KB (shift 16) ++ It's looks good for block device 128 GB or lower. ++ In this case, the block device is divided into 2097152 blocks. ++ ++config BLK_SNAP_TRACKING_BLOCK_MAXIMUM_COUNT ++ depends on BLK_SNAP ++ int "The limit of the maximum number of trackings blocks" ++ default 2097152 ++ help ++ As the size of the block device grows, the size of the tracking block ++ size should also grow. For this purpose, the limit of the maximum ++ number of block size is set. ++ ++config BLK_SNAP_CHUNK_MINIMUM_SHIFT ++ depends on BLK_SNAP ++ int "The power of 2 for minimum snapshots chunk size" ++ default 18 ++ help ++ The minimum chunk size by default is 256 KB (shift 18) ++ It's looks good for block device 128 GB or lower. ++ In this case, the block device is divided into 524288 chunks. ++ ++config BLK_SNAP_CHUNK_MAXIMUM_COUNT ++ depends on BLK_SNAP ++ int "The limit of the maximum number of snapshots chunks" ++ default 2097152 ++ help ++ As the size of the block device grows, the size of the chunk ++ should also grow. For this purpose, the limit of the maximum number ++ of chunks is set. ++ ++config BLK_SNAP_CHUNK_MAXIMUM_IN_CACHE ++ depends on BLK_SNAP ++ int "The limit of the maximum chunks in memory cache" ++ default 64 ++ help ++ Since reading and writing to snapshots is performed in large chunks, ++ a cache is implemented to optimize reading small portions of data ++ from the snapshot image. As the number of chunks in the cache ++ increases, memory consumption also increases. ++ The minimum recommended value is four. ++ ++config BLK_SNAP_FREE_DIFF_BUFFER_POOL_SIZE ++ depends on BLK_SNAP ++ int "The maximum size of the free buffers pool" ++ default 128 ++ help ++ A buffer can be allocated for each chunk. After use, this buffer is ++ not released immediately, but is sent to the pool of free buffers. ++ However, if there are too many free buffers in the pool, they are ++ released immediately. The maximum size of the pool is regulated by ++ this define. ++ ++config BLK_SNAP_DIFF_STORAGE_MINIMUM ++ depends on BLK_SNAP ++ int "The minimum allowable size of the difference storage in sectors" ++ default 2097152 ++ help ++ When reached, an event is generated about the lack of free space. ++ ++config BLK_SNAP_DEBUG_MEMORY_LEAK ++ depends on BLK_SNAP ++ bool "Enable memory leak detector" ++ default n ++ help ++ Enables debugging code to monitor memory consumption by the module. ++ ++ If unsure, say N. ++ ++config BLK_SNAP_ALLOW_DIFF_STORAGE_IN_MEMORY ++ depends on BLK_SNAP ++ bool "Allow difference storage in memory" ++ default n ++ help ++ Enables the ability to create a repository of changes in memory. ++ This feature can be useful for debugging. Or it can be used for ++ mobile phones or other devices if there are guaranteed not to be ++ a large number of writings during the snapshot hold. ++ ++ If unsure, say N. +-- +2.20.1 + diff --git a/patches/lk5.19/0019-block-blksnap-Makefile.patch b/patches/lk5.19/0019-block-blksnap-Makefile.patch new file mode 100644 index 00000000..4ad8c228 --- /dev/null +++ b/patches/lk5.19/0019-block-blksnap-Makefile.patch @@ -0,0 +1,42 @@ +From 3c35b6134d7ad657d29b33a4c16e1af59105c917 Mon Sep 17 00:00:00 2001 +From: Sergei Shtepa +Date: Tue, 7 Jun 2022 14:53:20 +0200 +Subject: [PATCH 19/20] block, blksnap: Makefile + +Allows to build a module. + +Signed-off-by: Sergei Shtepa +--- + drivers/block/blksnap/Makefile | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + create mode 100644 drivers/block/blksnap/Makefile + +diff --git a/drivers/block/blksnap/Makefile b/drivers/block/blksnap/Makefile +new file mode 100644 +index 000000000000..18b6b9e8f944 +--- /dev/null ++++ b/drivers/block/blksnap/Makefile +@@ -0,0 +1,20 @@ ++# SPDX-License-Identifier: GPL-2.0 ++KERNEL_MODULE_NAME := blksnap ++ ++$(KERNEL_MODULE_NAME)-y += big_buffer.o ++$(KERNEL_MODULE_NAME)-y += cbt_map.o ++$(KERNEL_MODULE_NAME)-y += chunk.o ++$(KERNEL_MODULE_NAME)-y += ctrl.o ++$(KERNEL_MODULE_NAME)-y += diff_io.o ++$(KERNEL_MODULE_NAME)-y += diff_area.o ++$(KERNEL_MODULE_NAME)-y += diff_buffer.o ++$(KERNEL_MODULE_NAME)-y += diff_storage.o ++$(KERNEL_MODULE_NAME)-y += event_queue.o ++$(KERNEL_MODULE_NAME)-y += main.o ++$(KERNEL_MODULE_NAME)-y += snapimage.o ++$(KERNEL_MODULE_NAME)-y += snapshot.o ++$(KERNEL_MODULE_NAME)-y += sysfs.o ++$(KERNEL_MODULE_NAME)-y += tracker.o ++$(KERNEL_MODULE_NAME)-y += memory_checker.o ++ ++obj-m += $(KERNEL_MODULE_NAME).o +-- +2.20.1 + diff --git a/patches/lk5.19/0020-block-blksnap-adds-a-blksnap-to-the-kernel-tree.patch b/patches/lk5.19/0020-block-blksnap-adds-a-blksnap-to-the-kernel-tree.patch new file mode 100644 index 00000000..32bf8369 --- /dev/null +++ b/patches/lk5.19/0020-block-blksnap-adds-a-blksnap-to-the-kernel-tree.patch @@ -0,0 +1,36 @@ +From a203abd2b70fe3a9da925eeb9165a86cba247cbc Mon Sep 17 00:00:00 2001 +From: Sergei Shtepa +Date: Tue, 7 Jun 2022 14:53:54 +0200 +Subject: [PATCH 20/20] block, blksnap: adds a blksnap to the kernel tree + +Signed-off-by: Sergei Shtepa +--- + drivers/block/Kconfig | 2 ++ + drivers/block/Makefile | 1 + + 2 files changed, 3 insertions(+) + +diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig +index fdb81f2794cd..e8e12bed6cc4 100644 +--- a/drivers/block/Kconfig ++++ b/drivers/block/Kconfig +@@ -410,4 +410,6 @@ config BLK_DEV_RBD + + source "drivers/block/rnbd/Kconfig" + ++source "drivers/block/blksnap/Kconfig" ++ + endif # BLK_DEV +diff --git a/drivers/block/Makefile b/drivers/block/Makefile +index 934a9c7c3a7c..0d95279bbe92 100644 +--- a/drivers/block/Makefile ++++ b/drivers/block/Makefile +@@ -38,5 +38,6 @@ obj-$(CONFIG_ZRAM) += zram/ + obj-$(CONFIG_BLK_DEV_RNBD) += rnbd/ + + obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk/ ++obj-$(CONFIG_BLK_SNAP) += blksnap/ + + swim_mod-y := swim.o swim_asm.o +-- +2.20.1 +