From d07b7c7f210e3f92775ad93a7306812edf524bfc Mon Sep 17 00:00:00 2001 From: Chris Dunlop Date: Mon, 16 Mar 2015 12:21:21 +1100 Subject: [PATCH 01/16] Reduce size of zfs_sb_t: allocate z_hold_mtx separately zfs_sb_t has grown to the point where using kmem_zalloc() for allocations is triggering the 32k warning threshold. We can't safely convert this entire allocation to use vmem_alloc() instead of kmem_alloc() because the backing_dev_info structure is embedded here. It depends on the bit_waitqueue() function which won't behave properly when given a virtual address. Instead, use vmem_alloc() to allocate the z_hold_mtx array separately. Signed-off-by: Brian Behlendorf Signed-off-by: Chris Dunlop Closes #3178 --- include/sys/zfs_vfsops.h | 2 +- module/zfs/zfs_vfsops.c | 6 ++++++ module/zfs/zfs_znode.c | 3 +++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/sys/zfs_vfsops.h b/include/sys/zfs_vfsops.h index 4b88260de2b9..c4db2a911d3e 100644 --- a/include/sys/zfs_vfsops.h +++ b/include/sys/zfs_vfsops.h @@ -92,7 +92,7 @@ typedef struct zfs_sb { uint64_t z_replay_eof; /* New end of file - replay only */ sa_attr_type_t *z_attr_table; /* SA attr mapping->id */ #define ZFS_OBJ_MTX_SZ 256 - kmutex_t z_hold_mtx[ZFS_OBJ_MTX_SZ]; /* znode hold locks */ + kmutex_t *z_hold_mtx; /* znode hold locks */ } zfs_sb_t; #define ZFS_SUPER_MAGIC 0x2fc12fc1 diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index e98f4bf6a120..41a1c4d8849d 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -776,6 +776,9 @@ zfs_sb_create(const char *osname, zfs_sb_t **zsbp) rrw_init(&zsb->z_teardown_lock, B_FALSE); rw_init(&zsb->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); rw_init(&zsb->z_fuid_lock, NULL, RW_DEFAULT, NULL); + + zsb->z_hold_mtx = vmem_zalloc(sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ, + KM_SLEEP); for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); @@ -789,6 +792,8 @@ zfs_sb_create(const char *osname, zfs_sb_t **zsbp) out: dmu_objset_disown(os, zsb); *zsbp = NULL; + + vmem_free(zsb->z_hold_mtx, sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ); kmem_free(zsb, sizeof (zfs_sb_t)); return (error); } @@ -892,6 +897,7 @@ zfs_sb_free(zfs_sb_t *zsb) rw_destroy(&zsb->z_fuid_lock); for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_destroy(&zsb->z_hold_mtx[i]); + vmem_free(zsb->z_hold_mtx, sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ); mutex_destroy(&zsb->z_ctldir_lock); avl_destroy(&zsb->z_ctldir_snaps); kmem_free(zsb, sizeof (zfs_sb_t)); diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index 3a7c30db2cfa..a96ac8338f47 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -1731,6 +1731,8 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) list_create(&zsb->z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); + zsb->z_hold_mtx = vmem_zalloc(sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ, + KM_SLEEP); for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); @@ -1755,6 +1757,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_destroy(&zsb->z_hold_mtx[i]); + vmem_free(zsb->z_hold_mtx, sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ); kmem_free(sb, sizeof (struct super_block)); kmem_free(zsb, sizeof (zfs_sb_t)); } From e89bd69775d5379f6835dade1e14005aa17f4903 Mon Sep 17 00:00:00 2001 From: Isaac Huang Date: Sat, 21 Mar 2015 23:32:54 -0600 Subject: [PATCH 02/16] zio_injection_enabled should not be a module option The zio_inject.c keeps zio_injection_enabled as a counter of fault handlers, so it should not be exported to user space as a module option. Several EXPORT_SYMBOLs are moved from zio.c to zio_inject.c, where the symbols are defined. Signed-off-by: Isaac Huang Signed-off-by: Brian Behlendorf Closes #3199 --- man/man5/zfs-module-parameters.5 | 11 ----------- module/zfs/zio.c | 8 -------- module/zfs/zio_inject.c | 9 +++++++-- 3 files changed, 7 insertions(+), 21 deletions(-) diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index fe31e292a792..9a3e2149ae80 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -1413,17 +1413,6 @@ Max zio millisec delay before posting event Default value: \fB30,000\fR. .RE -.sp -.ne 2 -.na -\fBzio_injection_enabled\fR (int) -.ad -.RS 12n -Enable fault injection -.sp -Use \fB1\fR for yes and \fB0\fR for no (default). -.RE - .sp .ne 2 .na diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 49e2d93b6783..ad0064443d94 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -3331,14 +3331,6 @@ zbookmark_is_before(const dnode_phys_t *dnp, const zbookmark_phys_t *zb1, } #if defined(_KERNEL) && defined(HAVE_SPL) -/* Fault injection */ -EXPORT_SYMBOL(zio_injection_enabled); -EXPORT_SYMBOL(zio_inject_fault); -EXPORT_SYMBOL(zio_inject_list_next); -EXPORT_SYMBOL(zio_clear_fault); -EXPORT_SYMBOL(zio_handle_fault_injection); -EXPORT_SYMBOL(zio_handle_device_injection); -EXPORT_SYMBOL(zio_handle_label_injection); EXPORT_SYMBOL(zio_type_name); EXPORT_SYMBOL(zio_buf_alloc); EXPORT_SYMBOL(zio_data_buf_alloc); diff --git a/module/zfs/zio_inject.c b/module/zfs/zio_inject.c index c168f3b47f2e..5afb23c595ae 100644 --- a/module/zfs/zio_inject.c +++ b/module/zfs/zio_inject.c @@ -525,6 +525,11 @@ zio_inject_fini(void) } #if defined(_KERNEL) && defined(HAVE_SPL) -module_param(zio_injection_enabled, int, 0644); -MODULE_PARM_DESC(zio_injection_enabled, "Enable fault injection"); +EXPORT_SYMBOL(zio_injection_enabled); +EXPORT_SYMBOL(zio_inject_fault); +EXPORT_SYMBOL(zio_inject_list_next); +EXPORT_SYMBOL(zio_clear_fault); +EXPORT_SYMBOL(zio_handle_fault_injection); +EXPORT_SYMBOL(zio_handle_device_injection); +EXPORT_SYMBOL(zio_handle_label_injection); #endif From 7b4536c710adea88f160c6f9ae140ae5279c8183 Mon Sep 17 00:00:00 2001 From: Gordan Bobic Date: Mon, 23 Mar 2015 16:17:56 +0000 Subject: [PATCH 03/16] Execute udevadm settle before trying to import pools Execute udevadm settle before trying to import pools. Otherwise the disk device nodes may not be ready before import time. This is analogous to the behavior of the init scripts and systemd units. Signed-off-by: Gordan Bobic Signed-off-by: Pavel Snajdr Signed-off-by: Brian Behlendorf Closes #3213 --- dracut/90zfs/mount-zfs.sh.in | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dracut/90zfs/mount-zfs.sh.in b/dracut/90zfs/mount-zfs.sh.in index ffc3f201350a..8237d76f2efe 100755 --- a/dracut/90zfs/mount-zfs.sh.in +++ b/dracut/90zfs/mount-zfs.sh.in @@ -12,6 +12,9 @@ if getargbool 0 zfs_force -y zfs.force -y zfsforce ; then ZPOOL_FORCE="-f" fi +# Delay until all required block devices are present. +udevadm settle + case "$root" in zfs:*) # We have ZFS modules loaded, so we're able to import pools now. From ded576e28fe70a40e78a90e4668de8130d599380 Mon Sep 17 00:00:00 2001 From: Tim Chase Date: Mon, 23 Mar 2015 12:10:19 -0500 Subject: [PATCH 04/16] Set the maximum ZVOL transfer size correctly ZoL had been setting max_sectors to UINT_MAX, but until Linux 3.19, it the kernel artifically capped it at 1024 (BLK_DEF_MAX_SECTORS). This cap was removed in torvalds/linux@34b48db. This patch changes it to DMU_MAX_ACCESS (in sectors) and also changes the ASSERT in dmu_tx_hold_write() to allow the maximum transfer size. Signed-off-by: Tim Chase Signed-off-by: Brian Behlendorf Closes #3212 --- module/zfs/dmu_tx.c | 2 +- module/zfs/zvol.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index cdf5a6d0fcfa..3d6dcc70f305 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -420,7 +420,7 @@ dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len) dmu_tx_hold_t *txh; ASSERT(tx->tx_txg == 0); - ASSERT(len < DMU_MAX_ACCESS); + ASSERT(len <= DMU_MAX_ACCESS); ASSERT(len == 0 || UINT64_MAX - off >= len - 1); txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 4febbb6bf1d7..d180b5b5b76f 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -1389,7 +1389,7 @@ __zvol_create_minor(const char *name, boolean_t ignore_snapdev) set_capacity(zv->zv_disk, zv->zv_volsize >> 9); - blk_queue_max_hw_sectors(zv->zv_queue, UINT_MAX); + blk_queue_max_hw_sectors(zv->zv_queue, DMU_MAX_ACCESS / 512); blk_queue_max_segments(zv->zv_queue, UINT16_MAX); blk_queue_max_segment_size(zv->zv_queue, UINT_MAX); blk_queue_physical_block_size(zv->zv_queue, zv->zv_volblocksize); From 58806b4cdc32e6f4e4a214cfba3b62a24efb34b7 Mon Sep 17 00:00:00 2001 From: Ned Bass Date: Tue, 24 Mar 2015 17:00:08 -0700 Subject: [PATCH 05/16] dbuf_free_range() overzealously frees dbufs When called to free a spill block from a dnode, dbuf_free_range() has a bug that results in all dbufs for the dnode getting freed. A variety of problems may result from this bug, but a common one was a zap lookup tripping an ASSERT because the zap buffers had been zeroed out. This could happen on a dataset with xattr=sa set when extended attributes are written and removed on a directory concurrently with I/O to files in that directory. Signed-off-by: Ned Bass Signed-off-by: Tim Chase Signed-off-by: Brian Behlendorf Fixes #3195 Fixes #3204 Fixes #3222 --- module/zfs/dbuf.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index f10a04d112a8..7a0c666395c8 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -898,9 +898,14 @@ dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx) db_next = list_next(&dn->dn_dbufs, db); ASSERT(db->db_blkid != DMU_BONUS_BLKID); + /* Skip indirect blocks. */ if (db->db_level != 0) continue; - if ((db->db_blkid < start || db->db_blkid > end) && !freespill) + /* Skip direct blocks outside the range. */ + if (!freespill && (db->db_blkid < start || db->db_blkid > end)) + continue; + /* Skip all direct blocks, only free spill blocks. */ + if (freespill && (db->db_blkid != DMU_SPILL_BLKID)) continue; /* found a level 0 buffer in the range */ From 7d90f569b3f05def7cbd0a52ce8ac3040364d702 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 20 Mar 2015 15:10:24 -0700 Subject: [PATCH 06/16] Check all vdev labels in 'zpool import' When using 'zpool import' to scan for available pools prefer vdev names which reference vdevs with more valid labels. There should be two labels at the start of the device and two labels at the end of the device. If labels are missing then the device has been damaged or is in some other way incomplete. Preferring names with fully intact labels helps weed out bad paths and improves the likelihood of being able to import the pool. This behavior only applies when scanning /dev/ for valid pools. If a cache file exists the pools described by the cache file will be used. Signed-off-by: Brian Behlendorf Signed-off-by: Chris Dunlap Closes #3145 Closes #2844 Closes #3107 --- cmd/mount_zfs/mount_zfs.c | 2 +- cmd/zpool/zpool_vdev.c | 2 +- include/libzfs.h | 2 +- lib/libzfs/libzfs_import.c | 74 +++++++++++++++++++++++++++++--------- 4 files changed, 61 insertions(+), 19 deletions(-) diff --git a/cmd/mount_zfs/mount_zfs.c b/cmd/mount_zfs/mount_zfs.c index b168f719a595..6cdb668f54c1 100644 --- a/cmd/mount_zfs/mount_zfs.c +++ b/cmd/mount_zfs/mount_zfs.c @@ -239,7 +239,7 @@ parse_dataset(char *dataset) if (fd < 0) goto out; - error = zpool_read_label(fd, &config); + error = zpool_read_label(fd, &config, NULL); (void) close(fd); if (error) goto out; diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c index 93a968dbadc7..cf6d2bfa56d5 100644 --- a/cmd/zpool/zpool_vdev.c +++ b/cmd/zpool/zpool_vdev.c @@ -597,7 +597,7 @@ is_spare(nvlist_t *config, const char *path) if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 || !inuse || state != POOL_STATE_SPARE || - zpool_read_label(fd, &label) != 0) { + zpool_read_label(fd, &label, NULL) != 0) { free(name); (void) close(fd); return (B_FALSE); diff --git a/include/libzfs.h b/include/libzfs.h index 108b75f5eb3c..e6a877214a64 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -757,7 +757,7 @@ extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **, /* * Label manipulation. */ -extern int zpool_read_label(int, nvlist_t **); +extern int zpool_read_label(int, nvlist_t **, int *); extern int zpool_clear_label(int); /* diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c index 301b08883629..182168456e0c 100644 --- a/lib/libzfs/libzfs_import.c +++ b/lib/libzfs/libzfs_import.c @@ -88,6 +88,7 @@ typedef struct name_entry { char *ne_name; uint64_t ne_guid; uint64_t ne_order; + uint64_t ne_num_labels; struct name_entry *ne_next; } name_entry_t; @@ -173,8 +174,23 @@ fix_paths(nvlist_t *nv, name_entry_t *names) break; } - if (best == NULL || ne->ne_order < best->ne_order) + if (best == NULL) { best = ne; + continue; + } + + /* Prefer paths with move vdev labels. */ + if (ne->ne_num_labels > best->ne_num_labels) { + best = ne; + continue; + } + + /* Prefer paths earlier in the search order. */ + if (best->ne_num_labels == best->ne_num_labels && + ne->ne_order < best->ne_order) { + best = ne; + continue; + } } } @@ -200,7 +216,7 @@ fix_paths(nvlist_t *nv, name_entry_t *names) */ static int add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path, - int order, nvlist_t *config) + int order, int num_labels, nvlist_t *config) { uint64_t pool_guid, vdev_guid, top_guid, txg, state; pool_entry_t *pe; @@ -226,6 +242,7 @@ add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path, } ne->ne_guid = vdev_guid; ne->ne_order = order; + ne->ne_num_labels = num_labels; ne->ne_next = pl->names; pl->names = ne; return (0); @@ -328,6 +345,7 @@ add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path, ne->ne_guid = vdev_guid; ne->ne_order = order; + ne->ne_num_labels = num_labels; ne->ne_next = pl->names; pl->names = ne; @@ -843,15 +861,17 @@ label_offset(uint64_t size, int l) /* * Given a file descriptor, read the label information and return an nvlist - * describing the configuration, if there is one. + * describing the configuration, if there is one. The number of valid + * labels found will be returned in num_labels when non-NULL. */ int -zpool_read_label(int fd, nvlist_t **config) +zpool_read_label(int fd, nvlist_t **config, int *num_labels) { struct stat64 statbuf; - int l; + int l, count = 0; vdev_label_t *label; - uint64_t state, txg, size; + nvlist_t *expected_config = NULL; + uint64_t expected_guid = 0, size; *config = NULL; @@ -863,6 +883,8 @@ zpool_read_label(int fd, nvlist_t **config) return (-1); for (l = 0; l < VDEV_LABELS; l++) { + uint64_t state, guid, txg; + if (pread64(fd, label, sizeof (vdev_label_t), label_offset(size, l)) != sizeof (vdev_label_t)) continue; @@ -871,6 +893,12 @@ zpool_read_label(int fd, nvlist_t **config) sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) continue; + if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_GUID, + &guid) != 0 || guid == 0) { + nvlist_free(*config); + continue; + } + if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, &state) != 0 || state > POOL_STATE_L2CACHE) { nvlist_free(*config); @@ -884,12 +912,24 @@ zpool_read_label(int fd, nvlist_t **config) continue; } - free(label); - return (0); + if (expected_guid) { + if (expected_guid == guid) + count++; + + nvlist_free(*config); + } else { + expected_config = *config; + expected_guid = guid; + count++; + } } + if (num_labels != NULL) + *num_labels = count; + free(label); - *config = NULL; + *config = expected_config; + return (0); } @@ -937,7 +977,7 @@ zpool_find_import_blkid(libzfs_handle_t *hdl, pool_list_t *pools) blkid_dev dev; const char *devname; nvlist_t *config; - int fd, err; + int fd, err, num_labels; err = blkid_get_cache(&cache, NULL); if (err != 0) { @@ -972,7 +1012,7 @@ zpool_find_import_blkid(libzfs_handle_t *hdl, pool_list_t *pools) if ((fd = open64(devname, O_RDONLY)) < 0) continue; - err = zpool_read_label(fd, &config); + err = zpool_read_label(fd, &config, &num_labels); (void) close(fd); if (err != 0) { @@ -981,7 +1021,8 @@ zpool_find_import_blkid(libzfs_handle_t *hdl, pool_list_t *pools) } if (config != NULL) { - err = add_config(hdl, pools, devname, 0, config); + err = add_config(hdl, pools, devname, 0, + num_labels, config); if (err != 0) goto err_blkid3; } @@ -1017,7 +1058,7 @@ zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE] = { static nvlist_t * zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) { - int i, dirs = iarg->paths; + int i, num_labels, dirs = iarg->paths; DIR *dirp = NULL; struct dirent64 *dp; char path[MAXPATHLEN]; @@ -1143,7 +1184,7 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) if ((fd = openat64(dfd, name, O_RDONLY)) < 0) continue; - if ((zpool_read_label(fd, &config)) != 0) { + if ((zpool_read_label(fd, &config, &num_labels))) { (void) close(fd); (void) no_memory(hdl); goto error; @@ -1177,7 +1218,8 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) } /* use the non-raw path for the config */ (void) strlcpy(end, name, pathleft); - if (add_config(hdl, &pools, path, i+1, config)) + if (add_config(hdl, &pools, path, i+1, + num_labels, config)) goto error; } } @@ -1461,7 +1503,7 @@ zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr, *inuse = B_FALSE; - if (zpool_read_label(fd, &config) != 0) { + if (zpool_read_label(fd, &config, NULL) != 0) { (void) no_memory(hdl); return (-1); } From d820d2e9cf27678dd07b86f8fdd4635162371a37 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 27 Mar 2015 14:30:23 -0700 Subject: [PATCH 07/16] Remove rpm/fedora directory Originally it was thought that custom spec files might be required for Fedora. Happily that has turns out not to be the case. Since this directory just contains symlinks to the generic spec files it can be removed. Signed-off-by: Brian Behlendorf --- config/zfs-build.m4 | 2 +- configure.ac | 4 ---- rpm/Makefile.am | 2 +- rpm/fedora/.gitignore | 3 --- rpm/fedora/Makefile.am | 1 - rpm/fedora/zfs-dkms.spec.in | 1 - rpm/fedora/zfs-kmod.spec.in | 1 - rpm/fedora/zfs.spec.in | 1 - 8 files changed, 2 insertions(+), 13 deletions(-) delete mode 100644 rpm/fedora/.gitignore delete mode 100644 rpm/fedora/Makefile.am delete mode 120000 rpm/fedora/zfs-dkms.spec.in delete mode 120000 rpm/fedora/zfs-kmod.spec.in delete mode 120000 rpm/fedora/zfs.spec.in diff --git a/config/zfs-build.m4 b/config/zfs-build.m4 index 4c8bddf2f297..73ba979d10d1 100644 --- a/config/zfs-build.m4 +++ b/config/zfs-build.m4 @@ -151,7 +151,7 @@ AC_DEFUN([ZFS_AC_RPM], [ RPM_SPEC_DIR="rpm/generic" AC_ARG_WITH([spec], AS_HELP_STRING([--with-spec=SPEC], - [Spec files 'generic|fedora']), + [Spec files 'generic']), [RPM_SPEC_DIR="rpm/$withval"]) AC_MSG_CHECKING([whether spec files are available]) diff --git a/configure.ac b/configure.ac index e0829205afbf..854af771c076 100644 --- a/configure.ac +++ b/configure.ac @@ -134,10 +134,6 @@ AC_CONFIG_FILES([ scripts/zpool-config/Makefile scripts/common.sh rpm/Makefile - rpm/fedora/Makefile - rpm/fedora/zfs.spec - rpm/fedora/zfs-kmod.spec - rpm/fedora/zfs-dkms.spec rpm/generic/Makefile rpm/generic/zfs.spec rpm/generic/zfs-kmod.spec diff --git a/rpm/Makefile.am b/rpm/Makefile.am index e41cdda2e999..b564eeefd9f4 100644 --- a/rpm/Makefile.am +++ b/rpm/Makefile.am @@ -1 +1 @@ -SUBDIRS = fedora generic +SUBDIRS = generic diff --git a/rpm/fedora/.gitignore b/rpm/fedora/.gitignore deleted file mode 100644 index 7f5daafdd6d4..000000000000 --- a/rpm/fedora/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -/zfs-dkms.spec -/zfs-kmod.spec -/zfs.spec diff --git a/rpm/fedora/Makefile.am b/rpm/fedora/Makefile.am deleted file mode 100644 index 89b13640d622..000000000000 --- a/rpm/fedora/Makefile.am +++ /dev/null @@ -1 +0,0 @@ -EXTRA_DIST = zfs.spec.in zfs-kmod.spec.in zfs-dkms.spec.in diff --git a/rpm/fedora/zfs-dkms.spec.in b/rpm/fedora/zfs-dkms.spec.in deleted file mode 120000 index ffa051baaf03..000000000000 --- a/rpm/fedora/zfs-dkms.spec.in +++ /dev/null @@ -1 +0,0 @@ -../generic/zfs-dkms.spec.in \ No newline at end of file diff --git a/rpm/fedora/zfs-kmod.spec.in b/rpm/fedora/zfs-kmod.spec.in deleted file mode 120000 index af19ecde0058..000000000000 --- a/rpm/fedora/zfs-kmod.spec.in +++ /dev/null @@ -1 +0,0 @@ -../generic/zfs-kmod.spec.in \ No newline at end of file diff --git a/rpm/fedora/zfs.spec.in b/rpm/fedora/zfs.spec.in deleted file mode 120000 index 4c8079166ff8..000000000000 --- a/rpm/fedora/zfs.spec.in +++ /dev/null @@ -1 +0,0 @@ -../generic/zfs.spec.in \ No newline at end of file From ee2ca1db28a0910770b8bd504cf021199f583047 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Wed, 25 Mar 2015 16:59:17 -0700 Subject: [PATCH 08/16] Add RHEL style kmod packages Provide a Redhat specific zfs-kmod.spec file which uses the old style kmods (not kmods2) packaging. By using the provided kmodtool script packages can be built which support weak modules. This allows for the kernel to be updated without having to rebuild the ZFS kernel modules. Packages for RHEL/Centos/SL/TOSS which use this spec file can by built as follows: $ ./configure --with-spec=redhat $ make rpms Signed-off-by: Brian Behlendorf --- config/zfs-build.m4 | 2 +- configure.ac | 4 ++ rpm/Makefile.am | 2 +- rpm/redhat/.gitignore | 3 ++ rpm/redhat/Makefile.am | 1 + rpm/redhat/zfs-dkms.spec.in | 1 + rpm/redhat/zfs-kmod.spec.in | 86 +++++++++++++++++++++++++++++++++++++ rpm/redhat/zfs.spec.in | 1 + 8 files changed, 98 insertions(+), 2 deletions(-) create mode 100644 rpm/redhat/.gitignore create mode 100644 rpm/redhat/Makefile.am create mode 120000 rpm/redhat/zfs-dkms.spec.in create mode 100644 rpm/redhat/zfs-kmod.spec.in create mode 120000 rpm/redhat/zfs.spec.in diff --git a/config/zfs-build.m4 b/config/zfs-build.m4 index 73ba979d10d1..8d3a373047ec 100644 --- a/config/zfs-build.m4 +++ b/config/zfs-build.m4 @@ -151,7 +151,7 @@ AC_DEFUN([ZFS_AC_RPM], [ RPM_SPEC_DIR="rpm/generic" AC_ARG_WITH([spec], AS_HELP_STRING([--with-spec=SPEC], - [Spec files 'generic']), + [Spec files 'generic|redhat']), [RPM_SPEC_DIR="rpm/$withval"]) AC_MSG_CHECKING([whether spec files are available]) diff --git a/configure.ac b/configure.ac index 854af771c076..63d0073e9a13 100644 --- a/configure.ac +++ b/configure.ac @@ -134,6 +134,10 @@ AC_CONFIG_FILES([ scripts/zpool-config/Makefile scripts/common.sh rpm/Makefile + rpm/redhat/Makefile + rpm/redhat/zfs.spec + rpm/redhat/zfs-kmod.spec + rpm/redhat/zfs-dkms.spec rpm/generic/Makefile rpm/generic/zfs.spec rpm/generic/zfs-kmod.spec diff --git a/rpm/Makefile.am b/rpm/Makefile.am index b564eeefd9f4..f2cf72cef13c 100644 --- a/rpm/Makefile.am +++ b/rpm/Makefile.am @@ -1 +1 @@ -SUBDIRS = generic +SUBDIRS = generic redhat diff --git a/rpm/redhat/.gitignore b/rpm/redhat/.gitignore new file mode 100644 index 000000000000..7f5daafdd6d4 --- /dev/null +++ b/rpm/redhat/.gitignore @@ -0,0 +1,3 @@ +/zfs-dkms.spec +/zfs-kmod.spec +/zfs.spec diff --git a/rpm/redhat/Makefile.am b/rpm/redhat/Makefile.am new file mode 100644 index 000000000000..89b13640d622 --- /dev/null +++ b/rpm/redhat/Makefile.am @@ -0,0 +1 @@ +EXTRA_DIST = zfs.spec.in zfs-kmod.spec.in zfs-dkms.spec.in diff --git a/rpm/redhat/zfs-dkms.spec.in b/rpm/redhat/zfs-dkms.spec.in new file mode 120000 index 000000000000..ffa051baaf03 --- /dev/null +++ b/rpm/redhat/zfs-dkms.spec.in @@ -0,0 +1 @@ +../generic/zfs-dkms.spec.in \ No newline at end of file diff --git a/rpm/redhat/zfs-kmod.spec.in b/rpm/redhat/zfs-kmod.spec.in new file mode 100644 index 000000000000..ecc809bc6520 --- /dev/null +++ b/rpm/redhat/zfs-kmod.spec.in @@ -0,0 +1,86 @@ +%bcond_with debug +%bcond_with debug_dmu_tx + +Name: @PACKAGE@-kmod +Version: @VERSION@ +Release: @RELEASE@%{?dist} + +Summary: Kernel module(s) +Group: System Environment/Kernel +License: @ZFS_META_LICENSE@ +URL: http://zfsonlinux.org/ +BuildRequires: %kernel_module_package_buildreqs +BuildRequires: kmod-spl-devel = %{version} +Source0: @PACKAGE@-%{version}.tar.gz +BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) + +# Additional dependency information for the kmod sub-package must be specified +# by generating a preamble text file which kmodtool can append to the spec file. +%(/bin/echo -e "\ +Requires: spl-kmod\n\ +Requires: @PACKAGE@ = %{version}\n\ +Conflicts: @PACKAGE@-dkms\n\n" > %{_sourcedir}/kmod-preamble) + +%description +This package contains the ZFS kernel modules. + +%define kmod_name @PACKAGE@ +%define debug_package %{nil} + +%kernel_module_package -n %{kmod_name} -p %{_sourcedir}/kmod-preamble + +%define ksrc %{_usrsrc}/kernels/%{kverrel} +%define kobj %{ksrc} +%define splsrc %{_usrsrc}/spl-%{version} +%define splobj %{splsrc}/%{kverrel} + +%package -n kmod-%{kmod_name}-devel +Summary: ZFS kernel module(s) devel common +Group: System Environment/Kernel +Requires: kmod-spl-devel = %{version} + +%description -n kmod-%{kmod_name}-devel +This package provides the header files and objects to build kernel modules +which depend on the spl kernel module. + +%prep +if ! [ -d "%{ksrc}" ]; then + echo "Kernel build directory isn't set properly, cannot continue" + exit 1 +fi + +%if %{with debug} +%define debug --enable-debug +%else +%define debug --disable-debug +%endif + +%if %{with debug_dmu_tx} +%define debug_dmu_tx --enable-debug-dmu-tx +%else +%define debug_dmu_tx --disable-debug-dmu-tx +%endif + +%setup -n %{kmod_name}-%{version} +%build +%configure \ + --with-config=kernel \ + --with-linux=%{ksrc} \ + --with-linux-obj=%{kobj} \ + --with-spl="%{splsrc}" \ + --with-spl-obj="%{splobj}" \ + %{debug} \ + %{debug_dmu_tx} +make %{?_smp_mflags} + +%install +make install \ + DESTDIR=${RPM_BUILD_ROOT} \ + INSTALL_MOD_DIR=extra/%{kmod_name} +%{__rm} -f %{buildroot}/lib/modules/%{kverrel}/modules.* + +%clean +rm -rf $RPM_BUILD_ROOT + +%files -n kmod-%{kmod_name}-devel +%{_usrsrc}/%{kmod_name}-%{version} diff --git a/rpm/redhat/zfs.spec.in b/rpm/redhat/zfs.spec.in new file mode 120000 index 000000000000..4c8079166ff8 --- /dev/null +++ b/rpm/redhat/zfs.spec.in @@ -0,0 +1 @@ +../generic/zfs.spec.in \ No newline at end of file From 95a6990d9a77a56eb97b76f2880f95f0f42f4fe0 Mon Sep 17 00:00:00 2001 From: Ned Bass Date: Tue, 24 Mar 2015 17:22:21 -0700 Subject: [PATCH 09/16] Add NULL guard in zfs_zrlock_class event class The owner field could be NULL in some cases, so add a guard. Shorten __entry field names to fit assignment statements in 80 columns. Signed-off-by: Ned Bass Signed-off-by: Brian Behlendorf Fixes #3220 --- include/sys/trace_zrlock.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/sys/trace_zrlock.h b/include/sys/trace_zrlock.h index 3653e3e8d293..8c811c2aa0b3 100644 --- a/include/sys/trace_zrlock.h +++ b/include/sys/trace_zrlock.h @@ -42,28 +42,28 @@ DECLARE_EVENT_CLASS(zfs_zrlock_class, TP_PROTO(zrlock_t *zrl, uint32_t n), TP_ARGS(zrl, n), TP_STRUCT__entry( - __field(int32_t, zr_refcount) + __field(int32_t, refcount) #ifdef ZFS_DEBUG - __field(pid_t, zr_owner_pid) - __field(const char *, zr_caller) + __field(pid_t, owner_pid) + __field(const char *, caller) #endif __field(uint32_t, n) ), TP_fast_assign( - __entry->zr_refcount = zrl->zr_refcount; + __entry->refcount = zrl->zr_refcount; #ifdef ZFS_DEBUG - __entry->zr_owner_pid = zrl->zr_owner->pid; - __entry->zr_caller = zrl->zr_caller; + __entry->owner_pid = zrl->zr_owner ? zrl->zr_owner->pid : 0; + __entry->caller = zrl->zr_caller; #endif - __entry->n = n; + __entry->n = n; ), #ifdef ZFS_DEBUG TP_printk("zrl { refcount %d owner_pid %d caller %s } n %u", - __entry->zr_refcount, __entry->zr_owner_pid, __entry->zr_caller, + __entry->refcount, __entry->owner_pid, __entry->caller, __entry->n) #else TP_printk("zrl { refcount %d } n %u", - __entry->zr_refcount, __entry->n) + __entry->refcount, __entry->n) #endif ); From 9540be9b23fd6f8b5bf7d81853c251010d9b7205 Mon Sep 17 00:00:00 2001 From: Ned Bass Date: Thu, 26 Mar 2015 12:10:26 -0700 Subject: [PATCH 10/16] zpool import should honor overlay property Make the 'zpool import' command honor the overlay property to allow filesystems to be mounted on a non-empty directory. As it stands now this property is only checked by the 'zfs mount' command. Move the check into 'zfs_mount()` in libzpool so the property is honored for all callers. Signed-off-by: Ned Bass Signed-off-by: Brian Behlendorf Closes #3227 --- cmd/zfs/zfs_main.c | 14 -------------- lib/libzfs/libzfs_mount.c | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 83f02666d247..84073435e2d7 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -5641,7 +5641,6 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol, char mountpoint[ZFS_MAXPROPLEN]; char shareopts[ZFS_MAXPROPLEN]; char smbshareopts[ZFS_MAXPROPLEN]; - char overlay[ZFS_MAXPROPLEN]; const char *cmdname = op == OP_SHARE ? "share" : "mount"; struct mnttab mnt; uint64_t zoned, canmount; @@ -5748,19 +5747,6 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol, return (0); } - /* - * Overlay mounts are disabled by default but may be enabled - * via the 'overlay' property or the 'zfs mount -O' option. - */ - if (!(flags & MS_OVERLAY)) { - if (zfs_prop_get(zhp, ZFS_PROP_OVERLAY, overlay, - sizeof (overlay), NULL, NULL, 0, B_FALSE) == 0) { - if (strcmp(overlay, "on") == 0) { - flags |= MS_OVERLAY; - } - } - } - /* * At this point, we have verified that the mountpoint and/or * shareopts are appropriate for auto management. If the diff --git a/lib/libzfs/libzfs_mount.c b/lib/libzfs/libzfs_mount.c index ac3b6822608c..0e3332e0e3ed 100644 --- a/lib/libzfs/libzfs_mount.c +++ b/lib/libzfs/libzfs_mount.c @@ -388,6 +388,7 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags) struct stat buf; char mountpoint[ZFS_MAXPROPLEN]; char mntopts[MNT_LINE_MAX]; + char overlay[ZFS_MAXPROPLEN]; libzfs_handle_t *hdl = zhp->zfs_hdl; int remount = 0, rc; @@ -441,6 +442,19 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags) } } + /* + * Overlay mounts are disabled by default but may be enabled + * via the 'overlay' property or the 'zfs mount -O' option. + */ + if (!(flags & MS_OVERLAY)) { + if (zfs_prop_get(zhp, ZFS_PROP_OVERLAY, overlay, + sizeof (overlay), NULL, NULL, 0, B_FALSE) == 0) { + if (strcmp(overlay, "on") == 0) { + flags |= MS_OVERLAY; + } + } + } + /* * Determine if the mountpoint is empty. If so, refuse to perform the * mount. We don't perform this check if 'remount' is From a4069eef2e403a3b2a307b23b7500e2adc6ecae5 Mon Sep 17 00:00:00 2001 From: Prakash Surya Date: Fri, 27 Mar 2015 13:03:22 +1100 Subject: [PATCH 11/16] Illumos 5695 - dmu_sync'ed holes do not retain birth time 5695 dmu_sync'ed holes do not retain birth time Reviewed by: Matthew Ahrens Reviewed by: George Wilson Reviewed by: Christopher Siden Reviewed by: Bayard Bell Approved by: Dan McDonald References: https://www.illumos.org/issues/5695 https://github.com/illumos/illumos-gate/commit/70163ac Ported-by: Chris Dunlop Signed-off-by: Brian Behlendorf Closes #3229 --- cmd/zdb/zdb.c | 4 +++- include/sys/spa.h | 13 +++++++------ module/zfs/dmu.c | 14 +++++++++++++- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index b0d7170b92b2..1d76f2a7da3e 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -1115,7 +1115,9 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp) if (BP_IS_HOLE(bp)) { (void) snprintf(blkbuf + strlen(blkbuf), - buflen - strlen(blkbuf), "B=%llu", + buflen - strlen(blkbuf), + "%llxL B=%llu", + (u_longlong_t)BP_GET_LSIZE(bp), (u_longlong_t)bp->blk_birth); } else { (void) snprintf(blkbuf + strlen(blkbuf), diff --git a/include/sys/spa.h b/include/sys/spa.h index 2f73793fedb2..83b6723a4f73 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -501,12 +501,13 @@ _NOTE(CONSTCOND) } while (0) if (bp == NULL) { \ len += func(buf + len, size - len, ""); \ } else if (BP_IS_HOLE(bp)) { \ - len += func(buf + len, size - len, ""); \ - if (bp->blk_birth > 0) { \ - len += func(buf + len, size - len, \ - " birth=%lluL", \ - (u_longlong_t)bp->blk_birth); \ - } \ + len += func(buf + len, size - len, \ + "HOLE [L%llu %s] " \ + "size=%llxL birth=%lluL", \ + (u_longlong_t)BP_GET_LEVEL(bp), \ + type, \ + (u_longlong_t)BP_GET_LSIZE(bp), \ + (u_longlong_t)bp->blk_birth); \ } else if (BP_IS_EMBEDDED(bp)) { \ len = func(buf + len, size - len, \ "EMBEDDED [L%llu %s] et=%u %s " \ diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 1501ae8046ad..3b7bbefc2f73 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1472,7 +1472,19 @@ dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg) dr->dt.dl.dr_overridden_by = *zio->io_bp; dr->dt.dl.dr_override_state = DR_OVERRIDDEN; dr->dt.dl.dr_copies = zio->io_prop.zp_copies; - if (BP_IS_HOLE(&dr->dt.dl.dr_overridden_by)) + + /* + * Old style holes are filled with all zeros, whereas + * new-style holes maintain their lsize, type, level, + * and birth time (see zio_write_compress). While we + * need to reset the BP_SET_LSIZE() call that happened + * in dmu_sync_ready for old style holes, we do *not* + * want to wipe out the information contained in new + * style holes. Thus, only zero out the block pointer if + * it's an old style hole. + */ + if (BP_IS_HOLE(&dr->dt.dl.dr_overridden_by) && + dr->dt.dl.dr_overridden_by.blk_birth == 0) BP_ZERO(&dr->dt.dl.dr_overridden_by); } else { dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; From ee2f17aa2af3d8620470c3bbbe6f9dac9449d546 Mon Sep 17 00:00:00 2001 From: Chris Dunlop Date: Fri, 27 Mar 2015 15:04:12 +1100 Subject: [PATCH 12/16] Align code with Illumos Align code in traverse_visitbp() with that in Illumos in preparation for applying Illumos-5694. No functional change: use a temporary variable pd to replace multiple occurrences of td->td_pfd. This increases our stack use slightly more then normal because the function is called recursively. Signed-off-by: Chris Dunlop Signed-off-by: Brian Behlendorf Issue #3230 --- module/zfs/dmu_traverse.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c index b5c1ec758f8b..a8481aee61ee 100644 --- a/module/zfs/dmu_traverse.c +++ b/module/zfs/dmu_traverse.c @@ -213,6 +213,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, { int err = 0; arc_buf_t *buf = NULL; + prefetch_data_t *pd = td->td_pfd; switch (resume_skip_check(td, dnp, zb)) { case RESUME_SKIP_ALL: @@ -249,16 +250,14 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, return (0); } - if (td->td_pfd != NULL && !td->td_pfd->pd_exited && - prefetch_needed(td->td_pfd, bp)) { - mutex_enter(&td->td_pfd->pd_mtx); - ASSERT(td->td_pfd->pd_blks_fetched >= 0); - while (td->td_pfd->pd_blks_fetched == 0 && - !td->td_pfd->pd_exited) - cv_wait(&td->td_pfd->pd_cv, &td->td_pfd->pd_mtx); - td->td_pfd->pd_blks_fetched--; - cv_broadcast(&td->td_pfd->pd_cv); - mutex_exit(&td->td_pfd->pd_mtx); + if (pd != NULL && !pd->pd_exited && prefetch_needed(pd, bp)) { + mutex_enter(&pd->pd_mtx); + ASSERT(pd->pd_blks_fetched >= 0); + while (pd->pd_blks_fetched == 0 && !pd->pd_exited) + cv_wait(&pd->pd_cv, &pd->pd_mtx); + pd->pd_blks_fetched--; + cv_broadcast(&pd->pd_cv); + mutex_exit(&pd->pd_mtx); } if (BP_IS_HOLE(bp)) { From b738bc5a0f8ccd0281ed06831c34fbe31d2b2138 Mon Sep 17 00:00:00 2001 From: George Wilson Date: Fri, 27 Mar 2015 15:31:52 +1100 Subject: [PATCH 13/16] Illumos 5694 - traverse_prefetcher does not prefetch enough 5694 traverse_prefetcher does not prefetch enough Reviewed by: Matthew Ahrens Reviewed by: Alex Reece Reviewed by: Christopher Siden Reviewed by: Josef 'Jeff' Sipek Reviewed by: Bayard Bell Approved by: Garrett D'Amore References: https://www.illumos.org/issues/5694 https://github.com/illumos/illumos-gate/commit/34d7ce05 Ported-by: Chris Dunlop Signed-off-by: Brian Behlendorf Closes #3230 --- man/man5/zfs-module-parameters.5 | 4 ++-- module/zfs/dmu_traverse.c | 23 +++++++++++------------ 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index 9a3e2149ae80..783d3532ea8d 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -1058,10 +1058,10 @@ Use \fB1\fR for yes (default) and \fB0\fR to disable. .sp .ne 2 .na -\fBzfs_pd_blks_max\fR (int) +\fBzfs_pd_bytes_max\fR (int) .ad .RS 12n -Max number of blocks to prefetch +The number of bytes which should be prefetched. .sp Default value: \fB100\fR. .RE diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c index a8481aee61ee..9280a89b2f85 100644 --- a/module/zfs/dmu_traverse.c +++ b/module/zfs/dmu_traverse.c @@ -38,13 +38,12 @@ #include #include -int zfs_pd_blks_max = 100; +int32_t zfs_pd_bytes_max = 50 * 1024 * 1024; /* 50MB */ typedef struct prefetch_data { kmutex_t pd_mtx; kcondvar_t pd_cv; - int pd_blks_max; - int pd_blks_fetched; + int32_t pd_bytes_fetched; int pd_flags; boolean_t pd_cancel; boolean_t pd_exited; @@ -251,11 +250,12 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, } if (pd != NULL && !pd->pd_exited && prefetch_needed(pd, bp)) { + uint64_t size = BP_GET_LSIZE(bp); mutex_enter(&pd->pd_mtx); - ASSERT(pd->pd_blks_fetched >= 0); - while (pd->pd_blks_fetched == 0 && !pd->pd_exited) + ASSERT(pd->pd_bytes_fetched >= 0); + while (pd->pd_bytes_fetched < size && !pd->pd_exited) cv_wait(&pd->pd_cv, &pd->pd_mtx); - pd->pd_blks_fetched--; + pd->pd_bytes_fetched -= size; cv_broadcast(&pd->pd_cv); mutex_exit(&pd->pd_mtx); } @@ -452,7 +452,7 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, prefetch_data_t *pfd = arg; uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; - ASSERT(pfd->pd_blks_fetched >= 0); + ASSERT(pfd->pd_bytes_fetched >= 0); if (pfd->pd_cancel) return (SET_ERROR(EINTR)); @@ -460,9 +460,9 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, return (0); mutex_enter(&pfd->pd_mtx); - while (!pfd->pd_cancel && pfd->pd_blks_fetched >= pfd->pd_blks_max) + while (!pfd->pd_cancel && pfd->pd_bytes_fetched >= zfs_pd_bytes_max) cv_wait(&pfd->pd_cv, &pfd->pd_mtx); - pfd->pd_blks_fetched++; + pfd->pd_bytes_fetched += BP_GET_LSIZE(bp); cv_broadcast(&pfd->pd_cv); mutex_exit(&pfd->pd_mtx); @@ -531,7 +531,6 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp, td->td_flags = flags; td->td_paused = B_FALSE; - pd->pd_blks_max = zfs_pd_blks_max; pd->pd_flags = flags; mutex_init(&pd->pd_mtx, NULL, MUTEX_DEFAULT, NULL); cv_init(&pd->pd_cv, NULL, CV_DEFAULT, NULL); @@ -661,6 +660,6 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags, EXPORT_SYMBOL(traverse_dataset); EXPORT_SYMBOL(traverse_pool); -module_param(zfs_pd_blks_max, int, 0644); -MODULE_PARM_DESC(zfs_pd_blks_max, "Max number of blocks to prefetch"); +module_param(zfs_pd_bytes_max, int, 0644); +MODULE_PARM_DESC(zfs_pd_bytes_max, "Max number of bytes to prefetch"); #endif From 0f7d2a4b3d2d7fc0975a7ef53bd3c4700d47c51b Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Fri, 27 Mar 2015 17:11:50 +1100 Subject: [PATCH 14/16] Illumus 5693 - ztest fails in dbuf_verify: buf[i] == 0, due to dedup and bp_override 5693 ztest fails in dbuf_verify: buf[i] == 0, due to dedup and bp_override Reviewed by: George Wilson Reviewed by: Christopher Siden Reviewed by: Bayard Bell Approved by: Dan McDonald References: https://www.illumos.org/issues/5693 https://github.com/illumos/illumos-gate/commit/7f7ace3 Ported-by: Chris Dunlop Signed-off-by: Brian Behlendorf Closes #3231 --- module/zfs/zio.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/module/zfs/zio.c b/module/zfs/zio.c index ad0064443d94..066f04f1864c 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -1043,8 +1043,6 @@ zio_write_bp_init(zio_t *zio) zio->io_pipeline |= ZIO_STAGE_DDT_WRITE; return (ZIO_PIPELINE_CONTINUE); } - zio->io_bp_override = NULL; - BP_ZERO(bp); } if (!BP_IS_HOLE(bp) && bp->blk_birth == zio->io_txg) { From 74aa2ba259e61512bd029c9e8f857f0611a80bbd Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Tue, 31 Mar 2015 11:51:37 -0700 Subject: [PATCH 15/16] Update zfs_pd_bytes_max default in zfs(8) Commit b738bc5 should have updated the default value of zfs_pd_bytes_max in the zfs(8) man page. The correct default value is 50*1024*1024. Signed-off-by: Brian Behlendorf --- man/man5/zfs-module-parameters.5 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index 783d3532ea8d..007cb1c71470 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -1063,7 +1063,7 @@ Use \fB1\fR for yes (default) and \fB0\fR to disable. .RS 12n The number of bytes which should be prefetched. .sp -Default value: \fB100\fR. +Default value: \fB52,428,800\fR. .RE .sp From 40d06e3c78c23b199dfd9284809e710fab549391 Mon Sep 17 00:00:00 2001 From: Tim Chase Date: Mon, 30 Mar 2015 22:43:29 -0500 Subject: [PATCH 16/16] Mark all ZPL and ioctl functions as PF_FSTRANS Prevent deadlocks by disabling direct reclaim during all ZPL and ioctl calls as well as the l2arc and adapt ARC threads. This obviates the need for MUTEX_FSTRANS so its previous uses and definition have been eliminated. Signed-off-by: Tim Chase Signed-off-by: Brian Behlendorf Closes #3225 --- include/sys/zfs_context.h | 1 - module/zfs/arc.c | 10 ++++++++-- module/zfs/dbuf.c | 2 +- module/zfs/zfs_ioctl.c | 5 +++++ module/zfs/zfs_znode.c | 18 +++--------------- module/zfs/zpl_file.c | 40 +++++++++++++++++++++++++++++++++++++++ module/zfs/zpl_inode.c | 39 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 96 insertions(+), 19 deletions(-) diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index b8eff58bc615..3dc54f1d7d90 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -273,7 +273,6 @@ typedef struct kmutex { } kmutex_t; #define MUTEX_DEFAULT 0 -#define MUTEX_FSTRANS MUTEX_DEFAULT #define MUTEX_HELD(m) ((m)->m_owner == curthread) #define MUTEX_NOT_HELD(m) (!MUTEX_HELD(m)) diff --git a/module/zfs/arc.c b/module/zfs/arc.c index bdf116c35c52..421c81e1cfe9 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -933,7 +933,7 @@ buf_init(void) for (i = 0; i < BUF_LOCKS; i++) { mutex_init(&buf_hash_table.ht_locks[i].ht_lock, - NULL, MUTEX_FSTRANS, NULL); + NULL, MUTEX_DEFAULT, NULL); } } @@ -2412,9 +2412,11 @@ static void arc_adapt_thread(void) { callb_cpr_t cpr; + fstrans_cookie_t cookie; CALLB_CPR_INIT(&cpr, &arc_reclaim_thr_lock, callb_generic_cpr, FTAG); + cookie = spl_fstrans_mark(); mutex_enter(&arc_reclaim_thr_lock); while (arc_thread_exit == 0) { #ifndef _KERNEL @@ -2485,6 +2487,7 @@ arc_adapt_thread(void) arc_thread_exit = 0; cv_broadcast(&arc_reclaim_thr_cv); CALLB_CPR_EXIT(&cpr); /* drops arc_reclaim_thr_lock */ + spl_fstrans_unmark(cookie); thread_exit(); } @@ -5376,11 +5379,13 @@ l2arc_feed_thread(void) uint64_t size, wrote; clock_t begin, next = ddi_get_lbolt(); boolean_t headroom_boost = B_FALSE; + fstrans_cookie_t cookie; CALLB_CPR_INIT(&cpr, &l2arc_feed_thr_lock, callb_generic_cpr, FTAG); mutex_enter(&l2arc_feed_thr_lock); + cookie = spl_fstrans_mark(); while (l2arc_thread_exit == 0) { CALLB_CPR_SAFE_BEGIN(&cpr); (void) cv_timedwait_interruptible(&l2arc_feed_thr_cv, @@ -5454,6 +5459,7 @@ l2arc_feed_thread(void) next = l2arc_write_interval(begin, size, wrote); spa_config_exit(spa, SCL_L2ARC, dev); } + spl_fstrans_unmark(cookie); l2arc_thread_exit = 0; cv_broadcast(&l2arc_feed_thr_cv); @@ -5570,7 +5576,7 @@ l2arc_init(void) mutex_init(&l2arc_feed_thr_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&l2arc_feed_thr_cv, NULL, CV_DEFAULT, NULL); mutex_init(&l2arc_dev_mtx, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&l2arc_buflist_mtx, NULL, MUTEX_FSTRANS, NULL); + mutex_init(&l2arc_buflist_mtx, NULL, MUTEX_DEFAULT, NULL); mutex_init(&l2arc_free_on_write_mtx, NULL, MUTEX_DEFAULT, NULL); l2arc_dev_list = &L2ARC_dev_list; diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 7a0c666395c8..ed6a8fd2a4dc 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -331,7 +331,7 @@ dbuf_init(void) 0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0); for (i = 0; i < DBUF_MUTEXES; i++) - mutex_init(&h->hash_mutexes[i], NULL, MUTEX_FSTRANS, NULL); + mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL); dbuf_stats_init(h); } diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index cd7697058983..39783e1091a4 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -5733,6 +5733,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg) const zfs_ioc_vec_t *vec; char *saved_poolname = NULL; nvlist_t *innvl = NULL; + fstrans_cookie_t cookie; vecnum = cmd - ZFS_IOC_FIRST; if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0])) @@ -5827,7 +5828,9 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg) } outnvl = fnvlist_alloc(); + cookie = spl_fstrans_mark(); error = vec->zvec_func(zc->zc_name, innvl, outnvl); + spl_fstrans_unmark(cookie); if (error == 0 && vec->zvec_allow_log && spa_open(zc->zc_name, &spa, FTAG) == 0) { @@ -5855,7 +5858,9 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg) nvlist_free(outnvl); } else { + cookie = spl_fstrans_mark(); error = vec->zvec_legacy_func(zc); + spl_fstrans_unmark(cookie); } out: diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index a96ac8338f47..c931a72c3aea 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -1097,23 +1097,13 @@ zfs_zinactive(znode_t *zp) { zfs_sb_t *zsb = ZTOZSB(zp); uint64_t z_id = zp->z_id; - boolean_t drop_mutex = 0; ASSERT(zp->z_sa_hdl); /* * Don't allow a zfs_zget() while were trying to release this znode. - * - * Linux allows direct memory reclaim which means that any KM_SLEEP - * allocation may trigger inode eviction. This can lead to a deadlock - * through the ->shrink_icache_memory()->evict()->zfs_inactive()-> - * zfs_zinactive() call path. To avoid this deadlock the process - * must not reacquire the mutex when it is already holding it. */ - if (!ZFS_OBJ_HOLD_OWNED(zsb, z_id)) { - ZFS_OBJ_HOLD_ENTER(zsb, z_id); - drop_mutex = 1; - } + ZFS_OBJ_HOLD_ENTER(zsb, z_id); mutex_enter(&zp->z_lock); @@ -1124,8 +1114,7 @@ zfs_zinactive(znode_t *zp) if (zp->z_unlinked) { mutex_exit(&zp->z_lock); - if (drop_mutex) - ZFS_OBJ_HOLD_EXIT(zsb, z_id); + ZFS_OBJ_HOLD_EXIT(zsb, z_id); zfs_rmnode(zp); return; @@ -1134,8 +1123,7 @@ zfs_zinactive(znode_t *zp) mutex_exit(&zp->z_lock); zfs_znode_dmu_fini(zp); - if (drop_mutex) - ZFS_OBJ_HOLD_EXIT(zsb, z_id); + ZFS_OBJ_HOLD_EXIT(zsb, z_id); } static inline int diff --git a/module/zfs/zpl_file.c b/module/zfs/zpl_file.c index 571e04315e9d..66db113064c3 100644 --- a/module/zfs/zpl_file.c +++ b/module/zfs/zpl_file.c @@ -35,13 +35,16 @@ zpl_open(struct inode *ip, struct file *filp) { cred_t *cr = CRED(); int error; + fstrans_cookie_t cookie; error = generic_file_open(ip, filp); if (error) return (error); crhold(cr); + cookie = spl_fstrans_mark(); error = -zfs_open(ip, filp->f_mode, filp->f_flags, cr); + spl_fstrans_unmark(cookie); crfree(cr); ASSERT3S(error, <=, 0); @@ -53,12 +56,15 @@ zpl_release(struct inode *ip, struct file *filp) { cred_t *cr = CRED(); int error; + fstrans_cookie_t cookie; + cookie = spl_fstrans_mark(); if (ITOZ(ip)->z_atime_dirty) zfs_mark_inode_dirty(ip); crhold(cr); error = -zfs_close(ip, filp->f_flags, cr); + spl_fstrans_unmark(cookie); crfree(cr); ASSERT3S(error, <=, 0); @@ -71,9 +77,12 @@ zpl_iterate(struct file *filp, struct dir_context *ctx) struct dentry *dentry = filp->f_path.dentry; cred_t *cr = CRED(); int error; + fstrans_cookie_t cookie; crhold(cr); + cookie = spl_fstrans_mark(); error = -zfs_readdir(dentry->d_inode, ctx, cr); + spl_fstrans_unmark(cookie); crfree(cr); ASSERT3S(error, <=, 0); @@ -106,9 +115,12 @@ zpl_fsync(struct file *filp, struct dentry *dentry, int datasync) { cred_t *cr = CRED(); int error; + fstrans_cookie_t cookie; crhold(cr); + cookie = spl_fstrans_mark(); error = -zfs_fsync(dentry->d_inode, datasync, cr); + spl_fstrans_unmark(cookie); crfree(cr); ASSERT3S(error, <=, 0); @@ -134,9 +146,12 @@ zpl_fsync(struct file *filp, int datasync) struct inode *inode = filp->f_mapping->host; cred_t *cr = CRED(); int error; + fstrans_cookie_t cookie; crhold(cr); + cookie = spl_fstrans_mark(); error = -zfs_fsync(inode, datasync, cr); + spl_fstrans_unmark(cookie); crfree(cr); ASSERT3S(error, <=, 0); @@ -162,13 +177,16 @@ zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync) struct inode *inode = filp->f_mapping->host; cred_t *cr = CRED(); int error; + fstrans_cookie_t cookie; error = filemap_write_and_wait_range(inode->i_mapping, start, end); if (error) return (error); crhold(cr); + cookie = spl_fstrans_mark(); error = -zfs_fsync(inode, datasync, cr); + spl_fstrans_unmark(cookie); crfree(cr); ASSERT3S(error, <=, 0); @@ -193,6 +211,7 @@ zpl_read_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count, ssize_t read; uio_t uio; int error; + fstrans_cookie_t cookie; uio.uio_iov = (struct iovec *)iovp; uio.uio_resid = count; @@ -201,7 +220,9 @@ zpl_read_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count, uio.uio_limit = MAXOFFSET_T; uio.uio_segflg = segment; + cookie = spl_fstrans_mark(); error = -zfs_read(ip, &uio, flags, cr); + spl_fstrans_unmark(cookie); if (error < 0) return (error); @@ -271,6 +292,7 @@ zpl_write_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count, ssize_t wrote; uio_t uio; int error; + fstrans_cookie_t cookie; if (flags & O_APPEND) *ppos = i_size_read(ip); @@ -282,7 +304,9 @@ zpl_write_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count, uio.uio_limit = MAXOFFSET_T; uio.uio_segflg = segment; + cookie = spl_fstrans_mark(); error = -zfs_write(ip, &uio, flags, cr); + spl_fstrans_unmark(cookie); if (error < 0) return (error); @@ -347,13 +371,17 @@ static loff_t zpl_llseek(struct file *filp, loff_t offset, int whence) { #if defined(SEEK_HOLE) && defined(SEEK_DATA) + fstrans_cookie_t cookie; + if (whence == SEEK_DATA || whence == SEEK_HOLE) { struct inode *ip = filp->f_mapping->host; loff_t maxbytes = ip->i_sb->s_maxbytes; loff_t error; spl_inode_lock(ip); + cookie = spl_fstrans_mark(); error = -zfs_holey(ip, whence, &offset); + spl_fstrans_unmark(cookie); if (error == 0) error = lseek_execute(filp, ip, offset, maxbytes); spl_inode_unlock(ip); @@ -414,9 +442,12 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma) struct inode *ip = filp->f_mapping->host; znode_t *zp = ITOZ(ip); int error; + fstrans_cookie_t cookie; + cookie = spl_fstrans_mark(); error = -zfs_map(ip, vma->vm_pgoff, (caddr_t *)vma->vm_start, (size_t)(vma->vm_end - vma->vm_start), vma->vm_flags); + spl_fstrans_unmark(cookie); if (error) return (error); @@ -446,12 +477,15 @@ zpl_readpage(struct file *filp, struct page *pp) struct inode *ip; struct page *pl[1]; int error = 0; + fstrans_cookie_t cookie; ASSERT(PageLocked(pp)); ip = pp->mapping->host; pl[0] = pp; + cookie = spl_fstrans_mark(); error = -zfs_getpage(ip, pl, 1); + spl_fstrans_unmark(cookie); if (error) { SetPageError(pp); @@ -569,6 +603,7 @@ zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len) cred_t *cr = CRED(); flock64_t bf; loff_t olen; + fstrans_cookie_t cookie; if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) return (error); @@ -593,7 +628,9 @@ zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len) bf.l_len = len; bf.l_pid = 0; + cookie = spl_fstrans_mark(); error = -zfs_space(ip, F_FREESP, &bf, FWRITE, offset, cr); + spl_fstrans_unmark(cookie); spl_inode_unlock(ip); crfree(cr); @@ -663,6 +700,7 @@ zpl_ioctl_setflags(struct file *filp, void __user *arg) xvattr_t xva; xoptattr_t *xoap; int error; + fstrans_cookie_t cookie; if (copy_from_user(&ioctl_flags, arg, sizeof (ioctl_flags))) return (-EFAULT); @@ -697,7 +735,9 @@ zpl_ioctl_setflags(struct file *filp, void __user *arg) xoap->xoa_nodump = B_TRUE; crhold(cr); + cookie = spl_fstrans_mark(); error = -zfs_setattr(ip, (vattr_t *)&xva, 0, cr); + spl_fstrans_unmark(cookie); crfree(cr); return (error); diff --git a/module/zfs/zpl_inode.c b/module/zfs/zpl_inode.c index c009807cb422..fc6231b88485 100644 --- a/module/zfs/zpl_inode.c +++ b/module/zfs/zpl_inode.c @@ -40,12 +40,15 @@ zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) cred_t *cr = CRED(); struct inode *ip; int error; + fstrans_cookie_t cookie; if (dlen(dentry) > ZFS_MAXNAMELEN) return (ERR_PTR(-ENAMETOOLONG)); crhold(cr); + cookie = spl_fstrans_mark(); error = -zfs_lookup(dir, dname(dentry), &ip, 0, cr, NULL, NULL); + spl_fstrans_unmark(cookie); ASSERT3S(error, <=, 0); crfree(cr); @@ -95,12 +98,15 @@ zpl_create(struct inode *dir, struct dentry *dentry, zpl_umode_t mode, struct inode *ip; vattr_t *vap; int error; + fstrans_cookie_t cookie; crhold(cr); vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP); zpl_vap_init(vap, dir, mode, cr); + cookie = spl_fstrans_mark(); error = -zfs_create(dir, dname(dentry), vap, 0, mode, &ip, cr, 0, NULL); + spl_fstrans_unmark(cookie); if (error == 0) { VERIFY0(zpl_xattr_security_init(ip, dir, &dentry->d_name)); VERIFY0(zpl_init_acl(ip, dir)); @@ -122,6 +128,7 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, zpl_umode_t mode, struct inode *ip; vattr_t *vap; int error; + fstrans_cookie_t cookie; /* * We currently expect Linux to supply rdev=0 for all sockets @@ -135,7 +142,9 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, zpl_umode_t mode, zpl_vap_init(vap, dir, mode, cr); vap->va_rdev = rdev; + cookie = spl_fstrans_mark(); error = -zfs_create(dir, dname(dentry), vap, 0, mode, &ip, cr, 0, NULL); + spl_fstrans_unmark(cookie); if (error == 0) { VERIFY0(zpl_xattr_security_init(ip, dir, &dentry->d_name)); VERIFY0(zpl_init_acl(ip, dir)); @@ -154,9 +163,12 @@ zpl_unlink(struct inode *dir, struct dentry *dentry) { cred_t *cr = CRED(); int error; + fstrans_cookie_t cookie; crhold(cr); + cookie = spl_fstrans_mark(); error = -zfs_remove(dir, dname(dentry), cr); + spl_fstrans_unmark(cookie); crfree(cr); ASSERT3S(error, <=, 0); @@ -170,12 +182,15 @@ zpl_mkdir(struct inode *dir, struct dentry *dentry, zpl_umode_t mode) vattr_t *vap; struct inode *ip; int error; + fstrans_cookie_t cookie; crhold(cr); vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP); zpl_vap_init(vap, dir, mode | S_IFDIR, cr); + cookie = spl_fstrans_mark(); error = -zfs_mkdir(dir, dname(dentry), vap, &ip, cr, 0, NULL); + spl_fstrans_unmark(cookie); if (error == 0) { VERIFY0(zpl_xattr_security_init(ip, dir, &dentry->d_name)); VERIFY0(zpl_init_acl(ip, dir)); @@ -194,9 +209,12 @@ zpl_rmdir(struct inode * dir, struct dentry *dentry) { cred_t *cr = CRED(); int error; + fstrans_cookie_t cookie; crhold(cr); + cookie = spl_fstrans_mark(); error = -zfs_rmdir(dir, dname(dentry), NULL, cr, 0); + spl_fstrans_unmark(cookie); crfree(cr); ASSERT3S(error, <=, 0); @@ -208,6 +226,7 @@ zpl_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { boolean_t issnap = ITOZSB(dentry->d_inode)->z_issnap; int error; + fstrans_cookie_t cookie; /* * Ensure MNT_SHRINKABLE is set on snapshots to ensure they are @@ -220,7 +239,9 @@ zpl_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) if (unlikely(issnap && !(mnt->mnt_flags & MNT_SHRINKABLE))) mnt->mnt_flags |= MNT_SHRINKABLE; + cookie = spl_fstrans_mark(); error = -zfs_getattr_fast(dentry->d_inode, stat); + spl_fstrans_unmark(cookie); ASSERT3S(error, <=, 0); return (error); @@ -233,6 +254,7 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia) cred_t *cr = CRED(); vattr_t *vap; int error; + fstrans_cookie_t cookie; error = inode_change_ok(ip, ia); if (error) @@ -249,7 +271,9 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia) vap->va_mtime = ia->ia_mtime; vap->va_ctime = ia->ia_ctime; + cookie = spl_fstrans_mark(); error = -zfs_setattr(ip, vap, 0, cr); + spl_fstrans_unmark(cookie); if (!error && (ia->ia_valid & ATTR_MODE)) error = zpl_chmod_acl(ip); @@ -266,9 +290,12 @@ zpl_rename(struct inode *sdip, struct dentry *sdentry, { cred_t *cr = CRED(); int error; + fstrans_cookie_t cookie; crhold(cr); + cookie = spl_fstrans_mark(); error = -zfs_rename(sdip, dname(sdentry), tdip, dname(tdentry), cr, 0); + spl_fstrans_unmark(cookie); crfree(cr); ASSERT3S(error, <=, 0); @@ -282,12 +309,15 @@ zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name) vattr_t *vap; struct inode *ip; int error; + fstrans_cookie_t cookie; crhold(cr); vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP); zpl_vap_init(vap, dir, S_IFLNK | S_IRWXUGO, cr); + cookie = spl_fstrans_mark(); error = -zfs_symlink(dir, dname(dentry), vap, (char *)name, &ip, cr, 0); + spl_fstrans_unmark(cookie); if (error == 0) { VERIFY0(zpl_xattr_security_init(ip, dir, &dentry->d_name)); d_instantiate(dentry, ip); @@ -309,6 +339,7 @@ zpl_follow_link(struct dentry *dentry, struct nameidata *nd) uio_t uio; char *link; int error; + fstrans_cookie_t cookie; crhold(cr); @@ -320,7 +351,9 @@ zpl_follow_link(struct dentry *dentry, struct nameidata *nd) uio.uio_resid = (MAXPATHLEN - 1); uio.uio_segflg = UIO_SYSSPACE; + cookie = spl_fstrans_mark(); error = -zfs_readlink(ip, &uio, cr); + spl_fstrans_unmark(cookie); if (error) { kmem_free(link, MAXPATHLEN); nd_set_link(nd, ERR_PTR(error)); @@ -347,6 +380,7 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) cred_t *cr = CRED(); struct inode *ip = old_dentry->d_inode; int error; + fstrans_cookie_t cookie; if (ip->i_nlink >= ZFS_LINK_MAX) return (-EMLINK); @@ -355,7 +389,9 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) ip->i_ctime = CURRENT_TIME_SEC; igrab(ip); /* Use ihold() if available */ + cookie = spl_fstrans_mark(); error = -zfs_link(dir, ip, dname(dentry), cr); + spl_fstrans_unmark(cookie); if (error) { iput(ip); goto out; @@ -375,6 +411,7 @@ zpl_truncate_range(struct inode *ip, loff_t start, loff_t end) { cred_t *cr = CRED(); flock64_t bf; + fstrans_cookie_t cookie; ASSERT3S(start, <=, end); @@ -392,7 +429,9 @@ zpl_truncate_range(struct inode *ip, loff_t start, loff_t end) bf.l_start = start; bf.l_len = end - start; bf.l_pid = 0; + cookie = spl_fstrans_mark(); zfs_space(ip, F_FREESP, &bf, FWRITE, start, cr); + spl_fstrans_unmark(cookie); crfree(cr); }