From d07b7c7f210e3f92775ad93a7306812edf524bfc Mon Sep 17 00:00:00 2001
From: Chris Dunlop <chris@onthe.net.au>
Date: Mon, 16 Mar 2015 12:21:21 +1100
Subject: [PATCH 01/16] Reduce size of zfs_sb_t: allocate z_hold_mtx separately

zfs_sb_t has grown to the point where using kmem_zalloc() for allocations
is triggering the 32k warning threshold.

We can't safely convert this entire allocation to use vmem_alloc() instead
of kmem_alloc() because the backing_dev_info structure is embedded here.
It depends on the bit_waitqueue() function which won't behave properly
when given a virtual address.

Instead, use vmem_alloc() to allocate the z_hold_mtx array separately.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Chris Dunlop <chris@onthe.net.au>
Closes #3178
---
 include/sys/zfs_vfsops.h | 2 +-
 module/zfs/zfs_vfsops.c  | 6 ++++++
 module/zfs/zfs_znode.c   | 3 +++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/include/sys/zfs_vfsops.h b/include/sys/zfs_vfsops.h
index 4b88260de2b9..c4db2a911d3e 100644
--- a/include/sys/zfs_vfsops.h
+++ b/include/sys/zfs_vfsops.h
@@ -92,7 +92,7 @@ typedef struct zfs_sb {
 	uint64_t	z_replay_eof;	/* New end of file - replay only */
 	sa_attr_type_t	*z_attr_table;	/* SA attr mapping->id */
 #define	ZFS_OBJ_MTX_SZ	256
-	kmutex_t	z_hold_mtx[ZFS_OBJ_MTX_SZ];	/* znode hold locks */
+	kmutex_t	*z_hold_mtx;	/* znode hold locks */
 } zfs_sb_t;
 
 #define	ZFS_SUPER_MAGIC	0x2fc12fc1
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index e98f4bf6a120..41a1c4d8849d 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -776,6 +776,9 @@ zfs_sb_create(const char *osname, zfs_sb_t **zsbp)
 	rrw_init(&zsb->z_teardown_lock, B_FALSE);
 	rw_init(&zsb->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
 	rw_init(&zsb->z_fuid_lock, NULL, RW_DEFAULT, NULL);
+
+	zsb->z_hold_mtx = vmem_zalloc(sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ,
+	    KM_SLEEP);
 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 		mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
 
@@ -789,6 +792,8 @@ zfs_sb_create(const char *osname, zfs_sb_t **zsbp)
 out:
 	dmu_objset_disown(os, zsb);
 	*zsbp = NULL;
+
+	vmem_free(zsb->z_hold_mtx, sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ);
 	kmem_free(zsb, sizeof (zfs_sb_t));
 	return (error);
 }
@@ -892,6 +897,7 @@ zfs_sb_free(zfs_sb_t *zsb)
 	rw_destroy(&zsb->z_fuid_lock);
 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 		mutex_destroy(&zsb->z_hold_mtx[i]);
+	vmem_free(zsb->z_hold_mtx, sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ);
 	mutex_destroy(&zsb->z_ctldir_lock);
 	avl_destroy(&zsb->z_ctldir_snaps);
 	kmem_free(zsb, sizeof (zfs_sb_t));
diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c
index 3a7c30db2cfa..a96ac8338f47 100644
--- a/module/zfs/zfs_znode.c
+++ b/module/zfs/zfs_znode.c
@@ -1731,6 +1731,8 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
 	list_create(&zsb->z_all_znodes, sizeof (znode_t),
 	    offsetof(znode_t, z_link_node));
 
+	zsb->z_hold_mtx = vmem_zalloc(sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ,
+	    KM_SLEEP);
 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 		mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
 
@@ -1755,6 +1757,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 		mutex_destroy(&zsb->z_hold_mtx[i]);
 
+	vmem_free(zsb->z_hold_mtx, sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ);
 	kmem_free(sb, sizeof (struct super_block));
 	kmem_free(zsb, sizeof (zfs_sb_t));
 }

From e89bd69775d5379f6835dade1e14005aa17f4903 Mon Sep 17 00:00:00 2001
From: Isaac Huang <he.huang@intel.com>
Date: Sat, 21 Mar 2015 23:32:54 -0600
Subject: [PATCH 02/16] zio_injection_enabled should not be a module option

The zio_inject.c keeps zio_injection_enabled as a counter of
fault handlers, so it should not be exported to user space as
a module option.

Several EXPORT_SYMBOLs are moved from zio.c to zio_inject.c,
where the symbols are defined.

Signed-off-by: Isaac Huang <he.huang@intel.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #3199
---
 man/man5/zfs-module-parameters.5 | 11 -----------
 module/zfs/zio.c                 |  8 --------
 module/zfs/zio_inject.c          |  9 +++++++--
 3 files changed, 7 insertions(+), 21 deletions(-)

diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5
index fe31e292a792..9a3e2149ae80 100644
--- a/man/man5/zfs-module-parameters.5
+++ b/man/man5/zfs-module-parameters.5
@@ -1413,17 +1413,6 @@ Max zio millisec delay before posting event
 Default value: \fB30,000\fR.
 .RE
 
-.sp
-.ne 2
-.na
-\fBzio_injection_enabled\fR (int)
-.ad
-.RS 12n
-Enable fault injection
-.sp
-Use \fB1\fR for yes and \fB0\fR for no (default).
-.RE
-
 .sp
 .ne 2
 .na
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index 49e2d93b6783..ad0064443d94 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -3331,14 +3331,6 @@ zbookmark_is_before(const dnode_phys_t *dnp, const zbookmark_phys_t *zb1,
 }
 
 #if defined(_KERNEL) && defined(HAVE_SPL)
-/* Fault injection */
-EXPORT_SYMBOL(zio_injection_enabled);
-EXPORT_SYMBOL(zio_inject_fault);
-EXPORT_SYMBOL(zio_inject_list_next);
-EXPORT_SYMBOL(zio_clear_fault);
-EXPORT_SYMBOL(zio_handle_fault_injection);
-EXPORT_SYMBOL(zio_handle_device_injection);
-EXPORT_SYMBOL(zio_handle_label_injection);
 EXPORT_SYMBOL(zio_type_name);
 EXPORT_SYMBOL(zio_buf_alloc);
 EXPORT_SYMBOL(zio_data_buf_alloc);
diff --git a/module/zfs/zio_inject.c b/module/zfs/zio_inject.c
index c168f3b47f2e..5afb23c595ae 100644
--- a/module/zfs/zio_inject.c
+++ b/module/zfs/zio_inject.c
@@ -525,6 +525,11 @@ zio_inject_fini(void)
 }
 
 #if defined(_KERNEL) && defined(HAVE_SPL)
-module_param(zio_injection_enabled, int, 0644);
-MODULE_PARM_DESC(zio_injection_enabled, "Enable fault injection");
+EXPORT_SYMBOL(zio_injection_enabled);
+EXPORT_SYMBOL(zio_inject_fault);
+EXPORT_SYMBOL(zio_inject_list_next);
+EXPORT_SYMBOL(zio_clear_fault);
+EXPORT_SYMBOL(zio_handle_fault_injection);
+EXPORT_SYMBOL(zio_handle_device_injection);
+EXPORT_SYMBOL(zio_handle_label_injection);
 #endif

From 7b4536c710adea88f160c6f9ae140ae5279c8183 Mon Sep 17 00:00:00 2001
From: Gordan Bobic <gordan@steel.shatteredsilicon.net>
Date: Mon, 23 Mar 2015 16:17:56 +0000
Subject: [PATCH 03/16] Execute udevadm settle before trying to import pools

Execute udevadm settle before trying to import pools.  Otherwise the
disk device nodes may not be ready before import time.  This is
analogous to the behavior of the init scripts and systemd units.

Signed-off-by: Gordan Bobic <gordan@steel.shatteredsilicon.net>
Signed-off-by: Pavel Snajdr <snajpa@snajpa.net>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #3213
---
 dracut/90zfs/mount-zfs.sh.in | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/dracut/90zfs/mount-zfs.sh.in b/dracut/90zfs/mount-zfs.sh.in
index ffc3f201350a..8237d76f2efe 100755
--- a/dracut/90zfs/mount-zfs.sh.in
+++ b/dracut/90zfs/mount-zfs.sh.in
@@ -12,6 +12,9 @@ if getargbool 0 zfs_force -y zfs.force -y zfsforce ; then
 	ZPOOL_FORCE="-f"
 fi
 
+# Delay until all required block devices are present.
+udevadm settle
+
 case "$root" in
 	zfs:*)
 		# We have ZFS modules loaded, so we're able to import pools now.

From ded576e28fe70a40e78a90e4668de8130d599380 Mon Sep 17 00:00:00 2001
From: Tim Chase <tim@chase2k.com>
Date: Mon, 23 Mar 2015 12:10:19 -0500
Subject: [PATCH 04/16] Set the maximum ZVOL transfer size correctly

ZoL had been setting max_sectors to UINT_MAX, but until Linux 3.19, it
the kernel artifically capped it at 1024 (BLK_DEF_MAX_SECTORS).
This cap was removed in torvalds/linux@34b48db.  This patch changes
it to DMU_MAX_ACCESS (in sectors) and also changes the ASSERT in
dmu_tx_hold_write() to allow the maximum transfer size.

Signed-off-by: Tim Chase <tim@chase2k.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #3212
---
 module/zfs/dmu_tx.c | 2 +-
 module/zfs/zvol.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c
index cdf5a6d0fcfa..3d6dcc70f305 100644
--- a/module/zfs/dmu_tx.c
+++ b/module/zfs/dmu_tx.c
@@ -420,7 +420,7 @@ dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len)
 	dmu_tx_hold_t *txh;
 
 	ASSERT(tx->tx_txg == 0);
-	ASSERT(len < DMU_MAX_ACCESS);
+	ASSERT(len <= DMU_MAX_ACCESS);
 	ASSERT(len == 0 || UINT64_MAX - off >= len - 1);
 
 	txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
index 4febbb6bf1d7..d180b5b5b76f 100644
--- a/module/zfs/zvol.c
+++ b/module/zfs/zvol.c
@@ -1389,7 +1389,7 @@ __zvol_create_minor(const char *name, boolean_t ignore_snapdev)
 
 	set_capacity(zv->zv_disk, zv->zv_volsize >> 9);
 
-	blk_queue_max_hw_sectors(zv->zv_queue, UINT_MAX);
+	blk_queue_max_hw_sectors(zv->zv_queue, DMU_MAX_ACCESS / 512);
 	blk_queue_max_segments(zv->zv_queue, UINT16_MAX);
 	blk_queue_max_segment_size(zv->zv_queue, UINT_MAX);
 	blk_queue_physical_block_size(zv->zv_queue, zv->zv_volblocksize);

From 58806b4cdc32e6f4e4a214cfba3b62a24efb34b7 Mon Sep 17 00:00:00 2001
From: Ned Bass <bass6@llnl.gov>
Date: Tue, 24 Mar 2015 17:00:08 -0700
Subject: [PATCH 05/16] dbuf_free_range() overzealously frees dbufs

When called to free a spill block from a dnode, dbuf_free_range() has a
bug that results in all dbufs for the dnode getting freed.  A variety of
problems may result from this bug, but a common one was a zap lookup
tripping an ASSERT because the zap buffers had been zeroed out.  This
could happen on a dataset with xattr=sa set when extended attributes are
written and removed on a directory concurrently with I/O to files in
that directory.

Signed-off-by: Ned Bass <bass6@llnl.gov>
Signed-off-by: Tim Chase <tim@chase2k.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Fixes #3195
Fixes #3204
Fixes #3222
---
 module/zfs/dbuf.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index f10a04d112a8..7a0c666395c8 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -898,9 +898,14 @@ dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx)
 		db_next = list_next(&dn->dn_dbufs, db);
 		ASSERT(db->db_blkid != DMU_BONUS_BLKID);
 
+		/* Skip indirect blocks. */
 		if (db->db_level != 0)
 			continue;
-		if ((db->db_blkid < start || db->db_blkid > end) && !freespill)
+		/* Skip direct blocks outside the range. */
+		if (!freespill && (db->db_blkid < start || db->db_blkid > end))
+			continue;
+		/* Skip all direct blocks, only free spill blocks. */
+		if (freespill && (db->db_blkid != DMU_SPILL_BLKID))
 			continue;
 
 		/* found a level 0 buffer in the range */

From 7d90f569b3f05def7cbd0a52ce8ac3040364d702 Mon Sep 17 00:00:00 2001
From: Brian Behlendorf <behlendorf1@llnl.gov>
Date: Fri, 20 Mar 2015 15:10:24 -0700
Subject: [PATCH 06/16] Check all vdev labels in 'zpool import'

When using 'zpool import' to scan for available pools prefer vdev names
which reference vdevs with more valid labels.  There should be two labels
at the start of the device and two labels at the end of the device.  If
labels are missing then the device has been damaged or is in some other
way incomplete.  Preferring names with fully intact labels helps weed out
bad paths and improves the likelihood of being able to import the pool.

This behavior only applies when scanning /dev/ for valid pools.  If a
cache file exists the pools described by the cache file will be used.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Chris Dunlap <cdunlap@llnl.gov>
Closes #3145
Closes #2844
Closes #3107
---
 cmd/mount_zfs/mount_zfs.c  |  2 +-
 cmd/zpool/zpool_vdev.c     |  2 +-
 include/libzfs.h           |  2 +-
 lib/libzfs/libzfs_import.c | 74 +++++++++++++++++++++++++++++---------
 4 files changed, 61 insertions(+), 19 deletions(-)

diff --git a/cmd/mount_zfs/mount_zfs.c b/cmd/mount_zfs/mount_zfs.c
index b168f719a595..6cdb668f54c1 100644
--- a/cmd/mount_zfs/mount_zfs.c
+++ b/cmd/mount_zfs/mount_zfs.c
@@ -239,7 +239,7 @@ parse_dataset(char *dataset)
 		if (fd < 0)
 			goto out;
 
-		error = zpool_read_label(fd, &config);
+		error = zpool_read_label(fd, &config, NULL);
 		(void) close(fd);
 		if (error)
 			goto out;
diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c
index 93a968dbadc7..cf6d2bfa56d5 100644
--- a/cmd/zpool/zpool_vdev.c
+++ b/cmd/zpool/zpool_vdev.c
@@ -597,7 +597,7 @@ is_spare(nvlist_t *config, const char *path)
 	if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 ||
 	    !inuse ||
 	    state != POOL_STATE_SPARE ||
-	    zpool_read_label(fd, &label) != 0) {
+	    zpool_read_label(fd, &label, NULL) != 0) {
 		free(name);
 		(void) close(fd);
 		return (B_FALSE);
diff --git a/include/libzfs.h b/include/libzfs.h
index 108b75f5eb3c..e6a877214a64 100644
--- a/include/libzfs.h
+++ b/include/libzfs.h
@@ -757,7 +757,7 @@ extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **,
 /*
  * Label manipulation.
  */
-extern int zpool_read_label(int, nvlist_t **);
+extern int zpool_read_label(int, nvlist_t **, int *);
 extern int zpool_clear_label(int);
 
 /*
diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c
index 301b08883629..182168456e0c 100644
--- a/lib/libzfs/libzfs_import.c
+++ b/lib/libzfs/libzfs_import.c
@@ -88,6 +88,7 @@ typedef struct name_entry {
 	char			*ne_name;
 	uint64_t		ne_guid;
 	uint64_t		ne_order;
+	uint64_t		ne_num_labels;
 	struct name_entry	*ne_next;
 } name_entry_t;
 
@@ -173,8 +174,23 @@ fix_paths(nvlist_t *nv, name_entry_t *names)
 				break;
 			}
 
-			if (best == NULL || ne->ne_order < best->ne_order)
+			if (best == NULL) {
 				best = ne;
+				continue;
+			}
+
+			/* Prefer paths with move vdev labels. */
+			if (ne->ne_num_labels > best->ne_num_labels) {
+				best = ne;
+				continue;
+			}
+
+			/* Prefer paths earlier in the search order. */
+			if (best->ne_num_labels == best->ne_num_labels &&
+			    ne->ne_order < best->ne_order) {
+				best = ne;
+				continue;
+			}
 		}
 	}
 
@@ -200,7 +216,7 @@ fix_paths(nvlist_t *nv, name_entry_t *names)
  */
 static int
 add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
-    int order, nvlist_t *config)
+    int order, int num_labels, nvlist_t *config)
 {
 	uint64_t pool_guid, vdev_guid, top_guid, txg, state;
 	pool_entry_t *pe;
@@ -226,6 +242,7 @@ add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
 		}
 		ne->ne_guid = vdev_guid;
 		ne->ne_order = order;
+		ne->ne_num_labels = num_labels;
 		ne->ne_next = pl->names;
 		pl->names = ne;
 		return (0);
@@ -328,6 +345,7 @@ add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
 
 	ne->ne_guid = vdev_guid;
 	ne->ne_order = order;
+	ne->ne_num_labels = num_labels;
 	ne->ne_next = pl->names;
 	pl->names = ne;
 
@@ -843,15 +861,17 @@ label_offset(uint64_t size, int l)
 
 /*
  * Given a file descriptor, read the label information and return an nvlist
- * describing the configuration, if there is one.
+ * describing the configuration, if there is one.  The number of valid
+ * labels found will be returned in num_labels when non-NULL.
  */
 int
-zpool_read_label(int fd, nvlist_t **config)
+zpool_read_label(int fd, nvlist_t **config, int *num_labels)
 {
 	struct stat64 statbuf;
-	int l;
+	int l, count = 0;
 	vdev_label_t *label;
-	uint64_t state, txg, size;
+	nvlist_t *expected_config = NULL;
+	uint64_t expected_guid = 0, size;
 
 	*config = NULL;
 
@@ -863,6 +883,8 @@ zpool_read_label(int fd, nvlist_t **config)
 		return (-1);
 
 	for (l = 0; l < VDEV_LABELS; l++) {
+		uint64_t state, guid, txg;
+
 		if (pread64(fd, label, sizeof (vdev_label_t),
 		    label_offset(size, l)) != sizeof (vdev_label_t))
 			continue;
@@ -871,6 +893,12 @@ zpool_read_label(int fd, nvlist_t **config)
 		    sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0)
 			continue;
 
+		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_GUID,
+		    &guid) != 0 || guid == 0) {
+			nvlist_free(*config);
+			continue;
+		}
+
 		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
 		    &state) != 0 || state > POOL_STATE_L2CACHE) {
 			nvlist_free(*config);
@@ -884,12 +912,24 @@ zpool_read_label(int fd, nvlist_t **config)
 			continue;
 		}
 
-		free(label);
-		return (0);
+		if (expected_guid) {
+			if (expected_guid == guid)
+				count++;
+
+			nvlist_free(*config);
+		} else {
+			expected_config = *config;
+			expected_guid = guid;
+			count++;
+		}
 	}
 
+	if (num_labels != NULL)
+		*num_labels = count;
+
 	free(label);
-	*config = NULL;
+	*config = expected_config;
+
 	return (0);
 }
 
@@ -937,7 +977,7 @@ zpool_find_import_blkid(libzfs_handle_t *hdl, pool_list_t *pools)
 	blkid_dev dev;
 	const char *devname;
 	nvlist_t *config;
-	int fd, err;
+	int fd, err, num_labels;
 
 	err = blkid_get_cache(&cache, NULL);
 	if (err != 0) {
@@ -972,7 +1012,7 @@ zpool_find_import_blkid(libzfs_handle_t *hdl, pool_list_t *pools)
 		if ((fd = open64(devname, O_RDONLY)) < 0)
 			continue;
 
-		err = zpool_read_label(fd, &config);
+		err = zpool_read_label(fd, &config, &num_labels);
 		(void) close(fd);
 
 		if (err != 0) {
@@ -981,7 +1021,8 @@ zpool_find_import_blkid(libzfs_handle_t *hdl, pool_list_t *pools)
 		}
 
 		if (config != NULL) {
-			err = add_config(hdl, pools, devname, 0, config);
+			err = add_config(hdl, pools, devname, 0,
+			    num_labels, config);
 			if (err != 0)
 				goto err_blkid3;
 		}
@@ -1017,7 +1058,7 @@ zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE] = {
 static nvlist_t *
 zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
 {
-	int i, dirs = iarg->paths;
+	int i, num_labels, dirs = iarg->paths;
 	DIR *dirp = NULL;
 	struct dirent64 *dp;
 	char path[MAXPATHLEN];
@@ -1143,7 +1184,7 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
 			if ((fd = openat64(dfd, name, O_RDONLY)) < 0)
 				continue;
 
-			if ((zpool_read_label(fd, &config)) != 0) {
+			if ((zpool_read_label(fd, &config, &num_labels))) {
 				(void) close(fd);
 				(void) no_memory(hdl);
 				goto error;
@@ -1177,7 +1218,8 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
 				}
 				/* use the non-raw path for the config */
 				(void) strlcpy(end, name, pathleft);
-				if (add_config(hdl, &pools, path, i+1, config))
+				if (add_config(hdl, &pools, path, i+1,
+				    num_labels, config))
 					goto error;
 			}
 		}
@@ -1461,7 +1503,7 @@ zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
 
 	*inuse = B_FALSE;
 
-	if (zpool_read_label(fd, &config) != 0) {
+	if (zpool_read_label(fd, &config, NULL) != 0) {
 		(void) no_memory(hdl);
 		return (-1);
 	}

From d820d2e9cf27678dd07b86f8fdd4635162371a37 Mon Sep 17 00:00:00 2001
From: Brian Behlendorf <behlendorf1@llnl.gov>
Date: Fri, 27 Mar 2015 14:30:23 -0700
Subject: [PATCH 07/16] Remove rpm/fedora directory

Originally it was thought that custom spec files might be required
for Fedora.  Happily that has turns out not to be the case.  Since
this directory just contains symlinks to the generic spec files it
can be removed.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
---
 config/zfs-build.m4         | 2 +-
 configure.ac                | 4 ----
 rpm/Makefile.am             | 2 +-
 rpm/fedora/.gitignore       | 3 ---
 rpm/fedora/Makefile.am      | 1 -
 rpm/fedora/zfs-dkms.spec.in | 1 -
 rpm/fedora/zfs-kmod.spec.in | 1 -
 rpm/fedora/zfs.spec.in      | 1 -
 8 files changed, 2 insertions(+), 13 deletions(-)
 delete mode 100644 rpm/fedora/.gitignore
 delete mode 100644 rpm/fedora/Makefile.am
 delete mode 120000 rpm/fedora/zfs-dkms.spec.in
 delete mode 120000 rpm/fedora/zfs-kmod.spec.in
 delete mode 120000 rpm/fedora/zfs.spec.in

diff --git a/config/zfs-build.m4 b/config/zfs-build.m4
index 4c8bddf2f297..73ba979d10d1 100644
--- a/config/zfs-build.m4
+++ b/config/zfs-build.m4
@@ -151,7 +151,7 @@ AC_DEFUN([ZFS_AC_RPM], [
 	RPM_SPEC_DIR="rpm/generic"
 	AC_ARG_WITH([spec],
 		AS_HELP_STRING([--with-spec=SPEC],
-		[Spec files 'generic|fedora']),
+		[Spec files 'generic']),
 		[RPM_SPEC_DIR="rpm/$withval"])
 
 	AC_MSG_CHECKING([whether spec files are available])
diff --git a/configure.ac b/configure.ac
index e0829205afbf..854af771c076 100644
--- a/configure.ac
+++ b/configure.ac
@@ -134,10 +134,6 @@ AC_CONFIG_FILES([
 	scripts/zpool-config/Makefile
 	scripts/common.sh
 	rpm/Makefile
-	rpm/fedora/Makefile
-	rpm/fedora/zfs.spec
-	rpm/fedora/zfs-kmod.spec
-	rpm/fedora/zfs-dkms.spec
 	rpm/generic/Makefile
 	rpm/generic/zfs.spec
 	rpm/generic/zfs-kmod.spec
diff --git a/rpm/Makefile.am b/rpm/Makefile.am
index e41cdda2e999..b564eeefd9f4 100644
--- a/rpm/Makefile.am
+++ b/rpm/Makefile.am
@@ -1 +1 @@
-SUBDIRS = fedora generic
+SUBDIRS = generic
diff --git a/rpm/fedora/.gitignore b/rpm/fedora/.gitignore
deleted file mode 100644
index 7f5daafdd6d4..000000000000
--- a/rpm/fedora/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-/zfs-dkms.spec
-/zfs-kmod.spec
-/zfs.spec
diff --git a/rpm/fedora/Makefile.am b/rpm/fedora/Makefile.am
deleted file mode 100644
index 89b13640d622..000000000000
--- a/rpm/fedora/Makefile.am
+++ /dev/null
@@ -1 +0,0 @@
-EXTRA_DIST = zfs.spec.in zfs-kmod.spec.in zfs-dkms.spec.in
diff --git a/rpm/fedora/zfs-dkms.spec.in b/rpm/fedora/zfs-dkms.spec.in
deleted file mode 120000
index ffa051baaf03..000000000000
--- a/rpm/fedora/zfs-dkms.spec.in
+++ /dev/null
@@ -1 +0,0 @@
-../generic/zfs-dkms.spec.in
\ No newline at end of file
diff --git a/rpm/fedora/zfs-kmod.spec.in b/rpm/fedora/zfs-kmod.spec.in
deleted file mode 120000
index af19ecde0058..000000000000
--- a/rpm/fedora/zfs-kmod.spec.in
+++ /dev/null
@@ -1 +0,0 @@
-../generic/zfs-kmod.spec.in
\ No newline at end of file
diff --git a/rpm/fedora/zfs.spec.in b/rpm/fedora/zfs.spec.in
deleted file mode 120000
index 4c8079166ff8..000000000000
--- a/rpm/fedora/zfs.spec.in
+++ /dev/null
@@ -1 +0,0 @@
-../generic/zfs.spec.in
\ No newline at end of file

From ee2ca1db28a0910770b8bd504cf021199f583047 Mon Sep 17 00:00:00 2001
From: Brian Behlendorf <behlendorf1@llnl.gov>
Date: Wed, 25 Mar 2015 16:59:17 -0700
Subject: [PATCH 08/16] Add RHEL style kmod packages

Provide a Redhat specific zfs-kmod.spec file which uses the old style
kmods (not kmods2) packaging.  By using the provided kmodtool script
packages can be built which support weak modules.  This allows for the
kernel to be updated without having to rebuild the ZFS kernel modules.

Packages for RHEL/Centos/SL/TOSS which use this spec file can by built
as follows:

$ ./configure --with-spec=redhat
$ make rpms

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
---
 config/zfs-build.m4         |  2 +-
 configure.ac                |  4 ++
 rpm/Makefile.am             |  2 +-
 rpm/redhat/.gitignore       |  3 ++
 rpm/redhat/Makefile.am      |  1 +
 rpm/redhat/zfs-dkms.spec.in |  1 +
 rpm/redhat/zfs-kmod.spec.in | 86 +++++++++++++++++++++++++++++++++++++
 rpm/redhat/zfs.spec.in      |  1 +
 8 files changed, 98 insertions(+), 2 deletions(-)
 create mode 100644 rpm/redhat/.gitignore
 create mode 100644 rpm/redhat/Makefile.am
 create mode 120000 rpm/redhat/zfs-dkms.spec.in
 create mode 100644 rpm/redhat/zfs-kmod.spec.in
 create mode 120000 rpm/redhat/zfs.spec.in

diff --git a/config/zfs-build.m4 b/config/zfs-build.m4
index 73ba979d10d1..8d3a373047ec 100644
--- a/config/zfs-build.m4
+++ b/config/zfs-build.m4
@@ -151,7 +151,7 @@ AC_DEFUN([ZFS_AC_RPM], [
 	RPM_SPEC_DIR="rpm/generic"
 	AC_ARG_WITH([spec],
 		AS_HELP_STRING([--with-spec=SPEC],
-		[Spec files 'generic']),
+		[Spec files 'generic|redhat']),
 		[RPM_SPEC_DIR="rpm/$withval"])
 
 	AC_MSG_CHECKING([whether spec files are available])
diff --git a/configure.ac b/configure.ac
index 854af771c076..63d0073e9a13 100644
--- a/configure.ac
+++ b/configure.ac
@@ -134,6 +134,10 @@ AC_CONFIG_FILES([
 	scripts/zpool-config/Makefile
 	scripts/common.sh
 	rpm/Makefile
+	rpm/redhat/Makefile
+	rpm/redhat/zfs.spec
+	rpm/redhat/zfs-kmod.spec
+	rpm/redhat/zfs-dkms.spec
 	rpm/generic/Makefile
 	rpm/generic/zfs.spec
 	rpm/generic/zfs-kmod.spec
diff --git a/rpm/Makefile.am b/rpm/Makefile.am
index b564eeefd9f4..f2cf72cef13c 100644
--- a/rpm/Makefile.am
+++ b/rpm/Makefile.am
@@ -1 +1 @@
-SUBDIRS = generic
+SUBDIRS = generic redhat
diff --git a/rpm/redhat/.gitignore b/rpm/redhat/.gitignore
new file mode 100644
index 000000000000..7f5daafdd6d4
--- /dev/null
+++ b/rpm/redhat/.gitignore
@@ -0,0 +1,3 @@
+/zfs-dkms.spec
+/zfs-kmod.spec
+/zfs.spec
diff --git a/rpm/redhat/Makefile.am b/rpm/redhat/Makefile.am
new file mode 100644
index 000000000000..89b13640d622
--- /dev/null
+++ b/rpm/redhat/Makefile.am
@@ -0,0 +1 @@
+EXTRA_DIST = zfs.spec.in zfs-kmod.spec.in zfs-dkms.spec.in
diff --git a/rpm/redhat/zfs-dkms.spec.in b/rpm/redhat/zfs-dkms.spec.in
new file mode 120000
index 000000000000..ffa051baaf03
--- /dev/null
+++ b/rpm/redhat/zfs-dkms.spec.in
@@ -0,0 +1 @@
+../generic/zfs-dkms.spec.in
\ No newline at end of file
diff --git a/rpm/redhat/zfs-kmod.spec.in b/rpm/redhat/zfs-kmod.spec.in
new file mode 100644
index 000000000000..ecc809bc6520
--- /dev/null
+++ b/rpm/redhat/zfs-kmod.spec.in
@@ -0,0 +1,86 @@
+%bcond_with     debug
+%bcond_with     debug_dmu_tx
+
+Name:           @PACKAGE@-kmod
+Version:        @VERSION@
+Release:        @RELEASE@%{?dist}
+
+Summary:        Kernel module(s)
+Group:          System Environment/Kernel
+License:        @ZFS_META_LICENSE@
+URL:            http://zfsonlinux.org/
+BuildRequires:  %kernel_module_package_buildreqs
+BuildRequires:  kmod-spl-devel = %{version}
+Source0:        @PACKAGE@-%{version}.tar.gz
+BuildRoot:      %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
+
+# Additional dependency information for the kmod sub-package must be specified
+# by generating a preamble text file which kmodtool can append to the spec file.
+%(/bin/echo -e "\
+Requires:       spl-kmod\n\
+Requires:       @PACKAGE@ = %{version}\n\
+Conflicts:      @PACKAGE@-dkms\n\n" > %{_sourcedir}/kmod-preamble)
+
+%description
+This package contains the ZFS kernel modules.
+
+%define kmod_name @PACKAGE@
+%define debug_package %{nil}
+
+%kernel_module_package -n %{kmod_name} -p %{_sourcedir}/kmod-preamble
+
+%define ksrc %{_usrsrc}/kernels/%{kverrel}
+%define kobj %{ksrc}
+%define splsrc %{_usrsrc}/spl-%{version}
+%define splobj %{splsrc}/%{kverrel}
+
+%package -n kmod-%{kmod_name}-devel
+Summary:        ZFS kernel module(s) devel common
+Group:          System Environment/Kernel
+Requires:       kmod-spl-devel = %{version}
+
+%description -n  kmod-%{kmod_name}-devel
+This package provides the header files and objects to build kernel modules
+which depend on the spl kernel module.
+
+%prep
+if ! [ -d "%{ksrc}"  ]; then
+        echo "Kernel build directory isn't set properly, cannot continue"
+        exit 1
+fi
+
+%if %{with debug}
+%define debug --enable-debug
+%else
+%define debug --disable-debug
+%endif
+
+%if %{with debug_dmu_tx}
+%define debug_dmu_tx --enable-debug-dmu-tx
+%else
+%define debug_dmu_tx --disable-debug-dmu-tx
+%endif
+
+%setup -n %{kmod_name}-%{version}
+%build
+%configure \
+        --with-config=kernel \
+        --with-linux=%{ksrc} \
+        --with-linux-obj=%{kobj} \
+        --with-spl="%{splsrc}" \
+        --with-spl-obj="%{splobj}" \
+        %{debug} \
+        %{debug_dmu_tx}
+make %{?_smp_mflags}
+
+%install
+make install \
+        DESTDIR=${RPM_BUILD_ROOT} \
+        INSTALL_MOD_DIR=extra/%{kmod_name}
+%{__rm} -f %{buildroot}/lib/modules/%{kverrel}/modules.*
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%files -n kmod-%{kmod_name}-devel
+%{_usrsrc}/%{kmod_name}-%{version}
diff --git a/rpm/redhat/zfs.spec.in b/rpm/redhat/zfs.spec.in
new file mode 120000
index 000000000000..4c8079166ff8
--- /dev/null
+++ b/rpm/redhat/zfs.spec.in
@@ -0,0 +1 @@
+../generic/zfs.spec.in
\ No newline at end of file

From 95a6990d9a77a56eb97b76f2880f95f0f42f4fe0 Mon Sep 17 00:00:00 2001
From: Ned Bass <bass6@llnl.gov>
Date: Tue, 24 Mar 2015 17:22:21 -0700
Subject: [PATCH 09/16] Add NULL guard in zfs_zrlock_class event class

The owner field could be NULL in some cases, so add a guard.  Shorten
__entry field names to fit assignment statements in 80 columns.

Signed-off-by: Ned Bass <bass6@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Fixes #3220
---
 include/sys/trace_zrlock.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/sys/trace_zrlock.h b/include/sys/trace_zrlock.h
index 3653e3e8d293..8c811c2aa0b3 100644
--- a/include/sys/trace_zrlock.h
+++ b/include/sys/trace_zrlock.h
@@ -42,28 +42,28 @@ DECLARE_EVENT_CLASS(zfs_zrlock_class,
 	TP_PROTO(zrlock_t *zrl, uint32_t n),
 	TP_ARGS(zrl, n),
 	TP_STRUCT__entry(
-	    __field(int32_t,		zr_refcount)
+	    __field(int32_t,		refcount)
 #ifdef	ZFS_DEBUG
-	    __field(pid_t,		zr_owner_pid)
-	    __field(const char *,	zr_caller)
+	    __field(pid_t,		owner_pid)
+	    __field(const char *,	caller)
 #endif
 	    __field(uint32_t,		n)
 	),
 	TP_fast_assign(
-	    __entry->zr_refcount	= zrl->zr_refcount;
+	    __entry->refcount	= zrl->zr_refcount;
 #ifdef	ZFS_DEBUG
-	    __entry->zr_owner_pid	= zrl->zr_owner->pid;
-	    __entry->zr_caller		= zrl->zr_caller;
+	    __entry->owner_pid	= zrl->zr_owner ? zrl->zr_owner->pid : 0;
+	    __entry->caller	= zrl->zr_caller;
 #endif
-	    __entry->n			= n;
+	    __entry->n		= n;
 	),
 #ifdef	ZFS_DEBUG
 	TP_printk("zrl { refcount %d owner_pid %d caller %s } n %u",
-	    __entry->zr_refcount, __entry->zr_owner_pid, __entry->zr_caller,
+	    __entry->refcount, __entry->owner_pid, __entry->caller,
 	    __entry->n)
 #else
 	TP_printk("zrl { refcount %d } n %u",
-	    __entry->zr_refcount, __entry->n)
+	    __entry->refcount, __entry->n)
 #endif
 );
 

From 9540be9b23fd6f8b5bf7d81853c251010d9b7205 Mon Sep 17 00:00:00 2001
From: Ned Bass <bass6@llnl.gov>
Date: Thu, 26 Mar 2015 12:10:26 -0700
Subject: [PATCH 10/16] zpool import should honor overlay property

Make the 'zpool import' command honor the overlay property to allow
filesystems to be mounted on a non-empty directory. As it stands now
this property is only checked by the 'zfs mount' command.  Move the
check into 'zfs_mount()` in libzpool so the property is honored for all
callers.

Signed-off-by: Ned Bass <bass6@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #3227
---
 cmd/zfs/zfs_main.c        | 14 --------------
 lib/libzfs/libzfs_mount.c | 14 ++++++++++++++
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c
index 83f02666d247..84073435e2d7 100644
--- a/cmd/zfs/zfs_main.c
+++ b/cmd/zfs/zfs_main.c
@@ -5641,7 +5641,6 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol,
 	char mountpoint[ZFS_MAXPROPLEN];
 	char shareopts[ZFS_MAXPROPLEN];
 	char smbshareopts[ZFS_MAXPROPLEN];
-	char overlay[ZFS_MAXPROPLEN];
 	const char *cmdname = op == OP_SHARE ? "share" : "mount";
 	struct mnttab mnt;
 	uint64_t zoned, canmount;
@@ -5748,19 +5747,6 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol,
 		return (0);
 	}
 
-	/*
-	 * Overlay mounts are disabled by default but may be enabled
-	 * via the 'overlay' property or the 'zfs mount -O' option.
-	 */
-	if (!(flags & MS_OVERLAY)) {
-		if (zfs_prop_get(zhp, ZFS_PROP_OVERLAY, overlay,
-			    sizeof (overlay), NULL, NULL, 0, B_FALSE) == 0) {
-			if (strcmp(overlay, "on") == 0) {
-				flags |= MS_OVERLAY;
-			}
-		}
-	}
-
 	/*
 	 * At this point, we have verified that the mountpoint and/or
 	 * shareopts are appropriate for auto management. If the
diff --git a/lib/libzfs/libzfs_mount.c b/lib/libzfs/libzfs_mount.c
index ac3b6822608c..0e3332e0e3ed 100644
--- a/lib/libzfs/libzfs_mount.c
+++ b/lib/libzfs/libzfs_mount.c
@@ -388,6 +388,7 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
 	struct stat buf;
 	char mountpoint[ZFS_MAXPROPLEN];
 	char mntopts[MNT_LINE_MAX];
+	char overlay[ZFS_MAXPROPLEN];
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	int remount = 0, rc;
 
@@ -441,6 +442,19 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
 		}
 	}
 
+	/*
+	 * Overlay mounts are disabled by default but may be enabled
+	 * via the 'overlay' property or the 'zfs mount -O' option.
+	 */
+	if (!(flags & MS_OVERLAY)) {
+		if (zfs_prop_get(zhp, ZFS_PROP_OVERLAY, overlay,
+			    sizeof (overlay), NULL, NULL, 0, B_FALSE) == 0) {
+			if (strcmp(overlay, "on") == 0) {
+				flags |= MS_OVERLAY;
+			}
+		}
+	}
+
 	/*
 	 * Determine if the mountpoint is empty.  If so, refuse to perform the
 	 * mount.  We don't perform this check if 'remount' is

From a4069eef2e403a3b2a307b23b7500e2adc6ecae5 Mon Sep 17 00:00:00 2001
From: Prakash Surya <prakash.surya@delphix.com>
Date: Fri, 27 Mar 2015 13:03:22 +1100
Subject: [PATCH 11/16] Illumos 5695 - dmu_sync'ed holes do not retain birth
 time

5695 dmu_sync'ed holes do not retain birth time
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Bayard Bell <buffer.g.overflow@gmail.com>
Approved by: Dan McDonald <danmcd@omniti.com>

References:
  https://www.illumos.org/issues/5695
  https://github.com/illumos/illumos-gate/commit/70163ac

Ported-by: Chris Dunlop <chris@onthe.net.au>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #3229
---
 cmd/zdb/zdb.c     |  4 +++-
 include/sys/spa.h | 13 +++++++------
 module/zfs/dmu.c  | 14 +++++++++++++-
 3 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
index b0d7170b92b2..1d76f2a7da3e 100644
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -1115,7 +1115,9 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
 
 	if (BP_IS_HOLE(bp)) {
 		(void) snprintf(blkbuf + strlen(blkbuf),
-		    buflen - strlen(blkbuf), "B=%llu",
+		    buflen - strlen(blkbuf),
+		    "%llxL B=%llu",
+		    (u_longlong_t)BP_GET_LSIZE(bp),
 		    (u_longlong_t)bp->blk_birth);
 	} else {
 		(void) snprintf(blkbuf + strlen(blkbuf),
diff --git a/include/sys/spa.h b/include/sys/spa.h
index 2f73793fedb2..83b6723a4f73 100644
--- a/include/sys/spa.h
+++ b/include/sys/spa.h
@@ -501,12 +501,13 @@ _NOTE(CONSTCOND) } while (0)
 	if (bp == NULL) {						\
 		len += func(buf + len, size - len, "<NULL>");		\
 	} else if (BP_IS_HOLE(bp)) {					\
-		len += func(buf + len, size - len, "<hole>");		\
-		if (bp->blk_birth > 0) {				\
-			len += func(buf + len, size - len,		\
-			    " birth=%lluL",				\
-			    (u_longlong_t)bp->blk_birth);		\
-		}							\
+		len += func(buf + len, size - len,			\
+		    "HOLE [L%llu %s] "					\
+		    "size=%llxL birth=%lluL",				\
+		    (u_longlong_t)BP_GET_LEVEL(bp),			\
+		    type,						\
+		    (u_longlong_t)BP_GET_LSIZE(bp),			\
+		    (u_longlong_t)bp->blk_birth);			\
 	} else if (BP_IS_EMBEDDED(bp)) {				\
 		len = func(buf + len, size - len,			\
 		    "EMBEDDED [L%llu %s] et=%u %s "			\
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
index 1501ae8046ad..3b7bbefc2f73 100644
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -1472,7 +1472,19 @@ dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg)
 		dr->dt.dl.dr_overridden_by = *zio->io_bp;
 		dr->dt.dl.dr_override_state = DR_OVERRIDDEN;
 		dr->dt.dl.dr_copies = zio->io_prop.zp_copies;
-		if (BP_IS_HOLE(&dr->dt.dl.dr_overridden_by))
+
+		/*
+		 * Old style holes are filled with all zeros, whereas
+		 * new-style holes maintain their lsize, type, level,
+		 * and birth time (see zio_write_compress). While we
+		 * need to reset the BP_SET_LSIZE() call that happened
+		 * in dmu_sync_ready for old style holes, we do *not*
+		 * want to wipe out the information contained in new
+		 * style holes. Thus, only zero out the block pointer if
+		 * it's an old style hole.
+		 */
+		if (BP_IS_HOLE(&dr->dt.dl.dr_overridden_by) &&
+		    dr->dt.dl.dr_overridden_by.blk_birth == 0)
 			BP_ZERO(&dr->dt.dl.dr_overridden_by);
 	} else {
 		dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;

From ee2f17aa2af3d8620470c3bbbe6f9dac9449d546 Mon Sep 17 00:00:00 2001
From: Chris Dunlop <chris@onthe.net.au>
Date: Fri, 27 Mar 2015 15:04:12 +1100
Subject: [PATCH 12/16] Align code with Illumos

Align code in traverse_visitbp() with that in Illumos in preparation for
applying Illumos-5694.

No functional change: use a temporary variable pd to replace multiple
occurrences of td->td_pfd.  This increases our stack use slightly more
then normal because the function is called recursively.

Signed-off-by: Chris Dunlop <chris@onthe.net.au>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #3230
---
 module/zfs/dmu_traverse.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c
index b5c1ec758f8b..a8481aee61ee 100644
--- a/module/zfs/dmu_traverse.c
+++ b/module/zfs/dmu_traverse.c
@@ -213,6 +213,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
 {
 	int err = 0;
 	arc_buf_t *buf = NULL;
+	prefetch_data_t *pd = td->td_pfd;
 
 	switch (resume_skip_check(td, dnp, zb)) {
 	case RESUME_SKIP_ALL:
@@ -249,16 +250,14 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
 		return (0);
 	}
 
-	if (td->td_pfd != NULL && !td->td_pfd->pd_exited &&
-	    prefetch_needed(td->td_pfd, bp)) {
-		mutex_enter(&td->td_pfd->pd_mtx);
-		ASSERT(td->td_pfd->pd_blks_fetched >= 0);
-		while (td->td_pfd->pd_blks_fetched == 0 &&
-		    !td->td_pfd->pd_exited)
-			cv_wait(&td->td_pfd->pd_cv, &td->td_pfd->pd_mtx);
-		td->td_pfd->pd_blks_fetched--;
-		cv_broadcast(&td->td_pfd->pd_cv);
-		mutex_exit(&td->td_pfd->pd_mtx);
+	if (pd != NULL && !pd->pd_exited && prefetch_needed(pd, bp)) {
+		mutex_enter(&pd->pd_mtx);
+		ASSERT(pd->pd_blks_fetched >= 0);
+		while (pd->pd_blks_fetched == 0 && !pd->pd_exited)
+			cv_wait(&pd->pd_cv, &pd->pd_mtx);
+		pd->pd_blks_fetched--;
+		cv_broadcast(&pd->pd_cv);
+		mutex_exit(&pd->pd_mtx);
 	}
 
 	if (BP_IS_HOLE(bp)) {

From b738bc5a0f8ccd0281ed06831c34fbe31d2b2138 Mon Sep 17 00:00:00 2001
From: George Wilson <george.wilson@delphix.com>
Date: Fri, 27 Mar 2015 15:31:52 +1100
Subject: [PATCH 13/16] Illumos 5694 - traverse_prefetcher does not prefetch
 enough

5694 traverse_prefetcher does not prefetch enough
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Alex Reece <alex@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
Reviewed by: Bayard Bell <buffer.g.overflow@gmail.com>
Approved by: Garrett D'Amore <garrett@damore.org>

References:
  https://www.illumos.org/issues/5694
  https://github.com/illumos/illumos-gate/commit/34d7ce05

Ported-by: Chris Dunlop <chris@onthe.net.au>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #3230
---
 man/man5/zfs-module-parameters.5 |  4 ++--
 module/zfs/dmu_traverse.c        | 23 +++++++++++------------
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5
index 9a3e2149ae80..783d3532ea8d 100644
--- a/man/man5/zfs-module-parameters.5
+++ b/man/man5/zfs-module-parameters.5
@@ -1058,10 +1058,10 @@ Use \fB1\fR for yes (default) and \fB0\fR to disable.
 .sp
 .ne 2
 .na
-\fBzfs_pd_blks_max\fR (int)
+\fBzfs_pd_bytes_max\fR (int)
 .ad
 .RS 12n
-Max number of blocks to prefetch
+The number of bytes which should be prefetched.
 .sp
 Default value: \fB100\fR.
 .RE
diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c
index a8481aee61ee..9280a89b2f85 100644
--- a/module/zfs/dmu_traverse.c
+++ b/module/zfs/dmu_traverse.c
@@ -38,13 +38,12 @@
 #include <sys/callb.h>
 #include <sys/zfeature.h>
 
-int zfs_pd_blks_max = 100;
+int32_t zfs_pd_bytes_max = 50 * 1024 * 1024;	/* 50MB */
 
 typedef struct prefetch_data {
 	kmutex_t pd_mtx;
 	kcondvar_t pd_cv;
-	int pd_blks_max;
-	int pd_blks_fetched;
+	int32_t pd_bytes_fetched;
 	int pd_flags;
 	boolean_t pd_cancel;
 	boolean_t pd_exited;
@@ -251,11 +250,12 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
 	}
 
 	if (pd != NULL && !pd->pd_exited && prefetch_needed(pd, bp)) {
+		uint64_t size = BP_GET_LSIZE(bp);
 		mutex_enter(&pd->pd_mtx);
-		ASSERT(pd->pd_blks_fetched >= 0);
-		while (pd->pd_blks_fetched == 0 && !pd->pd_exited)
+		ASSERT(pd->pd_bytes_fetched >= 0);
+		while (pd->pd_bytes_fetched < size && !pd->pd_exited)
 			cv_wait(&pd->pd_cv, &pd->pd_mtx);
-		pd->pd_blks_fetched--;
+		pd->pd_bytes_fetched -= size;
 		cv_broadcast(&pd->pd_cv);
 		mutex_exit(&pd->pd_mtx);
 	}
@@ -452,7 +452,7 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
 	prefetch_data_t *pfd = arg;
 	uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
 
-	ASSERT(pfd->pd_blks_fetched >= 0);
+	ASSERT(pfd->pd_bytes_fetched >= 0);
 	if (pfd->pd_cancel)
 		return (SET_ERROR(EINTR));
 
@@ -460,9 +460,9 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
 		return (0);
 
 	mutex_enter(&pfd->pd_mtx);
-	while (!pfd->pd_cancel && pfd->pd_blks_fetched >= pfd->pd_blks_max)
+	while (!pfd->pd_cancel && pfd->pd_bytes_fetched >= zfs_pd_bytes_max)
 		cv_wait(&pfd->pd_cv, &pfd->pd_mtx);
-	pfd->pd_blks_fetched++;
+	pfd->pd_bytes_fetched += BP_GET_LSIZE(bp);
 	cv_broadcast(&pfd->pd_cv);
 	mutex_exit(&pfd->pd_mtx);
 
@@ -531,7 +531,6 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
 	td->td_flags = flags;
 	td->td_paused = B_FALSE;
 
-	pd->pd_blks_max = zfs_pd_blks_max;
 	pd->pd_flags = flags;
 	mutex_init(&pd->pd_mtx, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&pd->pd_cv, NULL, CV_DEFAULT, NULL);
@@ -661,6 +660,6 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
 EXPORT_SYMBOL(traverse_dataset);
 EXPORT_SYMBOL(traverse_pool);
 
-module_param(zfs_pd_blks_max, int, 0644);
-MODULE_PARM_DESC(zfs_pd_blks_max, "Max number of blocks to prefetch");
+module_param(zfs_pd_bytes_max, int, 0644);
+MODULE_PARM_DESC(zfs_pd_bytes_max, "Max number of bytes to prefetch");
 #endif

From 0f7d2a4b3d2d7fc0975a7ef53bd3c4700d47c51b Mon Sep 17 00:00:00 2001
From: Matthew Ahrens <mahrens@delphix.com>
Date: Fri, 27 Mar 2015 17:11:50 +1100
Subject: [PATCH 14/16] Illumus 5693 - ztest fails in dbuf_verify: buf[i] == 0,
 due to dedup and bp_override

5693 ztest fails in dbuf_verify: buf[i] == 0, due to dedup and bp_override
Reviewed by: George Wilson <george@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Bayard Bell <buffer.g.overflow@gmail.com>
Approved by: Dan McDonald <danmcd@omniti.com>

References:
  https://www.illumos.org/issues/5693
  https://github.com/illumos/illumos-gate/commit/7f7ace3

Ported-by: Chris Dunlop <chris@onthe.net.au>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #3231
---
 module/zfs/zio.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index ad0064443d94..066f04f1864c 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -1043,8 +1043,6 @@ zio_write_bp_init(zio_t *zio)
 			zio->io_pipeline |= ZIO_STAGE_DDT_WRITE;
 			return (ZIO_PIPELINE_CONTINUE);
 		}
-		zio->io_bp_override = NULL;
-		BP_ZERO(bp);
 	}
 
 	if (!BP_IS_HOLE(bp) && bp->blk_birth == zio->io_txg) {

From 74aa2ba259e61512bd029c9e8f857f0611a80bbd Mon Sep 17 00:00:00 2001
From: Brian Behlendorf <behlendorf1@llnl.gov>
Date: Tue, 31 Mar 2015 11:51:37 -0700
Subject: [PATCH 15/16] Update zfs_pd_bytes_max default in zfs(8)

Commit b738bc5 should have updated the default value of zfs_pd_bytes_max
in the zfs(8) man page.  The correct default value is 50*1024*1024.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
---
 man/man5/zfs-module-parameters.5 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5
index 783d3532ea8d..007cb1c71470 100644
--- a/man/man5/zfs-module-parameters.5
+++ b/man/man5/zfs-module-parameters.5
@@ -1063,7 +1063,7 @@ Use \fB1\fR for yes (default) and \fB0\fR to disable.
 .RS 12n
 The number of bytes which should be prefetched.
 .sp
-Default value: \fB100\fR.
+Default value: \fB52,428,800\fR.
 .RE
 
 .sp

From 40d06e3c78c23b199dfd9284809e710fab549391 Mon Sep 17 00:00:00 2001
From: Tim Chase <tim@chase2k.com>
Date: Mon, 30 Mar 2015 22:43:29 -0500
Subject: [PATCH 16/16] Mark all ZPL and ioctl functions as PF_FSTRANS

Prevent deadlocks by disabling direct reclaim during all ZPL and ioctl
calls as well as the l2arc and adapt ARC threads.

This obviates the need for MUTEX_FSTRANS so its previous uses and
definition have been eliminated.

Signed-off-by: Tim Chase <tim@chase2k.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #3225
---
 include/sys/zfs_context.h |  1 -
 module/zfs/arc.c          | 10 ++++++++--
 module/zfs/dbuf.c         |  2 +-
 module/zfs/zfs_ioctl.c    |  5 +++++
 module/zfs/zfs_znode.c    | 18 +++---------------
 module/zfs/zpl_file.c     | 40 +++++++++++++++++++++++++++++++++++++++
 module/zfs/zpl_inode.c    | 39 ++++++++++++++++++++++++++++++++++++++
 7 files changed, 96 insertions(+), 19 deletions(-)

diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h
index b8eff58bc615..3dc54f1d7d90 100644
--- a/include/sys/zfs_context.h
+++ b/include/sys/zfs_context.h
@@ -273,7 +273,6 @@ typedef struct kmutex {
 } kmutex_t;
 
 #define	MUTEX_DEFAULT	0
-#define	MUTEX_FSTRANS	MUTEX_DEFAULT
 #define	MUTEX_HELD(m)	((m)->m_owner == curthread)
 #define	MUTEX_NOT_HELD(m) (!MUTEX_HELD(m))
 
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index bdf116c35c52..421c81e1cfe9 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -933,7 +933,7 @@ buf_init(void)
 
 	for (i = 0; i < BUF_LOCKS; i++) {
 		mutex_init(&buf_hash_table.ht_locks[i].ht_lock,
-		    NULL, MUTEX_FSTRANS, NULL);
+		    NULL, MUTEX_DEFAULT, NULL);
 	}
 }
 
@@ -2412,9 +2412,11 @@ static void
 arc_adapt_thread(void)
 {
 	callb_cpr_t		cpr;
+	fstrans_cookie_t	cookie;
 
 	CALLB_CPR_INIT(&cpr, &arc_reclaim_thr_lock, callb_generic_cpr, FTAG);
 
+	cookie = spl_fstrans_mark();
 	mutex_enter(&arc_reclaim_thr_lock);
 	while (arc_thread_exit == 0) {
 #ifndef _KERNEL
@@ -2485,6 +2487,7 @@ arc_adapt_thread(void)
 	arc_thread_exit = 0;
 	cv_broadcast(&arc_reclaim_thr_cv);
 	CALLB_CPR_EXIT(&cpr);		/* drops arc_reclaim_thr_lock */
+	spl_fstrans_unmark(cookie);
 	thread_exit();
 }
 
@@ -5376,11 +5379,13 @@ l2arc_feed_thread(void)
 	uint64_t size, wrote;
 	clock_t begin, next = ddi_get_lbolt();
 	boolean_t headroom_boost = B_FALSE;
+	fstrans_cookie_t cookie;
 
 	CALLB_CPR_INIT(&cpr, &l2arc_feed_thr_lock, callb_generic_cpr, FTAG);
 
 	mutex_enter(&l2arc_feed_thr_lock);
 
+	cookie = spl_fstrans_mark();
 	while (l2arc_thread_exit == 0) {
 		CALLB_CPR_SAFE_BEGIN(&cpr);
 		(void) cv_timedwait_interruptible(&l2arc_feed_thr_cv,
@@ -5454,6 +5459,7 @@ l2arc_feed_thread(void)
 		next = l2arc_write_interval(begin, size, wrote);
 		spa_config_exit(spa, SCL_L2ARC, dev);
 	}
+	spl_fstrans_unmark(cookie);
 
 	l2arc_thread_exit = 0;
 	cv_broadcast(&l2arc_feed_thr_cv);
@@ -5570,7 +5576,7 @@ l2arc_init(void)
 	mutex_init(&l2arc_feed_thr_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&l2arc_feed_thr_cv, NULL, CV_DEFAULT, NULL);
 	mutex_init(&l2arc_dev_mtx, NULL, MUTEX_DEFAULT, NULL);
-	mutex_init(&l2arc_buflist_mtx, NULL, MUTEX_FSTRANS, NULL);
+	mutex_init(&l2arc_buflist_mtx, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&l2arc_free_on_write_mtx, NULL, MUTEX_DEFAULT, NULL);
 
 	l2arc_dev_list = &L2ARC_dev_list;
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index 7a0c666395c8..ed6a8fd2a4dc 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -331,7 +331,7 @@ dbuf_init(void)
 	    0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0);
 
 	for (i = 0; i < DBUF_MUTEXES; i++)
-		mutex_init(&h->hash_mutexes[i], NULL, MUTEX_FSTRANS, NULL);
+		mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL);
 
 	dbuf_stats_init(h);
 }
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index cd7697058983..39783e1091a4 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -5733,6 +5733,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
 	const zfs_ioc_vec_t *vec;
 	char *saved_poolname = NULL;
 	nvlist_t *innvl = NULL;
+	fstrans_cookie_t cookie;
 
 	vecnum = cmd - ZFS_IOC_FIRST;
 	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
@@ -5827,7 +5828,9 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
 		}
 
 		outnvl = fnvlist_alloc();
+		cookie = spl_fstrans_mark();
 		error = vec->zvec_func(zc->zc_name, innvl, outnvl);
+		spl_fstrans_unmark(cookie);
 
 		if (error == 0 && vec->zvec_allow_log &&
 		    spa_open(zc->zc_name, &spa, FTAG) == 0) {
@@ -5855,7 +5858,9 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
 
 		nvlist_free(outnvl);
 	} else {
+		cookie = spl_fstrans_mark();
 		error = vec->zvec_legacy_func(zc);
+		spl_fstrans_unmark(cookie);
 	}
 
 out:
diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c
index a96ac8338f47..c931a72c3aea 100644
--- a/module/zfs/zfs_znode.c
+++ b/module/zfs/zfs_znode.c
@@ -1097,23 +1097,13 @@ zfs_zinactive(znode_t *zp)
 {
 	zfs_sb_t *zsb = ZTOZSB(zp);
 	uint64_t z_id = zp->z_id;
-	boolean_t drop_mutex = 0;
 
 	ASSERT(zp->z_sa_hdl);
 
 	/*
 	 * Don't allow a zfs_zget() while were trying to release this znode.
-	 *
-	 * Linux allows direct memory reclaim which means that any KM_SLEEP
-	 * allocation may trigger inode eviction.  This can lead to a deadlock
-	 * through the ->shrink_icache_memory()->evict()->zfs_inactive()->
-	 * zfs_zinactive() call path.  To avoid this deadlock the process
-	 * must not reacquire the mutex when it is already holding it.
 	 */
-	if (!ZFS_OBJ_HOLD_OWNED(zsb, z_id)) {
-		ZFS_OBJ_HOLD_ENTER(zsb, z_id);
-		drop_mutex = 1;
-	}
+	ZFS_OBJ_HOLD_ENTER(zsb, z_id);
 
 	mutex_enter(&zp->z_lock);
 
@@ -1124,8 +1114,7 @@ zfs_zinactive(znode_t *zp)
 	if (zp->z_unlinked) {
 		mutex_exit(&zp->z_lock);
 
-		if (drop_mutex)
-			ZFS_OBJ_HOLD_EXIT(zsb, z_id);
+		ZFS_OBJ_HOLD_EXIT(zsb, z_id);
 
 		zfs_rmnode(zp);
 		return;
@@ -1134,8 +1123,7 @@ zfs_zinactive(znode_t *zp)
 	mutex_exit(&zp->z_lock);
 	zfs_znode_dmu_fini(zp);
 
-	if (drop_mutex)
-		ZFS_OBJ_HOLD_EXIT(zsb, z_id);
+	ZFS_OBJ_HOLD_EXIT(zsb, z_id);
 }
 
 static inline int
diff --git a/module/zfs/zpl_file.c b/module/zfs/zpl_file.c
index 571e04315e9d..66db113064c3 100644
--- a/module/zfs/zpl_file.c
+++ b/module/zfs/zpl_file.c
@@ -35,13 +35,16 @@ zpl_open(struct inode *ip, struct file *filp)
 {
 	cred_t *cr = CRED();
 	int error;
+	fstrans_cookie_t cookie;
 
 	error = generic_file_open(ip, filp);
 	if (error)
 		return (error);
 
 	crhold(cr);
+	cookie = spl_fstrans_mark();
 	error = -zfs_open(ip, filp->f_mode, filp->f_flags, cr);
+	spl_fstrans_unmark(cookie);
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
 
@@ -53,12 +56,15 @@ zpl_release(struct inode *ip, struct file *filp)
 {
 	cred_t *cr = CRED();
 	int error;
+	fstrans_cookie_t cookie;
 
+	cookie = spl_fstrans_mark();
 	if (ITOZ(ip)->z_atime_dirty)
 		zfs_mark_inode_dirty(ip);
 
 	crhold(cr);
 	error = -zfs_close(ip, filp->f_flags, cr);
+	spl_fstrans_unmark(cookie);
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
 
@@ -71,9 +77,12 @@ zpl_iterate(struct file *filp, struct dir_context *ctx)
 	struct dentry *dentry = filp->f_path.dentry;
 	cred_t *cr = CRED();
 	int error;
+	fstrans_cookie_t cookie;
 
 	crhold(cr);
+	cookie = spl_fstrans_mark();
 	error = -zfs_readdir(dentry->d_inode, ctx, cr);
+	spl_fstrans_unmark(cookie);
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
 
@@ -106,9 +115,12 @@ zpl_fsync(struct file *filp, struct dentry *dentry, int datasync)
 {
 	cred_t *cr = CRED();
 	int error;
+	fstrans_cookie_t cookie;
 
 	crhold(cr);
+	cookie = spl_fstrans_mark();
 	error = -zfs_fsync(dentry->d_inode, datasync, cr);
+	spl_fstrans_unmark(cookie);
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
 
@@ -134,9 +146,12 @@ zpl_fsync(struct file *filp, int datasync)
 	struct inode *inode = filp->f_mapping->host;
 	cred_t *cr = CRED();
 	int error;
+	fstrans_cookie_t cookie;
 
 	crhold(cr);
+	cookie = spl_fstrans_mark();
 	error = -zfs_fsync(inode, datasync, cr);
+	spl_fstrans_unmark(cookie);
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
 
@@ -162,13 +177,16 @@ zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 	struct inode *inode = filp->f_mapping->host;
 	cred_t *cr = CRED();
 	int error;
+	fstrans_cookie_t cookie;
 
 	error = filemap_write_and_wait_range(inode->i_mapping, start, end);
 	if (error)
 		return (error);
 
 	crhold(cr);
+	cookie = spl_fstrans_mark();
 	error = -zfs_fsync(inode, datasync, cr);
+	spl_fstrans_unmark(cookie);
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
 
@@ -193,6 +211,7 @@ zpl_read_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
 	ssize_t read;
 	uio_t uio;
 	int error;
+	fstrans_cookie_t cookie;
 
 	uio.uio_iov = (struct iovec *)iovp;
 	uio.uio_resid = count;
@@ -201,7 +220,9 @@ zpl_read_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
 	uio.uio_limit = MAXOFFSET_T;
 	uio.uio_segflg = segment;
 
+	cookie = spl_fstrans_mark();
 	error = -zfs_read(ip, &uio, flags, cr);
+	spl_fstrans_unmark(cookie);
 	if (error < 0)
 		return (error);
 
@@ -271,6 +292,7 @@ zpl_write_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
 	ssize_t wrote;
 	uio_t uio;
 	int error;
+	fstrans_cookie_t cookie;
 
 	if (flags & O_APPEND)
 		*ppos = i_size_read(ip);
@@ -282,7 +304,9 @@ zpl_write_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
 	uio.uio_limit = MAXOFFSET_T;
 	uio.uio_segflg = segment;
 
+	cookie = spl_fstrans_mark();
 	error = -zfs_write(ip, &uio, flags, cr);
+	spl_fstrans_unmark(cookie);
 	if (error < 0)
 		return (error);
 
@@ -347,13 +371,17 @@ static loff_t
 zpl_llseek(struct file *filp, loff_t offset, int whence)
 {
 #if defined(SEEK_HOLE) && defined(SEEK_DATA)
+	fstrans_cookie_t cookie;
+
 	if (whence == SEEK_DATA || whence == SEEK_HOLE) {
 		struct inode *ip = filp->f_mapping->host;
 		loff_t maxbytes = ip->i_sb->s_maxbytes;
 		loff_t error;
 
 		spl_inode_lock(ip);
+		cookie = spl_fstrans_mark();
 		error = -zfs_holey(ip, whence, &offset);
+		spl_fstrans_unmark(cookie);
 		if (error == 0)
 			error = lseek_execute(filp, ip, offset, maxbytes);
 		spl_inode_unlock(ip);
@@ -414,9 +442,12 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma)
 	struct inode *ip = filp->f_mapping->host;
 	znode_t *zp = ITOZ(ip);
 	int error;
+	fstrans_cookie_t cookie;
 
+	cookie = spl_fstrans_mark();
 	error = -zfs_map(ip, vma->vm_pgoff, (caddr_t *)vma->vm_start,
 	    (size_t)(vma->vm_end - vma->vm_start), vma->vm_flags);
+	spl_fstrans_unmark(cookie);
 	if (error)
 		return (error);
 
@@ -446,12 +477,15 @@ zpl_readpage(struct file *filp, struct page *pp)
 	struct inode *ip;
 	struct page *pl[1];
 	int error = 0;
+	fstrans_cookie_t cookie;
 
 	ASSERT(PageLocked(pp));
 	ip = pp->mapping->host;
 	pl[0] = pp;
 
+	cookie = spl_fstrans_mark();
 	error = -zfs_getpage(ip, pl, 1);
+	spl_fstrans_unmark(cookie);
 
 	if (error) {
 		SetPageError(pp);
@@ -569,6 +603,7 @@ zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
 	cred_t *cr = CRED();
 	flock64_t bf;
 	loff_t olen;
+	fstrans_cookie_t cookie;
 
 	if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
 		return (error);
@@ -593,7 +628,9 @@ zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
 	bf.l_len = len;
 	bf.l_pid = 0;
 
+	cookie = spl_fstrans_mark();
 	error = -zfs_space(ip, F_FREESP, &bf, FWRITE, offset, cr);
+	spl_fstrans_unmark(cookie);
 	spl_inode_unlock(ip);
 
 	crfree(cr);
@@ -663,6 +700,7 @@ zpl_ioctl_setflags(struct file *filp, void __user *arg)
 	xvattr_t	xva;
 	xoptattr_t	*xoap;
 	int		error;
+	fstrans_cookie_t cookie;
 
 	if (copy_from_user(&ioctl_flags, arg, sizeof (ioctl_flags)))
 		return (-EFAULT);
@@ -697,7 +735,9 @@ zpl_ioctl_setflags(struct file *filp, void __user *arg)
 		xoap->xoa_nodump = B_TRUE;
 
 	crhold(cr);
+	cookie = spl_fstrans_mark();
 	error = -zfs_setattr(ip, (vattr_t *)&xva, 0, cr);
+	spl_fstrans_unmark(cookie);
 	crfree(cr);
 
 	return (error);
diff --git a/module/zfs/zpl_inode.c b/module/zfs/zpl_inode.c
index c009807cb422..fc6231b88485 100644
--- a/module/zfs/zpl_inode.c
+++ b/module/zfs/zpl_inode.c
@@ -40,12 +40,15 @@ zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 	cred_t *cr = CRED();
 	struct inode *ip;
 	int error;
+	fstrans_cookie_t cookie;
 
 	if (dlen(dentry) > ZFS_MAXNAMELEN)
 		return (ERR_PTR(-ENAMETOOLONG));
 
 	crhold(cr);
+	cookie = spl_fstrans_mark();
 	error = -zfs_lookup(dir, dname(dentry), &ip, 0, cr, NULL, NULL);
+	spl_fstrans_unmark(cookie);
 	ASSERT3S(error, <=, 0);
 	crfree(cr);
 
@@ -95,12 +98,15 @@ zpl_create(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
 	struct inode *ip;
 	vattr_t *vap;
 	int error;
+	fstrans_cookie_t cookie;
 
 	crhold(cr);
 	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
 	zpl_vap_init(vap, dir, mode, cr);
 
+	cookie = spl_fstrans_mark();
 	error = -zfs_create(dir, dname(dentry), vap, 0, mode, &ip, cr, 0, NULL);
+	spl_fstrans_unmark(cookie);
 	if (error == 0) {
 		VERIFY0(zpl_xattr_security_init(ip, dir, &dentry->d_name));
 		VERIFY0(zpl_init_acl(ip, dir));
@@ -122,6 +128,7 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
 	struct inode *ip;
 	vattr_t *vap;
 	int error;
+	fstrans_cookie_t cookie;
 
 	/*
 	 * We currently expect Linux to supply rdev=0 for all sockets
@@ -135,7 +142,9 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
 	zpl_vap_init(vap, dir, mode, cr);
 	vap->va_rdev = rdev;
 
+	cookie = spl_fstrans_mark();
 	error = -zfs_create(dir, dname(dentry), vap, 0, mode, &ip, cr, 0, NULL);
+	spl_fstrans_unmark(cookie);
 	if (error == 0) {
 		VERIFY0(zpl_xattr_security_init(ip, dir, &dentry->d_name));
 		VERIFY0(zpl_init_acl(ip, dir));
@@ -154,9 +163,12 @@ zpl_unlink(struct inode *dir, struct dentry *dentry)
 {
 	cred_t *cr = CRED();
 	int error;
+	fstrans_cookie_t cookie;
 
 	crhold(cr);
+	cookie = spl_fstrans_mark();
 	error = -zfs_remove(dir, dname(dentry), cr);
+	spl_fstrans_unmark(cookie);
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
 
@@ -170,12 +182,15 @@ zpl_mkdir(struct inode *dir, struct dentry *dentry, zpl_umode_t mode)
 	vattr_t *vap;
 	struct inode *ip;
 	int error;
+	fstrans_cookie_t cookie;
 
 	crhold(cr);
 	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
 	zpl_vap_init(vap, dir, mode | S_IFDIR, cr);
 
+	cookie = spl_fstrans_mark();
 	error = -zfs_mkdir(dir, dname(dentry), vap, &ip, cr, 0, NULL);
+	spl_fstrans_unmark(cookie);
 	if (error == 0) {
 		VERIFY0(zpl_xattr_security_init(ip, dir, &dentry->d_name));
 		VERIFY0(zpl_init_acl(ip, dir));
@@ -194,9 +209,12 @@ zpl_rmdir(struct inode * dir, struct dentry *dentry)
 {
 	cred_t *cr = CRED();
 	int error;
+	fstrans_cookie_t cookie;
 
 	crhold(cr);
+	cookie = spl_fstrans_mark();
 	error = -zfs_rmdir(dir, dname(dentry), NULL, cr, 0);
+	spl_fstrans_unmark(cookie);
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
 
@@ -208,6 +226,7 @@ zpl_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 {
 	boolean_t issnap = ITOZSB(dentry->d_inode)->z_issnap;
 	int error;
+	fstrans_cookie_t cookie;
 
 	/*
 	 * Ensure MNT_SHRINKABLE is set on snapshots to ensure they are
@@ -220,7 +239,9 @@ zpl_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 	if (unlikely(issnap && !(mnt->mnt_flags & MNT_SHRINKABLE)))
 		mnt->mnt_flags |= MNT_SHRINKABLE;
 
+	cookie = spl_fstrans_mark();
 	error = -zfs_getattr_fast(dentry->d_inode, stat);
+	spl_fstrans_unmark(cookie);
 	ASSERT3S(error, <=, 0);
 
 	return (error);
@@ -233,6 +254,7 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
 	cred_t *cr = CRED();
 	vattr_t *vap;
 	int error;
+	fstrans_cookie_t cookie;
 
 	error = inode_change_ok(ip, ia);
 	if (error)
@@ -249,7 +271,9 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
 	vap->va_mtime = ia->ia_mtime;
 	vap->va_ctime = ia->ia_ctime;
 
+	cookie = spl_fstrans_mark();
 	error = -zfs_setattr(ip, vap, 0, cr);
+	spl_fstrans_unmark(cookie);
 	if (!error && (ia->ia_valid & ATTR_MODE))
 		error = zpl_chmod_acl(ip);
 
@@ -266,9 +290,12 @@ zpl_rename(struct inode *sdip, struct dentry *sdentry,
 {
 	cred_t *cr = CRED();
 	int error;
+	fstrans_cookie_t cookie;
 
 	crhold(cr);
+	cookie = spl_fstrans_mark();
 	error = -zfs_rename(sdip, dname(sdentry), tdip, dname(tdentry), cr, 0);
+	spl_fstrans_unmark(cookie);
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
 
@@ -282,12 +309,15 @@ zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
 	vattr_t *vap;
 	struct inode *ip;
 	int error;
+	fstrans_cookie_t cookie;
 
 	crhold(cr);
 	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
 	zpl_vap_init(vap, dir, S_IFLNK | S_IRWXUGO, cr);
 
+	cookie = spl_fstrans_mark();
 	error = -zfs_symlink(dir, dname(dentry), vap, (char *)name, &ip, cr, 0);
+	spl_fstrans_unmark(cookie);
 	if (error == 0) {
 		VERIFY0(zpl_xattr_security_init(ip, dir, &dentry->d_name));
 		d_instantiate(dentry, ip);
@@ -309,6 +339,7 @@ zpl_follow_link(struct dentry *dentry, struct nameidata *nd)
 	uio_t uio;
 	char *link;
 	int error;
+	fstrans_cookie_t cookie;
 
 	crhold(cr);
 
@@ -320,7 +351,9 @@ zpl_follow_link(struct dentry *dentry, struct nameidata *nd)
 	uio.uio_resid = (MAXPATHLEN - 1);
 	uio.uio_segflg = UIO_SYSSPACE;
 
+	cookie = spl_fstrans_mark();
 	error = -zfs_readlink(ip, &uio, cr);
+	spl_fstrans_unmark(cookie);
 	if (error) {
 		kmem_free(link, MAXPATHLEN);
 		nd_set_link(nd, ERR_PTR(error));
@@ -347,6 +380,7 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 	cred_t *cr = CRED();
 	struct inode *ip = old_dentry->d_inode;
 	int error;
+	fstrans_cookie_t cookie;
 
 	if (ip->i_nlink >= ZFS_LINK_MAX)
 		return (-EMLINK);
@@ -355,7 +389,9 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 	ip->i_ctime = CURRENT_TIME_SEC;
 	igrab(ip); /* Use ihold() if available */
 
+	cookie = spl_fstrans_mark();
 	error = -zfs_link(dir, ip, dname(dentry), cr);
+	spl_fstrans_unmark(cookie);
 	if (error) {
 		iput(ip);
 		goto out;
@@ -375,6 +411,7 @@ zpl_truncate_range(struct inode *ip, loff_t start, loff_t end)
 {
 	cred_t *cr = CRED();
 	flock64_t bf;
+	fstrans_cookie_t cookie;
 
 	ASSERT3S(start, <=, end);
 
@@ -392,7 +429,9 @@ zpl_truncate_range(struct inode *ip, loff_t start, loff_t end)
 	bf.l_start = start;
 	bf.l_len = end - start;
 	bf.l_pid = 0;
+	cookie = spl_fstrans_mark();
 	zfs_space(ip, F_FREESP, &bf, FWRITE, start, cr);
+	spl_fstrans_unmark(cookie);
 
 	crfree(cr);
 }