Skip to content

Commit

Permalink
Mark all ZPL and ioctl functions as PF_FSTRANS
Browse files Browse the repository at this point in the history
Prevent deadlocks by disabling direct reclaim during all ZPL and ioctl
calls as well as the l2arc and adapt ARC threads.

This obviates the need for MUTEX_FSTRANS so its previous uses and
definition have been eliminated.

Signed-off-by: Tim Chase <tim@chase2k.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes openzfs#3225
  • Loading branch information
dweeezil authored and behlendorf committed Apr 3, 2015
1 parent 74aa2ba commit 40d06e3
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 19 deletions.
1 change: 0 additions & 1 deletion include/sys/zfs_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,6 @@ typedef struct kmutex {
} kmutex_t;

#define MUTEX_DEFAULT 0
#define MUTEX_FSTRANS MUTEX_DEFAULT
#define MUTEX_HELD(m) ((m)->m_owner == curthread)
#define MUTEX_NOT_HELD(m) (!MUTEX_HELD(m))

Expand Down
10 changes: 8 additions & 2 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -933,7 +933,7 @@ buf_init(void)

for (i = 0; i < BUF_LOCKS; i++) {
mutex_init(&buf_hash_table.ht_locks[i].ht_lock,
NULL, MUTEX_FSTRANS, NULL);
NULL, MUTEX_DEFAULT, NULL);
}
}

Expand Down Expand Up @@ -2412,9 +2412,11 @@ static void
arc_adapt_thread(void)
{
callb_cpr_t cpr;
fstrans_cookie_t cookie;

CALLB_CPR_INIT(&cpr, &arc_reclaim_thr_lock, callb_generic_cpr, FTAG);

cookie = spl_fstrans_mark();
mutex_enter(&arc_reclaim_thr_lock);
while (arc_thread_exit == 0) {
#ifndef _KERNEL
Expand Down Expand Up @@ -2485,6 +2487,7 @@ arc_adapt_thread(void)
arc_thread_exit = 0;
cv_broadcast(&arc_reclaim_thr_cv);
CALLB_CPR_EXIT(&cpr); /* drops arc_reclaim_thr_lock */
spl_fstrans_unmark(cookie);
thread_exit();
}

Expand Down Expand Up @@ -5376,11 +5379,13 @@ l2arc_feed_thread(void)
uint64_t size, wrote;
clock_t begin, next = ddi_get_lbolt();
boolean_t headroom_boost = B_FALSE;
fstrans_cookie_t cookie;

CALLB_CPR_INIT(&cpr, &l2arc_feed_thr_lock, callb_generic_cpr, FTAG);

mutex_enter(&l2arc_feed_thr_lock);

cookie = spl_fstrans_mark();
while (l2arc_thread_exit == 0) {
CALLB_CPR_SAFE_BEGIN(&cpr);
(void) cv_timedwait_interruptible(&l2arc_feed_thr_cv,
Expand Down Expand Up @@ -5454,6 +5459,7 @@ l2arc_feed_thread(void)
next = l2arc_write_interval(begin, size, wrote);
spa_config_exit(spa, SCL_L2ARC, dev);
}
spl_fstrans_unmark(cookie);

l2arc_thread_exit = 0;
cv_broadcast(&l2arc_feed_thr_cv);
Expand Down Expand Up @@ -5570,7 +5576,7 @@ l2arc_init(void)
mutex_init(&l2arc_feed_thr_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&l2arc_feed_thr_cv, NULL, CV_DEFAULT, NULL);
mutex_init(&l2arc_dev_mtx, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&l2arc_buflist_mtx, NULL, MUTEX_FSTRANS, NULL);
mutex_init(&l2arc_buflist_mtx, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&l2arc_free_on_write_mtx, NULL, MUTEX_DEFAULT, NULL);

l2arc_dev_list = &L2ARC_dev_list;
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ dbuf_init(void)
0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0);

for (i = 0; i < DBUF_MUTEXES; i++)
mutex_init(&h->hash_mutexes[i], NULL, MUTEX_FSTRANS, NULL);
mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL);

dbuf_stats_init(h);
}
Expand Down
5 changes: 5 additions & 0 deletions module/zfs/zfs_ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -5733,6 +5733,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
const zfs_ioc_vec_t *vec;
char *saved_poolname = NULL;
nvlist_t *innvl = NULL;
fstrans_cookie_t cookie;

vecnum = cmd - ZFS_IOC_FIRST;
if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
Expand Down Expand Up @@ -5827,7 +5828,9 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
}

outnvl = fnvlist_alloc();
cookie = spl_fstrans_mark();
error = vec->zvec_func(zc->zc_name, innvl, outnvl);
spl_fstrans_unmark(cookie);

if (error == 0 && vec->zvec_allow_log &&
spa_open(zc->zc_name, &spa, FTAG) == 0) {
Expand Down Expand Up @@ -5855,7 +5858,9 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)

nvlist_free(outnvl);
} else {
cookie = spl_fstrans_mark();
error = vec->zvec_legacy_func(zc);
spl_fstrans_unmark(cookie);
}

out:
Expand Down
18 changes: 3 additions & 15 deletions module/zfs/zfs_znode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1097,23 +1097,13 @@ zfs_zinactive(znode_t *zp)
{
zfs_sb_t *zsb = ZTOZSB(zp);
uint64_t z_id = zp->z_id;
boolean_t drop_mutex = 0;

ASSERT(zp->z_sa_hdl);

/*
* Don't allow a zfs_zget() while were trying to release this znode.
*
* Linux allows direct memory reclaim which means that any KM_SLEEP
* allocation may trigger inode eviction. This can lead to a deadlock
* through the ->shrink_icache_memory()->evict()->zfs_inactive()->
* zfs_zinactive() call path. To avoid this deadlock the process
* must not reacquire the mutex when it is already holding it.
*/
if (!ZFS_OBJ_HOLD_OWNED(zsb, z_id)) {
ZFS_OBJ_HOLD_ENTER(zsb, z_id);
drop_mutex = 1;
}
ZFS_OBJ_HOLD_ENTER(zsb, z_id);

mutex_enter(&zp->z_lock);

Expand All @@ -1124,8 +1114,7 @@ zfs_zinactive(znode_t *zp)
if (zp->z_unlinked) {
mutex_exit(&zp->z_lock);

if (drop_mutex)
ZFS_OBJ_HOLD_EXIT(zsb, z_id);
ZFS_OBJ_HOLD_EXIT(zsb, z_id);

zfs_rmnode(zp);
return;
Expand All @@ -1134,8 +1123,7 @@ zfs_zinactive(znode_t *zp)
mutex_exit(&zp->z_lock);
zfs_znode_dmu_fini(zp);

if (drop_mutex)
ZFS_OBJ_HOLD_EXIT(zsb, z_id);
ZFS_OBJ_HOLD_EXIT(zsb, z_id);
}

static inline int
Expand Down
40 changes: 40 additions & 0 deletions module/zfs/zpl_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,16 @@ zpl_open(struct inode *ip, struct file *filp)
{
cred_t *cr = CRED();
int error;
fstrans_cookie_t cookie;

error = generic_file_open(ip, filp);
if (error)
return (error);

crhold(cr);
cookie = spl_fstrans_mark();
error = -zfs_open(ip, filp->f_mode, filp->f_flags, cr);
spl_fstrans_unmark(cookie);
crfree(cr);
ASSERT3S(error, <=, 0);

Expand All @@ -53,12 +56,15 @@ zpl_release(struct inode *ip, struct file *filp)
{
cred_t *cr = CRED();
int error;
fstrans_cookie_t cookie;

cookie = spl_fstrans_mark();
if (ITOZ(ip)->z_atime_dirty)
zfs_mark_inode_dirty(ip);

crhold(cr);
error = -zfs_close(ip, filp->f_flags, cr);
spl_fstrans_unmark(cookie);
crfree(cr);
ASSERT3S(error, <=, 0);

Expand All @@ -71,9 +77,12 @@ zpl_iterate(struct file *filp, struct dir_context *ctx)
struct dentry *dentry = filp->f_path.dentry;
cred_t *cr = CRED();
int error;
fstrans_cookie_t cookie;

crhold(cr);
cookie = spl_fstrans_mark();
error = -zfs_readdir(dentry->d_inode, ctx, cr);
spl_fstrans_unmark(cookie);
crfree(cr);
ASSERT3S(error, <=, 0);

Expand Down Expand Up @@ -106,9 +115,12 @@ zpl_fsync(struct file *filp, struct dentry *dentry, int datasync)
{
cred_t *cr = CRED();
int error;
fstrans_cookie_t cookie;

crhold(cr);
cookie = spl_fstrans_mark();
error = -zfs_fsync(dentry->d_inode, datasync, cr);
spl_fstrans_unmark(cookie);
crfree(cr);
ASSERT3S(error, <=, 0);

Expand All @@ -134,9 +146,12 @@ zpl_fsync(struct file *filp, int datasync)
struct inode *inode = filp->f_mapping->host;
cred_t *cr = CRED();
int error;
fstrans_cookie_t cookie;

crhold(cr);
cookie = spl_fstrans_mark();
error = -zfs_fsync(inode, datasync, cr);
spl_fstrans_unmark(cookie);
crfree(cr);
ASSERT3S(error, <=, 0);

Expand All @@ -162,13 +177,16 @@ zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
struct inode *inode = filp->f_mapping->host;
cred_t *cr = CRED();
int error;
fstrans_cookie_t cookie;

error = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (error)
return (error);

crhold(cr);
cookie = spl_fstrans_mark();
error = -zfs_fsync(inode, datasync, cr);
spl_fstrans_unmark(cookie);
crfree(cr);
ASSERT3S(error, <=, 0);

Expand All @@ -193,6 +211,7 @@ zpl_read_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
ssize_t read;
uio_t uio;
int error;
fstrans_cookie_t cookie;

uio.uio_iov = (struct iovec *)iovp;
uio.uio_resid = count;
Expand All @@ -201,7 +220,9 @@ zpl_read_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
uio.uio_limit = MAXOFFSET_T;
uio.uio_segflg = segment;

cookie = spl_fstrans_mark();
error = -zfs_read(ip, &uio, flags, cr);
spl_fstrans_unmark(cookie);
if (error < 0)
return (error);

Expand Down Expand Up @@ -271,6 +292,7 @@ zpl_write_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
ssize_t wrote;
uio_t uio;
int error;
fstrans_cookie_t cookie;

if (flags & O_APPEND)
*ppos = i_size_read(ip);
Expand All @@ -282,7 +304,9 @@ zpl_write_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
uio.uio_limit = MAXOFFSET_T;
uio.uio_segflg = segment;

cookie = spl_fstrans_mark();
error = -zfs_write(ip, &uio, flags, cr);
spl_fstrans_unmark(cookie);
if (error < 0)
return (error);

Expand Down Expand Up @@ -347,13 +371,17 @@ static loff_t
zpl_llseek(struct file *filp, loff_t offset, int whence)
{
#if defined(SEEK_HOLE) && defined(SEEK_DATA)
fstrans_cookie_t cookie;

if (whence == SEEK_DATA || whence == SEEK_HOLE) {
struct inode *ip = filp->f_mapping->host;
loff_t maxbytes = ip->i_sb->s_maxbytes;
loff_t error;

spl_inode_lock(ip);
cookie = spl_fstrans_mark();
error = -zfs_holey(ip, whence, &offset);
spl_fstrans_unmark(cookie);
if (error == 0)
error = lseek_execute(filp, ip, offset, maxbytes);
spl_inode_unlock(ip);
Expand Down Expand Up @@ -414,9 +442,12 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma)
struct inode *ip = filp->f_mapping->host;
znode_t *zp = ITOZ(ip);
int error;
fstrans_cookie_t cookie;

cookie = spl_fstrans_mark();
error = -zfs_map(ip, vma->vm_pgoff, (caddr_t *)vma->vm_start,
(size_t)(vma->vm_end - vma->vm_start), vma->vm_flags);
spl_fstrans_unmark(cookie);
if (error)
return (error);

Expand Down Expand Up @@ -446,12 +477,15 @@ zpl_readpage(struct file *filp, struct page *pp)
struct inode *ip;
struct page *pl[1];
int error = 0;
fstrans_cookie_t cookie;

ASSERT(PageLocked(pp));
ip = pp->mapping->host;
pl[0] = pp;

cookie = spl_fstrans_mark();
error = -zfs_getpage(ip, pl, 1);
spl_fstrans_unmark(cookie);

if (error) {
SetPageError(pp);
Expand Down Expand Up @@ -569,6 +603,7 @@ zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
cred_t *cr = CRED();
flock64_t bf;
loff_t olen;
fstrans_cookie_t cookie;

if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return (error);
Expand All @@ -593,7 +628,9 @@ zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
bf.l_len = len;
bf.l_pid = 0;

cookie = spl_fstrans_mark();
error = -zfs_space(ip, F_FREESP, &bf, FWRITE, offset, cr);
spl_fstrans_unmark(cookie);
spl_inode_unlock(ip);

crfree(cr);
Expand Down Expand Up @@ -663,6 +700,7 @@ zpl_ioctl_setflags(struct file *filp, void __user *arg)
xvattr_t xva;
xoptattr_t *xoap;
int error;
fstrans_cookie_t cookie;

if (copy_from_user(&ioctl_flags, arg, sizeof (ioctl_flags)))
return (-EFAULT);
Expand Down Expand Up @@ -697,7 +735,9 @@ zpl_ioctl_setflags(struct file *filp, void __user *arg)
xoap->xoa_nodump = B_TRUE;

crhold(cr);
cookie = spl_fstrans_mark();
error = -zfs_setattr(ip, (vattr_t *)&xva, 0, cr);
spl_fstrans_unmark(cookie);
crfree(cr);

return (error);
Expand Down
Loading

0 comments on commit 40d06e3

Please sign in to comment.