Skip to content

Commit

Permalink
Mark IO pipeline with PF_FSTRANS
Browse files Browse the repository at this point in the history
In order to avoid deadlocking in the IO pipeline it is critical that
pageout be avoided during direct memory reclaim.  This ensures that
the pipeline threads can always make forward progress and never end
up blocking on a DMU transaction.  For this very reason Linux now
provides the PF_FSTRANS flag which may be set in the process context.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
  • Loading branch information
behlendorf committed Jan 16, 2015
1 parent d958324 commit 92119cc
Show file tree
Hide file tree
Showing 7 changed files with 69 additions and 45 deletions.
7 changes: 6 additions & 1 deletion include/sys/zfs_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -733,6 +733,11 @@ void ksiddomain_rele(ksiddomain_t *);
(void) nanosleep(&ts, NULL); \
} while (0)

#endif /* _KERNEL */
typedef int fstrans_cookie_t;

extern fstrans_cookie_t spl_fstrans_mark(void);
extern void spl_fstrans_unmark(fstrans_cookie_t);
extern int spl_fstrans_check(void);

#endif /* _KERNEL */
#endif /* _SYS_ZFS_CONTEXT_H */
17 changes: 17 additions & 0 deletions lib/libzpool/kernel.c
Original file line number Diff line number Diff line change
Expand Up @@ -1275,3 +1275,20 @@ zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
{
return (0);
}

fstrans_cookie_t
spl_fstrans_mark(void)
{
return ((fstrans_cookie_t) 0);
}

void
spl_fstrans_unmark(fstrans_cookie_t cookie)
{
}

int
spl_fstrans_check(void)
{
return (0);
}
10 changes: 1 addition & 9 deletions module/zfs/txg.c
Original file line number Diff line number Diff line change
Expand Up @@ -483,15 +483,7 @@ txg_sync_thread(dsl_pool_t *dp)
vdev_stat_t *vs1, *vs2;
clock_t start, delta;

#ifdef _KERNEL
/*
* Annotate this process with a flag that indicates that it is
* unsafe to use KM_SLEEP during memory allocations due to the
* potential for a deadlock. KM_PUSHPAGE should be used instead.
*/
current->flags |= PF_NOFS;
#endif /* _KERNEL */

(void) spl_fstrans_mark();
txg_thread_enter(tx, &cpr);

vs1 = kmem_alloc(sizeof (vdev_stat_t), KM_PUSHPAGE);
Expand Down
24 changes: 24 additions & 0 deletions module/zfs/vdev_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,17 @@ vdev_file_io_strategy(void *arg)
zio_interrupt(zio);
}

static void
vdev_file_io_fsync(void *arg)
{
zio_t *zio = (zio_t *)arg;
vdev_file_t *vf = zio->io_vd->vdev_tsd;

zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, kcred, NULL);

zio_interrupt(zio);
}

static int
vdev_file_io_start(zio_t *zio)
{
Expand All @@ -180,6 +191,19 @@ vdev_file_io_start(zio_t *zio)
if (zfs_nocacheflush)
break;

/*
* We cannot safely call vfs_fsync() when PF_FSTRANS
* is set in the current context. Filesystems like
* XFS include sanity checks to verify it is not
* already set, see xfs_vm_writepage(). Therefore
* the sync must be dispatched to a different context.
*/
if (spl_fstrans_check()) {
VERIFY3U(taskq_dispatch(vdev_file_taskq,
vdev_file_io_fsync, zio, TQ_SLEEP), !=, 0);
return (ZIO_PIPELINE_STOP);
}

zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC,
kcred, NULL);
break;
Expand Down
4 changes: 4 additions & 0 deletions module/zfs/zio.c
Original file line number Diff line number Diff line change
Expand Up @@ -1361,7 +1361,11 @@ static zio_pipe_stage_t *zio_pipeline[];
void
zio_execute(zio_t *zio)
{
fstrans_cookie_t cookie;

cookie = spl_fstrans_mark();
__zio_execute(zio);
spl_fstrans_unmark(cookie);
}

__attribute__((always_inline))
Expand Down
11 changes: 3 additions & 8 deletions module/zfs/zpl_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -481,19 +481,14 @@ int
zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
{
struct address_space *mapping = data;
fstrans_cookie_t cookie;

ASSERT(PageLocked(pp));
ASSERT(!PageWriteback(pp));
ASSERT(!(current->flags & PF_NOFS));

/*
* Annotate this call path with a flag that indicates that it is
* unsafe to use KM_SLEEP during memory allocations due to the
* potential for a deadlock. KM_PUSHPAGE should be used instead.
*/
current->flags |= PF_NOFS;
cookie = spl_fstrans_mark();
(void) zfs_putpage(mapping->host, pp, wbc);
current->flags &= ~PF_NOFS;
spl_fstrans_unmark(cookie);

return (0);
}
Expand Down
41 changes: 14 additions & 27 deletions module/zfs/zvol.c
Original file line number Diff line number Diff line change
Expand Up @@ -577,28 +577,21 @@ zvol_write(void *arg)
struct request *req = (struct request *)arg;
struct request_queue *q = req->q;
zvol_state_t *zv = q->queuedata;
fstrans_cookie_t cookie = spl_fstrans_mark();
uint64_t offset = blk_rq_pos(req) << 9;
uint64_t size = blk_rq_bytes(req);
int error = 0;
dmu_tx_t *tx;
rl_t *rl;

/*
* Annotate this call path with a flag that indicates that it is
* unsafe to use KM_SLEEP during memory allocations due to the
* potential for a deadlock. KM_PUSHPAGE should be used instead.
*/
ASSERT(!(current->flags & PF_NOFS));
current->flags |= PF_NOFS;

if (req->cmd_flags & VDEV_REQ_FLUSH)
zil_commit(zv->zv_zilog, ZVOL_OBJ);

/*
* Some requests are just for flush and nothing else.
*/
if (size == 0) {
blk_end_request(req, 0, size);
error = 0;
goto out;
}

Expand All @@ -612,7 +605,6 @@ zvol_write(void *arg)
if (error) {
dmu_tx_abort(tx);
zfs_range_unlock(rl);
blk_end_request(req, -error, size);
goto out;
}

Expand All @@ -628,9 +620,9 @@ zvol_write(void *arg)
zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zv->zv_zilog, ZVOL_OBJ);

blk_end_request(req, -error, size);
out:
current->flags &= ~PF_NOFS;
blk_end_request(req, -error, size);
spl_fstrans_unmark(cookie);
}

#ifdef HAVE_BLK_QUEUE_DISCARD
Expand All @@ -640,21 +632,14 @@ zvol_discard(void *arg)
struct request *req = (struct request *)arg;
struct request_queue *q = req->q;
zvol_state_t *zv = q->queuedata;
fstrans_cookie_t cookie = spl_fstrans_mark();
uint64_t start = blk_rq_pos(req) << 9;
uint64_t end = start + blk_rq_bytes(req);
int error;
rl_t *rl;

/*
* Annotate this call path with a flag that indicates that it is
* unsafe to use KM_SLEEP during memory allocations due to the
* potential for a deadlock. KM_PUSHPAGE should be used instead.
*/
ASSERT(!(current->flags & PF_NOFS));
current->flags |= PF_NOFS;

if (end > zv->zv_volsize) {
blk_end_request(req, -EIO, blk_rq_bytes(req));
error = EIO;
goto out;
}

Expand All @@ -668,7 +653,7 @@ zvol_discard(void *arg)
end = P2ALIGN(end, zv->zv_volblocksize);

if (start >= end) {
blk_end_request(req, 0, blk_rq_bytes(req));
error = 0;
goto out;
}

Expand All @@ -681,10 +666,9 @@ zvol_discard(void *arg)
*/

zfs_range_unlock(rl);

blk_end_request(req, -error, blk_rq_bytes(req));
out:
current->flags &= ~PF_NOFS;
blk_end_request(req, -error, blk_rq_bytes(req));
spl_fstrans_unmark(cookie);
}
#endif /* HAVE_BLK_QUEUE_DISCARD */

Expand All @@ -700,14 +684,15 @@ zvol_read(void *arg)
struct request *req = (struct request *)arg;
struct request_queue *q = req->q;
zvol_state_t *zv = q->queuedata;
fstrans_cookie_t cookie = spl_fstrans_mark();
uint64_t offset = blk_rq_pos(req) << 9;
uint64_t size = blk_rq_bytes(req);
int error;
rl_t *rl;

if (size == 0) {
blk_end_request(req, 0, size);
return;
error = 0;
goto out;
}

rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER);
Expand All @@ -720,7 +705,9 @@ zvol_read(void *arg)
if (error == ECKSUM)
error = SET_ERROR(EIO);

out:
blk_end_request(req, -error, size);
spl_fstrans_unmark(cookie);
}

/*
Expand Down

0 comments on commit 92119cc

Please sign in to comment.