Skip to content

Commit

Permalink
Linux 4.1 compat: loop device on ZFS
Browse files Browse the repository at this point in the history
Starting from Linux 4.1 allows iov_iter with bio_vec to be passed into
iter_read/iter_write. Notably, the loop device will pass bio_vec to backend
filesystem. However, current ZFS code assumes iovec without any check, so it
will always crash when using loop device.

With the restructured uio_t, we can safely pass bio_vec in uio_t with UIO_BVEC
set. The uio* functions are modified to handle bio_vec case separately.

The const uio_iov causes some warning in xuio related stuff, so explicit
convert them to non const.

Signed-off-by: Chunwei Chen <tuxoko@gmail.com>
Signed-off-by: Richard Yao <ryao@gentoo.org>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes openzfs#3511
Closes openzfs#3640
  • Loading branch information
tuxoko authored and tomgarcia committed Aug 25, 2015
1 parent 9f4a0d2 commit bca60ec
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 137 deletions.
205 changes: 108 additions & 97 deletions module/zcommon/zfs_uio.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
* software developed by the University of California, Berkeley, and its
* contributors.
*/
/*
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
*/

/*
* The uio support from OpenSolaris has been added as a short term
Expand All @@ -46,27 +49,25 @@

#include <sys/types.h>
#include <sys/uio_impl.h>
#include <linux/kmap_compat.h>

/*
* Move "n" bytes at byte address "p"; "rw" indicates the direction
* of the move, and the I/O parameters are provided in "uio", which is
* update to reflect the data which was moved. Returns 0 on success or
* a non-zero errno on failure.
*/
int
uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio)
static int
uiomove_iov(void *p, size_t n, enum uio_rw rw, struct uio *uio)
{
struct iovec *iov;
const struct iovec *iov = uio->uio_iov;
size_t skip = uio->uio_skip;
ulong_t cnt;

ASSERT3U(skip, <, iov->iov_len);

while (n && uio->uio_resid) {
iov = uio->uio_iov;
cnt = MIN(iov->iov_len, n);
if (cnt == 0l) {
uio->uio_iov++;
uio->uio_iovcnt--;
continue;
}
cnt = MIN(iov->iov_len - skip, n);
switch (uio->uio_segflg) {
case UIO_USERSPACE:
case UIO_USERISPACE:
Expand All @@ -75,29 +76,80 @@ uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio)
* iov->iov_base = user data pointer
*/
if (rw == UIO_READ) {
if (copy_to_user(iov->iov_base, p, cnt))
if (copy_to_user(iov->iov_base+skip, p, cnt))
return (EFAULT);
} else {
if (copy_from_user(p, iov->iov_base, cnt))
if (copy_from_user(p, iov->iov_base+skip, cnt))
return (EFAULT);
}
break;
case UIO_SYSSPACE:
if (rw == UIO_READ)
bcopy(p, iov->iov_base, cnt);
bcopy(p, iov->iov_base + skip, cnt);
else
bcopy(iov->iov_base, p, cnt);
bcopy(iov->iov_base + skip, p, cnt);
break;
default:
ASSERT(0);
}
skip += cnt;
if (skip == iov->iov_len) {
skip = 0;
uio->uio_iov = (++iov);
uio->uio_iovcnt--;
}
iov->iov_base += cnt;
iov->iov_len -= cnt;
uio->uio_skip = skip;
uio->uio_resid -= cnt;
uio->uio_loffset += cnt;
p = (caddr_t)p + cnt;
n -= cnt;
}
return (0);
}

static int
uiomove_bvec(void *p, size_t n, enum uio_rw rw, struct uio *uio)
{
const struct bio_vec *bv = uio->uio_bvec;
size_t skip = uio->uio_skip;
ulong_t cnt;

ASSERT3U(skip, <, bv->bv_len);

while (n && uio->uio_resid) {
void *paddr;
cnt = MIN(bv->bv_len - skip, n);

paddr = zfs_kmap_atomic(bv->bv_page, KM_USER1);
if (rw == UIO_READ)
bcopy(p, paddr + bv->bv_offset + skip, cnt);
else
bcopy(paddr + bv->bv_offset + skip, p, cnt);
zfs_kunmap_atomic(paddr, KM_USER1);

skip += cnt;
if (skip == bv->bv_len) {
skip = 0;
uio->uio_bvec = (++bv);
uio->uio_iovcnt--;
}
uio->uio_skip = skip;
uio->uio_resid -= cnt;
uio->uio_loffset += cnt;
p = (caddr_t)p + cnt;
n -= cnt;
}
return (0);
}

int
uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio)
{
if (uio->uio_segflg != UIO_BVEC)
return (uiomove_iov(p, n, rw, uio));
else
return (uiomove_bvec(p, n, rw, uio));
}
EXPORT_SYMBOL(uiomove);

#define fuword8(uptr, vptr) get_user((*vptr), (uptr))
Expand All @@ -111,39 +163,39 @@ EXPORT_SYMBOL(uiomove);
void
uio_prefaultpages(ssize_t n, struct uio *uio)
{
struct iovec *iov;
const struct iovec *iov;
ulong_t cnt, incr;
caddr_t p;
uint8_t tmp;
int iovcnt;
size_t skip = uio->uio_skip;

/* no need to fault in kernel pages */
switch (uio->uio_segflg) {
case UIO_SYSSPACE:
case UIO_BVEC:
return;
case UIO_USERSPACE:
case UIO_USERISPACE:
break;
default:
ASSERT(0);
}

iov = uio->uio_iov;
iovcnt = uio->uio_iovcnt;
ASSERT3U(skip, <, iov->iov_len);

while ((n > 0) && (iovcnt > 0)) {
cnt = MIN(iov->iov_len, n);
if (cnt == 0) {
/* empty iov entry */
iov++;
iovcnt--;
continue;
}
cnt = MIN(iov->iov_len - skip, n);
n -= cnt;
/*
* touch each page in this segment.
*/
p = iov->iov_base;
p = iov->iov_base + skip;
while (cnt) {
switch (uio->uio_segflg) {
case UIO_USERSPACE:
case UIO_USERISPACE:
if (fuword8((uint8_t *) p, &tmp))
return;
break;
case UIO_SYSSPACE:
bcopy(p, &tmp, 1);
break;
}
if (fuword8((uint8_t *) p, &tmp))
return;
incr = MIN(cnt, PAGESIZE);
p += incr;
cnt -= incr;
Expand All @@ -152,18 +204,11 @@ uio_prefaultpages(ssize_t n, struct uio *uio)
* touch the last byte in case it straddles a page.
*/
p--;
switch (uio->uio_segflg) {
case UIO_USERSPACE:
case UIO_USERISPACE:
if (fuword8((uint8_t *) p, &tmp))
return;
break;
case UIO_SYSSPACE:
bcopy(p, &tmp, 1);
break;
}
if (fuword8((uint8_t *) p, &tmp))
return;
iov++;
iovcnt--;
skip = 0;
}
}
EXPORT_SYMBOL(uio_prefaultpages);
Expand All @@ -175,49 +220,13 @@ EXPORT_SYMBOL(uio_prefaultpages);
int
uiocopy(void *p, size_t n, enum uio_rw rw, struct uio *uio, size_t *cbytes)
{
struct iovec *iov;
ulong_t cnt;
int iovcnt;

iovcnt = uio->uio_iovcnt;
*cbytes = 0;

for (iov = uio->uio_iov; n && iovcnt; iov++, iovcnt--) {
cnt = MIN(iov->iov_len, n);
if (cnt == 0)
continue;

switch (uio->uio_segflg) {

case UIO_USERSPACE:
case UIO_USERISPACE:
/*
* p = kernel data pointer
* iov->iov_base = user data pointer
*/
if (rw == UIO_READ) {
/* UIO_READ = copy data from kernel to user */
if (copy_to_user(iov->iov_base, p, cnt))
return (EFAULT);
} else {
/* UIO_WRITE = copy data from user to kernel */
if (copy_from_user(p, iov->iov_base, cnt))
return (EFAULT);
}
break;
struct uio uio_copy;
int ret;

case UIO_SYSSPACE:
if (rw == UIO_READ)
bcopy(p, iov->iov_base, cnt);
else
bcopy(iov->iov_base, p, cnt);
break;
}
p = (caddr_t)p + cnt;
n -= cnt;
*cbytes += cnt;
}
return (0);
bcopy(uio, &uio_copy, sizeof (struct uio));
ret = uiomove(p, n, rw, &uio_copy);
*cbytes = uio->uio_resid - uio_copy.uio_resid;
return (ret);
}
EXPORT_SYMBOL(uiocopy);

Expand All @@ -229,21 +238,23 @@ uioskip(uio_t *uiop, size_t n)
{
if (n > uiop->uio_resid)
return;
while (n != 0) {
iovec_t *iovp = uiop->uio_iov;
size_t niovb = MIN(iovp->iov_len, n);

if (niovb == 0) {
uiop->uio_skip += n;
if (uiop->uio_segflg != UIO_BVEC) {
while (uiop->uio_skip >= uiop->uio_iov->iov_len) {
uiop->uio_skip -= uiop->uio_iov->iov_len;
uiop->uio_iov++;
uiop->uio_iovcnt--;
continue;
}
iovp->iov_base += niovb;
uiop->uio_loffset += niovb;
iovp->iov_len -= niovb;
uiop->uio_resid -= niovb;
n -= niovb;
} else {
while (uiop->uio_skip >= uiop->uio_bvec->bv_len) {
uiop->uio_skip -= uiop->uio_bvec->bv_len;
uiop->uio_bvec++;
uiop->uio_iovcnt--;
}
}
uiop->uio_loffset += n;
uiop->uio_resid -= n;
}
EXPORT_SYMBOL(uioskip);
#endif /* _KERNEL */
5 changes: 3 additions & 2 deletions module/zfs/dmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2014, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
*/

#include <sys/dmu.h>
Expand Down Expand Up @@ -933,7 +934,7 @@ dmu_xuio_init(xuio_t *xuio, int nblk)
priv = kmem_zalloc(sizeof (dmu_xuio_t), KM_SLEEP);
priv->cnt = nblk;
priv->bufs = kmem_zalloc(nblk * sizeof (arc_buf_t *), KM_SLEEP);
priv->iovp = uio->uio_iov;
priv->iovp = (iovec_t *)uio->uio_iov;
XUIO_XUZC_PRIV(xuio) = priv;

if (XUIO_XUZC_RW(xuio) == UIO_READ)
Expand Down Expand Up @@ -974,7 +975,7 @@ dmu_xuio_add(xuio_t *xuio, arc_buf_t *abuf, offset_t off, size_t n)

ASSERT(i < priv->cnt);
ASSERT(off + n <= arc_buf_size(abuf));
iov = uio->uio_iov + i;
iov = (iovec_t *)uio->uio_iov + i;
iov->iov_base = (char *)abuf->b_data + off;
iov->iov_len = n;
priv->bufs[i] = abuf;
Expand Down
6 changes: 4 additions & 2 deletions module/zfs/zfs_vnops.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
*/

/* Portions Copyright 2007 Jeremy Teo */
Expand Down Expand Up @@ -591,10 +592,10 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
int max_blksz = zsb->z_max_blksz;
int error = 0;
arc_buf_t *abuf;
iovec_t *aiov = NULL;
const iovec_t *aiov = NULL;
xuio_t *xuio = NULL;
int i_iov = 0;
iovec_t *iovp = uio->uio_iov;
const iovec_t *iovp = uio->uio_iov;
int write_eof;
int count = 0;
sa_bulk_attr_t bulk[4];
Expand Down Expand Up @@ -714,6 +715,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)

if (xuio && abuf == NULL) {
ASSERT(i_iov < iovcnt);
ASSERT3U(uio->uio_segflg, !=, UIO_BVEC);
aiov = &iovp[i_iov];
abuf = dmu_xuio_arcbuf(xuio, i_iov);
dmu_xuio_clear(xuio, i_iov);
Expand Down
Loading

0 comments on commit bca60ec

Please sign in to comment.