Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ABD: linear/scatter dual typed buffer for ARC (ver 2) #3441

Closed
wants to merge 26 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
2a84724
Introduce ABD: linear/scatter dual typed buffer for ARC
tuxoko May 11, 2015
a244e02
Modify/Add incremental checksum function for abd_iterate_rfunc
tuxoko May 11, 2015
8d4d5ab
Use abd_t in arc_*.h, ddt.h, dmu.h and zio.h
tuxoko May 11, 2015
8acd63e
Convert zio_checksum to ABD version
tuxoko May 11, 2015
06d7e71
Handle abd_t in arc.c, bpobj.c and bptree.c
tuxoko May 11, 2015
70bc112
Handle abd_t in dbuf.c, ddt.c, dmu*.c
tuxoko May 11, 2015
1103cbe
Handle abd_t in dnode*.c, dsl_*.c, dsl_*.h
tuxoko May 12, 2015
f980ae8
Handle abd_t in sa_impl.h, sa.c, space_map.c, spa.c, spa_history.c an…
tuxoko May 12, 2015
ca8b191
Handle abd_t in zap*.c, zap_*.h, zfs_fuid.c, zfs_sa.c and zfs_vnops.c
tuxoko May 12, 2015
63ee9b8
Handle abd_t in zio.c
tuxoko May 12, 2015
4aee51a
Handle abd_t in vdev*.c sans vdev_raidz.c
tuxoko May 12, 2015
6fe2d25
Handle abd_t in vdev_raidz.c
tuxoko May 12, 2015
7a5f1a3
Disable simd raidz until we support it
Sep 22, 2016
de9379f
Handle ABD in ztest and zdb
tuxoko May 12, 2015
21c4a42
Enable ABD
tuxoko May 12, 2015
3096833
Split out miter part of copy funcs for abd_uiomove
tuxoko Aug 26, 2015
c24c876
Add access_ok check before __copy_{to,from}_ user_inatomic
Nov 20, 2015
66b36ab
Only use abd_magic when we enable debug
Oct 16, 2015
b49069c
Optimize abd with contiguous pages and sg merging
Oct 30, 2015
a336f7b
Add abd version byteswap functions
tuxoko Jun 24, 2015
fd6fad8
Enable scatter ABD support for SHA256
Nov 25, 2015
da774ea
Enable scatter ABD for raidz parity
Jan 22, 2016
a6b71bf
Enable scatter ABD in zfs_fm
Jan 23, 2016
f087493
Set rrd->payload to NULL after return buffer
Jan 25, 2016
aae3379
Add ABD version of zio_{,de}compress_data
Jan 29, 2016
2d4a7d2
Add dmu_write_abd
Feb 2, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/raidz_test/raidz_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt)

for (i = 0; i < cnt; i++) {
col = &rm->rm_col[tgts[i]];
dst = col->rc_data;
dst = (void *)col->rc_data;
for (i = 0; i < col->rc_size / sizeof (int); i++)
dst[i] = rand();
}
Expand Down
23 changes: 14 additions & 9 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2015, Intel Corporation.
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
*/

#include <stdio.h>
Expand All @@ -31,6 +32,7 @@
#include <stdlib.h>
#include <ctype.h>
#include <sys/zfs_context.h>
#include <sys/abd.h>
#include <sys/spa.h>
#include <sys/spa_impl.h>
#include <sys/dmu.h>
Expand Down Expand Up @@ -1283,7 +1285,7 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
ASSERT(buf->b_data);

/* recursively visit blocks below this */
cbp = buf->b_data;
cbp = ABD_TO_BUF(buf->b_data);
for (i = 0; i < epb; i++, cbp++) {
zbookmark_phys_t czb;

Expand Down Expand Up @@ -1455,7 +1457,7 @@ dump_bptree(objset_t *os, uint64_t obj, char *name)
return;

VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
bt = db->db_data;
bt = ABD_TO_BUF(db->db_data);
zdb_nicenum(bt->bt_bytes, bytes);
(void) printf("\n %s: %llu datasets, %s\n",
name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes);
Expand Down Expand Up @@ -1907,7 +1909,7 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
if (error)
fatal("dmu_bonus_hold(%llu) failed, errno %u",
object, error);
bonus = db->db_data;
bonus = ABD_TO_BUF(db->db_data);
bsize = db->db_size;
dn = DB_DNODE((dmu_buf_impl_t *)db);
}
Expand Down Expand Up @@ -2131,7 +2133,7 @@ dump_config(spa_t *spa)
spa->spa_config_object, FTAG, &db);

if (error == 0) {
nvsize = *(uint64_t *)db->db_data;
nvsize = *(uint64_t *)ABD_TO_BUF(db->db_data);
dmu_buf_rele(db, FTAG);

(void) printf("\nMOS Configuration:\n");
Expand Down Expand Up @@ -2462,7 +2464,7 @@ zdb_blkptr_done(zio_t *zio)
zdb_cb_t *zcb = zio->io_private;
zbookmark_phys_t *zb = &zio->io_bookmark;

zio_data_buf_free(zio->io_data, zio->io_size);
abd_free(zio->io_data, zio->io_size);

mutex_enter(&spa->spa_scrub_lock);
spa->spa_scrub_inflight--;
Expand Down Expand Up @@ -2528,7 +2530,7 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
if (!BP_IS_EMBEDDED(bp) &&
(dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) {
size_t size = BP_GET_PSIZE(bp);
void *data = zio_data_buf_alloc(size);
abd_t *data = abd_alloc_linear(size);
int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;

/* If it's an intent log block, failure is expected. */
Expand Down Expand Up @@ -3351,6 +3353,7 @@ zdb_read_block(char *thing, spa_t *spa)
zio_t *zio;
vdev_t *vd;
void *pbuf, *lbuf, *buf;
abd_t *pbuf_abd;
char *s, *p, *dup, *vdev, *flagstr;
int i, error;

Expand Down Expand Up @@ -3425,6 +3428,7 @@ zdb_read_block(char *thing, spa_t *spa)

/* Some 4K native devices require 4K buffer alignment */
pbuf = umem_alloc_aligned(SPA_MAXBLOCKSIZE, PAGESIZE, UMEM_NOFAIL);
pbuf_abd = abd_get_from_buf(pbuf, SPA_MAXBLOCKSIZE);
lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);

BP_ZERO(bp);
Expand Down Expand Up @@ -3452,15 +3456,15 @@ zdb_read_block(char *thing, spa_t *spa)
/*
* Treat this as a normal block read.
*/
zio_nowait(zio_read(zio, spa, bp, pbuf, psize, NULL, NULL,
zio_nowait(zio_read(zio, spa, bp, pbuf_abd, psize, NULL, NULL,
ZIO_PRIORITY_SYNC_READ,
ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
} else {
/*
* Treat this as a vdev child I/O.
*/
zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf, psize,
ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf_abd,
psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL));
Expand Down Expand Up @@ -3543,6 +3547,7 @@ zdb_read_block(char *thing, spa_t *spa)
zdb_dump_block(thing, buf, size, flags);

out:
abd_put(pbuf_abd);
umem_free(pbuf, SPA_MAXBLOCKSIZE);
umem_free(lbuf, SPA_MAXBLOCKSIZE);
free(dup);
Expand Down
7 changes: 6 additions & 1 deletion cmd/zdb/zdb_il.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

/*
* Copyright (c) 2013, 2014 by Delphix. All rights reserved.
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
*/

/*
Expand All @@ -36,6 +37,7 @@
#include <stdlib.h>
#include <ctype.h>
#include <sys/zfs_context.h>
#include <sys/abd.h>
#include <sys/spa.h>
#include <sys/dmu.h>
#include <sys/stat.h>
Expand Down Expand Up @@ -127,6 +129,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
blkptr_t *bp = &lr->lr_blkptr;
zbookmark_phys_t zb;
char *buf;
abd_t *abd;
int verbose = MAX(dump_opt['d'], dump_opt['i']);
int error;

Expand Down Expand Up @@ -163,9 +166,11 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
lr->lr_foid, ZB_ZIL_LEVEL,
lr->lr_offset / BP_GET_LSIZE(bp));

abd = abd_get_from_buf(buf, BP_GET_LSIZE(bp));
error = zio_wait(zio_read(NULL, zilog->zl_spa,
bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
bp, abd, BP_GET_LSIZE(bp), NULL, NULL,
ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb));
abd_put(abd);
if (error)
goto exit;
data = buf;
Expand Down
49 changes: 31 additions & 18 deletions cmd/ztest/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
*/

/*
Expand Down Expand Up @@ -86,6 +87,7 @@
*/

#include <sys/zfs_context.h>
#include <sys/abd.h>
#include <sys/spa.h>
#include <sys/dmu.h>
#include <sys/txg.h>
Expand Down Expand Up @@ -1427,26 +1429,31 @@ ztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag)
}

static void
ztest_pattern_set(void *buf, uint64_t size, uint64_t value)
ztest_pattern_set(abd_t *abd, uint64_t size, uint64_t value)
{
void *buf = abd_borrow_buf(abd, size);
uint64_t *ip = buf;
uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);

while (ip < ip_end)
*ip++ = value;

abd_return_buf_copy(abd, buf, size);
}

#ifndef NDEBUG
static boolean_t
ztest_pattern_match(void *buf, uint64_t size, uint64_t value)
ztest_pattern_match(abd_t *abd, uint64_t size, uint64_t value)
{
void *buf = abd_borrow_buf_copy(abd, size);
uint64_t *ip = buf;
uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);
uint64_t diff = 0;

while (ip < ip_end)
diff |= (value - *ip++);

abd_return_buf(abd, buf, size);
return (diff == 0);
}
#endif
Expand Down Expand Up @@ -1490,7 +1497,8 @@ ztest_bt_bonus(dmu_buf_t *db)
dmu_object_info_from_db(db, &doi);
ASSERT3U(doi.doi_bonus_size, <=, db->db_size);
ASSERT3U(doi.doi_bonus_size, >=, sizeof (*bt));
bt = (void *)((char *)db->db_data + doi.doi_bonus_size - sizeof (*bt));
bt = (void *)((char *)ABD_TO_BUF(db->db_data) + doi.doi_bonus_size -
sizeof (*bt));

return (bt);
}
Expand All @@ -1514,12 +1522,13 @@ ztest_fill_unused_bonus(dmu_buf_t *db, void *end, uint64_t obj,
objset_t *os, uint64_t gen)
{
uint64_t *bonusp;
uint64_t *start = ABD_TO_BUF(db->db_data);

ASSERT(IS_P2ALIGNED((char *)end - (char *)db->db_data, 8));
ASSERT(IS_P2ALIGNED((char *)end - (char *)start, 8));

for (bonusp = db->db_data; bonusp < (uint64_t *)end; bonusp++) {
for (bonusp = start; bonusp < (uint64_t *)end; bonusp++) {
uint64_t token = ZTEST_BONUS_FILL_TOKEN(obj, dmu_objset_id(os),
gen, bonusp - (uint64_t *)db->db_data);
gen, bonusp - start);
*bonusp = token;
}
}
Expand All @@ -1533,10 +1542,11 @@ ztest_verify_unused_bonus(dmu_buf_t *db, void *end, uint64_t obj,
objset_t *os, uint64_t gen)
{
uint64_t *bonusp;
uint64_t *start = ABD_TO_BUF(db->db_data);

for (bonusp = db->db_data; bonusp < (uint64_t *)end; bonusp++) {
for (bonusp = start; bonusp < (uint64_t *)end; bonusp++) {
uint64_t token = ZTEST_BONUS_FILL_TOKEN(obj, dmu_objset_id(os),
gen, bonusp - (uint64_t *)db->db_data);
gen, bonusp - start);
VERIFY3U(*bonusp, ==, token);
}
}
Expand Down Expand Up @@ -1903,7 +1913,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)
if (abuf == NULL) {
dmu_write(os, lr->lr_foid, offset, length, data, tx);
} else {
bcopy(data, abuf->b_data, length);
abd_copy_from_buf(abuf->b_data, data, length);
dmu_assign_arcbuf(db, offset, abuf, tx);
}

Expand Down Expand Up @@ -4317,16 +4327,19 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
for (off = bigoff, j = 0; j < s; j++, off += chunksize) {
dmu_buf_t *dbt;
if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
bcopy((caddr_t)bigbuf + (off - bigoff),
bigbuf_arcbufs[j]->b_data, chunksize);
abd_copy_from_buf(bigbuf_arcbufs[j]->b_data,
(caddr_t)bigbuf + (off - bigoff),
chunksize);
} else {
bcopy((caddr_t)bigbuf + (off - bigoff),
abd_copy_from_buf(
bigbuf_arcbufs[2 * j]->b_data,
(caddr_t)bigbuf + (off - bigoff),
chunksize / 2);
bcopy((caddr_t)bigbuf + (off - bigoff) +
chunksize / 2,

abd_copy_from_buf(
bigbuf_arcbufs[2 * j + 1]->b_data,
chunksize / 2);
(caddr_t)bigbuf + (off - bigoff) +
chunksize / 2, chunksize / 2);
}

if (i == 1) {
Expand Down Expand Up @@ -5442,7 +5455,7 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
enum zio_checksum checksum = spa_dedup_checksum(spa);
dmu_buf_t *db;
dmu_tx_t *tx;
void *buf;
abd_t *buf;
blkptr_t blk;
int copies = 2 * ZIO_DEDUPDITTO_MIN;
int i;
Expand Down Expand Up @@ -5523,14 +5536,14 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
* Damage the block. Dedup-ditto will save us when we read it later.
*/
psize = BP_GET_PSIZE(&blk);
buf = zio_buf_alloc(psize);
buf = abd_alloc_linear(psize);
ztest_pattern_set(buf, psize, ~pattern);

(void) zio_wait(zio_rewrite(NULL, spa, 0, &blk,
buf, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE,
ZIO_FLAG_CANFAIL | ZIO_FLAG_INDUCE_DAMAGE, NULL));

zio_buf_free(buf, psize);
abd_free(buf, psize);

(void) rw_unlock(&ztest_name_lock);
umem_free(od, sizeof (ztest_od_t));
Expand Down
22 changes: 22 additions & 0 deletions config/kernel-sg-from-pages.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
dnl #
dnl # 3.6 API change,
dnl # sg_alloc_table_from_pages, allows merging adjacent pages
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SG_FROM_PAGES], [
AC_MSG_CHECKING([whether sg_alloc_table_from_pages exists])
ZFS_LINUX_TRY_COMPILE([
#include <linux/scatterlist.h>
#define NR 4
],[
struct sg_table sgt;
struct page *pages[NR];
int ret;
ret = sg_alloc_table_from_pages(&sgt, pages, NR, 0, NR*PAGE_SIZE, GFP_KERNEL);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_SG_FROM_PAGES, 1, [sg_alloc_table_from_pages])
],[
AC_MSG_RESULT(no)
])
])

1 change: 1 addition & 0 deletions config/kernel.m4
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_FPU
ZFS_AC_KERNEL_KUID_HELPERS
ZFS_AC_KERNEL_MODULE_PARAM_CALL_CONST
ZFS_AC_KERNEL_SG_FROM_PAGES

AS_IF([test "$LINUX_OBJ" != "$LINUX"], [
KERNELMAKE_PARAMS="$KERNELMAKE_PARAMS O=$LINUX_OBJ"
Expand Down
1 change: 1 addition & 0 deletions include/sys/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
SUBDIRS = fm fs crypto sysevent

COMMON_H = \
$(top_srcdir)/include/sys/abd.h \
$(top_srcdir)/include/sys/arc.h \
$(top_srcdir)/include/sys/arc_impl.h \
$(top_srcdir)/include/sys/avl.h \
Expand Down
Loading