From a8f1180c4070b210e5f64d3120dc4cc8514bb6d1 Mon Sep 17 00:00:00 2001 From: Chris Siden Date: Mon, 23 Jan 2012 18:43:32 -0800 Subject: [PATCH 1/4] illumos #1948: zpool list should show more detailed pool information Reviewed by: Adam Leventhal Reviewed by: Matt Ahrens Reviewed by: Eric Schrock Reviewed by: Richard Lowe Reviewed by: Albert Lee Reviewed by: Dan McDonald Reviewed by: Garrett D'Amore Approved by: Eric Schrock References: https://www.illumos.org/issues/1948 Ported by: Martin Matuska --- cmd/zpool/zpool_main.c | 242 ++++++++++++++++++++++++++++++------ include/libzfs.h | 3 +- include/sys/fs/zfs.h | 5 +- include/sys/vdev_impl.h | 5 +- lib/libzfs/libzfs_pool.c | 33 ++++- man/man8/zpool.8 | 61 +++++++-- module/zcommon/zpool_prop.c | 4 +- module/zfs/spa.c | 18 ++- module/zfs/vdev.c | 17 ++- module/zfs/vdev_disk.c | 9 +- module/zfs/vdev_file.c | 6 +- module/zfs/vdev_mirror.c | 8 +- module/zfs/vdev_missing.c | 8 +- module/zfs/vdev_raidz.c | 6 +- module/zfs/vdev_root.c | 8 +- module/zfs/zfs_ioctl.c | 20 ++- 16 files changed, 386 insertions(+), 67 deletions(-) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 21548c82dfda..2c4dac7b0c4c 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012 by Frederik Wessels. All rights reserved. */ @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -68,6 +69,7 @@ static int zpool_do_status(int, char **); static int zpool_do_online(int, char **); static int zpool_do_offline(int, char **); static int zpool_do_clear(int, char **); +static int zpool_do_reopen(int, char **); static int zpool_do_reguid(int, char **); @@ -131,7 +133,8 @@ typedef enum { HELP_GET, HELP_SET, HELP_SPLIT, - HELP_REGUID + HELP_REGUID, + HELP_REOPEN } zpool_help_t; @@ -164,6 +167,7 @@ static zpool_command_t command_table[] = { { "online", zpool_do_online, HELP_ONLINE }, { "offline", zpool_do_offline, HELP_OFFLINE }, { "clear", zpool_do_clear, HELP_CLEAR }, + { "reopen", zpool_do_reopen, HELP_REOPEN }, { NULL }, { "attach", zpool_do_attach, HELP_ATTACH }, { "detach", zpool_do_detach, HELP_DETACH }, @@ -238,6 +242,8 @@ get_usage(zpool_help_t idx) { "[new-device]\n")); case HELP_REMOVE: return (gettext("\tremove ...\n")); + case HELP_REOPEN: + return (""); /* Undocumented command */ case HELP_SCRUB: return (gettext("\tscrub [-s] ...\n")); case HELP_STATUS: @@ -1989,10 +1995,10 @@ zpool_do_import(int argc, char **argv) } typedef struct iostat_cbdata { - zpool_list_t *cb_list; - int cb_verbose; - int cb_iteration; + boolean_t cb_verbose; int cb_namewidth; + int cb_iteration; + zpool_list_t *cb_list; } iostat_cbdata_t; static void @@ -2489,8 +2495,9 @@ zpool_do_iostat(int argc, char **argv) } typedef struct list_cbdata { + boolean_t cb_verbose; + int cb_namewidth; boolean_t cb_scripted; - boolean_t cb_first; zprop_list_t *cb_proplist; } list_cbdata_t; @@ -2498,16 +2505,27 @@ typedef struct list_cbdata { * Given a list of columns to display, output appropriate headers for each one. */ static void -print_header(zprop_list_t *pl) +print_header(list_cbdata_t *cb) { + zprop_list_t *pl = cb->cb_proplist; const char *header; boolean_t first = B_TRUE; boolean_t right_justify; + size_t width = 0; for (; pl != NULL; pl = pl->pl_next) { if (pl->pl_prop == ZPROP_INVAL) continue; + width = pl->pl_width; + if (first && cb->cb_verbose) { + /* + * Reset the width to accommodate the verbose listing + * of devices. + */ + width = cb->cb_namewidth; + } + if (!first) (void) printf(" "); else @@ -2519,9 +2537,9 @@ print_header(zprop_list_t *pl) if (pl->pl_next == NULL && !right_justify) (void) printf("%s", header); else if (right_justify) - (void) printf("%*s", (int)pl->pl_width, header); + (void) printf("%*s", (int)width, header); else - (void) printf("%-*s", (int)pl->pl_width, header); + (void) printf("%-*s", (int)width, header); } (void) printf("\n"); @@ -2532,17 +2550,28 @@ print_header(zprop_list_t *pl) * to the described layout. */ static void -print_pool(zpool_handle_t *zhp, zprop_list_t *pl, int scripted) +print_pool(zpool_handle_t *zhp, list_cbdata_t *cb) { + zprop_list_t *pl = cb->cb_proplist; boolean_t first = B_TRUE; char property[ZPOOL_MAXPROPLEN]; char *propstr; boolean_t right_justify; - int width; + size_t width; for (; pl != NULL; pl = pl->pl_next) { + + width = pl->pl_width; + if (first && cb->cb_verbose) { + /* + * Reset the width to accommodate the verbose listing + * of devices. + */ + width = cb->cb_namewidth; + } + if (!first) { - if (scripted) + if (cb->cb_scripted) (void) printf("\t"); else (void) printf(" "); @@ -2552,7 +2581,10 @@ print_pool(zpool_handle_t *zhp, zprop_list_t *pl, int scripted) right_justify = B_FALSE; if (pl->pl_prop != ZPROP_INVAL) { - if (zpool_get_prop(zhp, pl->pl_prop, property, + if (pl->pl_prop == ZPOOL_PROP_EXPANDSZ && + zpool_get_prop_int(zhp, pl->pl_prop, NULL) == 0) + propstr = "-"; + else if (zpool_get_prop(zhp, pl->pl_prop, property, sizeof (property), NULL) != 0) propstr = "-"; else @@ -2563,24 +2595,118 @@ print_pool(zpool_handle_t *zhp, zprop_list_t *pl, int scripted) propstr = "-"; } - width = pl->pl_width; /* * If this is being called in scripted mode, or if this is the * last column and it is left-justified, don't include a width * format specifier. */ - if (scripted || (pl->pl_next == NULL && !right_justify)) + if (cb->cb_scripted || (pl->pl_next == NULL && !right_justify)) (void) printf("%s", propstr); else if (right_justify) - (void) printf("%*s", width, propstr); + (void) printf("%*s", (int)width, propstr); else - (void) printf("%-*s", width, propstr); + (void) printf("%-*s", (int)width, propstr); } (void) printf("\n"); } +static void +print_one_column(zpool_prop_t prop, uint64_t value, boolean_t scripted) +{ + char propval[64]; + boolean_t fixed; + size_t width = zprop_width(prop, &fixed, ZFS_TYPE_POOL); + + zfs_nicenum(value, propval, sizeof (propval)); + + if (prop == ZPOOL_PROP_EXPANDSZ && value == 0) + (void) strlcpy(propval, "-", sizeof (propval)); + + if (scripted) + (void) printf("\t%s", propval); + else + (void) printf(" %*s", (int)width, propval); +} + +void +print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, + list_cbdata_t *cb, int depth) +{ + nvlist_t **child; + vdev_stat_t *vs; + uint_t c, children; + char *vname; + boolean_t scripted = cb->cb_scripted; + + verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &c) == 0); + + if (name != NULL) { + if (scripted) + (void) printf("\t%s", name); + else if (strlen(name) + depth > cb->cb_namewidth) + (void) printf("%*s%s", depth, "", name); + else + (void) printf("%*s%s%*s", depth, "", name, + (int)(cb->cb_namewidth - strlen(name) - depth), ""); + + /* only toplevel vdevs have capacity stats */ + if (vs->vs_space == 0) { + if (scripted) + (void) printf("\t-\t-\t-"); + else + (void) printf(" - - -"); + } else { + print_one_column(ZPOOL_PROP_SIZE, vs->vs_space, + scripted); + print_one_column(ZPOOL_PROP_CAPACITY, vs->vs_alloc, + scripted); + print_one_column(ZPOOL_PROP_FREE, + vs->vs_space - vs->vs_alloc, scripted); + } + print_one_column(ZPOOL_PROP_EXPANDSZ, vs->vs_esize, + scripted); + (void) printf("\n"); + } + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) + return; + + for (c = 0; c < children; c++) { + uint64_t ishole = B_FALSE; + + if (nvlist_lookup_uint64(child[c], + ZPOOL_CONFIG_IS_HOLE, &ishole) == 0 && ishole) + continue; + + vname = zpool_vdev_name(g_zfs, zhp, child[c], B_FALSE); + print_list_stats(zhp, vname, child[c], cb, depth + 2); + free(vname); + } + + /* + * Include level 2 ARC devices in iostat output + */ + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, + &child, &children) != 0) + return; + + if (children > 0) { + (void) printf("%-*s - - - - - " + "-\n", cb->cb_namewidth, "cache"); + for (c = 0; c < children; c++) { + vname = zpool_vdev_name(g_zfs, zhp, child[c], + B_FALSE); + print_list_stats(zhp, vname, child[c], cb, depth + 2); + free(vname); + } + } +} + + /* * Generic callback function to list a pool. */ @@ -2588,14 +2714,18 @@ int list_callback(zpool_handle_t *zhp, void *data) { list_cbdata_t *cbp = data; + nvlist_t *config; + nvlist_t *nvroot; - if (cbp->cb_first) { - if (!cbp->cb_scripted) - print_header(cbp->cb_proplist); - cbp->cb_first = B_FALSE; - } + config = zpool_get_config(zhp, NULL); + + print_pool(zhp, cbp); + if (!cbp->cb_verbose) + return (0); - print_pool(zhp, cbp->cb_proplist, cbp->cb_scripted); + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + print_list_stats(zhp, NULL, nvroot, cbp, 0); return (0); } @@ -2619,12 +2749,15 @@ zpool_do_list(int argc, char **argv) int ret; list_cbdata_t cb = { 0 }; static char default_props[] = - "name,size,allocated,free,capacity,dedupratio,health,altroot"; + "name,size,allocated,free,expandsize,capacity,dedupratio," + "health,altroot"; char *props = default_props; unsigned long interval = 0, count = 0; + zpool_list_t *list; + boolean_t first = B_TRUE; /* check options */ - while ((c = getopt(argc, argv, ":Ho:T:")) != -1) { + while ((c = getopt(argc, argv, ":Ho:T:v")) != -1) { switch (c) { case 'H': cb.cb_scripted = B_TRUE; @@ -2635,6 +2768,9 @@ zpool_do_list(int argc, char **argv) case 'T': get_timestamp_arg(*optarg); break; + case 'v': + cb.cb_verbose = B_TRUE; + break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); @@ -2655,23 +2791,29 @@ zpool_do_list(int argc, char **argv) if (zprop_get_list(g_zfs, props, &cb.cb_proplist, ZFS_TYPE_POOL) != 0) usage(B_FALSE); - cb.cb_first = B_TRUE; + if ((list = pool_list_get(argc, argv, &cb.cb_proplist, &ret)) == NULL) + return (1); + + if (argc == 0 && !cb.cb_scripted && pool_list_count(list) == 0) { + (void) printf(gettext("no pools available\n")); + zprop_free_list(cb.cb_proplist); + return (0); + } for (;;) { + pool_list_update(list); + + if (pool_list_count(list) == 0) + break; if (timestamp_fmt != NODATE) print_timestamp(timestamp_fmt); - ret = for_each_pool(argc, argv, B_TRUE, &cb.cb_proplist, - list_callback, &cb); - - if (argc == 0 && cb.cb_first) - (void) fprintf(stderr, gettext("no pools available\n")); - else if (argc && cb.cb_first) { - /* cannot open the given pool */ - zprop_free_list(cb.cb_proplist); - return (1); + if (!cb.cb_scripted && (first || cb.cb_verbose)) { + print_header(&cb); + first = B_FALSE; } + ret = pool_list_iter(list, B_TRUE, list_callback, &cb); if (interval == 0) break; @@ -3264,6 +3406,36 @@ zpool_do_reguid(int argc, char **argv) } +/* + * zpool reopen + * + * Reopen the pool so that the kernel can update the sizes of all vdevs. + * + * NOTE: This command is currently undocumented. If the command is ever + * exposed then the appropriate usage() messages will need to be made. + */ +int +zpool_do_reopen(int argc, char **argv) +{ + int ret = 0; + zpool_handle_t *zhp; + char *pool; + + argc--; + argv++; + + if (argc != 1) + return (2); + + pool = argv[0]; + if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL) + return (1); + + ret = zpool_reopen(zhp); + zpool_close(zhp); + return (ret); +} + typedef struct scrub_cbdata { int cb_type; int cb_argc; diff --git a/include/libzfs.h b/include/libzfs.h index c7a7daf2f3d7..9d0e8ce43468 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -22,7 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _LIBZFS_H @@ -252,6 +252,7 @@ typedef struct splitflags { extern int zpool_scan(zpool_handle_t *, pool_scan_func_t); extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *); extern int zpool_reguid(zpool_handle_t *); +extern int zpool_reopen(zpool_handle_t *); extern int zpool_vdev_online(zpool_handle_t *, const char *, int, vdev_state_t *); diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 13f9cdcfde3f..5238379920ba 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ @@ -168,6 +168,7 @@ typedef enum { ZPOOL_PROP_READONLY, ZPOOL_PROP_ASHIFT, ZPOOL_PROP_COMMENT, + ZPOOL_PROP_EXPANDSZ, ZPOOL_NUM_PROPS } zpool_prop_t; @@ -678,6 +679,7 @@ typedef struct vdev_stat { uint64_t vs_space; /* total capacity */ uint64_t vs_dspace; /* deflated capacity */ uint64_t vs_rsize; /* replaceable dev size */ + uint64_t vs_esize; /* expandable dev size */ uint64_t vs_ops[ZIO_TYPES]; /* operation count */ uint64_t vs_bytes[ZIO_TYPES]; /* bytes read/written */ uint64_t vs_read_errors; /* read errors */ @@ -800,6 +802,7 @@ typedef enum zfs_ioc { ZFS_IOC_POOL_REGUID, ZFS_IOC_SPACE_WRITTEN, ZFS_IOC_SPACE_SNAPS, + ZFS_IOC_POOL_REOPEN, } zfs_ioc_t; /* diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index 1df61a587d6c..42b8ade97e4f 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_VDEV_IMPL_H @@ -55,7 +56,8 @@ typedef struct vdev_cache_entry vdev_cache_entry_t; /* * Virtual device operations */ -typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *ashift); +typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size, + uint64_t *ashift); typedef void vdev_close_func_t(vdev_t *vd); typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize); typedef int vdev_io_start_func_t(zio_t *zio); @@ -118,6 +120,7 @@ struct vdev { uint64_t vdev_orig_guid; /* orig. guid prior to remove */ uint64_t vdev_asize; /* allocatable device capacity */ uint64_t vdev_min_asize; /* min acceptable asize */ + uint64_t vdev_max_asize; /* max acceptable asize */ uint64_t vdev_ashift; /* block alignment shift */ uint64_t vdev_state; /* see VDEV_STATE_* #defines */ uint64_t vdev_prevstate; /* used when reopening a vdev */ diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 68bfdee5b06d..efe5fee230b5 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -273,6 +273,7 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len, case ZPOOL_PROP_SIZE: case ZPOOL_PROP_ALLOCATED: case ZPOOL_PROP_FREE: + case ZPOOL_PROP_EXPANDSZ: case ZPOOL_PROP_ASHIFT: (void) zfs_nicenum(intval, buf, len); break; @@ -361,8 +362,8 @@ pool_uses_efi(nvlist_t *config) return (B_FALSE); } -static boolean_t -pool_is_bootable(zpool_handle_t *zhp) +boolean_t +zpool_is_bootable(zpool_handle_t *zhp) { char bootfs[ZPOOL_MAXNAMELEN]; @@ -1127,7 +1128,7 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot) return (zfs_error(hdl, EZFS_BADVERSION, msg)); } - if (pool_is_bootable(zhp) && nvlist_lookup_nvlist_array(nvroot, + if (zpool_is_bootable(zhp) && nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) { uint64_t s; @@ -2374,7 +2375,7 @@ zpool_vdev_attach(zpool_handle_t *zhp, uint_t children; nvlist_t *config_root; libzfs_handle_t *hdl = zhp->zpool_hdl; - boolean_t rootpool = pool_is_bootable(zhp); + boolean_t rootpool = zpool_is_bootable(zhp); if (replacing) (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, @@ -3005,6 +3006,26 @@ zpool_reguid(zpool_handle_t *zhp) return (zpool_standard_error(hdl, errno, msg)); } +/* + * Reopen the pool. + */ +int +zpool_reopen(zpool_handle_t *zhp) +{ + zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + char msg[1024]; + libzfs_handle_t *hdl = zhp->zpool_hdl; + + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "cannot reopen '%s'"), + zhp->zpool_name); + + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + if (zfs_ioctl(hdl, ZFS_IOC_POOL_REOPEN, &zc) == 0) + return (0); + return (zpool_standard_error(hdl, errno, msg)); +} + /* * Convert from a devid string to a path. */ @@ -3798,7 +3819,7 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name) if (zhp) { nvlist_t *nvroot; - if (pool_is_bootable(zhp)) { + if (zpool_is_bootable(zhp)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "EFI labeled devices are not supported on root " "pools.")); diff --git a/man/man8/zpool.8 b/man/man8/zpool.8 index ec931a22a17b..1ac30507b39e 100644 --- a/man/man8/zpool.8 +++ b/man/man8/zpool.8 @@ -1,6 +1,7 @@ '\" te .\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. .\" Copyright 2011 Nexenta Systems, Inc. All rights reserved. +.\" Copyright (c) 2012 by Delphix. All Rights Reserved. .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. .\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the .\" fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] @@ -83,7 +84,7 @@ zpool \- configures ZFS storage pools .LP .nf -\fBzpool list\fR [\fB-H\fR] [\fB-o\fR \fIproperty\fR[,...]] [\fIpool\fR] ... +\fBzpool list\fR [\fB-Hv\fR] [\fB-o\fR \fIproperty\fR[,...]] [\fIpool\fR] ... .fi .LP @@ -476,6 +477,18 @@ A text string consisting of printable ASCII characters that will be stored such .ne 2 .mk .na +\fB\fBexpandsize\fR\fR +.ad +.RS 20n +Amount of uninitialized space within the pool or device that can be used to +increase the total capacity of the pool. Uninitialized space consists of +any space on an EFI labeled vdev which has not been brought online +(i.e. zpool online -e). This space occurs when a LUN is dynamically expanded. +.RE + +.sp +.ne 2 +.na \fB\fBhealth\fR\fR .ad .RS 20n @@ -1312,7 +1325,7 @@ Verbose statistics. Reports usage statistics for individual \fIvdevs\fR within t .ne 2 .mk .na -\fB\fBzpool list\fR [\fB-H\fR] [\fB-o\fR \fIprops\fR[,...]] [\fIpool\fR] ...\fR +\fB\fBzpool list\fR [\fB-Hv\fR] [\fB-o\fR \fIprops\fR[,...]] [\fIpool\fR] ...\fR .ad .sp .6 .RS 4n @@ -1336,7 +1349,18 @@ Scripted mode. Do not display headers, and separate fields by a single tab inste .ad .RS 12n .rt -Comma-separated list of properties to display. See the "Properties" section for a list of valid properties. The default list is "name, size, used, available, capacity, health, altroot" +Comma-separated list of properties to display. See the "Properties" section for a list of valid properties. The default list is "name, size, used, available, expandsize, capacity, dedupratio, health, altroot" +.RE + +.sp +.ne 2 +.mk +.na +\fB\fB-v\fR\fR +.ad +.RS 12n +.rt +Verbose statistics. Reports usage statistics for individual \fIvdevs\fR within the pool, in addition to the pool-wise statistics. .RE .RE @@ -1649,10 +1673,10 @@ The results from this command are similar to the following: .in +2 .nf # \fBzpool list\fR - NAME SIZE USED AVAIL CAP HEALTH ALTROOT - pool 67.5G 2.92M 67.5G 0% ONLINE - - tank 67.5G 2.92M 67.5G 0% ONLINE - - zion - - - 0% FAULTED - + NAME SIZE ALLOC FREE EXPANDSZ CAP DEDUP HEALTH ALTROOT + rpool 19.9G 8.43G 11.4G - 42% 1.00x ONLINE - + tank 61.5G 20.0G 41.5G - 32% 1.00x ONLINE - + zion - - - - - - FAULTED - .fi .in -2 .sp @@ -1855,6 +1879,29 @@ The command to remove the mirrored log \fBmirror-2\fR is: .in -2 .sp +.LP +\fBExample 15 \fRDisplaying expanded space on a device +.sp +.LP +The following command dipslays the detailed information for the \fIdata\fR +pool. This pool is comprised of a single \fIraidz\fR vdev where one of its +devices increased its capacity by 1GB. In this example, the pool will not +be able to utilized this extra capacity until all the devices under the +\fIraidz\fR vdev have been expanded. + +.sp +.in +2 +.nf +# \fBzpool list -v data\fR + NAME SIZE ALLOC FREE EXPANDSZ CAP DEDUP HEALTH ALTROOT + data 17.9G 174K 17.9G - 0% 1.00x ONLINE - + raidz1 17.9G 174K 17.9G - + c4t2d0 - - - 1G + c4t3d0 - - - - + c4t4d0 - - - - +.fi +.in -2 + .SH EXIT STATUS .sp .LP diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c index 6c69fca7bf8f..303edcefcaaf 100644 --- a/module/zcommon/zpool_prop.c +++ b/module/zcommon/zpool_prop.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -81,6 +81,8 @@ zpool_prop_init(void) ZFS_TYPE_POOL, "", "FREE"); zprop_register_number(ZPOOL_PROP_ALLOCATED, "allocated", 0, PROP_READONLY, ZFS_TYPE_POOL, "", "ALLOC"); + zprop_register_number(ZPOOL_PROP_EXPANDSZ, "expandsize", 0, + PROP_READONLY, ZFS_TYPE_POOL, "", "EXPANDSZ"); zprop_register_number(ZPOOL_PROP_CAPACITY, "capacity", 0, PROP_READONLY, ZFS_TYPE_POOL, "", "CAP"); zprop_register_number(ZPOOL_PROP_GUID, "guid", 0, PROP_READONLY, diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 692664bec835..cbf8a447be62 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ /* @@ -168,15 +168,18 @@ spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, static void spa_prop_get_config(spa_t *spa, nvlist_t **nvp) { + vdev_t *rvd = spa->spa_root_vdev; uint64_t size; uint64_t alloc; + uint64_t space; uint64_t cap, version; zprop_source_t src = ZPROP_SRC_NONE; spa_config_dirent_t *dp; + int c; ASSERT(MUTEX_HELD(&spa->spa_props_lock)); - if (spa->spa_root_vdev != NULL) { + if (rvd != NULL) { alloc = metaslab_class_get_alloc(spa_normal_class(spa)); size = metaslab_class_get_space(spa_normal_class(spa)); spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); @@ -184,6 +187,15 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp) spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src); spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL, size - alloc, src); + + space = 0; + for (c = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; + space += tvd->vdev_max_asize - tvd->vdev_asize; + } + spa_prop_add_list(*nvp, ZPOOL_PROP_EXPANDSZ, NULL, space, + src); + spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL, (spa_mode(spa) == FREAD), src); @@ -194,7 +206,7 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp) ddt_get_pool_dedup_ratio(spa), src); spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, - spa->spa_root_vdev->vdev_state, src); + rvd->vdev_state, src); version = spa_version(spa); if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 0c8ce1b1c064..8f3ee278702a 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -109,7 +109,7 @@ vdev_get_min_asize(vdev_t *vd) vdev_t *pvd = vd->vdev_parent; /* - * The our parent is NULL (inactive spare or cache) or is the root, + * If our parent is NULL (inactive spare or cache) or is the root, * just return our own asize. */ if (pvd == NULL) @@ -747,6 +747,7 @@ vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops) mvd->vdev_asize = cvd->vdev_asize; mvd->vdev_min_asize = cvd->vdev_min_asize; + mvd->vdev_max_asize = cvd->vdev_max_asize; mvd->vdev_ashift = cvd->vdev_ashift; mvd->vdev_state = cvd->vdev_state; mvd->vdev_crtxg = cvd->vdev_crtxg; @@ -1132,7 +1133,8 @@ vdev_open(vdev_t *vd) spa_t *spa = vd->vdev_spa; int error; uint64_t osize = 0; - uint64_t asize, psize; + uint64_t max_osize = 0; + uint64_t asize, max_asize, psize; uint64_t ashift = 0; int c; @@ -1164,7 +1166,7 @@ vdev_open(vdev_t *vd) return (ENXIO); } - error = vd->vdev_ops->vdev_op_open(vd, &osize, &ashift); + error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize, &ashift); /* * Reset the vdev_reopening flag so that we actually close @@ -1222,6 +1224,7 @@ vdev_open(vdev_t *vd) } osize = P2ALIGN(osize, (uint64_t)sizeof (vdev_label_t)); + max_osize = P2ALIGN(max_osize, (uint64_t)sizeof (vdev_label_t)); if (vd->vdev_children == 0) { if (osize < SPA_MINDEVSIZE) { @@ -1231,6 +1234,8 @@ vdev_open(vdev_t *vd) } psize = osize; asize = osize - (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE); + max_asize = max_osize - (VDEV_LABEL_START_SIZE + + VDEV_LABEL_END_SIZE); } else { if (vd->vdev_parent != NULL && osize < SPA_MINDEVSIZE - (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE)) { @@ -1240,6 +1245,7 @@ vdev_open(vdev_t *vd) } psize = 0; asize = osize; + max_asize = max_osize; } vd->vdev_psize = psize; @@ -1259,6 +1265,7 @@ vdev_open(vdev_t *vd) * For testing purposes, a higher ashift can be requested. */ vd->vdev_asize = asize; + vd->vdev_max_asize = max_asize; vd->vdev_ashift = MAX(ashift, vd->vdev_ashift); } else { /* @@ -1269,6 +1276,7 @@ vdev_open(vdev_t *vd) VDEV_AUX_BAD_LABEL); return (EINVAL); } + vd->vdev_max_asize = max_asize; } /* @@ -2499,6 +2507,7 @@ vdev_get_stats(vdev_t *vd, vdev_stat_t *vs) vs->vs_rsize = vdev_get_min_asize(vd); if (vd->vdev_ops->vdev_op_leaf) vs->vs_rsize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE; + vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize; mutex_exit(&vd->vdev_stat_lock); /* diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index eee03d08055d..e6856cae293f 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -224,7 +224,8 @@ vdev_disk_rrpart(const char *path, int mode, vdev_disk_t *vd) } static int -vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *ashift) +vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, + uint64_t *ashift) { struct block_device *bdev = ERR_PTR(-ENXIO); vdev_disk_t *vd; @@ -240,6 +241,12 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *ashift) if (vd == NULL) return ENOMEM; + /* TODO */ + // if (vd->vdev_wholedisk == 1) { + // } else { + *max_psize = *psize; + //} + /* * Devices are always opened by the path provided at configuration * time. This means that if the provided path is a udev by-id path diff --git a/module/zfs/vdev_file.c b/module/zfs/vdev_file.c index ce49fe08fb71..75e6028ef498 100644 --- a/module/zfs/vdev_file.c +++ b/module/zfs/vdev_file.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -47,7 +48,8 @@ vdev_file_rele(vdev_t *vd) } static int -vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) +vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, + uint64_t *ashift) { vdev_file_t *vf; vnode_t *vp; @@ -112,7 +114,7 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) return (error); } - *psize = vattr.va_size; + *max_psize = *psize = vattr.va_size; *ashift = SPA_MINBLOCKSHIFT; return (0); diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c index 47181d439e28..5fdd3ebbcec1 100644 --- a/module/zfs/vdev_mirror.c +++ b/module/zfs/vdev_mirror.c @@ -23,6 +23,10 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + #include #include #include @@ -127,7 +131,8 @@ vdev_mirror_map_alloc(zio_t *zio) } static int -vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift) +vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, + uint64_t *ashift) { int numerrors = 0; int lasterror = 0; @@ -150,6 +155,7 @@ vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift) } *asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1; + *max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1; *ashift = MAX(*ashift, cvd->vdev_ashift); } diff --git a/module/zfs/vdev_missing.c b/module/zfs/vdev_missing.c index 6a5588d59213..3bd8c90e04c7 100644 --- a/module/zfs/vdev_missing.c +++ b/module/zfs/vdev_missing.c @@ -23,6 +23,10 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + /* * The 'missing' vdev is a special vdev type used only during import. It * signifies a placeholder in the root vdev for some vdev that we know is @@ -40,7 +44,8 @@ /* ARGSUSED */ static int -vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) +vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, + uint64_t *ashift) { /* * Really this should just fail. But then the root vdev will be in the @@ -49,6 +54,7 @@ vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) * will fail the GUID sum check before ever trying to open the pool. */ *psize = 0; + *max_psize = 0; *ashift = 0; return (0); } diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index b987ac40b136..e826b481f79b 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -1441,7 +1442,8 @@ vdev_raidz_reconstruct(raidz_map_t *rm, int *t, int nt) } static int -vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift) +vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, + uint64_t *ashift) { vdev_t *cvd; uint64_t nparity = vd->vdev_nparity; @@ -1469,10 +1471,12 @@ vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift) } *asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1; + *max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1; *ashift = MAX(*ashift, cvd->vdev_ashift); } *asize *= vd->vdev_children; + *max_asize *= vd->vdev_children; if (numerrors > nparity) { vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS; diff --git a/module/zfs/vdev_root.c b/module/zfs/vdev_root.c index d7ca99a3d3d5..5241b0215764 100644 --- a/module/zfs/vdev_root.c +++ b/module/zfs/vdev_root.c @@ -23,6 +23,10 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + #include #include #include @@ -50,7 +54,8 @@ too_many_errors(vdev_t *vd, int numerrors) } static int -vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift) +vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, + uint64_t *ashift) { int lasterror = 0; int numerrors = 0; @@ -78,6 +83,7 @@ vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift) } *asize = 0; + *max_asize = 0; *ashift = 0; return (0); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 94c91e876bd6..bab4d6188c7f 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -24,7 +24,7 @@ * Portions Copyright 2012 Pawel Jakub Dawidek * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -4036,6 +4036,22 @@ zfs_ioc_clear(zfs_cmd_t *zc) return (error); } +static int +zfs_ioc_pool_reopen(zfs_cmd_t *zc) +{ + spa_t *spa; + int error; + + error = spa_open(zc->zc_name, &spa, FTAG); + if (error) + return (error); + + spa_vdev_state_enter(spa, SCL_NONE); + vdev_reopen(spa->spa_root_vdev); + (void) spa_vdev_state_exit(spa, NULL, 0); + spa_close(spa, FTAG); + return (0); +} /* * inputs: * zc_name name of filesystem @@ -4830,6 +4846,8 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = { POOL_CHECK_SUSPENDED }, { zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED }, + { zfs_ioc_pool_reopen, zfs_secpolicy_config, POOL_NAME, B_TRUE, + POOL_CHECK_SUSPENDED }, }; int From 2e7d62a330a70906226adb7df3781341a6d1ed59 Mon Sep 17 00:00:00 2001 From: Martin Matuska Date: Tue, 28 Aug 2012 11:54:21 +0200 Subject: [PATCH 2/4] Do not include expandsize in default properties. --- cmd/zpool/zpool_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 2c4dac7b0c4c..0c1d30208d90 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -2749,7 +2749,7 @@ zpool_do_list(int argc, char **argv) int ret; list_cbdata_t cb = { 0 }; static char default_props[] = - "name,size,allocated,free,expandsize,capacity,dedupratio," + "name,size,allocated,free,capacity,dedupratio," "health,altroot"; char *props = default_props; unsigned long interval = 0, count = 0; From 181f94edee2944d82531f0b3acec1aa252bd7aa5 Mon Sep 17 00:00:00 2001 From: Bill Pijewski Date: Wed, 9 May 2012 15:05:14 -0700 Subject: [PATCH 3/4] illumos #2703: add mechanism to report ZFS send progress Reviewed by: Matt Ahrens Reviewed by: Robert Mustacchi Reviewed by: Richard Lowe Approved by: Eric Schrock Ported by: Martin Matuska --- cmd/zfs/zfs_main.c | 2 + include/libzfs.h | 4 + include/sys/dmu.h | 5 +- include/sys/dmu_impl.h | 28 ++++ include/sys/dsl_dataset.h | 4 + include/sys/fs/zfs.h | 2 + include/sys/zfs_context.h | 2 + lib/libzfs/libzfs_sendrecv.c | 82 +++++++++++- man/man8/zfs.8 | 4 +- module/zfs/dmu_send.c | 247 ++++++++++++++++++----------------- module/zfs/dsl_dataset.c | 7 + module/zfs/zfs_ioctl.c | 52 +++++++- 12 files changed, 312 insertions(+), 127 deletions(-) diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 1cb2ac9587e9..797c7a591a7b 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -23,6 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2012 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #include @@ -3551,6 +3552,7 @@ zfs_do_send(int argc, char **argv) if (flags.verbose) extraverbose = B_TRUE; flags.verbose = B_TRUE; + flags.progress = B_TRUE; break; case 'D': flags.dedup = B_TRUE; diff --git a/include/libzfs.h b/include/libzfs.h index 9d0e8ce43468..75e149334a65 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -23,6 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #ifndef _LIBZFS_H @@ -580,6 +581,9 @@ typedef struct sendflags { /* parsable verbose output (ie. -P) */ boolean_t parsable; + + /* show progress (ie. -v) */ + boolean_t progress; } sendflags_t; typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *); diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 5b2e25b78cf6..fe317c835b3e 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -705,8 +706,8 @@ typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp, void dmu_traverse_objset(objset_t *os, uint64_t txg_start, dmu_traverse_cb_t cb, void *arg); -int dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, - struct vnode *vp, offset_t *off); +int dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, + int outfd, struct vnode *vp, offset_t *off); int dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorign, uint64_t *sizep); diff --git a/include/sys/dmu_impl.h b/include/sys/dmu_impl.h index 0b8748df10e3..f13a2a37ce84 100644 --- a/include/sys/dmu_impl.h +++ b/include/sys/dmu_impl.h @@ -21,6 +21,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #ifndef _SYS_DMU_IMPL_H @@ -30,6 +31,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -239,6 +241,32 @@ typedef struct dmu_xuio { iovec_t *iovp; } dmu_xuio_t; +/* + * The list of data whose inclusion in a send stream can be pending from + * one call to backup_cb to another. Multiple calls to dump_free() and + * dump_freeobjects() can be aggregated into a single DRR_FREE or + * DRR_FREEOBJECTS replay record. + */ +typedef enum { + PENDING_NONE, + PENDING_FREE, + PENDING_FREEOBJECTS +} dmu_pendop_t; + +typedef struct dmu_sendarg { + list_node_t dsa_link; + dmu_replay_record_t *dsa_drr; + vnode_t *dsa_vp; + int dsa_outfd; + proc_t *dsa_proc; + offset_t *dsa_off; + objset_t *dsa_os; + zio_cksum_t dsa_zc; + uint64_t dsa_toguid; + int dsa_err; + dmu_pendop_t dsa_pending_op; +} dmu_sendarg_t; + #ifdef __cplusplus } #endif diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h index c4530a8f0ae7..014102299f74 100644 --- a/include/sys/dsl_dataset.h +++ b/include/sys/dsl_dataset.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #ifndef _SYS_DSL_DATASET_H @@ -150,6 +151,9 @@ typedef struct dsl_dataset { uint64_t ds_reserved; /* cached refreservation */ uint64_t ds_quota; /* cached refquota */ + kmutex_t ds_sendstream_lock; + list_t ds_sendstreams; + /* Protected by ds_lock; keep at end of struct for better locality */ char ds_snapname[MAXNAMELEN]; } dsl_dataset_t; diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 5238379920ba..f72c74fc93e1 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -23,6 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -803,6 +804,7 @@ typedef enum zfs_ioc { ZFS_IOC_SPACE_WRITTEN, ZFS_IOC_SPACE_SNAPS, ZFS_IOC_POOL_REOPEN, + ZFS_IOC_SEND_PROGRESS, } zfs_ioc_t; /* diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index 6201214ec94c..07b6d44e77c2 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -24,6 +24,7 @@ */ /* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #ifndef _SYS_ZFS_CONTEXT_H @@ -201,6 +202,7 @@ typedef struct proc { } proc_t; extern struct proc p0; +#define curproc (&p0) typedef void (*thread_func_t)(void *); typedef void (*thread_func_arg_t)(void *); diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index c5d963a324d0..9dbfb1641ac6 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -23,6 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012 Pawel Jakub Dawidek . + * Copyright (c) 2012, Joyent, Inc. All rights reserved. * All rights reserved */ @@ -44,6 +45,7 @@ #include #include #include +#include #include @@ -69,6 +71,12 @@ typedef struct dedup_arg { libzfs_handle_t *dedup_hdl; } dedup_arg_t; +typedef struct progress_arg { + zfs_handle_t *pa_zhp; + int pa_fd; + boolean_t pa_parsable; +} progress_arg_t; + typedef struct dataref { uint64_t ref_guid; uint64_t ref_object; @@ -787,7 +795,7 @@ typedef struct send_dump_data { char prevsnap[ZFS_MAXNAMELEN]; uint64_t prevsnap_obj; boolean_t seenfrom, seento, replicate, doall, fromorigin; - boolean_t verbose, dryrun, parsable; + boolean_t verbose, dryrun, parsable, progress; int outfd; boolean_t err; nvlist_t *fss; @@ -979,10 +987,60 @@ hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd) return (error); } +static void * +send_progress_thread(void *arg) +{ + progress_arg_t *pa = arg; + + zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_handle_t *zhp = pa->pa_zhp; + libzfs_handle_t *hdl = zhp->zfs_hdl; + unsigned long long bytes; + char buf[16]; + + time_t t; + struct tm *tm; + + assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + + if (!pa->pa_parsable) + (void) fprintf(stderr, "TIME SENT SNAPSHOT\n"); + + /* + * Print the progress from ZFS_IOC_SEND_PROGRESS every second. + */ + for (;;) { + (void) sleep(1); + + zc.zc_cookie = pa->pa_fd; + if (zfs_ioctl(hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0) + return ((void *)-1); + + (void) time(&t); + tm = localtime(&t); + bytes = zc.zc_cookie; + + if (pa->pa_parsable) { + (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n", + tm->tm_hour, tm->tm_min, tm->tm_sec, + bytes, zhp->zfs_name); + } else { + zfs_nicenum(bytes, buf, sizeof (buf)); + (void) fprintf(stderr, "%02d:%02d:%02d %5s %s\n", + tm->tm_hour, tm->tm_min, tm->tm_sec, + buf, zhp->zfs_name); + } + } +} + static int dump_snapshot(zfs_handle_t *zhp, void *arg) { send_dump_data_t *sdd = arg; + progress_arg_t pa = { 0 }; + pthread_t tid; + char *thissnap; int err; boolean_t isfromsnap, istosnap, fromorigin; @@ -1100,8 +1158,29 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) } if (!sdd->dryrun) { + /* + * If progress reporting is requested, spawn a new thread to + * poll ZFS_IOC_SEND_PROGRESS at a regular interval. + */ + if (sdd->progress) { + pa.pa_zhp = zhp; + pa.pa_fd = sdd->outfd; + pa.pa_parsable = sdd->parsable; + + if ((err = pthread_create(&tid, NULL, + send_progress_thread, &pa))) { + zfs_close(zhp); + return (err); + } + } + err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj, fromorigin, sdd->outfd, sdd->debugnv); + + if (sdd->progress) { + (void) pthread_cancel(tid); + (void) pthread_join(tid, NULL); + } } (void) strcpy(sdd->prevsnap, thissnap); @@ -1445,6 +1524,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, sdd.fsavl = fsavl; sdd.verbose = flags->verbose; sdd.parsable = flags->parsable; + sdd.progress = flags->progress; sdd.dryrun = flags->dryrun; sdd.filter_cb = filter_func; sdd.filter_cb_arg = cb_arg; diff --git a/man/man8/zfs.8 b/man/man8/zfs.8 index 508270634f63..7b355e0836a4 100644 --- a/man/man8/zfs.8 +++ b/man/man8/zfs.8 @@ -2,6 +2,7 @@ .\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved. .\" Copyright (c) 2012 by Delphix. All rights reserved. .\" Copyright (c) 2012 Nexenta Systems, Inc. All Rights Reserved. +.\" Copyright (c) 2012, Joyent, Inc. All rights reserved. .\" Copyright 2011 Joshua M. Clulow .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. .\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with @@ -2306,7 +2307,8 @@ Generate a stream package that sends all intermediary snapshots from the first s .ad .sp .6 .RS 4n -Print verbose information about the stream package generated. +Print verbose information about the stream package generated. This information +includes a per-second report of how much data has been sent. .RE .sp diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index 9f9003744b44..949f4d7737ee 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -23,6 +23,7 @@ * Copyright (c) 2011 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #include @@ -52,48 +53,30 @@ int zfs_send_corrupt_data = B_FALSE; static char *dmu_recv_tag = "dmu_recv_tag"; -/* - * The list of data whose inclusion in a send stream can be pending from - * one call to backup_cb to another. Multiple calls to dump_free() and - * dump_freeobjects() can be aggregated into a single DRR_FREE or - * DRR_FREEOBJECTS replay record. - */ -typedef enum { - PENDING_NONE, - PENDING_FREE, - PENDING_FREEOBJECTS -} pendop_t; - -struct backuparg { - dmu_replay_record_t *drr; - vnode_t *vp; - offset_t *off; - objset_t *os; - zio_cksum_t zc; - uint64_t toguid; - int err; - pendop_t pending_op; -}; - static int -dump_bytes(struct backuparg *ba, void *buf, int len) +dump_bytes(dmu_sendarg_t *dsp, void *buf, int len) { + dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset; ssize_t resid; /* have to get resid to get detailed errno */ ASSERT3U(len % 8, ==, 0); - fletcher_4_incremental_native(buf, len, &ba->zc); - ba->err = vn_rdwr(UIO_WRITE, ba->vp, + fletcher_4_incremental_native(buf, len, &dsp->dsa_zc); + dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp, (caddr_t)buf, len, 0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid); - *ba->off += len; - return (ba->err); + + mutex_enter(&ds->ds_sendstream_lock); + *dsp->dsa_off += len; + mutex_exit(&ds->ds_sendstream_lock); + + return (dsp->dsa_err); } static int -dump_free(struct backuparg *ba, uint64_t object, uint64_t offset, +dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, uint64_t length) { - struct drr_free *drrf = &(ba->drr->drr_u.drr_free); + struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free); if (length != -1ULL && offset + length < offset) length = -1ULL; @@ -105,13 +88,15 @@ dump_free(struct backuparg *ba, uint64_t object, uint64_t offset, * other DRR_FREE records. DRR_FREEOBJECTS records can only be * aggregated with other DRR_FREEOBJECTS records. */ - if (ba->pending_op != PENDING_NONE && ba->pending_op != PENDING_FREE) { - if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + if (dsp->dsa_pending_op != PENDING_NONE && + dsp->dsa_pending_op != PENDING_FREE) { + if (dump_bytes(dsp, dsp->dsa_drr, + sizeof (dmu_replay_record_t)) != 0) return (EINTR); - ba->pending_op = PENDING_NONE; + dsp->dsa_pending_op = PENDING_NONE; } - if (ba->pending_op == PENDING_FREE) { + if (dsp->dsa_pending_op == PENDING_FREE) { /* * There should never be a PENDING_FREE if length is -1 * (because dump_dnode is the only place where this @@ -129,34 +114,35 @@ dump_free(struct backuparg *ba, uint64_t object, uint64_t offset, return (0); } else { /* not a continuation. Push out pending record */ - if (dump_bytes(ba, ba->drr, + if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); - ba->pending_op = PENDING_NONE; + dsp->dsa_pending_op = PENDING_NONE; } } /* create a FREE record and make it pending */ - bzero(ba->drr, sizeof (dmu_replay_record_t)); - ba->drr->drr_type = DRR_FREE; + bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); + dsp->dsa_drr->drr_type = DRR_FREE; drrf->drr_object = object; drrf->drr_offset = offset; drrf->drr_length = length; - drrf->drr_toguid = ba->toguid; + drrf->drr_toguid = dsp->dsa_toguid; if (length == -1ULL) { - if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + if (dump_bytes(dsp, dsp->dsa_drr, + sizeof (dmu_replay_record_t)) != 0) return (EINTR); } else { - ba->pending_op = PENDING_FREE; + dsp->dsa_pending_op = PENDING_FREE; } return (0); } static int -dump_data(struct backuparg *ba, dmu_object_type_t type, +dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type, uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data) { - struct drr_write *drrw = &(ba->drr->drr_u.drr_write); + struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write); /* @@ -165,19 +151,20 @@ dump_data(struct backuparg *ba, dmu_object_type_t type, * the stream, since aggregation can't be done across operations * of different types. */ - if (ba->pending_op != PENDING_NONE) { - if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + if (dsp->dsa_pending_op != PENDING_NONE) { + if (dump_bytes(dsp, dsp->dsa_drr, + sizeof (dmu_replay_record_t)) != 0) return (EINTR); - ba->pending_op = PENDING_NONE; + dsp->dsa_pending_op = PENDING_NONE; } /* write a DATA record */ - bzero(ba->drr, sizeof (dmu_replay_record_t)); - ba->drr->drr_type = DRR_WRITE; + bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); + dsp->dsa_drr->drr_type = DRR_WRITE; drrw->drr_object = object; drrw->drr_type = type; drrw->drr_offset = offset; drrw->drr_length = blksz; - drrw->drr_toguid = ba->toguid; + drrw->drr_toguid = dsp->dsa_toguid; drrw->drr_checksumtype = BP_GET_CHECKSUM(bp); if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup) drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP; @@ -186,42 +173,43 @@ dump_data(struct backuparg *ba, dmu_object_type_t type, DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp)); drrw->drr_key.ddk_cksum = bp->blk_cksum; - if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); - if (dump_bytes(ba, data, blksz) != 0) + if (dump_bytes(dsp, data, blksz) != 0) return (EINTR); return (0); } static int -dump_spill(struct backuparg *ba, uint64_t object, int blksz, void *data) +dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data) { - struct drr_spill *drrs = &(ba->drr->drr_u.drr_spill); + struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill); - if (ba->pending_op != PENDING_NONE) { - if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + if (dsp->dsa_pending_op != PENDING_NONE) { + if (dump_bytes(dsp, dsp->dsa_drr, + sizeof (dmu_replay_record_t)) != 0) return (EINTR); - ba->pending_op = PENDING_NONE; + dsp->dsa_pending_op = PENDING_NONE; } /* write a SPILL record */ - bzero(ba->drr, sizeof (dmu_replay_record_t)); - ba->drr->drr_type = DRR_SPILL; + bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); + dsp->dsa_drr->drr_type = DRR_SPILL; drrs->drr_object = object; drrs->drr_length = blksz; - drrs->drr_toguid = ba->toguid; + drrs->drr_toguid = dsp->dsa_toguid; - if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) + if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t))) return (EINTR); - if (dump_bytes(ba, data, blksz)) + if (dump_bytes(dsp, data, blksz)) return (EINTR); return (0); } static int -dump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs) +dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs) { - struct drr_freeobjects *drrfo = &(ba->drr->drr_u.drr_freeobjects); + struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects); /* * If there is a pending op, but it's not PENDING_FREEOBJECTS, @@ -230,13 +218,14 @@ dump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs) * aggregated with other DRR_FREE records. DRR_FREEOBJECTS records * can only be aggregated with other DRR_FREEOBJECTS records. */ - if (ba->pending_op != PENDING_NONE && - ba->pending_op != PENDING_FREEOBJECTS) { - if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + if (dsp->dsa_pending_op != PENDING_NONE && + dsp->dsa_pending_op != PENDING_FREEOBJECTS) { + if (dump_bytes(dsp, dsp->dsa_drr, + sizeof (dmu_replay_record_t)) != 0) return (EINTR); - ba->pending_op = PENDING_NONE; + dsp->dsa_pending_op = PENDING_NONE; } - if (ba->pending_op == PENDING_FREEOBJECTS) { + if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) { /* * See whether this free object array can be aggregated * with pending one @@ -246,42 +235,43 @@ dump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs) return (0); } else { /* can't be aggregated. Push out pending record */ - if (dump_bytes(ba, ba->drr, + if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); - ba->pending_op = PENDING_NONE; + dsp->dsa_pending_op = PENDING_NONE; } } /* write a FREEOBJECTS record */ - bzero(ba->drr, sizeof (dmu_replay_record_t)); - ba->drr->drr_type = DRR_FREEOBJECTS; + bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); + dsp->dsa_drr->drr_type = DRR_FREEOBJECTS; drrfo->drr_firstobj = firstobj; drrfo->drr_numobjs = numobjs; - drrfo->drr_toguid = ba->toguid; + drrfo->drr_toguid = dsp->dsa_toguid; - ba->pending_op = PENDING_FREEOBJECTS; + dsp->dsa_pending_op = PENDING_FREEOBJECTS; return (0); } static int -dump_dnode(struct backuparg *ba, uint64_t object, dnode_phys_t *dnp) +dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) { - struct drr_object *drro = &(ba->drr->drr_u.drr_object); + struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object); if (dnp == NULL || dnp->dn_type == DMU_OT_NONE) - return (dump_freeobjects(ba, object, 1)); + return (dump_freeobjects(dsp, object, 1)); - if (ba->pending_op != PENDING_NONE) { - if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + if (dsp->dsa_pending_op != PENDING_NONE) { + if (dump_bytes(dsp, dsp->dsa_drr, + sizeof (dmu_replay_record_t)) != 0) return (EINTR); - ba->pending_op = PENDING_NONE; + dsp->dsa_pending_op = PENDING_NONE; } /* write an OBJECT record */ - bzero(ba->drr, sizeof (dmu_replay_record_t)); - ba->drr->drr_type = DRR_OBJECT; + bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); + dsp->dsa_drr->drr_type = DRR_OBJECT; drro->drr_object = object; drro->drr_type = dnp->dn_type; drro->drr_bonustype = dnp->dn_bonustype; @@ -289,19 +279,19 @@ dump_dnode(struct backuparg *ba, uint64_t object, dnode_phys_t *dnp) drro->drr_bonuslen = dnp->dn_bonuslen; drro->drr_checksumtype = dnp->dn_checksum; drro->drr_compress = dnp->dn_compress; - drro->drr_toguid = ba->toguid; + drro->drr_toguid = dsp->dsa_toguid; - if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); - if (dump_bytes(ba, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) + if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) return (EINTR); /* free anything past the end of the file */ - if (dump_free(ba, object, (dnp->dn_maxblkid + 1) * + if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) * (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL)) return (EINTR); - if (ba->err) + if (dsp->dsa_err) return (EINTR); return (0); } @@ -315,7 +305,7 @@ static int backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) { - struct backuparg *ba = arg; + dmu_sendarg_t *dsp = arg; dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE; int err = 0; @@ -328,10 +318,10 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, } else if (bp == NULL && zb->zb_object == DMU_META_DNODE_OBJECT) { uint64_t span = BP_SPAN(dnp, zb->zb_level); uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT; - err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT); + err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT); } else if (bp == NULL) { uint64_t span = BP_SPAN(dnp, zb->zb_level); - err = dump_free(ba, zb->zb_object, zb->zb_blkid * span, span); + err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span); } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) { return (0); } else if (type == DMU_OT_DNODE) { @@ -350,7 +340,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, for (i = 0; i < blksz >> DNODE_SHIFT; i++) { uint64_t dnobj = (zb->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; - err = dump_dnode(ba, dnobj, blk+i); + err = dump_dnode(dsp, dnobj, blk+i); if (err) break; } @@ -365,7 +355,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, ZIO_FLAG_CANFAIL, &aflags, zb) != 0) return (EIO); - err = dump_spill(ba, zb->zb_object, blksz, abuf->b_data); + err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data); (void) arc_buf_remove_ref(abuf, &abuf); } else { /* it's a level-0 block of a regular object */ uint32_t aflags = ARC_WAIT; @@ -389,7 +379,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, } } - err = dump_data(ba, type, zb->zb_object, zb->zb_blkid * blksz, + err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz, blksz, bp, abuf->b_data); (void) arc_buf_remove_ref(abuf, &abuf); } @@ -399,13 +389,13 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, } int -dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, - vnode_t *vp, offset_t *off) +dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, + int outfd, vnode_t *vp, offset_t *off) { dsl_dataset_t *ds = tosnap->os_dsl_dataset; dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; dmu_replay_record_t *drr; - struct backuparg ba; + dmu_sendarg_t *dsp; int err; uint64_t fromtxg = 0; @@ -446,8 +436,10 @@ dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, #ifdef _KERNEL if (dmu_objset_type(tosnap) == DMU_OST_ZFS) { uint64_t version; - if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0) + if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0) { + kmem_free(drr, sizeof (dmu_replay_record_t)); return (EINVAL); + } if (version == ZPL_VERSION_SA) { DMU_SET_FEATUREFLAGS( drr->drr_u.drr_begin.drr_versioninfo, @@ -474,46 +466,59 @@ dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, if (fromorigin) dsl_dataset_rele(fromds, FTAG); - ba.drr = drr; - ba.vp = vp; - ba.os = tosnap; - ba.off = off; - ba.toguid = ds->ds_phys->ds_guid; - ZIO_SET_CHECKSUM(&ba.zc, 0, 0, 0, 0); - ba.pending_op = PENDING_NONE; - - if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) { - kmem_free(drr, sizeof (dmu_replay_record_t)); - return (ba.err); + dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP); + + dsp->dsa_drr = drr; + dsp->dsa_vp = vp; + dsp->dsa_outfd = outfd; + dsp->dsa_proc = curproc; + dsp->dsa_os = tosnap; + dsp->dsa_off = off; + dsp->dsa_toguid = ds->ds_phys->ds_guid; + ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0); + dsp->dsa_pending_op = PENDING_NONE; + + mutex_enter(&ds->ds_sendstream_lock); + list_insert_head(&ds->ds_sendstreams, dsp); + mutex_exit(&ds->ds_sendstream_lock); + + if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { + err = dsp->dsa_err; + goto out; } err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH, - backup_cb, &ba); + backup_cb, dsp); - if (ba.pending_op != PENDING_NONE) - if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) + if (dsp->dsa_pending_op != PENDING_NONE) + if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) err = EINTR; if (err) { - if (err == EINTR && ba.err) - err = ba.err; - kmem_free(drr, sizeof (dmu_replay_record_t)); - return (err); + if (err == EINTR && dsp->dsa_err) + err = dsp->dsa_err; + goto out; } bzero(drr, sizeof (dmu_replay_record_t)); drr->drr_type = DRR_END; - drr->drr_u.drr_end.drr_checksum = ba.zc; - drr->drr_u.drr_end.drr_toguid = ba.toguid; + drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc; + drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid; - if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) { - kmem_free(drr, sizeof (dmu_replay_record_t)); - return (ba.err); + if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { + err = dsp->dsa_err; + goto out; } +out: + mutex_enter(&ds->ds_sendstream_lock); + list_remove(&ds->ds_sendstreams, dsp); + mutex_exit(&ds->ds_sendstream_lock); + kmem_free(drr, sizeof (dmu_replay_record_t)); + kmem_free(dsp, sizeof (dmu_sendarg_t)); - return (0); + return (err); } int diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index fce6d3c1a969..941dacc356ce 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #include @@ -29,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -399,6 +401,8 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL); + rw_init(&ds->ds_rwlock, NULL, RW_DEFAULT, NULL); cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL); @@ -406,6 +410,9 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); + list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t), + offsetof(dmu_sendarg_t, dsa_link)); + if (err == 0) { err = dsl_dir_open_obj(dp, ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index bab4d6188c7f..c6fcc3b89605 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -25,6 +25,7 @@ * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #include @@ -53,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -3889,8 +3891,8 @@ zfs_ioc_send(zfs_cmd_t *zc) } off = fp->f_offset; - error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, - fp->f_vnode, &off); + error = dmu_send(tosnap, fromsnap, zc->zc_obj, + zc->zc_cookie, fp->f_vnode, &off); if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) fp->f_offset = off; @@ -3902,6 +3904,50 @@ zfs_ioc_send(zfs_cmd_t *zc) return (error); } +/* + * inputs: + * zc_name name of snapshot on which to report progress + * zc_cookie file descriptor of send stream + * + * outputs: + * zc_cookie number of bytes written in send stream thus far + */ +static int +zfs_ioc_send_progress(zfs_cmd_t *zc) +{ + dsl_dataset_t *ds; + dmu_sendarg_t *dsp = NULL; + int error; + + if ((error = dsl_dataset_hold(zc->zc_name, FTAG, &ds)) != 0) + return (error); + + mutex_enter(&ds->ds_sendstream_lock); + + /* + * Iterate over all the send streams currently active on this dataset. + * If there's one which matches the specified file descriptor _and_ the + * stream was started by the current process, return the progress of + * that stream. + */ + + for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL; + dsp = list_next(&ds->ds_sendstreams, dsp)) { + if (dsp->dsa_outfd == zc->zc_cookie && + dsp->dsa_proc->group_leader == curproc->group_leader) + break; + } + + if (dsp != NULL) + zc->zc_cookie = *(dsp->dsa_off); + else + error = ENOENT; + + mutex_exit(&ds->ds_sendstream_lock); + dsl_dataset_rele(ds, FTAG); + return (error); +} + static int zfs_ioc_inject_fault(zfs_cmd_t *zc) { @@ -4848,6 +4894,8 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = { POOL_CHECK_SUSPENDED }, { zfs_ioc_pool_reopen, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED }, + { zfs_ioc_send_progress, zfs_secpolicy_read, DATASET_NAME, B_FALSE, + POOL_CHECK_NONE } }; int From 0c0003b03c86988d0d41159f23eaa93993477b29 Mon Sep 17 00:00:00 2001 From: Martin Matuska Date: Wed, 29 Aug 2012 01:17:29 +0200 Subject: [PATCH 4/4] Set max_psize at correct position in vdev_disk.c --- module/zfs/vdev_disk.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index e6856cae293f..9a5a73c86c3c 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -241,12 +241,6 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, if (vd == NULL) return ENOMEM; - /* TODO */ - // if (vd->vdev_wholedisk == 1) { - // } else { - *max_psize = *psize; - //} - /* * Devices are always opened by the path provided at configuration * time. This means that if the provided path is a udev by-id path @@ -295,6 +289,9 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, /* Physical volume size in bytes */ *psize = bdev_capacity(bdev); + /* TODO: report possible expansion size */ + *max_psize = *psize; + /* Based on the minimum sector size set the block size */ *ashift = highbit(MAX(block_size, SPA_MINBLOCKSIZE)) - 1;