diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index d44589303313..95f62dd6a10a 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -6481,7 +6481,8 @@ status_callback(zpool_handle_t *zhp, void *data) nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE); verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c) == 0); - health = zpool_state_to_name(vs->vs_state, vs->vs_aux); + + health = zpool_get_health_str_from_zhp(zhp); (void) printf(gettext(" pool: %s\n"), zpool_get_name(zhp)); (void) printf(gettext(" state: %s\n"), health); diff --git a/configure.ac b/configure.ac index a57724c13a6a..bbb96449b249 100644 --- a/configure.ac +++ b/configure.ac @@ -272,6 +272,7 @@ AC_CONFIG_FILES([ tests/zfs-tests/tests/functional/hkdf/Makefile tests/zfs-tests/tests/functional/inheritance/Makefile tests/zfs-tests/tests/functional/inuse/Makefile + tests/zfs-tests/tests/functional/kstat/Makefile tests/zfs-tests/tests/functional/large_files/Makefile tests/zfs-tests/tests/functional/largest_pool/Makefile tests/zfs-tests/tests/functional/link_count/Makefile diff --git a/include/libzfs.h b/include/libzfs.h index 45eb5c9047b6..2de42dcdd08f 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -301,6 +301,8 @@ int zfs_dev_is_whole_disk(char *dev_name); char *zfs_get_underlying_path(char *dev_name); char *zfs_get_enclosure_sysfs_path(char *dev_name); +const char *zpool_get_health_str_from_zhp(zpool_handle_t *); + /* * Functions to manage pool properties */ diff --git a/include/sys/spa.h b/include/sys/spa.h index 8a3938e865ee..30dc11d7f26e 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -873,6 +873,7 @@ typedef struct spa_stats { spa_stats_history_t tx_assign_histogram; spa_stats_history_t io_history; spa_stats_history_t mmp_history; + spa_stats_history_t health; /* pool health */ } spa_stats_t; typedef enum txg_state { diff --git a/include/zfs_comutil.h b/include/zfs_comutil.h index f89054388a4d..d80619602fd1 100644 --- a/include/zfs_comutil.h +++ b/include/zfs_comutil.h @@ -41,6 +41,8 @@ extern int zfs_spa_version_map(int zpl_version); #define ZFS_NUM_LEGACY_HISTORY_EVENTS 41 extern const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS]; +const char *zpool_state_to_name(vdev_state_t state, vdev_aux_t aux); + #ifdef __cplusplus } #endif diff --git a/lib/libspl/include/sys/kstat.h b/lib/libspl/include/sys/kstat.h index 24c71e27c783..dda5408d216b 100644 --- a/lib/libspl/include/sys/kstat.h +++ b/lib/libspl/include/sys/kstat.h @@ -303,7 +303,8 @@ typedef struct kstat32 { #define KSTAT_FLAG_WRITABLE 0x04 #define KSTAT_FLAG_PERSISTENT 0x08 #define KSTAT_FLAG_DORMANT 0x10 -#define KSTAT_FLAG_INVALID 0x20 +#define KSTAT_FLAG_NO_HEADERS 0x20 +#define KSTAT_FLAG_INVALID 0x40 /* * Dynamic update support diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index d082a5f66b1b..e0f17e386a2c 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -178,39 +178,6 @@ zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src) return (value); } -/* - * Map VDEV STATE to printed strings. - */ -const char * -zpool_state_to_name(vdev_state_t state, vdev_aux_t aux) -{ - switch (state) { - case VDEV_STATE_CLOSED: - case VDEV_STATE_OFFLINE: - return (gettext("OFFLINE")); - case VDEV_STATE_REMOVED: - return (gettext("REMOVED")); - case VDEV_STATE_CANT_OPEN: - if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG) - return (gettext("FAULTED")); - else if (aux == VDEV_AUX_SPLIT_POOL) - return (gettext("SPLIT")); - else - return (gettext("UNAVAIL")); - case VDEV_STATE_FAULTED: - return (gettext("FAULTED")); - case VDEV_STATE_DEGRADED: - return (gettext("DEGRADED")); - case VDEV_STATE_HEALTHY: - return (gettext("ONLINE")); - - default: - break; - } - - return (gettext("UNKNOWN")); -} - /* * Map POOL STATE to printed strings. */ @@ -241,6 +208,45 @@ zpool_pool_state_to_name(pool_state_t state) return (gettext("UNKNOWN")); } +/* Return true if we should print "SUSPENDED" for the pool health */ +static boolean_t +zpool_suspended_no_continue(zpool_status_t status) +{ + return (status == ZPOOL_STATUS_IO_FAILURE_WAIT || + status == ZPOOL_STATUS_IO_FAILURE_MMP); +} + +/* + * Given a pool handle, return the pool health string ("ONLINE", "DEGRADED", + * "SUSPENDED", etc). + */ +const char * +zpool_get_health_str_from_zhp(zpool_handle_t *zhp) +{ + zpool_errata_t errata; + zpool_status_t status; + nvlist_t *nvroot; + vdev_stat_t *vs; + uint_t vsc; + const char *str; + + status = zpool_get_status(zhp, NULL, &errata); + + if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { + str = gettext("FAULTED"); + } else if (zpool_suspended_no_continue(status)) { + str = gettext("SUSPENDED"); + } else { + verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL), + ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); + verify(nvlist_lookup_uint64_array(nvroot, + ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) + == 0); + str = zpool_state_to_name(vs->vs_state, vs->vs_aux); + } + return (str); +} + /* * Get a zpool property value for 'prop' and return the value in * a pre-allocated buffer. @@ -252,9 +258,6 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, uint64_t intval; const char *strval; zprop_source_t src = ZPROP_SRC_NONE; - nvlist_t *nvroot; - vdev_stat_t *vs; - uint_t vsc; if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { switch (prop) { @@ -263,7 +266,8 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, break; case ZPOOL_PROP_HEALTH: - (void) strlcpy(buf, "FAULTED", len); + (void) strlcpy(buf, zpool_get_health_str_from_zhp(zhp), + len); break; case ZPOOL_PROP_GUID: @@ -364,14 +368,8 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, break; case ZPOOL_PROP_HEALTH: - verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL), - ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); - verify(nvlist_lookup_uint64_array(nvroot, - ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) - == 0); - - (void) strlcpy(buf, zpool_state_to_name(intval, - vs->vs_aux), len); + (void) strlcpy(buf, zpool_get_health_str_from_zhp(zhp), + len); break; case ZPOOL_PROP_VERSION: if (intval >= SPA_VERSION_FEATURES) { diff --git a/lib/libzfs/libzfs_status.c b/lib/libzfs/libzfs_status.c index 57d2deabfc87..4089f0cc65e2 100644 --- a/lib/libzfs/libzfs_status.c +++ b/lib/libzfs/libzfs_status.c @@ -404,12 +404,12 @@ zpool_status_t zpool_get_status(zpool_handle_t *zhp, char **msgid, zpool_errata_t *errata) { zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE, errata); - - if (ret >= NMSGID) - *msgid = NULL; - else - *msgid = zfs_msgid_table[ret]; - + if (msgid != NULL) { + if (ret >= NMSGID) + *msgid = NULL; + else + *msgid = zfs_msgid_table[ret]; + } return (ret); } diff --git a/module/zcommon/zfs_comutil.c b/module/zcommon/zfs_comutil.c index 685a20c448ea..6270b89e2135 100644 --- a/module/zcommon/zfs_comutil.c +++ b/module/zcommon/zfs_comutil.c @@ -33,6 +33,7 @@ #include #else #include +#include #endif #include @@ -207,10 +208,53 @@ const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS] = { "pool split", }; +#if defined(_KERNEL) +/* Dummy gettext() for kernel builds */ +static const char * +gettext(const char *str) +{ + return (str); +} +#endif + +/* + * Map VDEV STATE to printed strings. + */ +const char * +zpool_state_to_name(vdev_state_t state, vdev_aux_t aux) +{ + switch (state) { + case VDEV_STATE_CLOSED: + case VDEV_STATE_OFFLINE: + return (gettext("OFFLINE")); + case VDEV_STATE_REMOVED: + return (gettext("REMOVED")); + case VDEV_STATE_CANT_OPEN: + if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG) + return (gettext("FAULTED")); + else if (aux == VDEV_AUX_SPLIT_POOL) + return (gettext("SPLIT")); + else + return (gettext("UNAVAIL")); + case VDEV_STATE_FAULTED: + return (gettext("FAULTED")); + case VDEV_STATE_DEGRADED: + return (gettext("DEGRADED")); + case VDEV_STATE_HEALTHY: + return (gettext("ONLINE")); + + default: + break; + } + + return (gettext("UNKNOWN")); +} + #if defined(_KERNEL) && defined(HAVE_SPL) EXPORT_SYMBOL(zfs_allocatable_devs); EXPORT_SYMBOL(zpool_get_rewind_policy); EXPORT_SYMBOL(zfs_zpl_version_map); EXPORT_SYMBOL(zfs_spa_version_map); EXPORT_SYMBOL(zfs_history_event_names); +EXPORT_SYMBOL(zpool_state_to_name); #endif diff --git a/module/zfs/spa_stats.c b/module/zfs/spa_stats.c index f604836c823f..1e953b33558b 100644 --- a/module/zfs/spa_stats.c +++ b/module/zfs/spa_stats.c @@ -22,6 +22,7 @@ #include #include #include +#include /* * Keeps stats on last N reads per spa_t, disabled by default. @@ -997,6 +998,82 @@ spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp, return ((void *)smh); } +static void * +spa_health_addr(kstat_t *ksp, loff_t n) +{ + return (ksp->ks_private); /* return the spa_t */ +} + +/* + * Return true if we should print "SUSPENDED" for the pool health. This is the + * kernel-side version of zpool_suspended_no_continue(). + */ +static boolean_t +spa_suspended_no_continue(spa_t *spa) +{ + return (spa_suspended(spa) && (spa_get_failmode(spa) + != ZIO_FAILURE_MODE_CONTINUE)); +} + +static int +spa_health_data(char *buf, size_t size, void *data) +{ + spa_t *spa = (spa_t *)data; + vdev_state_t state = spa->spa_root_vdev->vdev_state; + vdev_aux_t aux = spa->spa_root_vdev->vdev_stat.vs_aux; + if (spa_suspended_no_continue(spa)) { + strlcpy(buf, "SUSPENDED\n", size); + } else { + (void) snprintf(buf, size, "%s\n", + zpool_state_to_name(state, aux)); + } + return (0); +} + +/* + * Return the health of the pool in /proc/spl/kstat/zfs//health. + * + * This is a lock-less read of the pool's health (unlike using 'zpool', which + * can potentially block for seconds). Because it doesn't block, it can useful + * as a pool heartbeat value. + */ +static void +spa_health_init(spa_t *spa) +{ + spa_stats_history_t *ssh = &spa->spa_stats.health; + char *name; + kstat_t *ksp; + + mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL); + + name = kmem_asprintf("zfs/%s", spa_name(spa)); + ksp = kstat_create(name, 0, "health", "misc", + KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); + + ssh->kstat = ksp; + if (ksp) { + ksp->ks_lock = &ssh->lock; + ksp->ks_data = NULL; + ksp->ks_private = spa; + ksp->ks_flags |= KSTAT_FLAG_NO_HEADERS; + kstat_set_raw_ops(ksp, NULL, spa_health_data, spa_health_addr); + kstat_install(ksp); + } + + strfree(name); +} + +static void +spa_health_destroy(spa_t *spa) +{ + spa_stats_history_t *ssh = &spa->spa_stats.health; + kstat_t *ksp = ssh->kstat; + if (ksp) + kstat_delete(ksp); + + mutex_destroy(&ssh->lock); +} + void spa_stats_init(spa_t *spa) { @@ -1005,11 +1082,13 @@ spa_stats_init(spa_t *spa) spa_tx_assign_init(spa); spa_io_history_init(spa); spa_mmp_history_init(spa); + spa_health_init(spa); } void spa_stats_destroy(spa_t *spa) { + spa_health_destroy(spa); spa_tx_assign_destroy(spa); spa_txg_history_destroy(spa); spa_read_history_destroy(spa); diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 0260eb8848b4..4c975a1e1781 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -575,6 +575,10 @@ tests = ['inuse_001_pos', 'inuse_003_pos', 'inuse_004_pos', post = tags = ['functional', 'inuse'] +[tests/functional/kstat] +tests = ['health'] +tags = ['functional', 'kstat'] + [tests/functional/large_files] tests = ['large_files_001_pos', 'large_files_002_pos'] tags = ['functional', 'large_files'] diff --git a/tests/zfs-tests/include/blkdev.shlib b/tests/zfs-tests/include/blkdev.shlib index 87ffa8560b7e..5163ea2ae294 100644 --- a/tests/zfs-tests/include/blkdev.shlib +++ b/tests/zfs-tests/include/blkdev.shlib @@ -421,7 +421,16 @@ function unload_scsi_debug # function get_debug_device { - lsscsi | nawk '/scsi_debug/ {print $6; exit}' | cut -d / -f3 + for i in {1..10} ; do + val=$(lsscsi | nawk '/scsi_debug/ {print $6; exit}' | cut -d / -f3) + + # lsscsi can take time to settle + if [ "$val" != "-" ] ; then + break + fi + sleep 1 + done + echo "$val" } # diff --git a/tests/zfs-tests/tests/functional/Makefile.am b/tests/zfs-tests/tests/functional/Makefile.am index 396124986439..2368b829bf7e 100644 --- a/tests/zfs-tests/tests/functional/Makefile.am +++ b/tests/zfs-tests/tests/functional/Makefile.am @@ -28,6 +28,7 @@ SUBDIRS = \ hkdf \ inheritance \ inuse \ + kstat \ large_files \ largest_pool \ libzfs \ diff --git a/tests/zfs-tests/tests/functional/kstat/Makefile.am b/tests/zfs-tests/tests/functional/kstat/Makefile.am new file mode 100644 index 000000000000..3b4b135bf93b --- /dev/null +++ b/tests/zfs-tests/tests/functional/kstat/Makefile.am @@ -0,0 +1,5 @@ +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/health +dist_pkgdata_SCRIPTS = \ + setup.ksh \ + cleanup.ksh \ + health.ksh diff --git a/tests/zfs-tests/tests/functional/kstat/cleanup.ksh b/tests/zfs-tests/tests/functional/kstat/cleanup.ksh new file mode 100755 index 000000000000..8a212ce37e8a --- /dev/null +++ b/tests/zfs-tests/tests/functional/kstat/cleanup.ksh @@ -0,0 +1,28 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright (c) 2018 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/tests/zfs-tests/tests/functional/kstat/health.ksh b/tests/zfs-tests/tests/functional/kstat/health.ksh new file mode 100755 index 000000000000..6e5ab9eec611 --- /dev/null +++ b/tests/zfs-tests/tests/functional/kstat/health.ksh @@ -0,0 +1,138 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END + +# +# Copyright (c) 2018 by Lawrence Livermore National Security, LLC. +# + +# +# DESCRIPTION: +# Test /proc/spl/kstat/zfs//health kstat +# +# STRATEGY: +# 1. Create a mirrored pool +# 2. Check that pool is ONLINE +# 3. Fault one disk +# 4. Check that pool is DEGRADED +# 5. Create a new pool with a single scsi_debug disk +# 6. Remove the disk +# 7. Check that pool is SUSPENDED +# 8. Add the disk back in +# 9. Clear errors and destroy the pools + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "both" + +function cleanup +{ + # Destroy the scsi_debug pool + if [ -n "$TESTPOOL2" ] ; then + if [ -n "$host" ] ; then + # Re-enable the disk + scan_scsi_hosts $host + + # Device may have changed names after being inserted + SDISK=$(get_debug_device) + log_must ln $DEV_RDSKDIR/$SDISK $REALDISK + fi + + # Restore our working pool image + if [ -n "$BACKUP" ] ; then + gunzip -c $BACKUP > $REALDISK + log_must rm -f $BACKUP + fi + + # Our disk is back. Now we can clear errors and destroy the + # pool cleanly. + log_must zpool clear $TESTPOOL2 + + # Now that the disk is back and errors cleared, wait for our + # hung 'zpool scrub' to finish. + wait + + destroy_pool $TESTPOOL2 + log_must rm $REALDISK + unload_scsi_debug + fi +} + +# Check that our pool health values match what's expected +# +# $1: pool name +# $2: expected health ("ONLINE", "DEGRADED", "SUSPENDED", etc) +function check_all +{ + pool=$1 + expected=$2 + + health1=$(zpool status $pool | awk '/state: /{print $2}'); + health2=$(zpool list -H -o health $pool) + health3=$(cat /proc/spl/kstat/zfs/$pool/health) + log_must [ "$expected" == "$health1" == "$health2" == "$health3" ] +} + +log_onexit cleanup + +log_assert "Testing /proc/spl/kstat/zfs//health kstat" + +# Test that the initial pool is healthy +check_all $TESTPOOL "ONLINE" + +# Fault one of the disks, and check that pool is degraded +DISK1=$(echo "$DISKS" | awk '{print $2}') +zpool offline -tf $TESTPOOL $DISK1 +check_all $TESTPOOL "DEGRADED" + +# Create a new pool out of a scsi_debug disk +TESTPOOL2=testpool2 +MINVDEVSIZE_MB=$((MINVDEVSIZE / 1048576)) +load_scsi_debug $MINVDEVSIZE_MB 1 1 1 '512b' + +SDISK=$(get_debug_device) +host=$(get_scsi_host $SDISK) + +# Use $REALDISK instead of $SDISK in our pool because $SDISK can change names +# as we remove/add the disk (i.e. /dev/sdf -> /dev/sdg). +REALDISK=/dev/kstat-health-realdisk +log_must [ ! -e $REALDISK ] +ln $DEV_RDSKDIR/$SDISK $REALDISK + +log_must zpool create $TESTPOOL2 $REALDISK + +# Backup the contents of the disk image +BACKUP=/tmp/kstat-health-realdisk.gz +log_must [ ! -e $BACKUP ] +gzip -c $REALDISK > $BACKUP + +# Yank out the disk from under the pool +log_must rm $REALDISK +remove_disk $SDISK + +# Run a 'zpool scrub' in the background to suspend the pool. We run it in the +# background since the command will hang when the pool gets suspended. The +# command will resume and exit after we restore the missing disk later on. +zpool scrub $TESTPOOL2 & +sleep 1 # Give the scrub some time to run before we check if it fails + +check_all $TESTPOOL2 "SUSPENDED" + +log_pass "/proc/spl/kstat/zfs//health test successful" diff --git a/tests/zfs-tests/tests/functional/kstat/setup.ksh b/tests/zfs-tests/tests/functional/kstat/setup.ksh new file mode 100755 index 000000000000..57717a096146 --- /dev/null +++ b/tests/zfs-tests/tests/functional/kstat/setup.ksh @@ -0,0 +1,34 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright (c) 2018 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib + +if ! is_linux ; then + log_unsupported "/proc/spl/kstat//health only supported on Linux" +fi + +default_mirror_setup $DISKS + +log_pass