Skip to content

Commit

Permalink
Add pool health /proc entry, "SUSPENDED" pools
Browse files Browse the repository at this point in the history
1. Add a proc entry to display the pool's health:

$ cat /proc/spl/kstat/zfs/tank/health
ONLINE

This is done without using the spa config locks, so it will
never hang.

2. Fix 'zpool status' and 'zpool list -o health' output to print
"SUSPENDED" instead of "ONLINE" for suspended pools.

Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Closes openzfs#7331
Requires-spl: refs/pull/705/head
  • Loading branch information
tonyhutter committed May 26, 2018
1 parent 43eb39d commit e72397a
Show file tree
Hide file tree
Showing 17 changed files with 402 additions and 54 deletions.
3 changes: 2 additions & 1 deletion cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -6481,7 +6481,8 @@ status_callback(zpool_handle_t *zhp, void *data)
nvroot = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE);
verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &c) == 0);
health = zpool_state_to_name(vs->vs_state, vs->vs_aux);

health = zpool_get_health_str_from_zhp(zhp);

(void) printf(gettext(" pool: %s\n"), zpool_get_name(zhp));
(void) printf(gettext(" state: %s\n"), health);
Expand Down
1 change: 1 addition & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/hkdf/Makefile
tests/zfs-tests/tests/functional/inheritance/Makefile
tests/zfs-tests/tests/functional/inuse/Makefile
tests/zfs-tests/tests/functional/kstat/Makefile
tests/zfs-tests/tests/functional/large_files/Makefile
tests/zfs-tests/tests/functional/largest_pool/Makefile
tests/zfs-tests/tests/functional/link_count/Makefile
Expand Down
2 changes: 2 additions & 0 deletions include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,8 @@ int zfs_dev_is_whole_disk(char *dev_name);
char *zfs_get_underlying_path(char *dev_name);
char *zfs_get_enclosure_sysfs_path(char *dev_name);

const char *zpool_get_health_str_from_zhp(zpool_handle_t *);

/*
* Functions to manage pool properties
*/
Expand Down
1 change: 1 addition & 0 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -873,6 +873,7 @@ typedef struct spa_stats {
spa_stats_history_t tx_assign_histogram;
spa_stats_history_t io_history;
spa_stats_history_t mmp_history;
spa_stats_history_t health; /* pool health */
} spa_stats_t;

typedef enum txg_state {
Expand Down
2 changes: 2 additions & 0 deletions include/zfs_comutil.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ extern int zfs_spa_version_map(int zpl_version);
#define ZFS_NUM_LEGACY_HISTORY_EVENTS 41
extern const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS];

const char *zpool_state_to_name(vdev_state_t state, vdev_aux_t aux);

#ifdef __cplusplus
}
#endif
Expand Down
3 changes: 2 additions & 1 deletion lib/libspl/include/sys/kstat.h
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,8 @@ typedef struct kstat32 {
#define KSTAT_FLAG_WRITABLE 0x04
#define KSTAT_FLAG_PERSISTENT 0x08
#define KSTAT_FLAG_DORMANT 0x10
#define KSTAT_FLAG_INVALID 0x20
#define KSTAT_FLAG_NO_HEADERS 0x20
#define KSTAT_FLAG_INVALID 0x40

/*
* Dynamic update support
Expand Down
88 changes: 43 additions & 45 deletions lib/libzfs/libzfs_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,39 +178,6 @@ zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
return (value);
}

/*
* Map VDEV STATE to printed strings.
*/
const char *
zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
{
switch (state) {
case VDEV_STATE_CLOSED:
case VDEV_STATE_OFFLINE:
return (gettext("OFFLINE"));
case VDEV_STATE_REMOVED:
return (gettext("REMOVED"));
case VDEV_STATE_CANT_OPEN:
if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
return (gettext("FAULTED"));
else if (aux == VDEV_AUX_SPLIT_POOL)
return (gettext("SPLIT"));
else
return (gettext("UNAVAIL"));
case VDEV_STATE_FAULTED:
return (gettext("FAULTED"));
case VDEV_STATE_DEGRADED:
return (gettext("DEGRADED"));
case VDEV_STATE_HEALTHY:
return (gettext("ONLINE"));

default:
break;
}

return (gettext("UNKNOWN"));
}

/*
* Map POOL STATE to printed strings.
*/
Expand Down Expand Up @@ -241,6 +208,45 @@ zpool_pool_state_to_name(pool_state_t state)
return (gettext("UNKNOWN"));
}

/* Return true if we should print "SUSPENDED" for the pool health */
static boolean_t
zpool_suspended_no_continue(zpool_status_t status)
{
return (status == ZPOOL_STATUS_IO_FAILURE_WAIT ||
status == ZPOOL_STATUS_IO_FAILURE_MMP);
}

/*
* Given a pool handle, return the pool health string ("ONLINE", "DEGRADED",
* "SUSPENDED", etc).
*/
const char *
zpool_get_health_str_from_zhp(zpool_handle_t *zhp)
{
zpool_errata_t errata;
zpool_status_t status;
nvlist_t *nvroot;
vdev_stat_t *vs;
uint_t vsc;
const char *str;

status = zpool_get_status(zhp, NULL, &errata);

if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
str = gettext("FAULTED");
} else if (zpool_suspended_no_continue(status)) {
str = gettext("SUSPENDED");
} else {
verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
verify(nvlist_lookup_uint64_array(nvroot,
ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
== 0);
str = zpool_state_to_name(vs->vs_state, vs->vs_aux);
}
return (str);
}

/*
* Get a zpool property value for 'prop' and return the value in
* a pre-allocated buffer.
Expand All @@ -252,9 +258,6 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
uint64_t intval;
const char *strval;
zprop_source_t src = ZPROP_SRC_NONE;
nvlist_t *nvroot;
vdev_stat_t *vs;
uint_t vsc;

if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
switch (prop) {
Expand All @@ -263,7 +266,8 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
break;

case ZPOOL_PROP_HEALTH:
(void) strlcpy(buf, "FAULTED", len);
(void) strlcpy(buf, zpool_get_health_str_from_zhp(zhp),
len);
break;

case ZPOOL_PROP_GUID:
Expand Down Expand Up @@ -364,14 +368,8 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
break;

case ZPOOL_PROP_HEALTH:
verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
verify(nvlist_lookup_uint64_array(nvroot,
ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
== 0);

(void) strlcpy(buf, zpool_state_to_name(intval,
vs->vs_aux), len);
(void) strlcpy(buf, zpool_get_health_str_from_zhp(zhp),
len);
break;
case ZPOOL_PROP_VERSION:
if (intval >= SPA_VERSION_FEATURES) {
Expand Down
12 changes: 6 additions & 6 deletions lib/libzfs/libzfs_status.c
Original file line number Diff line number Diff line change
Expand Up @@ -404,12 +404,12 @@ zpool_status_t
zpool_get_status(zpool_handle_t *zhp, char **msgid, zpool_errata_t *errata)
{
zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE, errata);

if (ret >= NMSGID)
*msgid = NULL;
else
*msgid = zfs_msgid_table[ret];

if (msgid != NULL) {
if (ret >= NMSGID)
*msgid = NULL;
else
*msgid = zfs_msgid_table[ret];
}
return (ret);
}

Expand Down
44 changes: 44 additions & 0 deletions module/zcommon/zfs_comutil.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <sys/systm.h>
#else
#include <string.h>
#include <libintl.h>
#endif

#include <sys/types.h>
Expand Down Expand Up @@ -207,10 +208,53 @@ const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS] = {
"pool split",
};

#if defined(_KERNEL)
/* Dummy gettext() for kernel builds */
static const char *
gettext(const char *str)
{
return (str);
}
#endif

/*
* Map VDEV STATE to printed strings.
*/
const char *
zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
{
switch (state) {
case VDEV_STATE_CLOSED:
case VDEV_STATE_OFFLINE:
return (gettext("OFFLINE"));
case VDEV_STATE_REMOVED:
return (gettext("REMOVED"));
case VDEV_STATE_CANT_OPEN:
if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
return (gettext("FAULTED"));
else if (aux == VDEV_AUX_SPLIT_POOL)
return (gettext("SPLIT"));
else
return (gettext("UNAVAIL"));
case VDEV_STATE_FAULTED:
return (gettext("FAULTED"));
case VDEV_STATE_DEGRADED:
return (gettext("DEGRADED"));
case VDEV_STATE_HEALTHY:
return (gettext("ONLINE"));

default:
break;
}

return (gettext("UNKNOWN"));
}

#if defined(_KERNEL) && defined(HAVE_SPL)
EXPORT_SYMBOL(zfs_allocatable_devs);
EXPORT_SYMBOL(zpool_get_rewind_policy);
EXPORT_SYMBOL(zfs_zpl_version_map);
EXPORT_SYMBOL(zfs_spa_version_map);
EXPORT_SYMBOL(zfs_history_event_names);
EXPORT_SYMBOL(zpool_state_to_name);
#endif
79 changes: 79 additions & 0 deletions module/zfs/spa_stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <sys/zfs_context.h>
#include <sys/spa_impl.h>
#include <sys/vdev_impl.h>
#include <zfs_comutil.h>

/*
* Keeps stats on last N reads per spa_t, disabled by default.
Expand Down Expand Up @@ -997,6 +998,82 @@ spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
return ((void *)smh);
}

static void *
spa_health_addr(kstat_t *ksp, loff_t n)
{
return (ksp->ks_private); /* return the spa_t */
}

/*
* Return true if we should print "SUSPENDED" for the pool health. This is the
* kernel-side version of zpool_suspended_no_continue().
*/
static boolean_t
spa_suspended_no_continue(spa_t *spa)
{
return (spa_suspended(spa) && (spa_get_failmode(spa)
!= ZIO_FAILURE_MODE_CONTINUE));
}

static int
spa_health_data(char *buf, size_t size, void *data)
{
spa_t *spa = (spa_t *)data;
vdev_state_t state = spa->spa_root_vdev->vdev_state;
vdev_aux_t aux = spa->spa_root_vdev->vdev_stat.vs_aux;
if (spa_suspended_no_continue(spa)) {
strlcpy(buf, "SUSPENDED\n", size);
} else {
(void) snprintf(buf, size, "%s\n",
zpool_state_to_name(state, aux));
}
return (0);
}

/*
* Return the health of the pool in /proc/spl/kstat/zfs/<pool>/health.
*
* This is a lock-less read of the pool's health (unlike using 'zpool', which
* can potentially block for seconds). Because it doesn't block, it can useful
* as a pool heartbeat value.
*/
static void
spa_health_init(spa_t *spa)
{
spa_stats_history_t *ssh = &spa->spa_stats.health;
char *name;
kstat_t *ksp;

mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);

name = kmem_asprintf("zfs/%s", spa_name(spa));
ksp = kstat_create(name, 0, "health", "misc",
KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);

ssh->kstat = ksp;
if (ksp) {
ksp->ks_lock = &ssh->lock;
ksp->ks_data = NULL;
ksp->ks_private = spa;
ksp->ks_flags |= KSTAT_FLAG_NO_HEADERS;
kstat_set_raw_ops(ksp, NULL, spa_health_data, spa_health_addr);
kstat_install(ksp);
}

strfree(name);
}

static void
spa_health_destroy(spa_t *spa)
{
spa_stats_history_t *ssh = &spa->spa_stats.health;
kstat_t *ksp = ssh->kstat;
if (ksp)
kstat_delete(ksp);

mutex_destroy(&ssh->lock);
}

void
spa_stats_init(spa_t *spa)
{
Expand All @@ -1005,11 +1082,13 @@ spa_stats_init(spa_t *spa)
spa_tx_assign_init(spa);
spa_io_history_init(spa);
spa_mmp_history_init(spa);
spa_health_init(spa);
}

void
spa_stats_destroy(spa_t *spa)
{
spa_health_destroy(spa);
spa_tx_assign_destroy(spa);
spa_txg_history_destroy(spa);
spa_read_history_destroy(spa);
Expand Down
4 changes: 4 additions & 0 deletions tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,10 @@ tests = ['inuse_001_pos', 'inuse_003_pos', 'inuse_004_pos',
post =
tags = ['functional', 'inuse']

[tests/functional/kstat]
tests = ['health']
tags = ['functional', 'kstat']

[tests/functional/large_files]
tests = ['large_files_001_pos', 'large_files_002_pos']
tags = ['functional', 'large_files']
Expand Down
11 changes: 10 additions & 1 deletion tests/zfs-tests/include/blkdev.shlib
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,16 @@ function unload_scsi_debug
#
function get_debug_device
{
lsscsi | nawk '/scsi_debug/ {print $6; exit}' | cut -d / -f3
for i in {1..10} ; do
val=$(lsscsi | nawk '/scsi_debug/ {print $6; exit}' | cut -d / -f3)

# lsscsi can take time to settle
if [ "$val" != "-" ] ; then
break
fi
sleep 1
done
echo "$val"
}

#
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/tests/functional/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ SUBDIRS = \
hkdf \
inheritance \
inuse \
kstat \
large_files \
largest_pool \
libzfs \
Expand Down
Loading

0 comments on commit e72397a

Please sign in to comment.