Skip to content

Commit

Permalink
ZED should handle spares using configured ashift
Browse files Browse the repository at this point in the history
If the zpool 'ashift' property is set then ZED should use its value
when kicking in a hotspare; with this change 512e disks can be used
as spares for VDEVs that were created with ashift=9, even if ZFS
natively detects them as 4K block devices.

Also introduce an additional auto_spare test case which verifies that
in the face of multiple device failures an appropiate number of spares
are kicked in.

Signed-off-by: loli10K <ezomori.nozomu@gmail.com>
Requires-builders: test
  • Loading branch information
loli10K committed Dec 5, 2017
1 parent 94183a9 commit 6757314
Show file tree
Hide file tree
Showing 22 changed files with 418 additions and 98 deletions.
13 changes: 7 additions & 6 deletions TEST
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,25 @@
#TEST_PREPARE_SHARES="yes"

### SPLAT
#TEST_SPLAT_SKIP="yes"
TEST_SPLAT_SKIP="yes"
#TEST_SPLAT_OPTIONS="-acvx"

### ztest
#TEST_ZTEST_SKIP="yes"
TEST_ZTEST_SKIP="yes"
#TEST_ZTEST_TIMEOUT=1800
#TEST_ZTEST_DIR="/var/tmp/"
#TEST_ZTEST_OPTIONS="-V"
#TEST_ZTEST_CORE_DIR="/mnt/zloop"

### zimport
#TEST_ZIMPORT_SKIP="yes"
TEST_ZIMPORT_SKIP="yes"
#TEST_ZIMPORT_DIR="/var/tmp/zimport"
#TEST_ZIMPORT_VERSIONS="master installed"
#TEST_ZIMPORT_POOLS="zol-0.6.1 zol-0.6.2 master installed"
#TEST_ZIMPORT_OPTIONS="-c"

### xfstests
#TEST_XFSTESTS_SKIP="yes"
TEST_XFSTESTS_SKIP="yes"
#TEST_XFSTESTS_URL="https://github.com/behlendorf/xfstests/archive/"
#TEST_XFSTESTS_VER="zfs.tar.gz"
#TEST_XFSTESTS_POOL="tank"
Expand All @@ -36,13 +36,14 @@
#TEST_ZFSTESTS_DIR="/mnt/"
#TEST_ZFSTESTS_DISKS="vdb vdc vdd"
#TEST_ZFSTESTS_DISKSIZE="8G"
#TEST_ZFSTESTS_ITERS="1"
TEST_ZFSTESTS_ITERS="2"
#TEST_ZFSTESTS_OPTIONS="-vx"
#TEST_ZFSTESTS_RUNFILE="linux.run"
TEST_ZFSTESTS_RUNFILE="issue-2562.run"
#TEST_ZFSTESTS_TAGS="functional"

### zfsstress
#TEST_ZFSSTRESS_SKIP="yes"
TEST_ZFSSTRESS_SKIP="yes"
#TEST_ZFSSTRESS_URL="https://github.com/nedbass/zfsstress/archive/"
#TEST_ZFSSTRESS_VER="master.tar.gz"
#TEST_ZFSSTRESS_RUNTIME=300
Expand Down
6 changes: 5 additions & 1 deletion cmd/zed/agents/zfs_diagnosis.c
Original file line number Diff line number Diff line change
Expand Up @@ -716,8 +716,12 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
/*
* Don't do anything else if this case is already solved.
*/
if (fmd_case_solved(hdl, zcp->zc_case))
if (fmd_case_solved(hdl, zcp->zc_case)) {
uint64_t eid;
(void) nvlist_lookup_uint64(nvl, FM_EREPORT_EID, &eid);
fmd_hdl_debug(hdl, "skipping event eid=%llu, class%s", eid, class);
return;
}

fmd_hdl_debug(hdl, "error event '%s'", class);

Expand Down
57 changes: 16 additions & 41 deletions cmd/zed/agents/zfs_mod.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,6 @@ typedef void (*zfs_process_func_t)(zpool_handle_t *, nvlist_t *, boolean_t);
libzfs_handle_t *g_zfshdl;
list_t g_pool_list; /* list of unavailable pools at initialization */
list_t g_device_list; /* list of disks with asynchronous label request */
boolean_t g_enumeration_done;
pthread_t g_zfs_tid;

typedef struct unavailpool {
zpool_handle_t *uap_zhp;
Expand Down Expand Up @@ -555,21 +553,19 @@ zfs_iter_pool(zpool_handle_t *zhp, void *data)
* if this pool was originally unavailable,
* then enable its datasets asynchronously
*/
if (g_enumeration_done) {
for (pool = list_head(&g_pool_list); pool != NULL;
pool = list_next(&g_pool_list, pool)) {

if (pool->uap_enable_tid != 0)
continue; /* entry already processed */
if (strcmp(zpool_get_name(zhp),
zpool_get_name(pool->uap_zhp)))
continue;
if (zfs_toplevel_state(zhp) >= VDEV_STATE_DEGRADED) {
/* send to a background thread; keep on list */
(void) pthread_create(&pool->uap_enable_tid,
NULL, zfs_enable_ds, pool);
break;
}
for (pool = list_head(&g_pool_list); pool != NULL;
pool = list_next(&g_pool_list, pool)) {

if (pool->uap_enable_tid != 0)
continue; /* entry already processed */
if (strcmp(zpool_get_name(zhp),
zpool_get_name(pool->uap_zhp)))
continue;
if (zfs_toplevel_state(zhp) >= VDEV_STATE_DEGRADED) {
/* send to a background thread; keep on list */
(void) pthread_create(&pool->uap_enable_tid,
NULL, zfs_enable_ds, pool);
break;
}
}

Expand Down Expand Up @@ -833,20 +829,6 @@ zfs_slm_deliver_event(const char *class, const char *subclass, nvlist_t *nvl)
return (ret);
}

/*ARGSUSED*/
static void *
zfs_enum_pools(void *arg)
{
(void) zpool_iter(g_zfshdl, zfs_unavail_pool, (void *)&g_pool_list);
/*
* Linux - instead of using a thread pool, each list entry
* will spawn a thread when an unavailable pool transitions
* to available. zfs_slm_fini will wait for these threads.
*/
g_enumeration_done = B_TRUE;
return (NULL);
}

/*
* called from zed daemon at startup
*
Expand All @@ -861,17 +843,13 @@ zfs_slm_init()
return (-1);

/*
* collect a list of unavailable pools (asynchronously,
* since this can take a while)
* collect a list of unavailable pools: do this synchronously, since
* libzfs is not thread-safe.
*/
list_create(&g_pool_list, sizeof (struct unavailpool),
offsetof(struct unavailpool, uap_node));

if (pthread_create(&g_zfs_tid, NULL, zfs_enum_pools, NULL) != 0) {
list_destroy(&g_pool_list);
__libzfs_fini(g_zfshdl);
return (-1);
}
(void) zpool_iter(g_zfshdl, zfs_unavail_pool, (void *)&g_pool_list);

list_create(&g_device_list, sizeof (struct pendingdev),
offsetof(struct pendingdev, pd_node));
Expand All @@ -885,9 +863,6 @@ zfs_slm_fini()
unavailpool_t *pool;
pendingdev_t *device;

/* wait for zfs_enum_pools thread to complete */
(void) pthread_join(g_zfs_tid, NULL);

while ((pool = (list_head(&g_pool_list))) != NULL) {
/*
* each pool entry has two possibilities
Expand Down
12 changes: 12 additions & 0 deletions cmd/zed/agents/zfs_retire.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
nvlist_t **spares;
uint_t s, nspares;
char *dev_name;
zprop_source_t source;
int ashift;

config = zpool_get_config(zhp, NULL);
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
Expand All @@ -189,6 +191,11 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
&spares, &nspares) != 0)
return;

/*
* lookup "ashift" pool property, we may need it for the replacement
*/
ashift = zpool_get_prop_int(zhp, ZPOOL_PROP_ASHIFT, &source);

replacement = fmd_nvl_alloc(hdl, FMD_SLEEP);

(void) nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE,
Expand All @@ -207,6 +214,11 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
&spare_name) != 0)
continue;

/* if set, add the "ashift" pool property to the spare nvlist */
if (source != ZPROP_SRC_DEFAULT)
(void) nvlist_add_uint64(spares[s],
ZPOOL_CONFIG_ASHIFT, ashift);

(void) nvlist_add_nvlist_array(replacement,
ZPOOL_CONFIG_CHILDREN, &spares[s], 1);

Expand Down
8 changes: 4 additions & 4 deletions cmd/zed/zed_exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ _zed_exec_create_env(zed_strings_t *zsp)
* current cursor location within the zevent nvlist.
*/
static void
_zed_exec_fork_child(uint64_t eid, const char *dir, const char *prog,
_zed_exec_fork_child(uint64_t eid, const char *class, const char *dir, const char *prog,
char *env[], int zfd)
{
char path[PATH_MAX];
Expand Down Expand Up @@ -120,8 +120,8 @@ _zed_exec_fork_child(uint64_t eid, const char *dir, const char *prog,

/* parent process */

zed_log_msg(LOG_INFO, "Invoking \"%s\" eid=%llu pid=%d",
prog, eid, pid);
zed_log_msg(LOG_INFO, "Invoking \"%s\" eid=%llu class=%s pid=%d",
prog, eid, class, pid);

/* FIXME: Timeout rogue child processes with sigalarm? */

Expand Down Expand Up @@ -223,7 +223,7 @@ zed_exec_process(uint64_t eid, const char *class, const char *subclass,
for (csp = class_strings; *csp; csp++) {
n = strlen(*csp);
if ((strncmp(z, *csp, n) == 0) && !isalpha(z[n]))
_zed_exec_fork_child(eid, dir, z, e, zfd);
_zed_exec_fork_child(eid, class, dir, z, e, zfd);
}
}
free(e);
Expand Down
13 changes: 12 additions & 1 deletion cmd/zed/zed_log.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <sys/types.h>
#include <syslog.h>
#include <unistd.h>
#include <time.h>
#include "zed_log.h"

#define ZED_LOG_MAX_LOG_LEN 1024
Expand Down Expand Up @@ -205,6 +206,10 @@ static void
_zed_log_aux(int priority, const char *fmt, va_list vargs)
{
char buf[ZED_LOG_MAX_LOG_LEN];
time_t timer;
char ts[32];
struct tm* tm_info;
char msg[ZED_LOG_MAX_LOG_LEN + 32];
int n;

if (!fmt)
Expand All @@ -216,11 +221,17 @@ _zed_log_aux(int priority, const char *fmt, va_list vargs)
buf[sizeof (buf) - 1] = '\0';
}

time(&timer);
tm_info = localtime(&timer);
strftime(ts, sizeof(ts), "%Y-%m-%d %H:%M:%S", tm_info);

(void) sprintf(msg, "%s %s", ts, buf);

if (_ctx.do_syslog)
syslog(priority, "%s", buf);

if (_ctx.do_stderr && (priority <= _ctx.priority))
fprintf(stderr, "%s\n", buf);
fprintf(stderr, "%s\n", msg);
}

/*
Expand Down
2 changes: 1 addition & 1 deletion lib/libzfs/libzfs_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -3614,7 +3614,7 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &value) == 0 ||
name_flags & VDEV_NAME_GUID) {
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value);
(void) snprintf(buf, sizeof (buf), "%llu", (u_longlong_t)value);
(void) snprintf(buf, sizeof (buf), "0x%llx", (u_longlong_t)value);
path = buf;
} else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
#if defined(__sun__) || defined(__sun)
Expand Down
27 changes: 27 additions & 0 deletions tests/runfiles/issue-2562.run
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#

[DEFAULT]
pre = setup
quiet = False
pre_user = root
user = root
timeout = 600
post_user = root
post = cleanup
outputdir = /var/tmp/test_results

[tests/functional/fault]
#tests = ['auto_online_001_pos', 'auto_replace_001_pos', 'auto_spare_001_pos',
# 'auto_spare_002_pos', 'auto_spare_ashift', 'auto_spare_multiple']
tests = ['auto_spare_001_pos', 'auto_spare_002_pos']
tags = ['functional', 'fault']

2 changes: 1 addition & 1 deletion tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ tags = ['functional', 'exec']

[tests/functional/fault]
tests = ['auto_online_001_pos', 'auto_replace_001_pos', 'auto_spare_001_pos',
'auto_spare_002_pos.ksh']
'auto_spare_002_pos', 'auto_spare_ashift', 'auto_spare_multiple']
tags = ['functional', 'fault']

[tests/functional/features/async_destroy]
Expand Down
36 changes: 33 additions & 3 deletions tests/zfs-tests/include/blkdev.shlib
Original file line number Diff line number Diff line change
Expand Up @@ -353,16 +353,35 @@ function insert_disk #disk scsi_host

#
# Load scsi_debug module with specified parameters
# $blksz can be either one of: < 512b | 512e | 4Kn >
#
function load_scsi_debug # dev_size_mb add_host num_tgts max_luns
function load_scsi_debug # dev_size_mb add_host num_tgts max_luns blksz
{
typeset devsize=$1
typeset hosts=$2
typeset tgts=$3
typeset luns=$4
typeset blksz=$5

[[ -z $devsize ]] || [[ -z $hosts ]] || [[ -z $tgts ]] || \
[[ -z $luns ]] && log_fail "Arguments invalid or missing"
[[ -z $luns ]] || [[ -z $blksz ]] && \
log_fail "Arguments invalid or missing"

case "$5" in
'512b')
typeset sector=512
typeset blkexp=0
;;
'512e')
typeset sector=512
typeset blkexp=3
;;
'4Kn')
typeset sector=4096
typeset blkexp=0
;;
*) log_fail "Unsupported blksz value: $5" ;;
esac

if is_linux; then
modprobe -n scsi_debug
Expand All @@ -375,7 +394,8 @@ function load_scsi_debug # dev_size_mb add_host num_tgts max_luns
log_fail "scsi_debug module already installed"
else
log_must modprobe scsi_debug dev_size_mb=$devsize \
add_host=$hosts num_tgts=$tgts max_luns=$luns
add_host=$hosts num_tgts=$tgts max_luns=$luns \
sector_size=$sector physblk_exp=$blkexp
block_device_wait
lsscsi | egrep scsi_debug > /dev/null
if (($? == 1)); then
Expand All @@ -385,6 +405,16 @@ function load_scsi_debug # dev_size_mb add_host num_tgts max_luns
fi
}

#
# Unload scsi_debug module, if needed.
#
function unload_scsi_debug
{
if lsmod | grep scsi_debug >/dev/null; then
log_must modprobe -r scsi_debug
fi
}

#
# Get scsi_debug device name.
# Returns basename of scsi_debug device (for example "sdb").
Expand Down
5 changes: 3 additions & 2 deletions tests/zfs-tests/include/libtest.shlib
Original file line number Diff line number Diff line change
Expand Up @@ -3158,10 +3158,11 @@ function zed_stop
if [[ -f ${ZEDLET_DIR}/zed.pid ]]; then
zedpid=$(cat ${ZEDLET_DIR}/zed.pid)
kill $zedpid
wait $zedpid
while ps -p $zedpid > /dev/null; do
sleep 1
done
rm -f ${ZEDLET_DIR}/zed.pid
fi

return 0
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ if is_linux; then
for SDDEVICE in $(get_debug_device); do
unplug $SDDEVICE
done
modprobe -r scsi_debug
unload_scsi_debug
fi

log_pass
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ verify_runnable "global"

# Create scsi_debug devices for the reopen tests
if is_linux; then
load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS
load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS '512b'
else
log_unsupported "scsi debug module unsupported"
fi
Expand Down
Loading

0 comments on commit 6757314

Please sign in to comment.