diff --git a/TEST b/TEST index 4c593a457078..81f76b973172 100644 --- a/TEST +++ b/TEST @@ -5,25 +5,25 @@ #TEST_PREPARE_SHARES="yes" ### SPLAT -#TEST_SPLAT_SKIP="yes" +TEST_SPLAT_SKIP="yes" #TEST_SPLAT_OPTIONS="-acvx" ### ztest -#TEST_ZTEST_SKIP="yes" +TEST_ZTEST_SKIP="yes" #TEST_ZTEST_TIMEOUT=1800 #TEST_ZTEST_DIR="/var/tmp/" #TEST_ZTEST_OPTIONS="-V" #TEST_ZTEST_CORE_DIR="/mnt/zloop" ### zimport -#TEST_ZIMPORT_SKIP="yes" +TEST_ZIMPORT_SKIP="yes" #TEST_ZIMPORT_DIR="/var/tmp/zimport" #TEST_ZIMPORT_VERSIONS="master installed" #TEST_ZIMPORT_POOLS="zol-0.6.1 zol-0.6.2 master installed" #TEST_ZIMPORT_OPTIONS="-c" ### xfstests -#TEST_XFSTESTS_SKIP="yes" +TEST_XFSTESTS_SKIP="yes" #TEST_XFSTESTS_URL="https://github.com/behlendorf/xfstests/archive/" #TEST_XFSTESTS_VER="zfs.tar.gz" #TEST_XFSTESTS_POOL="tank" @@ -32,17 +32,18 @@ #TEST_XFSTESTS_OPTIONS="" ### zfs-tests.sh -#TEST_ZFSTESTS_SKIP="yes" +TEST_ZFSTESTS_SKIP="yes" #TEST_ZFSTESTS_DIR="/mnt/" #TEST_ZFSTESTS_DISKS="vdb vdc vdd" #TEST_ZFSTESTS_DISKSIZE="8G" #TEST_ZFSTESTS_ITERS="1" #TEST_ZFSTESTS_OPTIONS="-vx" #TEST_ZFSTESTS_RUNFILE="linux.run" +TEST_ZFSTESTS_RUNFILE="issue-2562.run" #TEST_ZFSTESTS_TAGS="functional" ### zfsstress -#TEST_ZFSSTRESS_SKIP="yes" +TEST_ZFSSTRESS_SKIP="yes" #TEST_ZFSSTRESS_URL="https://github.com/nedbass/zfsstress/archive/" #TEST_ZFSSTRESS_VER="master.tar.gz" #TEST_ZFSSTRESS_RUNTIME=300 @@ -83,8 +84,10 @@ SUSE*) Ubuntu-16.04*) # ZFS enabled xfstests fails to build TEST_XFSTESTS_SKIP="yes" + TEST_ZFSTESTS_SKIP="no" ;; Ubuntu*) + TEST_ZFSTESTS_SKIP="no" ;; *) ;; diff --git a/cmd/zed/agents/zfs_retire.c b/cmd/zed/agents/zfs_retire.c index f69c583f0b1f..c91c55fd433c 100644 --- a/cmd/zed/agents/zfs_retire.c +++ b/cmd/zed/agents/zfs_retire.c @@ -176,6 +176,8 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev) nvlist_t **spares; uint_t s, nspares; char *dev_name; + zprop_source_t source; + int ashift; config = zpool_get_config(zhp, NULL); if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, @@ -189,6 +191,11 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev) &spares, &nspares) != 0) return; + /* + * lookup "ashift" pool property, we may need it for the replacement + */ + ashift = zpool_get_prop_int(zhp, ZPOOL_PROP_ASHIFT, &source); + replacement = fmd_nvl_alloc(hdl, FMD_SLEEP); (void) nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE, @@ -207,6 +214,11 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev) &spare_name) != 0) continue; + /* if set, add the "ashift" pool property to the spare nvlist */ + if (source != ZPROP_SRC_DEFAULT) + (void) nvlist_add_uint64(spares[s], + ZPOOL_CONFIG_ASHIFT, ashift); + (void) nvlist_add_nvlist_array(replacement, ZPOOL_CONFIG_CHILDREN, &spares[s], 1); diff --git a/tests/runfiles/issue-2562.run b/tests/runfiles/issue-2562.run new file mode 100644 index 000000000000..8ed6f49f20f6 --- /dev/null +++ b/tests/runfiles/issue-2562.run @@ -0,0 +1,36 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +[DEFAULT] +pre = setup +quiet = False +pre_user = root +user = root +timeout = 600 +post_user = root +post = cleanup +outputdir = /var/tmp/test_results + +[tests/functional/fault] +tests = ['auto_online_001_pos', 'auto_replace_001_pos', 'auto_spare_001_pos', + 'auto_spare_002_pos', 'auto_spare_002_pos', + 'auto_spare_002_pos', 'auto_spare_002_pos', + 'auto_spare_002_pos', 'auto_spare_002_pos', + 'auto_spare_002_pos', 'auto_spare_002_pos', + 'auto_spare_002_pos', 'auto_spare_002_pos', + 'auto_spare_002_pos', 'auto_spare_002_pos', + 'auto_spare_002_pos', 'auto_spare_002_pos', + 'auto_spare_002_pos', 'auto_spare_002_pos', + 'auto_spare_002_pos', 'auto_spare_002_pos', + 'auto_spare_002_pos', 'auto_spare_002_pos', + 'auto_spare_ashift', 'auto_spare_multiple'] +tags = ['functional', 'fault'] + diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 9424c80a02cf..0c8f298c6d49 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -464,7 +464,7 @@ tags = ['functional', 'exec'] [tests/functional/fault] tests = ['auto_online_001_pos', 'auto_replace_001_pos', 'auto_spare_001_pos', - 'auto_spare_002_pos.ksh'] + 'auto_spare_002_pos', 'auto_spare_ashift', 'auto_spare_multiple'] tags = ['functional', 'fault'] [tests/functional/features/async_destroy] diff --git a/tests/zfs-tests/include/blkdev.shlib b/tests/zfs-tests/include/blkdev.shlib index 876c843561b8..28ac1052c3d0 100644 --- a/tests/zfs-tests/include/blkdev.shlib +++ b/tests/zfs-tests/include/blkdev.shlib @@ -353,16 +353,35 @@ function insert_disk #disk scsi_host # # Load scsi_debug module with specified parameters +# $blksz can be either one of: < 512b | 512e | 4Kn > # -function load_scsi_debug # dev_size_mb add_host num_tgts max_luns +function load_scsi_debug # dev_size_mb add_host num_tgts max_luns blksz { typeset devsize=$1 typeset hosts=$2 typeset tgts=$3 typeset luns=$4 + typeset blksz=$5 [[ -z $devsize ]] || [[ -z $hosts ]] || [[ -z $tgts ]] || \ - [[ -z $luns ]] && log_fail "Arguments invalid or missing" + [[ -z $luns ]] || [[ -z $blksz ]] && \ + log_fail "Arguments invalid or missing" + + case "$5" in + '512b') + typeset sector=512 + typeset blkexp=0 + ;; + '512e') + typeset sector=512 + typeset blkexp=3 + ;; + '4Kn') + typeset sector=4096 + typeset blkexp=0 + ;; + *) log_fail "Unsupported blksz value: $5" ;; + esac if is_linux; then modprobe -n scsi_debug @@ -375,7 +394,8 @@ function load_scsi_debug # dev_size_mb add_host num_tgts max_luns log_fail "scsi_debug module already installed" else log_must modprobe scsi_debug dev_size_mb=$devsize \ - add_host=$hosts num_tgts=$tgts max_luns=$luns + add_host=$hosts num_tgts=$tgts max_luns=$luns \ + sector_size=$sector physblk_exp=$blkexp block_device_wait lsscsi | egrep scsi_debug > /dev/null if (($? == 1)); then @@ -385,6 +405,16 @@ function load_scsi_debug # dev_size_mb add_host num_tgts max_luns fi } +# +# Unload scsi_debug module, if needed. +# +function unload_scsi_debug +{ + if lsmod | grep scsi_debug >/dev/null; then + log_must modprobe -r scsi_debug + fi +} + # # Get scsi_debug device name. # Returns basename of scsi_debug device (for example "sdb"). diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh index 4477e54027fb..99c51351c5c8 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh @@ -27,7 +27,7 @@ if is_linux; then for SDDEVICE in $(get_debug_device); do unplug $SDDEVICE done - modprobe -r scsi_debug + unload_scsi_debug fi log_pass diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/setup.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/setup.ksh index 4dbf8965dc6b..59b8764ced84 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/setup.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/setup.ksh @@ -22,7 +22,7 @@ verify_runnable "global" # Create scsi_debug devices for the reopen tests if is_linux; then - load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS + load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS '512b' else log_unsupported "scsi debug module unsupported" fi diff --git a/tests/zfs-tests/tests/functional/fault/Makefile.am b/tests/zfs-tests/tests/functional/fault/Makefile.am index 436f3e8be719..ef4380835c37 100644 --- a/tests/zfs-tests/tests/functional/fault/Makefile.am +++ b/tests/zfs-tests/tests/functional/fault/Makefile.am @@ -6,4 +6,6 @@ dist_pkgdata_SCRIPTS = \ auto_online_001_pos.ksh \ auto_replace_001_pos.ksh \ auto_spare_001_pos.ksh \ - auto_spare_002_pos.ksh + auto_spare_002_pos.ksh \ + auto_spare_ashift.ksh \ + auto_spare_multiple.ksh diff --git a/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh index 0f6e38ac2271..beff5dc87ce1 100755 --- a/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh @@ -54,9 +54,8 @@ fi function cleanup { - #online last disk before fail - insert_disk $offline_disk $host - poolexists $TESTPOOL && destroy_pool $TESTPOOL + destroy_pool $TESTPOOL + unload_scsi_debug } log_assert "Testing automated auto-online FMA test" @@ -65,8 +64,8 @@ log_onexit cleanup # If using the default loop devices, need a scsi_debug device for auto-online if is_loop_device $DISK1; then - SD=$(lsscsi | nawk '/scsi_debug/ {print $6; exit}') - SDDEVICE=$(echo $SD | nawk -F / '{print $3}') + load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS '512b' + SDDEVICE=$(get_debug_device) SDDEVICE_ID=$(get_persistent_disk_name $SDDEVICE) autoonline_disks="$SDDEVICE" else diff --git a/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh index 40a680a9bb02..8e48b2ab4371 100755 --- a/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh @@ -57,27 +57,23 @@ fi function setup { - lsmod | egrep scsi_debug > /dev/null - if (($? == 1)); then - load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS - fi + load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS '512b' + SD=$(get_debug_device) + SDDEVICE_ID=$(get_persistent_disk_name $SD) # Register vdev_id alias rule for scsi_debug device to create a # persistent path - SD=$(lsscsi | nawk '/scsi_debug/ {print $6; exit}' \ - | nawk -F / '{print $3}') - SDDEVICE_ID=$(get_persistent_disk_name $SD) log_must eval "echo "alias scsidebug /dev/disk/by-id/$SDDEVICE_ID" \ >> $VDEVID_CONF" block_device_wait - - SDDEVICE=$(udevadm info -q all -n $DEV_DSKDIR/$SD | egrep ID_VDEV \ - | nawk '{print $2; exit}' | nawk -F = '{print $2; exit}') + SDDEVICE=$(udevadm info -q all -n $DEV_DSKDIR/$SD \ + | awk -F'=' '/ID_VDEV=/{print $2; exit}') [[ -z $SDDEVICE ]] && log_fail "vdev rule was not registered properly" } function cleanup { - poolexists $TESTPOOL && destroy_pool $TESTPOOL + destroy_pool $TESTPOOL + unload_scsi_debug } log_assert "Testing automated auto-replace FMA test" @@ -112,7 +108,7 @@ log_must zpool export -F $TESTPOOL # Offline disk remove_disk $SD block_device_wait -log_must modprobe -r scsi_debug +unload_scsi_debug # Reimport pool with drive missing log_must zpool import $TESTPOOL diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh index 82f7f4834ce6..278e1c926e77 100755 --- a/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh @@ -42,7 +42,7 @@ verify_runnable "both" function cleanup { log_must zinject -c all - poolexists $TESTPOOL && destroy_pool $TESTPOOL + destroy_pool $TESTPOOL rm -f $VDEV_FILES $SPARE_FILE } diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh index f0ddac35cfed..9ae0fe6a3c63 100755 --- a/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh @@ -41,8 +41,14 @@ verify_runnable "both" function cleanup { + # XXX: remote this after debugging + zpool status -v + #zpool events -v + #cat $ZED_DEBUG_LOG + # is ZED even running? + ps fax log_must zinject -c all - poolexists $TESTPOOL && destroy_pool $TESTPOOL + destroy_pool $TESTPOOL rm -f $VDEV_FILES $SPARE_FILE } @@ -70,7 +76,7 @@ for type in "mirror" "raidz" "raidz2"; do # 5. Verify the ZED kicks in a hot spare and expected pool/device status log_note "Wait for ZED to auto-spare" - log_must wait_vdev_state $TESTPOOL $FAULT_FILE "DEGRADED" 60 + log_must wait_vdev_state $TESTPOOL $FAULT_FILE "DEGRADED" 120 log_must wait_vdev_state $TESTPOOL $SPARE_FILE "ONLINE" 60 log_must wait_hotspare_state $TESTPOOL $SPARE_FILE "INUSE" log_must check_state $TESTPOOL "" "DEGRADED" diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_ashift.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_ashift.ksh new file mode 100755 index 000000000000..fdb07d94cf6d --- /dev/null +++ b/tests/zfs-tests/tests/functional/fault/auto_spare_ashift.ksh @@ -0,0 +1,98 @@ +#!/bin/ksh -p + +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Intel Corporation. All rights reserved. +# Copyright 2017, loli10K . All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/math.shlib +. $STF_SUITE/tests/functional/fault/fault.cfg + +# +# DESCRIPTION: +# Testing Fault Management Agent ZED Logic - Automated Auto-Spare Test when +# drive is faulted and a custom ashift value needs to be provided to replace it. +# +# STRATEGY: +# 1. Create a pool from 512b devices and set "ashift" pool property accordingly +# 2. Add one 512e spare device (4Kn would generate IO errors on replace) +# 3. Inject IO errors with a zinject error handler +# 4. Start a scrub +# 5. Verify the ZED kicks in the hot spare and expected pool/device status +# 6. Clear the fault +# 7. Verify the hot spare is available and expected pool/device status +# + +verify_runnable "both" + +function cleanup +{ + log_must zinject -c all + destroy_pool $TESTPOOL + unload_scsi_debug + rm -f $SAFE_DEVICE $FAIL_DEVICE +} + +log_assert "ZED should replace a device using the configured ashift property" +log_onexit cleanup + +SAFE_DEVICE="$TEST_BASE_DIR/safe-dev" +FAIL_DEVICE="$TEST_BASE_DIR/fail-dev" + +# 1. Create a pool from 512b devices and set "ashift" pool property accordingly +for vdev in $SAFE_DEVICE $FAIL_DEVICE; do + truncate -s $SPA_MINDEVSIZE $vdev +done +log_must zpool create -f $TESTPOOL mirror $SAFE_DEVICE $FAIL_DEVICE +# NOTE: file VDEVs should be added as 512b devices, verify this "just in case" +for vdev in $SAFE_DEVICE $FAIL_DEVICE; do + verify_eq "9" "$(zdb -e -l $vdev | awk '/ashift: /{print $2}')" "ashift" +done +log_must zpool set ashift=9 $TESTPOOL + +# 2. Add one 512e spare device (4Kn would generate IO errors on replace) +# NOTE: must be larger than the existing 512b devices, add 32m of fudge +load_scsi_debug $(($SPA_MINDEVSIZE/1024/1024+32)) $SDHOSTS $SDTGTS $SDLUNS '512e' +SPARE_DEVICE=$(get_debug_device) +log_must_busy zpool add $TESTPOOL spare $SPARE_DEVICE + +# 3. Inject IO errors with a zinject error handler +log_must zinject -d $FAIL_DEVICE -e io -T all -f 100 $TESTPOOL + +# 4. Start a scrub +log_must zpool scrub $TESTPOOL + +# 5. Verify the ZED kicks in a hot spare and expected pool/device status +log_note "Wait for ZED to auto-spare" +log_must wait_vdev_state $TESTPOOL $FAIL_DEVICE "FAULTED" 60 +log_must wait_vdev_state $TESTPOOL $SPARE_DEVICE "ONLINE" 60 +log_must wait_hotspare_state $TESTPOOL $SPARE_DEVICE "INUSE" +log_must check_state $TESTPOOL "" "DEGRADED" + +# 6. Clear the fault +log_must zinject -c all +log_must zpool clear $TESTPOOL $FAIL_DEVICE + +# 7. Verify the hot spare is available and expected pool/device status +log_must wait_vdev_state $TESTPOOL $FAIL_DEVICE "ONLINE" 60 +log_must wait_hotspare_state $TESTPOOL $SPARE_DEVICE "AVAIL" +log_must is_pool_resilvered $TESTPOOL +log_must check_state $TESTPOOL "" "ONLINE" + +log_pass "ZED successfully replaces a device using the configured ashift property" diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh new file mode 100755 index 000000000000..04c9a63a84e9 --- /dev/null +++ b/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh @@ -0,0 +1,149 @@ +#!/bin/ksh -p + +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Intel Corporation. All rights reserved. +# Copyright 2017, loli10K . All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/fault/fault.cfg + +# +# DESCRIPTION: +# Testing Fault Management Agent ZED Logic - Automated Auto-Spare Test when +# multiple drives are faulted. +# +# STRATEGY: +# 1. Create a pool with two hot spares +# 2. Inject IO ERRORS with a zinject error handler on the first device +# 3. Start a scrub +# 4. Verify the ZED kicks in a hot spare and expected pool/device status +# 5. Inject IO ERRORS on a second device +# 6. Start a scrub +# 7. Verify the ZED kicks in a second hot spare +# 8. Clear the fault on both devices +# 9. Verify the hot spares are available and expected pool/device status +# 10. Rinse and repeat, this time faulting both devices at the same time +# + +verify_runnable "both" + +function cleanup +{ + log_must zinject -c all + destroy_pool $TESTPOOL + rm -f $DATA_DEVS $SPARE_DEVS +} + +log_assert "ZED should be able to handle multiple faulted devices" +log_onexit cleanup + +FAULT_DEV1="$TEST_BASE_DIR/fault-dev1" +FAULT_DEV2="$TEST_BASE_DIR/fault-dev2" +SAFE_DEV1="$TEST_BASE_DIR/safe-dev1" +SAFE_DEV2="$TEST_BASE_DIR/safe-dev2" +DATA_DEVS="$FAULT_DEV1 $FAULT_DEV2 $SAFE_DEV1 $SAFE_DEV2" +SPARE_DEV1="$TEST_BASE_DIR/spare-dev1" +SPARE_DEV2="$TEST_BASE_DIR/spare-dev2" +SPARE_DEVS="$SPARE_DEV1 $SPARE_DEV2" + +for type in "mirror" "raidz" "raidz2" "raidz3"; do + # 1. Create a pool with two hot spares + truncate -s $SPA_MINDEVSIZE $DATA_DEVS $SPARE_DEVS + log_must zpool create -f $TESTPOOL $type $DATA_DEVS spare $SPARE_DEVS + + # 2. Inject IO ERRORS with a zinject error handler on the first device + log_must zinject -d $FAULT_DEV1 -e io -T all -f 100 $TESTPOOL + + # 3. Start a scrub + log_must zpool scrub $TESTPOOL + + # 4. Verify the ZED kicks in a hot spare and expected pool/device status + log_note "Wait for ZED to auto-spare" + log_must wait_vdev_state $TESTPOOL $FAULT_DEV1 "FAULTED" 60 + log_must wait_vdev_state $TESTPOOL $SPARE_DEV1 "ONLINE" 60 + log_must wait_hotspare_state $TESTPOOL $SPARE_DEV1 "INUSE" + log_must check_state $TESTPOOL "" "DEGRADED" + + # 5. Inject IO ERRORS on a second device + log_must zinject -d $FAULT_DEV2 -e io -T all -f 100 $TESTPOOL + + # 6. Start a scrub + while is_pool_scrubbing $TESTPOOL || is_pool_resilvering $TESTPOOL; do + sleep 1 + done + log_must zpool scrub $TESTPOOL + + # 7. Verify the ZED kicks in a second hot spare + log_note "Wait for ZED to auto-spare" + log_must wait_vdev_state $TESTPOOL $FAULT_DEV2 "FAULTED" 60 + log_must wait_vdev_state $TESTPOOL $SPARE_DEV2 "ONLINE" 60 + log_must wait_hotspare_state $TESTPOOL $SPARE_DEV2 "INUSE" + log_must check_state $TESTPOOL "" "DEGRADED" + + # 8. Clear the fault on both devices + log_must zinject -c all + log_must zpool clear $TESTPOOL $FAULT_DEV1 + log_must zpool clear $TESTPOOL $FAULT_DEV2 + + # 9. Verify the hot spares are available and expected pool/device status + log_must wait_vdev_state $TESTPOOL $FAULT_DEV1 "ONLINE" 60 + log_must wait_vdev_state $TESTPOOL $FAULT_DEV2 "ONLINE" 60 + log_must wait_hotspare_state $TESTPOOL $SPARE_DEV1 "AVAIL" + log_must wait_hotspare_state $TESTPOOL $SPARE_DEV2 "AVAIL" + log_must check_state $TESTPOOL "" "ONLINE" + + # Cleanup + cleanup +done + +# Rinse and repeat, this time faulting both devices at the same time +# NOTE: "raidz" is exluded since it cannot survive 2 faulted devices +# NOTE: "mirror" is a 4-way mirror here and should survive this test +for type in "mirror" "raidz2" "raidz3"; do + # 1. Create a pool with two hot spares + truncate -s $SPA_MINDEVSIZE $DATA_DEVS $SPARE_DEVS + log_must zpool create -f $TESTPOOL $type $DATA_DEVS spare $SPARE_DEVS + + # 2. Inject IO ERRORS with a zinject error handler on two devices + log_must eval "zinject -d $FAULT_DEV1 -e io -T all -f 100 $TESTPOOL &" + log_must eval "zinject -d $FAULT_DEV2 -e io -T all -f 100 $TESTPOOL &" + + # 3. Start a scrub + log_must zpool scrub $TESTPOOL + + # 4. Verify the ZED kicks in two hot spares and expected pool/device status + log_note "Wait for ZED to auto-spare" + log_must wait_vdev_state $TESTPOOL $FAULT_DEV1 "FAULTED" 60 + log_must wait_vdev_state $TESTPOOL $FAULT_DEV2 "FAULTED" 60 + log_must wait_vdev_state $TESTPOOL $SPARE_DEV1 "ONLINE" 60 + log_must wait_vdev_state $TESTPOOL $SPARE_DEV2 "ONLINE" 60 + log_must wait_hotspare_state $TESTPOOL $SPARE_DEV1 "INUSE" + log_must wait_hotspare_state $TESTPOOL $SPARE_DEV2 "INUSE" + log_must check_state $TESTPOOL "" "DEGRADED" + + # 5. Clear the fault on both devices + log_must zinject -c all + log_must zpool clear $TESTPOOL $FAULT_DEV1 + log_must zpool clear $TESTPOOL $FAULT_DEV2 + + # Cleanup + cleanup +done + +log_pass "ZED successfully handles multiple faulted devices" diff --git a/tests/zfs-tests/tests/functional/fault/cleanup.ksh b/tests/zfs-tests/tests/functional/fault/cleanup.ksh index 82e379b0d085..9d354f30e709 100755 --- a/tests/zfs-tests/tests/functional/fault/cleanup.ksh +++ b/tests/zfs-tests/tests/functional/fault/cleanup.ksh @@ -33,14 +33,4 @@ cleanup_devices $DISKS zed_stop zed_cleanup -SDDEVICE=$(get_debug_device) - -# Offline disk and remove scsi_debug module -if is_linux; then - if [ -n "$SDDEVICE" ]; then - remove_disk $SDDEVICE - fi - modprobe -r scsi_debug -fi - log_pass diff --git a/tests/zfs-tests/tests/functional/fault/setup.ksh b/tests/zfs-tests/tests/functional/fault/setup.ksh index 3d54d4f21754..3d3cbc9e568c 100755 --- a/tests/zfs-tests/tests/functional/fault/setup.ksh +++ b/tests/zfs-tests/tests/functional/fault/setup.ksh @@ -31,8 +31,4 @@ verify_runnable "global" zed_setup zed_start -# Create a scsi_debug device to be used with auto-online (if using loop devices) -# and auto-replace regardless of other devices -load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS - log_pass