diff --git a/TEST b/TEST index 4c593a457078..40d0a3783066 100644 --- a/TEST +++ b/TEST @@ -5,25 +5,25 @@ #TEST_PREPARE_SHARES="yes" ### SPLAT -#TEST_SPLAT_SKIP="yes" +TEST_SPLAT_SKIP="yes" #TEST_SPLAT_OPTIONS="-acvx" ### ztest -#TEST_ZTEST_SKIP="yes" +TEST_ZTEST_SKIP="yes" #TEST_ZTEST_TIMEOUT=1800 #TEST_ZTEST_DIR="/var/tmp/" #TEST_ZTEST_OPTIONS="-V" #TEST_ZTEST_CORE_DIR="/mnt/zloop" ### zimport -#TEST_ZIMPORT_SKIP="yes" +TEST_ZIMPORT_SKIP="yes" #TEST_ZIMPORT_DIR="/var/tmp/zimport" #TEST_ZIMPORT_VERSIONS="master installed" #TEST_ZIMPORT_POOLS="zol-0.6.1 zol-0.6.2 master installed" #TEST_ZIMPORT_OPTIONS="-c" ### xfstests -#TEST_XFSTESTS_SKIP="yes" +TEST_XFSTESTS_SKIP="yes" #TEST_XFSTESTS_URL="https://github.com/behlendorf/xfstests/archive/" #TEST_XFSTESTS_VER="zfs.tar.gz" #TEST_XFSTESTS_POOL="tank" @@ -39,10 +39,11 @@ #TEST_ZFSTESTS_ITERS="1" #TEST_ZFSTESTS_OPTIONS="-vx" #TEST_ZFSTESTS_RUNFILE="linux.run" +TEST_ZFSTESTS_RUNFILE="issue-2562.run" #TEST_ZFSTESTS_TAGS="functional" ### zfsstress -#TEST_ZFSSTRESS_SKIP="yes" +TEST_ZFSSTRESS_SKIP="yes" #TEST_ZFSSTRESS_URL="https://github.com/nedbass/zfsstress/archive/" #TEST_ZFSSTRESS_VER="master.tar.gz" #TEST_ZFSSTRESS_RUNTIME=300 diff --git a/cmd/zed/agents/zfs_retire.c b/cmd/zed/agents/zfs_retire.c index f69c583f0b1f..c91c55fd433c 100644 --- a/cmd/zed/agents/zfs_retire.c +++ b/cmd/zed/agents/zfs_retire.c @@ -176,6 +176,8 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev) nvlist_t **spares; uint_t s, nspares; char *dev_name; + zprop_source_t source; + int ashift; config = zpool_get_config(zhp, NULL); if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, @@ -189,6 +191,11 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev) &spares, &nspares) != 0) return; + /* + * lookup "ashift" pool property, we may need it for the replacement + */ + ashift = zpool_get_prop_int(zhp, ZPOOL_PROP_ASHIFT, &source); + replacement = fmd_nvl_alloc(hdl, FMD_SLEEP); (void) nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE, @@ -207,6 +214,11 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev) &spare_name) != 0) continue; + /* if set, add the "ashift" pool property to the spare nvlist */ + if (source != ZPROP_SRC_DEFAULT) + (void) nvlist_add_uint64(spares[s], + ZPOOL_CONFIG_ASHIFT, ashift); + (void) nvlist_add_nvlist_array(replacement, ZPOOL_CONFIG_CHILDREN, &spares[s], 1); diff --git a/tests/runfiles/issue-2562.run b/tests/runfiles/issue-2562.run new file mode 100644 index 000000000000..444d8b762978 --- /dev/null +++ b/tests/runfiles/issue-2562.run @@ -0,0 +1,26 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +[DEFAULT] +pre = setup +quiet = False +pre_user = root +user = root +timeout = 600 +post_user = root +post = cleanup +outputdir = /var/tmp/test_results + +[tests/functional/fault] +tests = ['auto_online_001_pos', 'auto_replace_001_pos', 'auto_spare_001_pos', + 'auto_spare_002_pos', 'auto_spare_ashift', 'auto_spare_multiple'] +tags = ['functional', 'fault'] + diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 9424c80a02cf..0c8f298c6d49 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -464,7 +464,7 @@ tags = ['functional', 'exec'] [tests/functional/fault] tests = ['auto_online_001_pos', 'auto_replace_001_pos', 'auto_spare_001_pos', - 'auto_spare_002_pos.ksh'] + 'auto_spare_002_pos', 'auto_spare_ashift', 'auto_spare_multiple'] tags = ['functional', 'fault'] [tests/functional/features/async_destroy] diff --git a/tests/zfs-tests/include/blkdev.shlib b/tests/zfs-tests/include/blkdev.shlib index 876c843561b8..44f0aeb446df 100644 --- a/tests/zfs-tests/include/blkdev.shlib +++ b/tests/zfs-tests/include/blkdev.shlib @@ -353,16 +353,35 @@ function insert_disk #disk scsi_host # # Load scsi_debug module with specified parameters +# $blksz can be either one of: < 512b | 512e | 4Kn > # -function load_scsi_debug # dev_size_mb add_host num_tgts max_luns +function load_scsi_debug # dev_size_mb add_host num_tgts max_luns blksz { typeset devsize=$1 typeset hosts=$2 typeset tgts=$3 typeset luns=$4 + typeset blksz=$5 [[ -z $devsize ]] || [[ -z $hosts ]] || [[ -z $tgts ]] || \ - [[ -z $luns ]] && log_fail "Arguments invalid or missing" + [[ -z $luns ]] || [[ -z $blksz ]] && \ + log_fail "Arguments invalid or missing" + + case "$5" in + '512b') + typeset sector=512 + typeset blkexp=0 + ;; + '512e') + typeset sector=512 + typeset blkexp=3 + ;; + '4Kn') + typeset sector=4096 + typeset blkexp=0 + ;; + *) log_fail "Unsupported blksz value: $5" ;; + esac if is_linux; then modprobe -n scsi_debug @@ -375,7 +394,8 @@ function load_scsi_debug # dev_size_mb add_host num_tgts max_luns log_fail "scsi_debug module already installed" else log_must modprobe scsi_debug dev_size_mb=$devsize \ - add_host=$hosts num_tgts=$tgts max_luns=$luns + add_host=$hosts num_tgts=$tgts max_luns=$luns \ + sector_size=$sector physblk_exp=$blkexp block_device_wait lsscsi | egrep scsi_debug > /dev/null if (($? == 1)); then diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/setup.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/setup.ksh index 4dbf8965dc6b..59b8764ced84 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/setup.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/setup.ksh @@ -22,7 +22,7 @@ verify_runnable "global" # Create scsi_debug devices for the reopen tests if is_linux; then - load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS + load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS '512b' else log_unsupported "scsi debug module unsupported" fi diff --git a/tests/zfs-tests/tests/functional/fault/Makefile.am b/tests/zfs-tests/tests/functional/fault/Makefile.am index 436f3e8be719..ef4380835c37 100644 --- a/tests/zfs-tests/tests/functional/fault/Makefile.am +++ b/tests/zfs-tests/tests/functional/fault/Makefile.am @@ -6,4 +6,6 @@ dist_pkgdata_SCRIPTS = \ auto_online_001_pos.ksh \ auto_replace_001_pos.ksh \ auto_spare_001_pos.ksh \ - auto_spare_002_pos.ksh + auto_spare_002_pos.ksh \ + auto_spare_ashift.ksh \ + auto_spare_multiple.ksh diff --git a/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh index 40a680a9bb02..5953e13308b1 100755 --- a/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh @@ -59,7 +59,7 @@ function setup { lsmod | egrep scsi_debug > /dev/null if (($? == 1)); then - load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS + load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS '512b' fi # Register vdev_id alias rule for scsi_debug device to create a # persistent path diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_ashift.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_ashift.ksh new file mode 100755 index 000000000000..8cb7e8f82eb3 --- /dev/null +++ b/tests/zfs-tests/tests/functional/fault/auto_spare_ashift.ksh @@ -0,0 +1,100 @@ +#!/bin/ksh -p + +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Intel Corporation. All rights reserved. +# Copyright 2017, loli10K . All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/math.shlib +. $STF_SUITE/tests/functional/fault/fault.cfg + +# +# DESCRIPTION: +# Testing Fault Management Agent ZED Logic - Automated Auto-Spare Test when +# drive is faulted and a custom ashift value needs to be provided to replace it. +# +# STRATEGY: +# 1. Create a pool from 512b devices and set "ashift" pool property accordingly +# 2. Add one 512e spare device (4Kn would generate IO errors on replace) +# 3. Inject IO errors with a zinject error handler +# 4. Start a scrub +# 5. Verify the ZED kicks in the hot spare and expected pool/device status +# 6. Clear the fault +# 7. Verify the hot spare is available and expected pool/device status +# + +verify_runnable "both" + +function cleanup +{ + log_must zinject -c all + destroy_pool $TESTPOOL + rm -f $SAFE_DEVICE $FAIL_DEVICE +} + +log_assert "ZED should replace a device using the configured ashift property" +log_onexit cleanup + +SAFE_DEVICE="$TEST_BASE_DIR/safe-dev" +FAIL_DEVICE="$TEST_BASE_DIR/fail-dev" + +# 1. Create a pool from 512b devices and set "ashift" pool property accordingly +for vdev in $SAFE_DEVICE $FAIL_DEVICE; do + truncate -s $SPA_MINDEVSIZE $vdev +done +log_must zpool create -f $TESTPOOL mirror $SAFE_DEVICE $FAIL_DEVICE +# NOTE: file VDEVs should be added as 512b devices, verify this "just in case" +for vdev in $SAFE_DEVICE $FAIL_DEVICE; do + verify_eq "9" "$(zdb -e -l $vdev | awk '/ashift: /{print $2}')" "ashift" +done +log_must zpool set ashift=9 $TESTPOOL + +# 2. Add one 512e spare device (4Kn would generate IO errors on replace) +# NOTE: must be larger than the existing 512b devices, add 32m of fudge +if lsmod | grep scsi_debug >/dev/null; then + log_must modprobe -r scsi_debug +fi +load_scsi_debug $(($SPA_MINDEVSIZE/1024/1024+32)) $SDHOSTS $SDTGTS $SDLUNS '512e' +SPARE_DEVICE=$(get_debug_device) +log_must zpool add $TESTPOOL spare $SPARE_DEVICE + +# 3. Inject IO errors with a zinject error handler +log_must zinject -d $FAIL_DEVICE -e io -T all -f 100 $TESTPOOL + +# 4. Start a scrub +log_must zpool scrub $TESTPOOL + +# 5. Verify the ZED kicks in a hot spare and expected pool/device status +log_note "Wait for ZED to auto-spare" +log_must wait_vdev_state $TESTPOOL $FAIL_DEVICE "FAULTED" 60 +log_must wait_vdev_state $TESTPOOL $SPARE_DEVICE "ONLINE" 60 +log_must wait_hotspare_state $TESTPOOL $SPARE_DEVICE "INUSE" +log_must check_state $TESTPOOL "" "DEGRADED" + +# 6. Clear the fault +log_must zinject -c all +log_must zpool clear $TESTPOOL $FAIL_DEVICE + +# 7. Verify the hot spare is available and expected pool/device status +log_must wait_vdev_state $TESTPOOL $FAIL_DEVICE "ONLINE" 60 +log_must wait_hotspare_state $TESTPOOL $SPARE_DEVICE "AVAIL" +log_must is_pool_resilvered $TESTPOOL +log_must check_state $TESTPOOL "" "ONLINE" + +log_pass "ZED successfully replaces a device using the configured ashift property" diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh new file mode 100755 index 000000000000..2e60fd3bc1af --- /dev/null +++ b/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh @@ -0,0 +1,114 @@ +#!/bin/ksh -p + +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Intel Corporation. All rights reserved. +# Copyright 2017, loli10K . All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/fault/fault.cfg + +# +# DESCRIPTION: +# Testing Fault Management Agent ZED Logic - Automated Auto-Spare Test when +# multiple drives are faulted. +# +# STRATEGY: +# 1. Create a pool with two hot spares +# 2. Inject IO ERRORS with a zinject error handler on the first device +# 3. Start a scrub +# 4. Verify the ZED kicks in a hot spare and expected pool/device status +# 5. Inject IO ERRORS on a second device +# 6. Start a scrub +# 7. Verify the ZED kicks in a second hot spare +# 8. Clear the fault on both devices +# 9. Verify the hot spares are available and expected pool/device status +# + +verify_runnable "both" + +function cleanup +{ + log_must zinject -c all + destroy_pool $TESTPOOL + rm -f $DATA_DEVS $SPARE_DEVS +} + +log_assert "ZED should be able to handle multiple faulted devices" +log_onexit cleanup + +FAULT_DEV1="$TEST_BASE_DIR/fault-dev1" +FAULT_DEV2="$TEST_BASE_DIR/fault-dev2" +SAFE_DEV1="$TEST_BASE_DIR/safe-dev1" +SAFE_DEV2="$TEST_BASE_DIR/safe-dev2" +DATA_DEVS="$FAULT_DEV1 $FAULT_DEV2 $SAFE_DEV1 $SAFE_DEV2" +SPARE_DEV1="$TEST_BASE_DIR/spare-dev1" +SPARE_DEV2="$TEST_BASE_DIR/spare-dev2" +SPARE_DEVS="$SPARE_DEV1 $SPARE_DEV2" + +for type in "mirror" "raidz" "raidz2"; do + # 1. Create a pool with two hot spares + truncate -s $SPA_MINDEVSIZE $DATA_DEVS $SPARE_DEVS + log_must zpool create -f $TESTPOOL $type $DATA_DEVS spare $SPARE_DEVS + + # 2. Inject IO ERRORS with a zinject error handler on the first device + log_must zinject -d $FAULT_DEV1 -e io -T all -f 100 $TESTPOOL + + # 3. Start a scrub + log_must zpool scrub $TESTPOOL + + # 4. Verify the ZED kicks in a hot spare and expected pool/device status + log_note "Wait for ZED to auto-spare" + log_must wait_vdev_state $TESTPOOL $FAULT_DEV1 "FAULTED" 60 + log_must wait_vdev_state $TESTPOOL $SPARE_DEV1 "ONLINE" 60 + log_must wait_hotspare_state $TESTPOOL $SPARE_DEV1 "INUSE" + log_must check_state $TESTPOOL "" "DEGRADED" + + # 5. Inject IO ERRORS on a second device + log_must zinject -d $FAULT_DEV2 -e io -T all -f 100 $TESTPOOL + + # 6. Start a scrub + while is_pool_scrubbing $TESTPOOL; do + sleep 1 + done + log_must zpool scrub $TESTPOOL + + # 7. Verify the ZED kicks in a second hot spare + log_note "Wait for ZED to auto-spare" + log_must wait_vdev_state $TESTPOOL $FAULT_DEV2 "FAULTED" 60 + log_must wait_vdev_state $TESTPOOL $SPARE_DEV2 "ONLINE" 60 + log_must wait_hotspare_state $TESTPOOL $SPARE_DEV2 "INUSE" + log_must check_state $TESTPOOL "" "DEGRADED" + + # 8. Clear the fault on both devices + log_must zinject -c all + log_must zpool clear $TESTPOOL $FAULT_DEV1 + log_must zpool clear $TESTPOOL $FAULT_DEV2 + + # 9. Verify the hot spares are available and expected pool/device status + log_must wait_vdev_state $TESTPOOL $FAULT_DEV1 "ONLINE" 60 + log_must wait_vdev_state $TESTPOOL $FAULT_DEV2 "ONLINE" 60 + log_must wait_hotspare_state $TESTPOOL $SPARE_DEV1 "AVAIL" + log_must wait_hotspare_state $TESTPOOL $SPARE_DEV2 "AVAIL" + log_must check_state $TESTPOOL "" "ONLINE" + + # 10. Cleanup + cleanup +done + +log_pass "ZED successfully handles multiple faulted devices" diff --git a/tests/zfs-tests/tests/functional/fault/setup.ksh b/tests/zfs-tests/tests/functional/fault/setup.ksh index 3d54d4f21754..3043cf715e7e 100755 --- a/tests/zfs-tests/tests/functional/fault/setup.ksh +++ b/tests/zfs-tests/tests/functional/fault/setup.ksh @@ -33,6 +33,6 @@ zed_start # Create a scsi_debug device to be used with auto-online (if using loop devices) # and auto-replace regardless of other devices -load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS +load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS '512b' log_pass