From 7e35fd52587f727e40281332aeb0b7b245f11ea6 Mon Sep 17 00:00:00 2001 From: Richard Yao Date: Tue, 4 Feb 2014 17:39:47 -0500 Subject: [PATCH] Workaround for pools affected by #2094 erratum ZoL commit 1421c89 unintentionally changed the disk format in a forward- compatible, but not backward compatible way. This was accomplished by adding an entry to zbookmark_t, which is included in a couple of on-disk structures. That lead to the creation of pools with incorrect dsl_scan_phys_t objects that could only be imported by versions of ZoL containing that commit. Such pools cannot be imported by other versions of ZFS or past versions of ZoL. The additional field has been removed by the previous commit. However, affected pools must be imported and scrubbed using a version of ZoL with this commit applied. This will return the pools to a state in which they may be imported by other implementations. The 'zpool status' command can be used to determine if a pool is impacted. A message similar to the following means your pool must be scrubbed to restore compatibility by replacing the damaged dsl_scan_phys_t object. pool: zol-0.6.2-173 state: ONLINE scan: pool compatibility issue detected. see: https://github.com/zfsonlinux/zfs/issues/2094 action: To correct the issue run 'zpool scrub'. config: NAME STATE READ WRITE CKSUM zol-0.6.2-173 ONLINE 0 0 0 raidz1-0 ONLINE 0 0 0 /var/tmp/zol-0.6.2-173/vdev0 ONLINE 0 0 0 /var/tmp/zol-0.6.2-173/vdev1 ONLINE 0 0 0 /var/tmp/zol-0.6.2-173/vdev2 ONLINE 0 0 0 /var/tmp/zol-0.6.2-173/vdev3 ONLINE 0 0 0 errors: No known data errors Pools affected by the damaged dsl_scan_phys_t can be detected prior to an upgrade by running the following command as root: zdb -dddd poolname 1 | grep -P '^\t\tscan = ' | sed -e 's;scan = ;;' | wc -w Note that `poolname` must be replaced with the name of the pool you wish to check. A value of 25 indicates the dsl_scan_phys_t has been damaged. A value of 24 indicates that the dsl_scan_phys_t is normal. A value of 0 indicates that there has never been a scrub run on the pool. The regression caused by the change to zbookmark_t never made it into a tagged release or any Gentoo backports. Only those using HEAD were affected. However, this patch has a few limitations. There is no way to detect a damaged dsl_scan_phys_t object when it has occurred on 32-bit systems due to integer rounding that wrote incorrect information, but avoided the overflow on them. Correcting such issues requires triggering a scrub. In addition, bptree_entry_phys_t objects are also affected. These objects only exist during an asynchronous destroy and automating repair of damaged bptree_entry_phys_t objects is non-trivial. Any pools that have been imported by an affected version of ZoL must have all asynchronous destroy operations finish before export and subsequent import by a version containing this commit. Failure to do that will prevent pool import. The presence of any background destroys on any imported pools can be checked by running `zpool get freeing` as root. This will display a non-zero value for any pool with an active asynchronous destroy. Lastly, it is expected that no user data has been lost as a result of this erratum. Original-patch-by: Tim Chase Signed-off-by: Richard Yao Signed-off-by: Brian Behlendorf Issue #2094 --- cmd/zpool/zpool_main.c | 13 +++++++++++++ include/sys/dsl_scan.h | 2 ++ include/sys/fs/zfs.h | 3 +++ include/sys/spa_impl.h | 1 + module/zfs/dsl_scan.c | 36 ++++++++++++++++++++++++++++++++++++ module/zfs/spa_misc.c | 1 + 6 files changed, 56 insertions(+) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index a856fd49e85c..7b4a65bbdf89 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -3903,6 +3903,19 @@ print_scan_status(pool_scan_stat_t *ps) return; } + /* + * Scan required due to known errata. + */ + if ((ps->pss_pass_errata & DSE_ZOL_2094) && + (ps->pss_state == DSS_FINISHED || ps->pss_state == DSS_CANCELED)) { + (void) printf(gettext("pool compatibility issue detected.\n")); + (void) printf(gettext( + " see: https://github.com/zfsonlinux/zfs/issues/2094\n")); + (void) printf(gettext( + "action: To correct the issue run 'zpool scrub'.\n")); + return; + } + start = ps->pss_start_time; end = ps->pss_end_time; zfs_nicenum(ps->pss_processed, processed_buf, sizeof (processed_buf)); diff --git a/include/sys/dsl_scan.h b/include/sys/dsl_scan.h index bf8c5ac824a1..bcb85d67d38e 100644 --- a/include/sys/dsl_scan.h +++ b/include/sys/dsl_scan.h @@ -72,6 +72,8 @@ typedef enum dsl_scan_flags { DSF_VISIT_DS_AGAIN = 1<<0, } dsl_scan_flags_t; +#define DSL_SCAN_FLAGS_MASK (DSF_VISIT_DS_AGAIN) + /* * Every pool will have one dsl_scan_t and this structure will contain * in-memory information about the scan and a pointer to the on-disk diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index c54721155a85..a2c61cefc680 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -694,6 +694,7 @@ typedef struct pool_scan_stat { /* values not stored on disk */ uint64_t pss_pass_exam; /* examined bytes per scan pass */ uint64_t pss_pass_start; /* start time of a scan pass */ + uint64_t pss_pass_errata; /* additional informational errata */ } pool_scan_stat_t; typedef enum dsl_scan_state { @@ -704,6 +705,8 @@ typedef enum dsl_scan_state { DSS_NUM_STATES } dsl_scan_state_t; +#define DSE_ZOL_2094 0x01 /* Zol Issue #2094 */ +#define DSL_SCAN_ERRATA_MASK (DSE_ZOL_2094) /* * Vdev statistics. Note: all fields should be 64-bit because this diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 55515c1fc369..454df73cb770 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -171,6 +171,7 @@ struct spa { uint8_t spa_scrub_reopen; /* scrub doing vdev_reopen */ uint64_t spa_scan_pass_start; /* start time per pass/reboot */ uint64_t spa_scan_pass_exam; /* examined bytes per pass */ + uint64_t spa_scan_pass_errata; /* errata issues detected */ kmutex_t spa_async_lock; /* protect async state */ kthread_t *spa_async_thread; /* thread doing async task */ int spa_async_suspended; /* async tasks suspended */ diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index ea04507813f7..c6e67d32be98 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -123,6 +123,41 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg) err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS, &scn->scn_phys); + /* + * Detect if the pool contains the signature of #2094. If it + * does properly update the scn->scn_phys struct and notify the + * administrator via 'zpool status' to scrub the pool. In the + * unlikely event that an async destroy is in progress return + * an error. The destroy must be allowed to completely under + * the previous code and only then may be imported using the + * new code and the pool corrected with a 'zpool scrub'. + * + * See http://github.com/zfsonlinux/zfs/issue/2094 + */ + if (err == EOVERFLOW) { + uint64_t zaptmp[SCAN_PHYS_NUMINTS + 1]; + VERIFY3S(SCAN_PHYS_NUMINTS, ==, 24); + VERIFY3S(offsetof(dsl_scan_phys_t, scn_flags), ==, + (23 * sizeof (uint64_t))); + + err = zap_lookup(dp->dp_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCAN, + sizeof (uint64_t), SCAN_PHYS_NUMINTS + 1, &zaptmp); + if (err == 0) { + uint64_t overflow = zaptmp[SCAN_PHYS_NUMINTS]; + + if (overflow & ~DSL_SCAN_FLAGS_MASK || + scn->scn_async_destroying) + return (EOVERFLOW); + + bcopy(zaptmp, &scn->scn_phys, + SCAN_PHYS_NUMINTS * sizeof (uint64_t)); + scn->scn_phys.scn_flags = overflow; + spa->spa_scan_pass_errata |= DSE_ZOL_2094; + } + /* Fall-thru to the expected error handling */ + } + if (err == ENOENT) return (0); else if (err) @@ -319,6 +354,7 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) } scn->scn_phys.scn_end_time = gethrestime_sec(); + spa->spa_scan_pass_errata &= ~DSE_ZOL_2094; } /* ARGSUSED */ diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 58c4e67bd1b1..4d1b4553eb98 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -1795,6 +1795,7 @@ spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps) /* data not stored on disk */ ps->pss_pass_start = spa->spa_scan_pass_start; ps->pss_pass_exam = spa->spa_scan_pass_exam; + ps->pss_pass_errata = spa->spa_scan_pass_errata; return (0); }