Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Special failsafe feature #16185

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1525,6 +1525,23 @@ zpool_do_add(int argc, char **argv)
}
}

/*
* Special case:
*
* We need to know the special_failsafe pool property value to determine
* if the new vdev configuration has the correct redundancy requirements
* for special and dedup vdevs.
*
* Pass in the current value for special_failsafe to the proplist.
*/
char strval[ZFS_MAXPROPLEN];
if (zpool_get_prop(zhp, ZPOOL_PROP_SPECIAL_FAILSAFE, strval,
ZFS_MAXPROPLEN, NULL, B_FALSE) == 0) {
verify(add_prop_list(
zpool_prop_to_name(ZPOOL_PROP_SPECIAL_FAILSAFE), strval,
&props, B_TRUE) == 0);
}

/* pass off to make_root_vdev for processing */
nvroot = make_root_vdev(zhp, props, !check_inuse,
check_replication, B_FALSE, dryrun, argc, argv);
Expand Down Expand Up @@ -7597,6 +7614,23 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
}
}

/*
* Special case:
*
* We need to know the special_failsafe pool property value to determine
* if the new vdev configuration has the correct redundancy requirements
* for special and dedup vdevs.
*
* Pass in the current value for special_failsafe to the proplist.
*/
char strval[ZFS_MAXPROPLEN];
if (zpool_get_prop(zhp, ZPOOL_PROP_SPECIAL_FAILSAFE, strval,
ZFS_MAXPROPLEN, NULL, B_FALSE) == 0) {
verify(add_prop_list(
zpool_prop_to_name(ZPOOL_PROP_SPECIAL_FAILSAFE), strval,
&props, B_TRUE) == 0);
}

nvroot = make_root_vdev(zhp, props, force, B_FALSE, replacing, B_FALSE,
argc, argv);
if (nvroot == NULL) {
Expand Down
97 changes: 85 additions & 12 deletions cmd/zpool/zpool_vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
*/
boolean_t error_seen;
boolean_t is_force;
boolean_t is_alloc_class;

void
vdev_error(const char *fmt, ...)
Expand All @@ -94,8 +95,15 @@ vdev_error(const char *fmt, ...)
if (!error_seen) {
(void) fprintf(stderr, gettext("invalid vdev specification\n"));
if (!is_force)
(void) fprintf(stderr, gettext("use '-f' to override "
"the following errors:\n"));
if (is_alloc_class) {
(void) fprintf(stderr, gettext("Turn on the "
"special_failsafe pool property or use '-f'"
" to override the following errors:\n"));
is_alloc_class = B_FALSE;
} else {
(void) fprintf(stderr, gettext("use '-f' to "
"override the following errors:\n"));
}
else
(void) fprintf(stderr, gettext("the following errors "
"must be manually repaired:\n"));
Expand Down Expand Up @@ -442,6 +450,7 @@ typedef struct replication_level {
const char *zprl_type;
uint64_t zprl_children;
uint64_t zprl_parity;
boolean_t zprl_is_alloc_class;
} replication_level_t;

#define ZPOOL_FUZZ (16 * 1024 * 1024)
Expand Down Expand Up @@ -480,13 +489,43 @@ is_raidz_draid(replication_level_t *a, replication_level_t *b)
return (B_FALSE);
}

/*
* Return true if 'props' contains:
*
* special_failsafe=on
*
* ... and feature@special_failsafe is NOT disabled.
*/
static boolean_t
is_special_failsafe_enabled_in_props(nvlist_t *props)
{
const char *str = NULL;

if (nvlist_lookup_string(props, "feature@special_failsafe",
&str) == 0) {
if ((str != NULL) && strcmp(str, "disabled") == 0) {
return (B_FALSE);
}
}

if (nvlist_lookup_string(props,
zpool_prop_to_name(ZPOOL_PROP_SPECIAL_FAILSAFE),
&str) == 0) {
if ((str != NULL) && strcmp(str, "on") == 0) {
return (B_TRUE); /* It is enabled */
}
}

return (B_FALSE);
}

/*
* Given a list of toplevel vdevs, return the current replication level. If
* the config is inconsistent, then NULL is returned. If 'fatal' is set, then
* an error message will be displayed for each self-inconsistent vdev.
*/
static replication_level_t *
get_replication(nvlist_t *nvroot, boolean_t fatal)
get_replication(nvlist_t *props, nvlist_t *nvroot, boolean_t fatal)
{
nvlist_t **top;
uint_t t, toplevels;
Expand All @@ -495,7 +534,7 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
nvlist_t *nv;
const char *type;
replication_level_t lastrep = {0};
replication_level_t rep;
replication_level_t rep = {0};
replication_level_t *ret;
replication_level_t *raidz, *mirror;
boolean_t dontreport;
Expand All @@ -507,6 +546,7 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)

for (t = 0; t < toplevels; t++) {
uint64_t is_log = B_FALSE;
const char *str = NULL;

nv = top[t];

Expand All @@ -528,12 +568,32 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
strcmp(type, VDEV_TYPE_INDIRECT) == 0)
continue;

rep.zprl_type = type;

/*
* If special_failsafe=on then we know the special allocation
* class devices have at least one copy of their data on the
* pool so we can ignore their replication level.
*/
(void) nvlist_lookup_string(nv, ZPOOL_CONFIG_ALLOCATION_BIAS,
&str);
if (str &&
((strcmp(str, VDEV_ALLOC_BIAS_SPECIAL) == 0) ||
(strcmp(str, VDEV_ALLOC_BIAS_DEDUP) == 0))) {
rep.zprl_is_alloc_class = B_TRUE;
is_alloc_class = B_TRUE;
if (is_special_failsafe_enabled_in_props(props)) {
continue; /* We're backed up, skip redundancy */
}
} else {
is_alloc_class = B_FALSE;
}

if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
&child, &children) != 0) {
/*
* This is a 'file' or 'disk' vdev.
*/
rep.zprl_type = type;
rep.zprl_children = 1;
rep.zprl_parity = 0;
} else {
Expand All @@ -548,7 +608,6 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
* We also check that the size of each vdev (if it can
* be determined) is the same.
*/
rep.zprl_type = type;
rep.zprl_children = 0;

if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
Expand Down Expand Up @@ -808,7 +867,7 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
* report any difference between the two.
*/
static int
check_replication(nvlist_t *config, nvlist_t *newroot)
check_replication(nvlist_t *props, nvlist_t *config, nvlist_t *newroot)
{
nvlist_t **child;
uint_t children;
Expand All @@ -825,7 +884,7 @@ check_replication(nvlist_t *config, nvlist_t *newroot)

verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) == 0);
if ((current = get_replication(nvroot, B_FALSE)) == NULL)
if ((current = get_replication(props, nvroot, B_FALSE)) == NULL)
return (0);
}
/*
Expand All @@ -850,17 +909,31 @@ check_replication(nvlist_t *config, nvlist_t *newroot)
* Get the replication level of the new vdev spec, reporting any
* inconsistencies found.
*/
if ((new = get_replication(newroot, B_TRUE)) == NULL) {
if ((new = get_replication(props, newroot, B_TRUE)) == NULL) {
free(current);
return (-1);
}

/*
* Check to see if the new vdev spec matches the replication level of
* the current pool.
*/
ret = 0;
if (current != NULL) {
if (current->zprl_is_alloc_class || new->zprl_is_alloc_class)
is_alloc_class = B_TRUE;
else
is_alloc_class = B_FALSE;

/*
* Special case:
* If there were any redundancy problems with alloc class vdevs
* BUT the pool had special_failsafe on, then we're fine since
* all the alloc class data has a copy in the main pool.
*/
if (is_special_failsafe_enabled_in_props(props) &&
is_alloc_class)
goto out;

if (is_raidz_mirror(current, new, &raidz, &mirror) ||
is_raidz_mirror(new, current, &raidz, &mirror)) {
if (raidz->zprl_parity != mirror->zprl_children - 1) {
Expand Down Expand Up @@ -899,7 +972,7 @@ check_replication(nvlist_t *config, nvlist_t *newroot)
ret = -1;
}
}

out:
free(new);
if (current != NULL)
free(current);
Expand Down Expand Up @@ -1888,7 +1961,7 @@ make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep,
* found. We include the existing pool spec, if any, as we need to
* catch changes against the existing replication level.
*/
if (check_rep && check_replication(poolconfig, newroot) != 0) {
if (check_rep && check_replication(props, poolconfig, newroot) != 0) {
nvlist_free(newroot);
return (NULL);
}
Expand Down
2 changes: 2 additions & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ typedef enum {
ZPOOL_PROP_DEDUP_TABLE_SIZE,
ZPOOL_PROP_DEDUP_TABLE_QUOTA,
ZPOOL_PROP_DEDUPCACHED,
ZPOOL_PROP_SPECIAL_FAILSAFE,
ZPOOL_NUM_PROPS
} zpool_prop_t;

Expand Down Expand Up @@ -1633,6 +1634,7 @@ typedef enum {
ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS,
ZFS_ERR_ASHIFT_MISMATCH,
ZFS_ERR_STREAM_LARGE_MICROZAP,
ZFS_ERR_SPECIAL_FAILSAFE_NOT_POSSIBLE,
} zfs_errno_t;

/*
Expand Down
3 changes: 2 additions & 1 deletion include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -1133,7 +1133,8 @@ extern boolean_t spa_remap_blkptr(spa_t *spa, blkptr_t *bp,
extern uint64_t spa_get_last_removal_txg(spa_t *spa);
extern boolean_t spa_trust_config(spa_t *spa);
extern uint64_t spa_missing_tvds_allowed(spa_t *spa);
extern void spa_set_missing_tvds(spa_t *spa, uint64_t missing);
extern void spa_set_missing_tvds(spa_t *spa, uint64_t missing,
uint64_t missing_special);
extern boolean_t spa_top_vdevs_spacemap_addressable(spa_t *spa);
extern uint64_t spa_total_metaslabs(spa_t *spa);
extern boolean_t spa_multihost(spa_t *spa);
Expand Down
10 changes: 10 additions & 0 deletions include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,13 @@ struct spa {
uint64_t spa_missing_tvds; /* unopenable tvds on load */
uint64_t spa_missing_tvds_allowed; /* allow loading spa? */

/*
* Number of 'spa_missing_tvds' that are alloc class devices
* in the pool that has special_failsafe on, and are thus recoverable
* from errors.
*/
uint64_t spa_missing_recovered_tvds;

uint64_t spa_nonallocating_dspace;
spa_removing_phys_t spa_removing_phys;
spa_vdev_removal_t *spa_vdev_removal;
Expand Down Expand Up @@ -479,6 +486,9 @@ struct spa {
*/
spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */
zfs_refcount_t spa_refcount; /* number of opens */

/* Backup special/dedup devices data to the pool */
boolean_t spa_special_failsafe;
};

extern char *spa_config_path;
Expand Down
5 changes: 5 additions & 0 deletions include/sys/vdev_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,11 @@ extern int vdev_obsolete_counts_are_precise(vdev_t *vd, boolean_t *are_precise);
int vdev_checkpoint_sm_object(vdev_t *vd, uint64_t *sm_obj);
void vdev_metaslab_group_create(vdev_t *vd);
uint64_t vdev_best_ashift(uint64_t logical, uint64_t a, uint64_t b);
extern boolean_t vdev_is_leaf(vdev_t *vd);
extern boolean_t vdev_is_special(vdev_t *vd);
extern boolean_t vdev_is_dedup(vdev_t *vd);
extern boolean_t vdev_is_alloc_class(vdev_t *vd);
extern boolean_t vdev_is_special_failsafe(vdev_t *vd);

/*
* Vdev ashift optimization tunables
Expand Down
1 change: 1 addition & 0 deletions include/zfeature_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ typedef enum spa_feature {
SPA_FEATURE_FAST_DEDUP,
SPA_FEATURE_LONGNAME,
SPA_FEATURE_LARGE_MICROZAP,
SPA_FEATURE_SPECIAL_FAILSAFE,
SPA_FEATURES
} spa_feature_t;

Expand Down
23 changes: 23 additions & 0 deletions lib/libzfs/libzfs.abi
Original file line number Diff line number Diff line change
Expand Up @@ -629,7 +629,11 @@
<elf-symbol name='fletcher_4_superscalar_ops' size='128' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='libzfs_config_ops' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='sa_protocol_names' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<<<<<<< HEAD
<elf-symbol name='spa_feature_table' size='2464' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
=======
<elf-symbol name='spa_feature_table' size='2352' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
>>>>>>> 69e9faf47 (Special failsafe feature)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You've committed an unresolved conflict.

<elf-symbol name='zfeature_checks_disable' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_deleg_perm_tab' size='512' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_history_event_names' size='328' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
Expand Down Expand Up @@ -3129,10 +3133,15 @@
<enumerator name='ZPOOL_PROP_BCLONEUSED' value='33'/>
<enumerator name='ZPOOL_PROP_BCLONESAVED' value='34'/>
<enumerator name='ZPOOL_PROP_BCLONERATIO' value='35'/>
<<<<<<< HEAD
<enumerator name='ZPOOL_PROP_DEDUP_TABLE_SIZE' value='36'/>
<enumerator name='ZPOOL_PROP_DEDUP_TABLE_QUOTA' value='37'/>
<enumerator name='ZPOOL_PROP_DEDUPCACHED' value='38'/>
<enumerator name='ZPOOL_NUM_PROPS' value='39'/>
=======
<enumerator name='ZPOOL_PROP_SPECIAL_FAILSAFE' value='36'/>
<enumerator name='ZPOOL_NUM_PROPS' value='37'/>
>>>>>>> 69e9faf47 (Special failsafe feature)
</enum-decl>
<typedef-decl name='zpool_prop_t' type-id='af1ba157' id='5d0c23fb'/>
<typedef-decl name='regoff_t' type-id='95e97e5e' id='54a2a2a8'/>
Expand Down Expand Up @@ -6192,10 +6201,15 @@
<enumerator name='SPA_FEATURE_AVZ_V2' value='38'/>
<enumerator name='SPA_FEATURE_REDACTION_LIST_SPILL' value='39'/>
<enumerator name='SPA_FEATURE_RAIDZ_EXPANSION' value='40'/>
<<<<<<< HEAD
<enumerator name='SPA_FEATURE_FAST_DEDUP' value='41'/>
<enumerator name='SPA_FEATURE_LONGNAME' value='42'/>
<enumerator name='SPA_FEATURE_LARGE_MICROZAP' value='43'/>
<enumerator name='SPA_FEATURES' value='44'/>
=======
<enumerator name='SPA_FEATURE_SPECIAL_FAILSAFE' value='41'/>
<enumerator name='SPA_FEATURES' value='42'/>
>>>>>>> 69e9faf47 (Special failsafe feature)
</enum-decl>
<typedef-decl name='spa_feature_t' type-id='33ecb627' id='d6618c78'/>
<qualified-type-def type-id='80f4b756' const='yes' id='b99c00c9'/>
Expand Down Expand Up @@ -9374,8 +9388,13 @@
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='module/zcommon/zfeature_common.c' language='LANG_C99'>
<<<<<<< HEAD
<array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='19712' id='fd4573e5'>
<subrange length='44' type-id='7359adad' id='cf8ba455'/>
=======
<array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='18816' id='b937914f'>
<subrange length='42' type-id='7359adad' id='cb7c937f'/>
>>>>>>> 69e9faf47 (Special failsafe feature)
</array-type-def>
<enum-decl name='zfeature_flags' id='6db816a4'>
<underlying-type type-id='9cac1fee'/>
Expand Down Expand Up @@ -9452,7 +9471,11 @@
<pointer-type-def type-id='611586a1' size-in-bits='64' id='2e243169'/>
<qualified-type-def type-id='eaa32e2f' const='yes' id='83be723c'/>
<pointer-type-def type-id='83be723c' size-in-bits='64' id='7acd98a2'/>
<<<<<<< HEAD
<var-decl name='spa_feature_table' type-id='fd4573e5' mangled-name='spa_feature_table' visibility='default' elf-symbol-id='spa_feature_table'/>
=======
<var-decl name='spa_feature_table' type-id='b937914f' mangled-name='spa_feature_table' visibility='default' elf-symbol-id='spa_feature_table'/>
>>>>>>> 69e9faf47 (Special failsafe feature)
<var-decl name='zfeature_checks_disable' type-id='c19b74c3' mangled-name='zfeature_checks_disable' visibility='default' elf-symbol-id='zfeature_checks_disable'/>
<function-decl name='opendir' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='80f4b756'/>
Expand Down
Loading
Loading