Skip to content

Commit

Permalink
btrfs: introduce mount option rescue=ignorebadroots
Browse files Browse the repository at this point in the history
In the face of extent root corruption, or any other core fs wide root
corruption we will fail to mount the file system.  This makes recovery
kind of a pain, because you need to fall back to userspace tools to
scrape off data.  Instead provide a mechanism to gracefully handle bad
roots, so we can at least mount read-only and possibly recover data from
the file system.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
  • Loading branch information
josefbacik authored and kdave committed Dec 8, 2020
1 parent 68319c1 commit 42437a6
Show file tree
Hide file tree
Showing 10 changed files with 130 additions and 28 deletions.
48 changes: 48 additions & 0 deletions fs/btrfs/block-group.c
Original file line number Diff line number Diff line change
Expand Up @@ -1985,6 +1985,51 @@ static int read_one_block_group(struct btrfs_fs_info *info,
return ret;
}

static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
{
struct extent_map_tree *em_tree = &fs_info->mapping_tree;
struct btrfs_space_info *space_info;
struct rb_node *node;
int ret = 0;

for (node = rb_first_cached(&em_tree->map); node; node = rb_next(node)) {
struct extent_map *em;
struct map_lookup *map;
struct btrfs_block_group *bg;

em = rb_entry(node, struct extent_map, rb_node);
map = em->map_lookup;
bg = btrfs_create_block_group_cache(fs_info, em->start);
if (!bg) {
ret = -ENOMEM;
break;
}

/* Fill dummy cache as FULL */
bg->length = em->len;
bg->flags = map->type;
bg->last_byte_to_unpin = (u64)-1;
bg->cached = BTRFS_CACHE_FINISHED;
bg->used = em->len;
bg->flags = map->type;
ret = btrfs_add_block_group_cache(fs_info, bg);
if (ret) {
btrfs_remove_free_space_cache(bg);
btrfs_put_block_group(bg);
break;
}
btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
0, &space_info);
bg->space_info = space_info;
link_block_group(bg);

set_avail_alloc_bits(fs_info, bg->flags);
}
if (!ret)
btrfs_init_global_block_rsv(fs_info);
return ret;
}

int btrfs_read_block_groups(struct btrfs_fs_info *info)
{
struct btrfs_path *path;
Expand All @@ -1995,6 +2040,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
int need_clear = 0;
u64 cache_gen;

if (!info->extent_root)
return fill_dummy_bgs(info);

key.objectid = 0;
key.offset = 0;
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
Expand Down
8 changes: 8 additions & 0 deletions fs/btrfs/block-rsv.c
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,14 @@ void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info)
fs_info->delayed_block_rsv.space_info = space_info;
fs_info->delayed_refs_rsv.space_info = space_info;

/*
* Our various recovery options can leave us with NULL roots, so check
* here and just bail before we go dereferencing NULLs everywhere.
*/
if (!fs_info->extent_root || !fs_info->csum_root ||
!fs_info->dev_root || !fs_info->chunk_root || !fs_info->tree_root)
return;

fs_info->extent_root->block_rsv = &fs_info->delayed_refs_rsv;
fs_info->csum_root->block_rsv = &fs_info->delayed_refs_rsv;
fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
Expand Down
2 changes: 1 addition & 1 deletion fs/btrfs/compression.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
struct compressed_bio *cb = bio->bi_private;
u8 *cb_sum = cb->sums;

if (inode->flags & BTRFS_INODE_NODATASUM)
if (!fs_info->csum_root || (inode->flags & BTRFS_INODE_NODATASUM))
return 0;

shash->tfm = fs_info->csum_shash;
Expand Down
1 change: 1 addition & 0 deletions fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1298,6 +1298,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
#define BTRFS_MOUNT_NOLOGREPLAY (1 << 27)
#define BTRFS_MOUNT_REF_VERIFY (1 << 28)
#define BTRFS_MOUNT_DISCARD_ASYNC (1 << 29)
#define BTRFS_MOUNT_IGNOREBADROOTS (1 << 30)

#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
#define BTRFS_DEFAULT_MAX_INLINE (2048)
Expand Down
65 changes: 41 additions & 24 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -2307,30 +2307,39 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)

root = btrfs_read_tree_root(tree_root, &location);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out;
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
ret = PTR_ERR(root);
goto out;
}
} else {
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
fs_info->extent_root = root;
}
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
fs_info->extent_root = root;

location.objectid = BTRFS_DEV_TREE_OBJECTID;
root = btrfs_read_tree_root(tree_root, &location);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out;
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
ret = PTR_ERR(root);
goto out;
}
} else {
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
fs_info->dev_root = root;
btrfs_init_devices_late(fs_info);
}
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
fs_info->dev_root = root;
btrfs_init_devices_late(fs_info);

location.objectid = BTRFS_CSUM_TREE_OBJECTID;
root = btrfs_read_tree_root(tree_root, &location);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out;
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
ret = PTR_ERR(root);
goto out;
}
} else {
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
fs_info->csum_root = root;
}
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
fs_info->csum_root = root;

/*
* This tree can share blocks with some other fs tree during relocation
Expand All @@ -2339,11 +2348,14 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
root = btrfs_get_fs_root(tree_root->fs_info,
BTRFS_DATA_RELOC_TREE_OBJECTID, true);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out;
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
ret = PTR_ERR(root);
goto out;
}
} else {
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
fs_info->data_reloc_root = root;
}
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
fs_info->data_reloc_root = root;

location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
root = btrfs_read_tree_root(tree_root, &location);
Expand All @@ -2356,9 +2368,11 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
location.objectid = BTRFS_UUID_TREE_OBJECTID;
root = btrfs_read_tree_root(tree_root, &location);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
if (ret != -ENOENT)
goto out;
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
ret = PTR_ERR(root);
if (ret != -ENOENT)
goto out;
}
} else {
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
fs_info->uuid_root = root;
Expand All @@ -2368,11 +2382,14 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
location.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID;
root = btrfs_read_tree_root(tree_root, &location);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out;
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
ret = PTR_ERR(root);
goto out;
}
} else {
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
fs_info->free_space_root = root;
}
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
fs_info->free_space_root = root;
}

return 0;
Expand Down
2 changes: 1 addition & 1 deletion fs/btrfs/file-item.c
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
int count = 0;
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);

if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
if (!fs_info->csum_root || (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
return BLK_STS_OK;

path = btrfs_alloc_path();
Expand Down
6 changes: 5 additions & 1 deletion fs/btrfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -2187,7 +2187,8 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
int skip_sum;
int async = !atomic_read(&BTRFS_I(inode)->sync_writers);

skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
skip_sum = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
!fs_info->csum_root;

if (btrfs_is_free_space_inode(BTRFS_I(inode)))
metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
Expand Down Expand Up @@ -2902,6 +2903,9 @@ int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u64 phy_offset,
if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
return 0;

if (!root->fs_info->csum_root)
return 0;

if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM);
Expand Down
12 changes: 11 additions & 1 deletion fs/btrfs/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,7 @@ enum {
Opt_rescue,
Opt_usebackuproot,
Opt_nologreplay,
Opt_ignorebadroots,

/* Deprecated options */
Opt_recovery,
Expand Down Expand Up @@ -455,6 +456,8 @@ static const match_table_t tokens = {
static const match_table_t rescue_tokens = {
{Opt_usebackuproot, "usebackuproot"},
{Opt_nologreplay, "nologreplay"},
{Opt_ignorebadroots, "ignorebadroots"},
{Opt_ignorebadroots, "ibadroots"},
{Opt_err, NULL},
};

Expand Down Expand Up @@ -498,6 +501,10 @@ static int parse_rescue_options(struct btrfs_fs_info *info, const char *options)
btrfs_set_and_info(info, NOLOGREPLAY,
"disabling log replay at mount time");
break;
case Opt_ignorebadroots:
btrfs_set_and_info(info, IGNOREBADROOTS,
"ignoring bad roots");
break;
case Opt_err:
btrfs_info(info, "unrecognized rescue option '%s'", p);
ret = -EINVAL;
Expand Down Expand Up @@ -983,7 +990,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
if (new_flags & SB_RDONLY)
goto out;

if (check_ro_option(info, BTRFS_MOUNT_NOLOGREPLAY, "nologreplay"))
if (check_ro_option(info, BTRFS_MOUNT_NOLOGREPLAY, "nologreplay") ||
check_ro_option(info, BTRFS_MOUNT_IGNOREBADROOTS, "ignorebadroots"))
ret = -EINVAL;
out:
if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) &&
Expand Down Expand Up @@ -1439,6 +1447,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
print_rescue_option(seq, "nologreplay", &printed);
if (btrfs_test_opt(info, USEBACKUPROOT))
print_rescue_option(seq, "usebackuproot", &printed);
if (btrfs_test_opt(info, IGNOREBADROOTS))
print_rescue_option(seq, "ignorebadroots", &printed);
if (btrfs_test_opt(info, FLUSHONCOMMIT))
seq_puts(seq, ",flushoncommit");
if (btrfs_test_opt(info, DISCARD_SYNC))
Expand Down
1 change: 1 addition & 0 deletions fs/btrfs/sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ BTRFS_ATTR(static_feature, send_stream_version, send_stream_version_show);
static const char *rescue_opts[] = {
"usebackuproot",
"nologreplay",
"ignorebadroots",
};

static ssize_t supported_rescue_options_show(struct kobject *kobj,
Expand Down
13 changes: 13 additions & 0 deletions fs/btrfs/volumes.c
Original file line number Diff line number Diff line change
Expand Up @@ -7659,6 +7659,19 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
u64 prev_dev_ext_end = 0;
int ret = 0;

/*
* We don't have a dev_root because we mounted with ignorebadroots and
* failed to load the root, so we want to skip the verification in this
* case for sure.
*
* However if the dev root is fine, but the tree itself is corrupted
* we'd still fail to mount. This verification is only to make sure
* writes can happen safely, so instead just bypass this check
* completely in the case of IGNOREBADROOTS.
*/
if (btrfs_test_opt(fs_info, IGNOREBADROOTS))
return 0;

key.objectid = 1;
key.type = BTRFS_DEV_EXTENT_KEY;
key.offset = 0;
Expand Down

0 comments on commit 42437a6

Please sign in to comment.