Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove zpl_revalidate: fix snapshot rollback #9600

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions config/kernel-dentry-alias.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
dnl #
dnl # 3.18 API change
dnl # Dentry aliases are in d_u struct dentry member
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U], [
ZFS_LINUX_TEST_SRC([dentry_alias_d_u], [
#include <linux/fs.h>
#include <linux/dcache.h>
#include <linux/list.h>
], [
struct inode *inode __attribute__ ((unused)) = NULL;
struct dentry *dentry __attribute__ ((unused)) = NULL;
hlist_for_each_entry(dentry, &inode->i_dentry,
d_u.d_alias) {
d_drop(dentry);
}
])
])

AC_DEFUN([ZFS_AC_KERNEL_DENTRY_ALIAS_D_U], [
AC_MSG_CHECKING([whether dentry aliases are in d_u member])
ZFS_LINUX_TEST_RESULT([dentry_alias_d_u], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_DENTRY_D_U_ALIASES, 1,
[dentry aliases are in d_u member])
],[
AC_MSG_RESULT(no)
])
])

2 changes: 2 additions & 0 deletions config/kernel.m4
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_SETATTR_PREPARE
ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED
ZFS_AC_KERNEL_SRC_DENTRY
ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U
ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE
ZFS_AC_KERNEL_SRC_SECURITY_INODE
ZFS_AC_KERNEL_SRC_FST_MOUNT
Expand Down Expand Up @@ -180,6 +181,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_SETATTR_PREPARE
ZFS_AC_KERNEL_INSERT_INODE_LOCKED
ZFS_AC_KERNEL_DENTRY
ZFS_AC_KERNEL_DENTRY_ALIAS_D_U
ZFS_AC_KERNEL_TRUNCATE_SETSIZE
ZFS_AC_KERNEL_SECURITY_INODE
ZFS_AC_KERNEL_FST_MOUNT
Expand Down
21 changes: 21 additions & 0 deletions include/os/linux/kernel/linux/dcache_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,25 @@ d_clear_d_op(struct dentry *dentry)
DCACHE_OP_REVALIDATE | DCACHE_OP_DELETE);
}

/*
* Walk and invalidate all dentry aliases of an inode
* unless it's a mountpoint
*/
static inline void
zpl_d_drop_aliases(struct inode *inode)
{
struct dentry *dentry;
spin_lock(&inode->i_lock);
#ifdef HAVE_DENTRY_D_U_ALIASES
hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
#else
hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
#endif
if (!IS_ROOT(dentry) && !d_mountpoint(dentry) &&
(dentry->d_inode == inode)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't this inode's dentry list only contain dentries for this inode?

In any case, I agree with @behlendorf that we should remove everything about this filesystem from the directory entry cache, since a rename could result in invalid dentry cache even though zfs_rezget() would succeed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't this inode's dentry list only contain dentries for this inode?

Negative dentries used be identified by setting dentry->d_inode == NULL. Though, I'm not certain if they ever were include in the ->i_dentry, they may only have been hashed. Looking at more recent kernels they've add some DCACHE_ENTRY_TYPE bits to ->d_flags since there are now more dentry types. Regardless, unless there's some reason we can't we should drop everything to force a new lookup.

d_drop(dentry);
}
}
spin_unlock(&inode->i_lock);
}
#endif /* _ZFS_DCACHE_H */
1 change: 0 additions & 1 deletion include/os/linux/zfs/sys/zpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ extern const struct inode_operations zpl_inode_operations;
extern const struct inode_operations zpl_dir_inode_operations;
extern const struct inode_operations zpl_symlink_inode_operations;
extern const struct inode_operations zpl_special_inode_operations;
extern dentry_operations_t zpl_dentry_operations;

/* zpl_file.c */
extern ssize_t zpl_read_common(struct inode *ip, const char *buf,
Expand Down
2 changes: 1 addition & 1 deletion module/os/linux/zfs/zfs_vfsops.c
Original file line number Diff line number Diff line change
Expand Up @@ -1926,7 +1926,6 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
sb->s_op = &zpl_super_operations;
sb->s_xattr = zpl_xattr_handlers;
sb->s_export_op = &zpl_export_operations;
sb->s_d_op = &zpl_dentry_operations;

/* Set features for file system. */
zfs_set_fuid_feature(zfsvfs);
Expand Down Expand Up @@ -2275,6 +2274,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
zp = list_next(&zfsvfs->z_all_znodes, zp)) {
err2 = zfs_rezget(zp);
if (err2) {
zpl_d_drop_aliases(ZTOI(zp));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After giving this some more thought I think we need to always drop all of the aliases, even when the zfs_rezget() succeeds. Otherwise, we'll incorrectly handle the case when the file does still exist after the rollback, and a hard link to that files gets removed by the rollback, but that dentry doesn't get correctly dropped.

The z_is_stale field can be removed, and the comment above should be updated. How about:

        /* 
         * Attempt to re-establish all the active inodes with their dbufs.
         * If zfs_rezget() fails, then we unhash the inode to prevent a
         * collision if a new inode is allocated with the same inode/object
         * number.  All dentries for the inode are dropped from the cache
         * since they may no longer be valid and a fresh lookup is required.
         */

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No problem, would you have any hints & recommendations wrt/ testing environment?

I'd try some ACL changes between the snapshot and rollback too; is it OK to add and remove users in the test suite?

So I'll try:

  • changing file metadata and see if that works
  • hardlinks
  • what else, please? :)

Copy link
Contributor

@behlendorf behlendorf Dec 17, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd try some ACL changes between the snapshot and rollback too; is it OK to add and remove users in the test suite?

Great. Yes, you can add and remove users in the ZTS with the add_user and del_user helper functions. Take a look at tests/functional/userquota/ tests for an example of this.

I think we should verify all of the following for correctness after an online rollback.

  • File types: normal files, directories, hardlinks, symlinks, O_TMPFILE.
  • Metadata: permissions, group, owner, xattrs (sa and diretory)
  • Rollback invalidates negative dentries
  • Rollback with open file handles
  • Rollback with open file handles of unlinked files.

Additionally, I think we would get a lot of value stressing the code by including a test case which runs for a minute or two and randomly performs snapshots and rollbacks of an active filesystem. In particular, this would be a good way to test open file handles and unlinks are handled properly. Related to exactly this I opened #9739 with a proposed fix.

[edit] See also #9741 which is similarly related. Though in this case it's due to an inode hash collision. See comment in zfs_znode_alloc().

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been trying to figure out, how to do the stress test part, but I can't come up with an elegant solution, which I could really call 'stress' test.

For one, writing a multithreaded beast in userspace C is something I'd rather avoid, but even if I bite the bullet, what should the threads do, so that it be considered stress testing stuff and not just repeating the same workload over?

So I'm thinking it's best to admit, that I'm stuck and ask for help, so that we can move on with this :)

Maybe let's skip the 'stress' test part, only do various permutations of ^ what you listed, perhaps repeat that process a few times and check the status of things in-between the steps - opening a file and leaving it open should be possible directly in the Korn shell. That's a fallback solution, so that we get at least somewhere, to move the whole OverlayFS thing forward in reasonable time frame. Would that work?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That sounds like a reasonable path forward to me. We can always add additional test case as we come up with specific scenarios.

Copy link
Contributor Author

@snajpa snajpa Jul 15, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@didrocks says I might be able to get some help from the Docker guys with writing the tests - for which I'm sorry, but I can't seem to find the time ... bugs are popping up in weird places and we desperately need to finish the kernel upgrade @ our infra. Sh*t.

I can understand the pain of not having the overlay2 Docker storage driver available...

remove_inode_hash(ZTOI(zp));
zp->z_is_stale = B_TRUE;
snajpa marked this conversation as resolved.
Show resolved Hide resolved
}
Expand Down
44 changes: 0 additions & 44 deletions module/os/linux/zfs/zpl_inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -602,46 +602,6 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
return (error);
}

static int
#ifdef HAVE_D_REVALIDATE_NAMEIDATA
zpl_revalidate(struct dentry *dentry, struct nameidata *nd)
{
unsigned int flags = (nd ? nd->flags : 0);
#else
zpl_revalidate(struct dentry *dentry, unsigned int flags)
{
#endif /* HAVE_D_REVALIDATE_NAMEIDATA */
/* CSTYLED */
zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
int error;

if (flags & LOOKUP_RCU)
return (-ECHILD);

/*
* After a rollback negative dentries created before the rollback
* time must be invalidated. Otherwise they can obscure files which
* are only present in the rolled back dataset.
*/
if (dentry->d_inode == NULL) {
spin_lock(&dentry->d_lock);
error = time_before(dentry->d_time, zfsvfs->z_rollback_time);
spin_unlock(&dentry->d_lock);

if (error)
return (0);
}

/*
* The dentry may reference a stale inode if a mounted file system
* was rolled back to a point in time where the object didn't exist.
*/
if (dentry->d_inode && ITOZ(dentry->d_inode)->z_is_stale)
return (0);

return (1);
}

const struct inode_operations zpl_inode_operations = {
.setattr = zpl_setattr,
.getattr = zpl_getattr,
Expand Down Expand Up @@ -730,7 +690,3 @@ const struct inode_operations zpl_special_inode_operations = {
.get_acl = zpl_get_acl,
#endif /* CONFIG_FS_POSIX_ACL */
};

dentry_operations_t zpl_dentry_operations = {
.d_revalidate = zpl_revalidate,
};