From 12fac856b172f56d2b08ce9d43d55c2fb0964e13 Mon Sep 17 00:00:00 2001 From: Jitendra Patidar Date: Thu, 25 Jul 2019 06:28:23 -0700 Subject: [PATCH] ZFS fix to make xattr=sa syncing to ZIL on create/remove/update. In current implementation of xattr=sa, it doesn't sync to ZIL before returning on xattr create/remove/update, so if node crash happens before xattr gets synced to storage, then corresponding xattr are lost. This diff makes xattr=sa syncing to ZIL on create/remove/update, so that xattr's are not lost on node crash. Signed-off-by: Jitendra Patidar Closes #8768 --- cmd/zdb/zdb_il.c | 27 ++++ cmd/ztest/ztest.c | 1 + include/sys/zfs_sa.h | 2 +- include/sys/zfs_znode.h | 2 + include/sys/zil.h | 13 +- include/zfeature_common.h | 1 + module/zcommon/zfeature_common.c | 11 ++ module/zfs/zfs_log.c | 34 +++++ module/zfs/zfs_replay.c | 87 +++++++++++ module/zfs/zfs_sa.c | 24 +++- module/zfs/zpl_xattr.c | 2 +- tests/runfiles/linux.run | 2 +- .../tests/functional/slog/Makefile.am | 3 +- .../tests/functional/slog/slog_016_pos.ksh | 136 ++++++++++++++++++ 14 files changed, 337 insertions(+), 8 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/slog/slog_016_pos.ksh diff --git a/cmd/zdb/zdb_il.c b/cmd/zdb/zdb_il.c index c12178effae0..f2bca777edd9 100644 --- a/cmd/zdb/zdb_il.c +++ b/cmd/zdb/zdb_il.c @@ -266,6 +266,32 @@ zil_prt_rec_setattr(zilog_t *zilog, int txtype, void *arg) } } +/* ARGSUSED */ +static void +zil_prt_rec_setsaxattr(zilog_t *zilog, int txtype, void *arg) +{ + lr_setsaxattr_t *lr = arg; + char *name; + char *val; + int i; + + name = (char *)(lr + 1); + (void) printf("%sfoid %llu\n", tab_prefix, + (u_longlong_t)lr->lr_foid); + + (void) printf("%sXAT_NAME %s\n", tab_prefix, name); + if (lr->lr_size == 0) { + (void) printf("%sXAT_VALUE NULL\n", tab_prefix); + } else { + printf("%sXAT_VALUE ", tab_prefix); + val = name + (strlen(name) + 1); + for (i = 0; i < lr->lr_size; i++) { + printf("%c", *val); + val++; + } + } +} + /* ARGSUSED */ static void zil_prt_rec_acl(zilog_t *zilog, int txtype, void *arg) @@ -305,6 +331,7 @@ static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = { {.zri_print = zil_prt_rec_create, .zri_name = "TX_MKDIR_ATTR "}, {.zri_print = zil_prt_rec_create, .zri_name = "TX_MKDIR_ACL_ATTR "}, {.zri_print = zil_prt_rec_write, .zri_name = "TX_WRITE2 "}, + {.zri_print = zil_prt_rec_setsaxattr, .zri_name = "TX_SETSAXATTR "}, }; /* ARGSUSED */ diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 01421e85bf0a..d652cf1a5f2e 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -2132,6 +2132,7 @@ zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = { NULL, /* TX_MKDIR_ATTR */ NULL, /* TX_MKDIR_ACL_ATTR */ NULL, /* TX_WRITE2 */ + NULL, /* TX_SETSAXATTR */ }; /* diff --git a/include/sys/zfs_sa.h b/include/sys/zfs_sa.h index 4e6d28638ef6..82b249b8aa21 100644 --- a/include/sys/zfs_sa.h +++ b/include/sys/zfs_sa.h @@ -139,7 +139,7 @@ void zfs_sa_symlink(struct znode *, char *link, int len, dmu_tx_t *); void zfs_sa_get_scanstamp(struct znode *, xvattr_t *); void zfs_sa_set_scanstamp(struct znode *, xvattr_t *, dmu_tx_t *); int zfs_sa_get_xattr(struct znode *); -int zfs_sa_set_xattr(struct znode *); +int zfs_sa_set_xattr(struct znode *, const char *, const void *, size_t); void zfs_sa_upgrade(struct sa_handle *, dmu_tx_t *); void zfs_sa_upgrade_txholds(dmu_tx_t *, struct znode *); void zfs_sa_init(void); diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index d4a3ea769331..e9c6483371d1 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -390,6 +390,8 @@ extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp, vsecattr_t *vsecp, zfs_fuid_info_t *fuidp); extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx); extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx); +extern void zfs_log_setsaxattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, + znode_t *zp, const char *name, const void *value, size_t size); #if defined(HAVE_UIO_RW) extern caddr_t zfs_map_page(page_t *, enum seg_rw); diff --git a/include/sys/zil.h b/include/sys/zil.h index cfa5e3995505..034fb3fa10e2 100644 --- a/include/sys/zil.h +++ b/include/sys/zil.h @@ -162,7 +162,8 @@ typedef enum zil_create { #define TX_MKDIR_ATTR 18 /* mkdir with attr */ #define TX_MKDIR_ACL_ATTR 19 /* mkdir with ACL + attrs */ #define TX_WRITE2 20 /* dmu_sync EALREADY write */ -#define TX_MAX_TYPE 21 /* Max transaction type */ +#define TX_SETSAXATTR 21 /* Set sa xattrs on file */ +#define TX_MAX_TYPE 22 /* Max transaction type */ /* * The transactions for mkdir, symlink, remove, rmdir, link, and rename @@ -182,7 +183,8 @@ typedef enum zil_create { (txtype) == TX_SETATTR || \ (txtype) == TX_ACL_V0 || \ (txtype) == TX_ACL || \ - (txtype) == TX_WRITE2) + (txtype) == TX_WRITE2 || \ + (txtype) == TX_SETSAXATTR) /* * The number of dnode slots consumed by the object is stored in the 8 @@ -335,6 +337,13 @@ typedef struct { /* optional attribute lr_attr_t may be here */ } lr_setattr_t; +typedef struct { + lr_t lr_common; /* common portion of log record */ + uint64_t lr_foid; /* file object to change attributes */ + uint64_t lr_size; + /* xattr name and value follows */ +} lr_setsaxattr_t; + typedef struct { lr_t lr_common; /* common portion of log record */ uint64_t lr_foid; /* obj id of file */ diff --git a/include/zfeature_common.h b/include/zfeature_common.h index 4f78229736e3..5b7b6745e645 100644 --- a/include/zfeature_common.h +++ b/include/zfeature_common.h @@ -72,6 +72,7 @@ typedef enum spa_feature { SPA_FEATURE_BOOKMARK_WRITTEN, SPA_FEATURE_LOG_SPACEMAP, SPA_FEATURE_LIVELIST, + SPA_FEATURE_ZILSAXATTR, SPA_FEATURES } spa_feature_t; diff --git a/module/zcommon/zfeature_common.c b/module/zcommon/zfeature_common.c index 8e1aef5daa77..b46d6392a012 100644 --- a/module/zcommon/zfeature_common.c +++ b/module/zcommon/zfeature_common.c @@ -553,6 +553,17 @@ zpool_feature_init(void) "com.datto:resilver_defer", "resilver_defer", "Support for defering new resilvers when one is already running.", ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL); + { + static const spa_feature_t zilsaxattr_deps[] = { + SPA_FEATURE_EXTENSIBLE_DATASET, + SPA_FEATURE_NONE + }; + zfeature_register(SPA_FEATURE_ZILSAXATTR, + "org.zfsonlinux:zilsaxattr", "zilsaxattr", + "Zil sa xattr.", + ZFEATURE_FLAG_READONLY_COMPAT | ZFEATURE_FLAG_PER_DATASET, + ZFEATURE_TYPE_BOOLEAN, zilsaxattr_deps); + } } #if defined(_KERNEL) diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c index ad5b5cf30b1e..b3c984a55d80 100644 --- a/module/zfs/zfs_log.c +++ b/module/zfs/zfs_log.c @@ -659,6 +659,40 @@ zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, zil_itx_assign(zilog, itx, tx); } +/* + * Handles TX_SETSAXATTR transactions. + */ +void +zfs_log_setsaxattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, + znode_t *zp, const char *name, const void *value, size_t size) +{ + itx_t *itx; + lr_setsaxattr_t *lr; + size_t recsize = sizeof (lr_setsaxattr_t); + void *xattrstart; + int namelen; + + if (zil_replaying(zilog, tx) || zp->z_unlinked) + return; + + namelen = strlen(name) + 1; + recsize += (namelen + size); + itx = zil_itx_create(txtype, recsize); + lr = (lr_setsaxattr_t *)&itx->itx_lr; + lr->lr_foid = zp->z_id; + xattrstart = (char *)(lr + 1); + bcopy(name, xattrstart, namelen); + if (value != NULL) { + bcopy(value, xattrstart + namelen, size); + lr->lr_size = size; + } else { + lr->lr_size = 0; + } + + itx->itx_sync = (zp->z_sync_cnt != 0); + zil_itx_assign(zilog, itx, tx); +} + /* * Handles TX_ACL transactions. */ diff --git a/module/zfs/zfs_replay.c b/module/zfs/zfs_replay.c index 144381769059..f140d784b453 100644 --- a/module/zfs/zfs_replay.c +++ b/module/zfs/zfs_replay.c @@ -48,6 +48,9 @@ #include #include #include +#include +#include +#include /* * Functions to replay ZFS intent log (ZIL) records @@ -860,6 +863,89 @@ zfs_replay_setattr(void *arg1, void *arg2, boolean_t byteswap) return (error); } +static int +zfs_replay_setsaxattr(void *arg1, void *arg2, boolean_t byteswap) +{ + zfsvfs_t *zfsvfs = arg1; + lr_setsaxattr_t *lr = arg2; + znode_t *zp; + nvlist_t *nvl; + size_t sa_size; + const char *name; + const void *value; + size_t size; + int error = 0; + + ASSERT(spa_feature_is_enabled(zfsvfs->z_os->os_spa, + SPA_FEATURE_ZILSAXATTR)); + if (byteswap) { + byteswap_uint64_array(lr, sizeof (*lr)); + } + + if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) + return (error); + + rw_enter(&zp->z_xattr_lock, RW_WRITER); + mutex_enter(&zp->z_lock); + if (zp->z_xattr_cached == NULL) + error = -zfs_sa_get_xattr(zp); + mutex_exit(&zp->z_lock); + + if (error) + goto out; + + ASSERT(zp->z_xattr_cached); + nvl = zp->z_xattr_cached; + + /* Get xattr name, value and size from log record */ + size = lr->lr_size; + name = (char *)(lr + 1); + if (size == 0) { + value = NULL; + error = -nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY); + } else { + value = name + strlen(name) + 1; + /* Limited to 32k to keep nvpair memory allocations small */ + if (size > DXATTR_MAX_ENTRY_SIZE) { + error = (-EFBIG); + goto out; + } + + /* Prevent the DXATTR SA from consuming the entire SA region */ + error = -nvlist_size(nvl, &sa_size, NV_ENCODE_XDR); + if (error) + goto out; + + if (sa_size > DXATTR_MAX_SA_SIZE) { + error = (-EFBIG); + goto out; + } + + error = -nvlist_add_byte_array(nvl, name, (uchar_t *)value, + size); + } + + /* + * Update the SA for additions, modifications, and removals. On + * error drop the inconsistent cached version of the nvlist, it + * will be reconstructed from the ARC when next accessed. + */ + if (error == 0) + error = -zfs_sa_set_xattr(zp, name, value, size); + + if (error) { + nvlist_free(nvl); + zp->z_xattr_cached = NULL; + } + + ASSERT3S(error, <=, 0); + +out: + rw_exit(&zp->z_xattr_lock); + iput(ZTOI(zp)); + return (error); +} + static int zfs_replay_acl_v0(void *arg1, void *arg2, boolean_t byteswap) { @@ -981,4 +1067,5 @@ zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE] = { zfs_replay_create, /* TX_MKDIR_ATTR */ zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */ zfs_replay_write2, /* TX_WRITE2 */ + zfs_replay_setsaxattr, /* TX_SETSAXATTR */ }; diff --git a/module/zfs/zfs_sa.c b/module/zfs/zfs_sa.c index bd21ba896cc3..a3d2fe03e6c4 100644 --- a/module/zfs/zfs_sa.c +++ b/module/zfs/zfs_sa.c @@ -27,8 +27,10 @@ #include #include #include +#include #include #include +#include /* * ZPL attribute registration table. @@ -69,6 +71,8 @@ sa_attr_reg_t zfs_attr_table[ZPL_END+1] = { {NULL, 0, 0, 0} }; +int zfs_zil_saxattr_enabled = 1; + #ifdef _KERNEL int zfs_sa_readlink(znode_t *zp, uio_t *uio) @@ -218,13 +222,14 @@ zfs_sa_get_xattr(znode_t *zp) } int -zfs_sa_set_xattr(znode_t *zp) +zfs_sa_set_xattr(znode_t *zp, const char *name, const void *value, size_t vsize) { zfsvfs_t *zfsvfs = ZTOZSB(zp); + zilog_t *zilog; dmu_tx_t *tx; char *obj; size_t size; - int error; + int error, logsaxattr = 0; ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock)); ASSERT(zp->z_xattr_cached); @@ -243,6 +248,11 @@ zfs_sa_set_xattr(znode_t *zp) if (error) goto out_free; + zilog = zfsvfs->z_log; + if (spa_feature_is_enabled(zfsvfs->z_os->os_spa, + SPA_FEATURE_ZILSAXATTR) && zfs_zil_saxattr_enabled) { + logsaxattr = 1; + } tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa_create(tx, size); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); @@ -255,6 +265,10 @@ zfs_sa_set_xattr(znode_t *zp) sa_bulk_attr_t bulk[2]; uint64_t ctime[2]; + if (logsaxattr) { + zfs_log_setsaxattr(zilog, tx, TX_SETSAXATTR, zp, name, + value, vsize); + } zfs_tstamp_update_setup(zp, STATE_CHANGED, NULL, ctime); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DXATTR(zfsvfs), NULL, obj, size); @@ -263,6 +277,8 @@ zfs_sa_set_xattr(znode_t *zp) VERIFY0(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx)); dmu_tx_commit(tx); + if (logsaxattr && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + zil_commit(zilog, 0); } out_free: vmem_free(obj, size); @@ -432,6 +448,10 @@ zfs_sa_upgrade_txholds(dmu_tx_t *tx, znode_t *zp) } } +module_param(zfs_zil_saxattr_enabled, int, 0644); +MODULE_PARM_DESC(zfs_zil_saxattr_enabled, + "Toggle xattr=sa extended attribute need to be logged in zil. "); + EXPORT_SYMBOL(zfs_attr_table); EXPORT_SYMBOL(zfs_sa_readlink); EXPORT_SYMBOL(zfs_sa_symlink); diff --git a/module/zfs/zpl_xattr.c b/module/zfs/zpl_xattr.c index 95523f28e3b4..46247de026ba 100644 --- a/module/zfs/zpl_xattr.c +++ b/module/zfs/zpl_xattr.c @@ -567,7 +567,7 @@ zpl_xattr_set_sa(struct inode *ip, const char *name, const void *value, * will be reconstructed from the ARC when next accessed. */ if (error == 0) - error = -zfs_sa_set_xattr(zp); + error = -zfs_sa_set_xattr(zp, name, value, size); if (error) { nvlist_free(nvl); diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 23ea633352d6..6f904cfe1a79 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -834,7 +834,7 @@ tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos', 'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg', 'slog_009_neg', 'slog_010_neg', 'slog_011_neg', 'slog_012_neg', 'slog_013_pos', 'slog_014_pos', 'slog_015_neg', 'slog_replay_fs', - 'slog_replay_volume'] + 'slog_replay_volume', 'slog_016_pos'] tags = ['functional', 'slog'] [tests/functional/snapshot] diff --git a/tests/zfs-tests/tests/functional/slog/Makefile.am b/tests/zfs-tests/tests/functional/slog/Makefile.am index 4548ce63b40c..849344d13aee 100644 --- a/tests/zfs-tests/tests/functional/slog/Makefile.am +++ b/tests/zfs-tests/tests/functional/slog/Makefile.am @@ -18,7 +18,8 @@ dist_pkgdata_SCRIPTS = \ slog_014_pos.ksh \ slog_015_neg.ksh \ slog_replay_fs.ksh \ - slog_replay_volume.ksh + slog_replay_volume.ksh \ + slog_016_pos.ksh dist_pkgdata_DATA = \ slog.cfg \ diff --git a/tests/zfs-tests/tests/functional/slog/slog_016_pos.ksh b/tests/zfs-tests/tests/functional/slog/slog_016_pos.ksh new file mode 100755 index 000000000000..5e336c784ff2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/slog/slog_016_pos.ksh @@ -0,0 +1,136 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019 by Nutanix. All rights reserved. +# + +. $STF_SUITE/tests/functional/slog/slog.kshlib + +# +# DESCRIPTION: +# Verify saxattr logging in to ZIL works +# +# STRATEGY: +# 1. Create an empty file system (TESTFS) +# 2. Freeze TESTFS +# 3. Create Xattrs. +# 4. Unmount filesystem +# +# 5. Remount TESTFS +# 6. Check xattrs. +# + +verify_runnable "global" + +log_assert "Verify saxattr logging in to ZIL works" +log_onexit cleanup + +NFILES=10 +function validate_zil_saxattr +{ + saxattrzil=$1 + + echo $saxattrzil > /sys/module/zfs/parameters/zfs_zil_saxattr_enabled + + # + # 1. Create an empty file system (TESTFS) + # + log_must zpool create $TESTPOOL $VDEV log mirror $LDEV + log_must zfs set compression=on $TESTPOOL + log_must zfs create -o xattr=sa $TESTPOOL/$TESTFS + log_must mkdir -p $TESTDIR + + # + # This dd command works around an issue where ZIL records aren't created + # after freezing the pool unless a ZIL header already exists. Create a + # file synchronously to force ZFS to write one out. + # + log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/sync \ + conv=fdatasync,fsync bs=1 count=1 + + # + # 2. Freeze TESTFS + # + log_must zpool freeze $TESTPOOL + + rm /$TESTPOOL/$TESTFS/sync + # + # 3. Create xattrs + # + for i in $(seq $NFILES); do + log_must mkdir /$TESTPOOL/$TESTFS/xattr.d.$i + log_must attr -qs test -V test /$TESTPOOL/$TESTFS/xattr.d.$i + + log_must touch /$TESTPOOL/$TESTFS/xattr.f.$i + log_must attr -qs test -V test /$TESTPOOL/$TESTFS/xattr.f.$i + done + + # + # 4. Unmount filesystem and export the pool + # + # At this stage TESTFS is empty again and unfrozen, and the + # intent log contains a complete set of deltas to replay it. + # + log_must zfs unmount /$TESTPOOL/$TESTFS + + log_note "Verify transactions to replay:" + log_must zdb -iv $TESTPOOL/$TESTFS + + log_must zpool export $TESTPOOL + + # + # 5. Remount TESTFS + # + # Import the pool to unfreeze it and claim log blocks. It has to be + # `zpool import -f` because we can't write a frozen pool's labels! + # + log_must zpool import -f -d $VDIR $TESTPOOL + + # + # 6. Verify Xattr + # If saxattrzil=0, then xattr=sa logging in zil is not enabled, + # So, xattrs would be lost. + # If saxattrzil=1, then xattr=sa logging in zil is enabled, + # So, xattrs shouldn't be lost. + # + for i in $(seq $NFILES); do + if [ $saxattrzil -eq 0 ]; then + log_mustnot attr -g test /$TESTPOOL/$TESTFS/xattr.d.$i + log_mustnot attr -g test /$TESTPOOL/$TESTFS/xattr.f.$i + else + log_must attr -g test /$TESTPOOL/$TESTFS/xattr.d.$i + log_must attr -g test /$TESTPOOL/$TESTFS/xattr.f.$i + fi + done + + cleanup +} + +#Validate xattr=sa logging in zil disabled. +validate_zil_saxattr 0 + +#Validate xattr=sa logging in zil enabled. +validate_zil_saxattr 1 + +log_pass "Verify saxattr logging in to ZIL works"