From 018e2147a54b8cc0c570bc5a2b0e3205a29e4e2c Mon Sep 17 00:00:00 2001 From: Dengyu Sun Date: Wed, 21 Dec 2022 07:21:52 -0500 Subject: [PATCH] libuzfs && uzfs: reimplement attr/xattr store using zfs sa and and add tests --- cmd/uzfs/uzfs.c | 292 ++++++++++++++++-- include/libuzfs.h | 64 ++-- include/libuzfs_impl.h | 13 + include/sys/kernel.h | 2 +- lib/libuzfs/Makefile.am | 4 +- lib/libuzfs/libuzfs.c | 410 +++++++++---------------- lib/libuzfs/libuzfs_attr.c | 612 +++++++++++++++++++++++++++++++++++++ 7 files changed, 1099 insertions(+), 298 deletions(-) create mode 100644 lib/libuzfs/libuzfs_attr.c diff --git a/cmd/uzfs/uzfs.c b/cmd/uzfs/uzfs.c index 30a75cb3c860..3ef0bc5a3eee 100644 --- a/cmd/uzfs/uzfs.c +++ b/cmd/uzfs/uzfs.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include static int uzfs_zpool_create(int argc, char **argv); static int uzfs_zpool_destroy(int argc, char **argv); @@ -90,6 +92,7 @@ static int uzfs_inode_get_kvobj(int argc, char **argv); static int uzfs_inode_get_kvattr(int argc, char **argv); static int uzfs_inode_set_kvattr(int argc, char **argv); static int uzfs_inode_rm_kvattr(int argc, char **argv); +static int uzfs_attr_random_test(int argc, char **argv); static int uzfs_dentry_create(int argc, char **argv); static int uzfs_dentry_delete(int argc, char **argv); @@ -141,6 +144,7 @@ typedef enum { HELP_DENTRY_CREATE, HELP_DENTRY_DELETE, HELP_DENTRY_LOOKUP, + HELP_ATTR_TEST } uzfs_help_t; typedef struct uzfs_command { @@ -204,6 +208,7 @@ static uzfs_command_t command_table[] = { { "create-dentry", uzfs_dentry_create, HELP_DENTRY_CREATE }, { "delete-dentry", uzfs_dentry_delete, HELP_DENTRY_DELETE }, { "lookup-dentry", uzfs_dentry_lookup, HELP_DENTRY_DELETE }, + { "attr-test", uzfs_attr_random_test, HELP_ATTR_TEST }, }; #define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) @@ -304,6 +309,8 @@ get_usage(uzfs_help_t idx) return (gettext("\tdelete-dentry ...\n")); case HELP_DENTRY_LOOKUP: return (gettext("\tlookup-dentry ...\n")); + case HELP_ATTR_TEST: + return (gettext("\tattr-test ...\n")); default: __builtin_unreachable(); } @@ -370,6 +377,38 @@ static void print_stat(const char *name, struct stat *stat) stat->st_blocks); } +static void +print_stat_sa(const char *name, uzfs_attr_t *stat) +{ + const char *format = + "ino: %lu\n" + "pino: %lu\n" + "psid: %u\n" + "ftype: %d\n" + "gen: %lu\n" + "nlink: %u\n" + "perm: %u\n" + "uid: %u\n" + "gid: %u\n" + "size: %lu\n" + "blksize: %lu\n" + "blocks: %lu\n" + "nsid: %lu\n" + "atime: (%lu, %lu)\n" + "mtime: (%lu, %lu)\n" + "ctime: (%lu, %lu)\n" + "btime: (%lu, %lu)\n"; + + printf(format, stat->ino, stat->pino, stat->psid, + stat->ftype, stat->gen, stat->nlink, stat->perm, + stat->uid, stat->gid, stat->size, stat->blksize, + stat->blocks, stat->nsid, stat->atime.tv_sec, + stat->atime.tv_nsec, stat->mtime.tv_sec, + stat->mtime.tv_nsec, stat->ctime.tv_sec, + stat->ctime.tv_nsec, stat->btime.tv_sec, + stat->btime.tv_nsec); +} + static int find_command_idx(char *command, int *idx) { @@ -1084,15 +1123,15 @@ uzfs_inode_getattr(int argc, char **argv) return (-1); } - struct stat buf; + uzfs_attr_t buf; memset(&buf, 0, sizeof (buf)); - err = libuzfs_inode_getattr(dhp, obj, &buf, sizeof (buf)); + err = libuzfs_inode_getattr(dhp, obj, &buf); if (err) printf("failed to get attr inode %ld on dataset: %s\n", obj, dsname); else - print_stat(NULL, &buf); + print_stat_sa(NULL, &buf); libuzfs_dataset_close(dhp); @@ -1114,28 +1153,29 @@ uzfs_inode_setattr(int argc, char **argv) return (-1); } - struct stat buf; + uzfs_attr_t buf; memset(&buf, 0, sizeof (buf)); - buf.st_ino = obj; - buf.st_mode = 0x1; - buf.st_nlink = 1; - buf.st_uid = 0; - buf.st_gid = 0; - buf.st_size = 4096; -// buf.st_atime.tv_sec = 0; -// buf.st_mtime.tv_sec = 0; -// buf.st_ctime.tv_sec = 0; - buf.st_blocks = 8; - buf.st_blksize = 512; - - uint64_t txg = 0; - err = libuzfs_inode_setattr(dhp, obj, &buf, sizeof (buf), &txg); + buf.ino = obj; + buf.pino = 0; + buf.psid = 0; + buf.ftype = TYPE_FILE; + buf.gen = 1; + buf.nlink = 1; + buf.perm = 0; + buf.uid = 12358; + buf.gid = 85321; + buf.size = 0; + buf.blksize = 65536; + buf.blocks = 1; + buf.nsid = 1; + + err = libuzfs_inode_setattr(dhp, obj, &buf, NULL); if (err) printf("failed to get attr inode %ld on dataset: %s\n", obj, dsname); else - print_stat(NULL, &buf); + print_stat_sa(NULL, &buf); libuzfs_dataset_close(dhp); @@ -1259,6 +1299,220 @@ uzfs_inode_rm_kvattr(int argc, char **argv) return (err); } +static int +uzfs_attr_cmp(uzfs_attr_t *lhs, uzfs_attr_t *rhs) +{ + return lhs->psid == rhs->psid && lhs->ftype == rhs->ftype&& + lhs->gen == rhs->gen && lhs->nlink == rhs->nlink && + lhs->perm == rhs->perm && lhs->gid == rhs->gid && + lhs->size == rhs->size && lhs->blksize == rhs->blksize && + lhs->blocks == rhs->blocks && lhs->nsid == rhs->nsid && + lhs->atime.tv_nsec == rhs->atime.tv_nsec && + lhs->atime.tv_sec == rhs->atime.tv_sec && + lhs->mtime.tv_nsec == rhs->mtime.tv_nsec && + lhs->mtime.tv_sec == rhs->mtime.tv_sec && + lhs->ctime.tv_nsec == rhs->ctime.tv_nsec && + lhs->ctime.tv_sec == rhs->ctime.tv_sec && + lhs->btime.tv_nsec == rhs->btime.tv_nsec && + lhs->btime.tv_sec == rhs->btime.tv_sec; +} + +static boolean_t +uzfs_attr_ops(libuzfs_dataset_handle_t *dhp, uint64_t *ino, + libuzfs_inode_type_t *type, uzfs_attr_t *cur_attr, + nvlist_t *nvl, boolean_t *reset) +{ + int delete_proportion = 1; + int getkvattr_proportion = 2; + int setkvattr_proportion = 4; + int deletekvattr_proportion = 2; + int getattr_proportion = 6; + int setattr_proportion = 6; + int total_proportion = delete_proportion + getkvattr_proportion + + setkvattr_proportion + deletekvattr_proportion + + getattr_proportion + setattr_proportion; + + int op = rand() % total_proportion; + *reset = B_FALSE; + if (op < delete_proportion) { + // delete inode + if (*ino != 0) { + VERIFY0(libuzfs_inode_delete(dhp, *ino, *type, NULL)); + memset(cur_attr, 0, sizeof (*cur_attr)); + *ino = 0; + *reset = B_TRUE; + } + return (B_TRUE); + } + op -= delete_proportion; + + if (*ino == 0) { + *type = rand() % 2; + VERIFY0(libuzfs_inode_create(dhp, ino, *type, NULL)); + } + if (op < getkvattr_proportion) { + // get all kvattr and check + nvpair_t *elem = NULL; + while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) { + char *name = nvpair_name(elem); + char *value = NULL; + uint_t size; + VERIFY0(nvpair_value_byte_array(elem, + (uchar_t **)(&value), &size)); + char *stored_value = + (char *)umem_alloc(size, UMEM_NOFAIL); + ssize_t rc = libuzfs_inode_get_kvattr(dhp, *ino, name, + stored_value, (uint64_t)(size), 0); + ASSERT3U(rc, ==, size); + if (memcmp(value, stored_value, size) != 0) { + return (B_FALSE); + } + umem_free(stored_value, size); + } + return (B_TRUE); + } + op -= getkvattr_proportion; + + if (op < setkvattr_proportion) { + char *name = NULL; + int name_size = 0; + if (rand() % 4 < 1) { + nvpair_t *elem = NULL; + if ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) { + char *stored_name = nvpair_name(elem); + name_size = strlen(stored_name); + name = umem_alloc(name_size + 1, UMEM_NOFAIL); + strcpy(name, stored_name); + name[name_size] = '\0'; + } + } + if (name == NULL) { + name_size = rand() % 50 + 1; + name = umem_alloc(name_size + 1, UMEM_NOFAIL); + for (int i = 0; i < name_size; ++i) { + name[i] = rand() % 26 + 'a'; + } + name[name_size] = '\0'; + } + + uint_t value_size = rand() % 8092 + 100; + uchar_t *value = umem_alloc(value_size + 1, UMEM_NOFAIL); + for (int i = 0; i < value_size; ++i) { + value[i] = rand() % 26 + 'a'; + } + value[value_size] = '\0'; + + VERIFY0(libuzfs_inode_set_kvattr(dhp, *ino, name, + (char *)value, value_size + 1, 0, NULL)); + VERIFY0(nvlist_add_byte_array(nvl, name, + value, value_size + 1)); + umem_free(value, value_size + 1); + if (name_size > 0) { + umem_free(name, name_size + 1); + } + return (B_TRUE); + } + op -= setkvattr_proportion; + + if (op < deletekvattr_proportion) { + // delete one kv_attr + nvpair_t *elem = NULL; + if ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) { + char *name = nvpair_name(elem); + // printf("remove kvattr, name: %s\n", name); + int err = libuzfs_inode_remove_kvattr(dhp, + *ino, name, NULL); + if (err != 0) { + printf("remove kvattr, name: %s, " + "failed, err: %d\n", name, err); + return (B_FALSE); + } + VERIFY0(nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY)); + } + return (B_TRUE); + } + op -= deletekvattr_proportion; + + if (op < getattr_proportion) { + // get attr + uzfs_attr_t stored_attr; + VERIFY0(libuzfs_inode_getattr(dhp, *ino, + &stored_attr)); + if (uzfs_attr_cmp(&stored_attr, cur_attr) == 0) { + printf("cur_attr: \n"); + print_stat_sa(NULL, cur_attr); + printf("stored_attr: \n"); + print_stat_sa(NULL, &stored_attr); + printf("\n"); + return (B_FALSE); + } + // printf("get ok\n"); + return (B_TRUE); + } + op -= getattr_proportion; + + if (op < setattr_proportion) { + // set attr + // change 4 byte of cur_attr and set + // printf("set attr\n"); + int attr_size = sizeof (uzfs_attr_t); + int start_index = rand() % (attr_size - 3); + for (int i = start_index; i < start_index + 4; ++i) { + ((uchar_t *)(cur_attr))[i] = rand() % 256; + } + VERIFY0(libuzfs_inode_setattr(dhp, *ino, cur_attr, 0)); + return (B_TRUE); + } + return (B_TRUE); +} + +static int +uzfs_attr_random_test(int argc, char **argv) +{ + assert(argc == 3); + char *dsname = argv[1]; + libuzfs_dataset_handle_t *dhp = libuzfs_dataset_open(dsname); + if (!dhp) { + printf("failed to open dataset: %s\n", dsname); + return (-1); + } + + int nloops = atoi(argv[2]); + uint64_t ino = 0; + uzfs_attr_t cur_attr; + memset(&cur_attr, 0, sizeof (cur_attr)); + nvlist_t *nvl = NULL; + VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)); + libuzfs_inode_type_t type = 0; + int seed = time(NULL); + srand(seed); + + printf("testing attr functionalities, " + "loops: %d, seed: %d\n", nloops, seed); + + for (int i = 0; i < nloops; ++i) { + boolean_t reset; + if (!uzfs_attr_ops(dhp, &ino, &type, &cur_attr, nvl, &reset)) { + printf("test failed, total loops: %d\n", i); + break; + } + if (reset) { + nvlist_free(nvl); + VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)); + } + } + + if (ino != 0) { + VERIFY0(libuzfs_inode_delete(dhp, ino, type, 0)); + } + nvlist_free(nvl); + libuzfs_dataset_close(dhp); + + printf("test end\n"); + + return (0); +} + int uzfs_dentry_create(int argc, char **argv) { diff --git a/include/libuzfs.h b/include/libuzfs.h index 3812e249db2d..43477511eb88 100644 --- a/include/libuzfs.h +++ b/include/libuzfs.h @@ -38,8 +38,39 @@ typedef enum { INODE_DIR = 1, } libuzfs_inode_type_t; +typedef enum { + TYPE_FILE, + TYPE_DIR, + TYPE_SYMLINK, + TYPE_SOCK, + TYPE_FIFO, + TYPE_CHR, + TYPE_BLK +} FileType; + +struct uzfs_attr { + uint64_t ino; + uint64_t pino; + uint32_t psid; + FileType ftype; + uint64_t gen; + uint32_t nlink; + uint32_t perm; + uint64_t uid; + uint64_t gid; + uint64_t size; + uint64_t blksize; + uint64_t blocks; + uint32_t nsid; + struct timespec atime; + struct timespec mtime; + struct timespec ctime; + struct timespec btime; +}; + typedef struct libuzfs_zpool_handle libuzfs_zpool_handle_t; typedef struct libuzfs_dataset_handle libuzfs_dataset_handle_t; +typedef struct uzfs_attr uzfs_attr_t; typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); @@ -134,28 +165,9 @@ extern int libuzfs_inode_create(libuzfs_dataset_handle_t *dhp, uint64_t *ino, extern int libuzfs_inode_claim(libuzfs_dataset_handle_t *dhp, uint64_t ino, libuzfs_inode_type_t type); -extern int libuzfs_inode_delete(libuzfs_dataset_handle_t *dhp, uint64_t ino, - libuzfs_inode_type_t type, uint64_t *txg); - -extern int libuzfs_inode_getattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, - void *attr, uint64_t size); - -extern int libuzfs_inode_setattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, - const void *attr, uint64_t size, uint64_t *txg); - extern int libuzfs_inode_get_kvobj(libuzfs_dataset_handle_t *dhp, uint64_t ino, uint64_t *kvobj); -extern int libuzfs_inode_set_kvattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, - const char *name, const char *value, uint64_t size, int flags, - uint64_t *txg); - -extern int libuzfs_inode_get_kvattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, - const char *name, char *value, uint64_t size, int flags); - -extern int libuzfs_inode_remove_kvattr(libuzfs_dataset_handle_t *dhp, - uint64_t ino, const char *name, uint64_t *txg); - extern int libuzfs_dentry_create(libuzfs_dataset_handle_t *dhp, uint64_t dino, const char *name, uint64_t *value, uint64_t num, uint64_t *txg); @@ -196,6 +208,20 @@ extern int libuzfs_write(uint64_t fsid, uint64_t ino, zfs_uio_t *uio, extern int libuzfs_fsync(uint64_t fsid, uint64_t ino, int syncflag); +extern int libuzfs_inode_delete(libuzfs_dataset_handle_t *dhp, uint64_t ino, + libuzfs_inode_type_t type, uint64_t *txg); +extern int libuzfs_inode_getattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, + uzfs_attr_t *attr); +extern int libuzfs_inode_setattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, + const uzfs_attr_t *attr, uint64_t *txg); +extern int libuzfs_inode_set_kvattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, + const char *name, const char *value, + uint64_t size, int flags, uint64_t *txg); +extern ssize_t libuzfs_inode_get_kvattr(libuzfs_dataset_handle_t *dhp, + uint64_t ino, const char *name, char *value, uint64_t size, int flags); +extern int libuzfs_inode_remove_kvattr(libuzfs_dataset_handle_t *dhp, + uint64_t ino, const char *name, uint64_t *txg); + #ifdef __cplusplus } #endif diff --git a/include/libuzfs_impl.h b/include/libuzfs_impl.h index 09e71054d475..651841e60c48 100644 --- a/include/libuzfs_impl.h +++ b/include/libuzfs_impl.h @@ -31,6 +31,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -46,8 +47,20 @@ struct libuzfs_dataset_handle { objset_t *os; zilog_t *zilog; uint64_t sb_ino; + sa_attr_type_t *uzfs_attr_table; }; +#define UZFS_SIZE_OFFSET 0 +#define UZFS_GEN_OFFSET 8 +#define UZFS_UID_OFFSET 16 +#define UZFS_GID_OFFSET 24 +#define UZFS_PARENT_OFFSET 32 + +extern int libuzfs_object_attr_init(libuzfs_dataset_handle_t *dhp, + sa_handle_t *sa_hdl, dmu_tx_t *tx); +extern void libuzfs_setup_dataset_sa(libuzfs_dataset_handle_t *dhp); +extern int libuzfs_get_xattr_zap_obj(libuzfs_dataset_handle_t *dhp, + uint64_t ino, uint64_t *xattr_zap_obj); #ifdef __cplusplus } diff --git a/include/sys/kernel.h b/include/sys/kernel.h index 050a21725e5d..3ce5d939694d 100644 --- a/include/sys/kernel.h +++ b/include/sys/kernel.h @@ -294,7 +294,7 @@ extern int fls(int x); #define time_before(a, b) time_after(b, a) -#define HAVE_D_PRUNE_ALIASES +#define HAVE_D_PRUNE_ALIASES // zfs_ioctl.c #define FKIOCTL 0x80000000 diff --git a/lib/libuzfs/Makefile.am b/lib/libuzfs/Makefile.am index b725359e2978..77ecac25ae27 100644 --- a/lib/libuzfs/Makefile.am +++ b/lib/libuzfs/Makefile.am @@ -14,7 +14,9 @@ pkgconfig_DATA = libuzfs.pc lib_LTLIBRARIES = libuzfs.la -USER_C = libuzfs.c +USER_C = \ + libuzfs.c \ + libuzfs_attr.c libuzfs_la_SOURCES = $(USER_C) diff --git a/lib/libuzfs/libuzfs.c b/lib/libuzfs/libuzfs.c index d5b13a55fbfc..0ef2e0249ed2 100644 --- a/lib/libuzfs/libuzfs.c +++ b/lib/libuzfs/libuzfs.c @@ -51,6 +51,7 @@ #include #include #include +#include #include "libuzfs_impl.h" @@ -429,6 +430,10 @@ libuzfs_objset_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) err = zap_add(os, MASTER_NODE_OBJ, UZFS_SB_OBJ, 8, 1, &sb_obj, tx); ASSERT(err == 0); + uint64_t sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, + DMU_OT_NONE, 0, tx); + err = zap_add(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); + ASSERT(err == 0); } int @@ -488,6 +493,62 @@ libuzfs_dhp_fini(libuzfs_dataset_handle_t *dhp) { } +static int +uzfs_get_file_info(dmu_object_type_t bonustype, const void *data, + zfs_file_info_t *zoi) +{ + if (bonustype != DMU_OT_SA) + return (SET_ERROR(ENOENT)); + + zoi->zfi_project = ZFS_DEFAULT_PROJID; + + /* + * If we have a NULL data pointer + * then assume the id's aren't changing and + * return EEXIST to the dmu to let it know to + * use the same ids + */ + if (data == NULL) + return (SET_ERROR(EEXIST)); + + const sa_hdr_phys_t *sap = data; + if (sap->sa_magic == 0) { + /* + * This should only happen for newly created files + * that haven't had the znode data filled in yet. + */ + zoi->zfi_user = 0; + zoi->zfi_group = 0; + zoi->zfi_generation = 0; + return (0); + } + + sa_hdr_phys_t sa = *sap; + boolean_t swap = B_FALSE; + if (sa.sa_magic == BSWAP_32(SA_MAGIC)) { + sa.sa_magic = SA_MAGIC; + sa.sa_layout_info = BSWAP_16(sa.sa_layout_info); + swap = B_TRUE; + } + VERIFY3U(sa.sa_magic, ==, SA_MAGIC); + + int hdrsize = sa_hdrsize(&sa); + VERIFY3U(hdrsize, >=, sizeof (sa_hdr_phys_t)); + + uintptr_t data_after_hdr = (uintptr_t)data + hdrsize; + zoi->zfi_user = *((uint64_t *)(data_after_hdr + UZFS_UID_OFFSET)); + zoi->zfi_group = *((uint64_t *)(data_after_hdr + UZFS_GID_OFFSET)); + zoi->zfi_generation = *((uint64_t *)(data_after_hdr + UZFS_GEN_OFFSET)); + + if (swap) { + zoi->zfi_user = BSWAP_64(zoi->zfi_user); + zoi->zfi_group = BSWAP_64(zoi->zfi_group); + zoi->zfi_project = BSWAP_64(zoi->zfi_project); + zoi->zfi_generation = BSWAP_64(zoi->zfi_generation); + } + return (0); +} + libuzfs_dataset_handle_t * libuzfs_dataset_open(const char *dsname) { @@ -501,6 +562,7 @@ libuzfs_dataset_open(const char *dsname) dhp, &os)); libuzfs_dhp_init(dhp, os); + dmu_objset_register_type(DMU_OST_ZFS, uzfs_get_file_info); zilog = dhp->zilog; @@ -508,6 +570,7 @@ libuzfs_dataset_open(const char *dsname) zilog = zil_open(os, libuzfs_get_data); + libuzfs_setup_dataset_sa(dhp); return (dhp); } @@ -515,6 +578,9 @@ void libuzfs_dataset_close(libuzfs_dataset_handle_t *dhp) { zil_close(dhp->zilog); + if (dhp->os->os_sa != NULL) { + sa_tear_down(dhp->os); + } dmu_objset_disown(dhp->os, B_TRUE, dhp); libuzfs_dhp_fini(dhp); free(dhp); @@ -570,39 +636,67 @@ libuzfs_wait_synced(libuzfs_dataset_handle_t *dhp) txg_wait_synced(spa_get_dsl(dhp->os->os_spa), 0); } -int -libuzfs_object_create(libuzfs_dataset_handle_t *dhp, uint64_t *obj, - uint64_t *txg) +static int +libuzfs_create_inode_with_type(libuzfs_dataset_handle_t *dhp, uint64_t *obj, + boolean_t claiming, libuzfs_inode_type_t type, uint64_t *txg) { - int err = 0; - dmu_tx_t *tx = NULL; objset_t *os = dhp->os; - - tx = dmu_tx_create(os); - - dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); - - err = dmu_tx_assign(tx, TXG_WAIT); - if (err) { + dmu_tx_t *tx = dmu_tx_create(os); + dmu_tx_hold_sa_create(tx, sizeof (uzfs_attr_t)); + if (type == INODE_DIR) { + dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); + } + int err = dmu_tx_assign(tx, TXG_WAIT); + if (err != 0) { dmu_tx_abort(tx); - goto out; + return (err); } + // create/claim object int dnodesize = dmu_objset_dnodesize(os); int bonuslen = DN_BONUS_SIZE(dnodesize); - int blocksize = 0; - int ibshift = 0; - - *obj = dmu_object_alloc_dnsize(os, DMU_OT_PLAIN_FILE_CONTENTS, 0, - DMU_OT_PLAIN_OTHER, bonuslen, dnodesize, tx); + if (type == INODE_FILE) { + if (claiming) { + ASSERT(*obj != 0); + VERIFY0(dmu_object_claim_dnsize(os, *obj, + DMU_OT_PLAIN_FILE_CONTENTS, + 0, DMU_OT_SA, bonuslen, dnodesize, tx)); + } else { + *obj = dmu_object_alloc_dnsize(os, + DMU_OT_PLAIN_FILE_CONTENTS, 0, + DMU_OT_SA, bonuslen, dnodesize, tx); + } + } else { + if (claiming) { + ASSERT(*obj != 0); + VERIFY0(zap_create_claim_dnsize(os, *obj, + DMU_OT_DIRECTORY_CONTENTS, DMU_OT_SA, + bonuslen, dnodesize, tx)); + } else { + *obj = zap_create_dnsize(os, DMU_OT_DIRECTORY_CONTENTS, + DMU_OT_SA, bonuslen, dnodesize, tx); + } + } - VERIFY0(dmu_object_set_blocksize(os, *obj, blocksize, ibshift, tx)); + sa_handle_t *sa_hdl; + sa_handle_get(os, *obj, NULL, SA_HDL_PRIVATE, &sa_hdl); + VERIFY0(libuzfs_object_attr_init(dhp, sa_hdl, tx)); + sa_handle_destroy(sa_hdl); - *txg = tx->tx_txg; + if (txg != NULL) { + *txg = tx->tx_txg; + } dmu_tx_commit(tx); -out: - return (err); + return (0); +} + +int +libuzfs_object_create(libuzfs_dataset_handle_t *dhp, uint64_t *obj, + uint64_t *txg) +{ + return (libuzfs_create_inode_with_type(dhp, obj, + B_FALSE, INODE_FILE, txg)); } int @@ -635,78 +729,16 @@ libuzfs_object_delete(libuzfs_dataset_handle_t *dhp, uint64_t obj, int libuzfs_object_claim(libuzfs_dataset_handle_t *dhp, uint64_t obj) { - int err = 0; - dmu_tx_t *tx = NULL; - objset_t *os = dhp->os; - - int dnodesize = dmu_objset_dnodesize(os); - int bonuslen = DN_BONUS_SIZE(dnodesize); - int blocksize = 0; - int ibs = 0; - - tx = dmu_tx_create(os); - - dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); - - err = dmu_tx_assign(tx, TXG_WAIT); - if (err) - goto out; - - err = dmu_object_claim_dnsize(os, obj, DMU_OT_PLAIN_FILE_CONTENTS, 0, - DMU_OT_PLAIN_OTHER, bonuslen, dnodesize, tx); - if (err) - goto out; - - VERIFY0(dmu_object_set_blocksize(os, obj, blocksize, ibs, tx)); - - dmu_tx_commit(tx); - - return (0); - -out: - dmu_tx_abort(tx); - return (err); + return (libuzfs_create_inode_with_type(dhp, &obj, + B_TRUE, INODE_FILE, 0)); } int TEST_libuzfs_object_claim(libuzfs_dataset_handle_t *dhp, uint64_t obj, uint64_t *txg) { - int err = 0; - dmu_tx_t *tx = NULL; - objset_t *os = dhp->os; - - int dnodesize = dmu_objset_dnodesize(os); - int bonuslen = DN_BONUS_SIZE(dnodesize); - int type = DMU_OT_UINT64_OTHER; - int bonus_type = DMU_OT_UINT64_OTHER; - int blocksize = 0; - int ibs = 0; - - tx = dmu_tx_create(os); - - dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); - - err = dmu_tx_assign(tx, TXG_WAIT); - if (err) - goto out; - - err = dmu_object_claim_dnsize(os, obj, type, 0, bonus_type, bonuslen, - dnodesize, tx); - if (err) - goto out; - - VERIFY0(dmu_object_set_blocksize(os, obj, blocksize, ibs, tx)); - - *txg = tx->tx_txg; - dmu_tx_commit(tx); - - return (0); - -out: - dmu_tx_abort(tx); - return (err); - + return (libuzfs_create_inode_with_type(dhp, &obj, + B_TRUE, INODE_FILE, txg)); } uint64_t @@ -1030,143 +1062,59 @@ libuzfs_zap_count(libuzfs_dataset_handle_t *dhp, uint64_t obj, uint64_t *count) } int -libuzfs_inode_create(libuzfs_dataset_handle_t *dhp, uint64_t *ino, +libuzfs_inode_delete(libuzfs_dataset_handle_t *dhp, uint64_t ino, libuzfs_inode_type_t type, uint64_t *txg) { - if (type == INODE_FILE) - return (libuzfs_object_create(dhp, ino, txg)); - - if (type == INODE_DIR) - return (libuzfs_zap_create(dhp, ino, txg)); - - return (EINVAL); -} - -int -libuzfs_inode_claim(libuzfs_dataset_handle_t *dhp, uint64_t ino, - libuzfs_inode_type_t type) -{ - if (type == INODE_FILE) - return (libuzfs_object_claim(dhp, ino)); - - if (type == INODE_DIR) - return (libuzfs_zap_claim(dhp, ino)); - - return (EINVAL); -} + uint64_t xattr_zap_obj = 0; + int err = libuzfs_get_xattr_zap_obj(dhp, ino, &xattr_zap_obj); + if (err != 0 && err != ENOENT) { + return (err); + } -static int -libuzfs_inode_kvobj_delete(libuzfs_dataset_handle_t *dhp, uint64_t ino, - libuzfs_inode_type_t type, uint64_t kvobj, uint64_t *txg) -{ - int err = 0; - dmu_tx_t *tx = NULL; objset_t *os = dhp->os; + dmu_tx_t *tx = dmu_tx_create(os); - tx = dmu_tx_create(os); - - dmu_tx_hold_free(tx, kvobj, 0, DMU_OBJECT_END); dmu_tx_hold_free(tx, ino, 0, DMU_OBJECT_END); - - err = dmu_tx_assign(tx, TXG_WAIT); - if (err) { - dmu_tx_abort(tx); - goto out; + if (xattr_zap_obj != 0) { + dmu_tx_hold_free(tx, xattr_zap_obj, 0, DMU_OBJECT_END); } - VERIFY0(zap_destroy(os, kvobj, tx)); + if ((err = dmu_tx_assign(tx, TXG_WAIT)) == 0) { + if (xattr_zap_obj != 0) { + VERIFY0(zap_destroy(os, xattr_zap_obj, tx)); + } - if (type == INODE_FILE) - VERIFY0(dmu_object_free(os, ino, tx)); - else if (type == INODE_DIR) - VERIFY0(zap_destroy(os, ino, tx)); + if (type == INODE_DIR) { + VERIFY0(zap_destroy(os, ino, tx)); + } else { + VERIFY0(dmu_object_free(os, ino, tx)); + } - *txg = tx->tx_txg; - dmu_tx_commit(tx); + if (txg != NULL) { + *txg = tx->tx_txg; + } + dmu_tx_commit(tx); + } else { + dmu_tx_abort(tx); + } -out: return (err); } int -libuzfs_inode_delete(libuzfs_dataset_handle_t *dhp, uint64_t ino, +libuzfs_inode_create(libuzfs_dataset_handle_t *dhp, uint64_t *ino, libuzfs_inode_type_t type, uint64_t *txg) { - if (type != INODE_FILE && type != INODE_DIR) - return (EINVAL); - - uint64_t kvobj = 0; - VERIFY0(libuzfs_inode_get_kvobj(dhp, ino, &kvobj)); - - if (kvobj == 0) { - if (type == INODE_FILE) - return (libuzfs_object_delete(dhp, ino, txg)); - if (type == INODE_DIR) - return (libuzfs_zap_delete(dhp, ino, txg)); - } - - return (libuzfs_inode_kvobj_delete(dhp, ino, type, kvobj, txg)); + return (libuzfs_create_inode_with_type(dhp, ino, + B_FALSE, type, txg)); } int -libuzfs_inode_getattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, void *attr, - uint64_t size) +libuzfs_inode_claim(libuzfs_dataset_handle_t *dhp, uint64_t ino, + libuzfs_inode_type_t type) { - return (libuzfs_object_getattr(dhp, ino, attr, size)); -} - -int -libuzfs_inode_setattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, - const void *attr, uint64_t size, uint64_t *txg) -{ - return (libuzfs_object_setattr(dhp, ino, attr, size, txg)); -} - -static int -libuzfs_object_kvattr_create_add(libuzfs_dataset_handle_t *dhp, uint64_t ino, - const char *key, const char *value, uint64_t size, int flags, uint64_t *txg) -{ - int err = 0; - dmu_tx_t *tx = NULL; - objset_t *os = dhp->os; - dmu_buf_t *db; - uint64_t kvobj = 0; - - tx = dmu_tx_create(os); - - dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); - dmu_tx_hold_bonus(tx, ino); - - err = dmu_tx_assign(tx, TXG_WAIT); - if (err) { - dmu_tx_abort(tx); - goto out; - } - - int dnodesize = dmu_objset_dnodesize(os); - int bonuslen = DN_BONUS_SIZE(dnodesize); - - kvobj = zap_create_dnsize(os, DMU_OT_DIRECTORY_CONTENTS, - DMU_OT_PLAIN_OTHER, bonuslen, dnodesize, tx); - - err = zap_add(os, kvobj, key, 1, size, value, tx); - if (err) { - dmu_tx_abort(tx); - goto out; - } - - VERIFY0(dmu_bonus_hold(os, ino, FTAG, &db)); - dmu_buf_will_dirty(db, tx); - bcopy(&kvobj, db->db_data, sizeof (kvobj)); - dmu_buf_rele(db, FTAG); - - *txg = tx->tx_txg; - dmu_tx_commit(tx); - - libuzfs_wait_synced(dhp); - -out: - return (err); + return (libuzfs_create_inode_with_type(dhp, &ino, + B_FALSE, type, NULL)); } int @@ -1182,60 +1130,6 @@ libuzfs_inode_get_kvobj(libuzfs_dataset_handle_t *dhp, uint64_t ino, return (0); } -int -libuzfs_inode_get_kvattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, - const char *name, char *value, uint64_t size, int flags) -{ - int err = 0; - uint64_t kvobj; - - err = libuzfs_inode_get_kvobj(dhp, ino, &kvobj); - if (err) - return (err); - - if (kvobj == 0) - return (ENOENT); - - return (libuzfs_zap_lookup(dhp, kvobj, name, 1, size, value)); -} - -// TODO(hping): remove kvobj when no kv attr -int -libuzfs_inode_remove_kvattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, - const char *name, uint64_t *txg) -{ - int err = 0; - uint64_t kvobj; - - err = libuzfs_inode_get_kvobj(dhp, ino, &kvobj); - if (err) - return (err); - - if (kvobj == 0) - return (ENOENT); - - return (libuzfs_zap_remove(dhp, kvobj, name, txg)); -} - -int -libuzfs_inode_set_kvattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, - const char *name, const char *value, uint64_t size, int flags, - uint64_t *txg) -{ - int err = 0; - uint64_t kvobj; - - err = libuzfs_inode_get_kvobj(dhp, ino, &kvobj); - if (err) - return (err); - - if (kvobj == 0) - return (libuzfs_object_kvattr_create_add(dhp, ino, name, - value, size, flags, txg)); - - return (libuzfs_zap_update(dhp, kvobj, name, 1, size, value, txg)); -} - int libuzfs_dentry_create(libuzfs_dataset_handle_t *dhp, uint64_t dino, const char *name, uint64_t *value, uint64_t num, uint64_t *txg) { diff --git a/lib/libuzfs/libuzfs_attr.c b/lib/libuzfs/libuzfs_attr.c new file mode 100644 index 000000000000..831d40640566 --- /dev/null +++ b/lib/libuzfs/libuzfs_attr.c @@ -0,0 +1,612 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef enum uzfs_attr_type { + UZFS_PINO, + UZFS_PSID, + UZFS_FTYPE, + UZFS_GEN, + UZFS_NLINK, + UZFS_PERM, + UZFS_UID, + UZFS_GID, + UZFS_SIZE, + UZFS_BLKSIZE, + UZFS_BLOCKS, + UZFS_NSID, + UZFS_ATIME, + UZFS_MTIME, + UZFS_CTIME, + UZFS_BTIME, + UZFS_ZXATTR, // sa index for dir xattr inode + UZFS_XATTR, // sa index for sa xattr (name, value) pairs + UZFS_END +} uzfs_attr_type_t; + +sa_attr_reg_t uzfs_attr_table[UZFS_END+1] = { + {"UZFS_PINO", sizeof (uint64_t), SA_UINT64_ARRAY, 0}, + {"UZFS_PSID", sizeof (uint32_t), SA_UINT32_ARRAY, 1}, + {"UZFS_FTYPE", sizeof (FileType), SA_UINT32_ARRAY, 2}, + {"UZFS_GEN", sizeof (uint64_t), SA_UINT64_ARRAY, 3}, + {"UZFS_NLINK", sizeof (uint32_t), SA_UINT32_ARRAY, 4}, + {"UZFS_PERM", sizeof (uint32_t), SA_UINT32_ARRAY, 5}, + {"UZFS_UID", sizeof (uint64_t), SA_UINT32_ARRAY, 6}, + {"UZFS_GID", sizeof (uint64_t), SA_UINT32_ARRAY, 7}, + {"UZFS_SIZE", sizeof (uint64_t), SA_UINT64_ARRAY, 8}, + {"UZFS_BLKSIZE", sizeof (uint64_t), SA_UINT64_ARRAY, 9}, + {"UZFS_BLOCKS", sizeof (uint64_t), SA_UINT64_ARRAY, 10}, + {"UZFS_NSID", sizeof (uint32_t), SA_UINT32_ARRAY, 11}, + {"UZFS_ATIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 12}, + {"UZFS_MTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 13}, + {"UZFS_CTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 14}, + {"UZFS_BTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 15}, + {"UZFS_ZXATTR", sizeof (uint64_t), SA_UINT64_ARRAY, 16}, + {"UZFS_XATTR", 0, SA_UINT8_ARRAY, 0}, + {NULL, 0, 0, 0} +}; + +void +libuzfs_setup_dataset_sa(libuzfs_dataset_handle_t *dhp) +{ + uint64_t sa_obj; + VERIFY0(zap_lookup(dhp->os, MASTER_NODE_OBJ, + ZFS_SA_ATTRS, 8, 1, &sa_obj)); + sa_setup(dhp->os, sa_obj, uzfs_attr_table, + UZFS_END, &dhp->uzfs_attr_table); +} + +// make sure sa_attrs has enough space +static void +libuzfs_add_bulk_attr(libuzfs_dataset_handle_t *dhp, sa_bulk_attr_t *sa_attrs, + int *cnt, uzfs_attr_t *attr) +{ + sa_attr_type_t *sa_tbl = dhp->uzfs_attr_table; + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_SIZE], + NULL, &attr->size, 8); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_GEN], + NULL, &attr->gen, 8); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_UID], + NULL, &attr->uid, 8); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_GID], + NULL, &attr->gid, 8); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_PINO], + NULL, &attr->pino, 8); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_PSID], + NULL, &attr->psid, 4); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_FTYPE], + NULL, &attr->ftype, 4); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_NLINK], + NULL, &attr->nlink, 4); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_PERM], + NULL, &attr->perm, 4); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_BLKSIZE], + NULL, &attr->blksize, 8); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_BLOCKS], + NULL, &attr->blocks, 8); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_NSID], + NULL, &attr->nsid, 4); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_ATIME], + NULL, &attr->atime, 16); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_MTIME], + NULL, &attr->mtime, 16); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_CTIME], + NULL, &attr->ctime, 16); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_BTIME], + NULL, &attr->btime, 16); +} + +int +libuzfs_object_attr_init(libuzfs_dataset_handle_t *dhp, + sa_handle_t *sa_hdl, dmu_tx_t *tx) +{ + sa_bulk_attr_t sa_attrs[UZFS_END]; + int cnt = 0; + uzfs_attr_t attr; + memset(&attr, 0, sizeof (attr)); + libuzfs_add_bulk_attr(dhp, sa_attrs, &cnt, &attr); + return (sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx)); +} + +int +libuzfs_get_xattr_zap_obj(libuzfs_dataset_handle_t *dhp, uint64_t ino, + uint64_t *xattr_zap_obj) +{ + objset_t *os = dhp->os; + sa_handle_t *sa_hdl = NULL; + int err = sa_handle_get(os, ino, NULL, SA_HDL_PRIVATE, &sa_hdl); + if (err != 0) { + return (err); + } + + sa_attr_type_t *sa_tbl = dhp->uzfs_attr_table; + err = sa_lookup(sa_hdl, sa_tbl[UZFS_ZXATTR], xattr_zap_obj, + sizeof (*xattr_zap_obj)); + sa_handle_destroy(sa_hdl); + return (err); +} + +int +libuzfs_inode_getattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, + uzfs_attr_t *attr) +{ + sa_handle_t *sa_hdl; + int err = sa_handle_get(dhp->os, ino, NULL, SA_HDL_PRIVATE, &sa_hdl); + if (err != 0) { + return (err); + } + + sa_bulk_attr_t sa_attrs[UZFS_END]; + int cnt = 0; + libuzfs_add_bulk_attr(dhp, sa_attrs, &cnt, attr); + err = sa_bulk_lookup(sa_hdl, sa_attrs, cnt); + + sa_handle_destroy(sa_hdl); + return (err); +} + +int +libuzfs_inode_setattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, + const uzfs_attr_t *attr, uint64_t *txg) +{ + sa_handle_t *sa_hdl; + objset_t *os = dhp->os; + int err = sa_handle_get(os, ino, NULL, SA_HDL_PRIVATE, &sa_hdl); + if (err != 0) { + return (err); + } + + dmu_tx_t *tx = dmu_tx_create(os); + dmu_tx_hold_sa(tx, sa_hdl, B_FALSE); + if ((err = dmu_tx_assign(tx, TXG_WAIT)) != 0) { + dmu_tx_abort(tx); + return (err); + } + + sa_bulk_attr_t sa_attrs[UZFS_END]; + int cnt = 0; + libuzfs_add_bulk_attr(dhp, sa_attrs, &cnt, (uzfs_attr_t *)attr); + VERIFY0(sa_bulk_update(sa_hdl, sa_attrs, cnt, tx)); + + sa_handle_destroy(sa_hdl); + + if (txg != NULL) { + *txg = tx->tx_txg; + } + + dmu_tx_commit(tx); + + return (0); +} + +static int +libuzfs_get_nvlist_from_handle(const sa_attr_type_t *sa_tbl, + nvlist_t **nvl, sa_handle_t *sa_hdl) +{ + int xattr_sa_size; + int err = sa_size(sa_hdl, sa_tbl[UZFS_XATTR], &xattr_sa_size); + if (err != 0) { + return (err); + } + + char *xattr_sa_data = vmem_alloc(xattr_sa_size, KM_SLEEP); + err = sa_lookup(sa_hdl, sa_tbl[UZFS_XATTR], + xattr_sa_data, xattr_sa_size); + if (err == 0) { + err = nvlist_unpack(xattr_sa_data, + xattr_sa_size, nvl, KM_SLEEP); + } + vmem_free(xattr_sa_data, xattr_sa_size); + + return (err); +} + +static int +libuzfs_kvattr_set_nvlist(nvlist_t *nvl, const char *name, + const char *value, size_t value_size, size_t *xattr_sa_size) +{ + int err = nvlist_add_byte_array(nvl, name, + (uchar_t *)value, (uint_t)value_size); + if (err != 0) { + return (err); + } + + err = nvlist_size(nvl, xattr_sa_size, NV_ENCODE_XDR); + if (err != 0) { + return (err); + } + + if (*xattr_sa_size > DXATTR_MAX_SA_SIZE || + *xattr_sa_size > SA_ATTR_MAX_LEN) { + err = nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY); + ASSERT(err != ENOENT); + if (err == 0) { + err = nvlist_size(nvl, xattr_sa_size, NV_ENCODE_XDR); + } + if (err != 0) { + return (err); + } + return (EFBIG); + } + + return (0); +} + +static int +libufzs_inode_lookup_kvattr_zap(sa_handle_t *sa_hdl, sa_attr_type_t *sa_tbl, + const char *name, uint64_t *xattr_zap_obj) +{ + objset_t *os = sa_hdl->sa_os; + int err = sa_lookup(sa_hdl, sa_tbl[UZFS_ZXATTR], + xattr_zap_obj, sizeof (*xattr_zap_obj)); + if (err != ENOENT && err != 0) { + return (err); + } + if (err == 0) { + err = zap_contains(os, *xattr_zap_obj, name); + } + return (err); +} + +static void +libuzfs_inode_save_kvattr(sa_handle_t *sa_hdl, sa_attr_type_t *sa_tbl, + boolean_t existed_in_sa, boolean_t sa_space_enough, char *xattr_sa_data, + uint64_t xattr_sa_size, boolean_t existed_in_zap, uint64_t xattr_zap_obj, + uint64_t zap_entries_count, const char *name, const char *value, + size_t size, dmu_tx_t *tx) +{ + objset_t *os = sa_hdl->sa_os; + if (existed_in_sa || sa_space_enough) { + if (xattr_sa_size == 0) { + VERIFY0(sa_remove(sa_hdl, sa_tbl[UZFS_XATTR], tx)); + } else { + VERIFY0(sa_update(sa_hdl, sa_tbl[UZFS_XATTR], + xattr_sa_data, xattr_sa_size, tx)); + } + } + + if (existed_in_zap && sa_space_enough) { + ASSERT(zap_entries_count >= 1); + if (zap_entries_count == 1) { + VERIFY0(zap_destroy(os, xattr_zap_obj, tx)); + VERIFY0(sa_remove(sa_hdl, sa_tbl[UZFS_ZXATTR], tx)); + } else { + VERIFY0(zap_remove(os, xattr_zap_obj, name, tx)); + } + } + + if (!sa_space_enough) { + if (xattr_zap_obj == DMU_NEW_OBJECT) { + int dnodesize = dmu_objset_dnodesize(os); + xattr_zap_obj = zap_create_dnsize(os, + DMU_OT_DIRECTORY_CONTENTS, DMU_OT_NONE, 0, + dnodesize, tx); + VERIFY0(sa_update(sa_hdl, sa_tbl[UZFS_ZXATTR], + &xattr_zap_obj, sizeof (xattr_zap_obj), tx)); + } + VERIFY0(zap_update(os, xattr_zap_obj, name, + 1, size, value, tx)); + } +} + +int +libuzfs_inode_set_kvattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, + const char *name, const char *value, uint64_t size, + int flags, uint64_t *txg) +{ + // only support value size <= 8K + ASSERT3U(size, <=, ZAP_MAXVALUELEN); + + sa_handle_t *sa_hdl = NULL; + objset_t *os = dhp->os; + int err = sa_handle_get(os, ino, NULL, SA_HDL_PRIVATE, &sa_hdl); + if (err != 0) { + return (err); + } + + nvlist_t *nvl; + sa_attr_type_t *sa_tbl = dhp->uzfs_attr_table; + err = libuzfs_get_nvlist_from_handle(sa_tbl, &nvl, sa_hdl); + if (err == ENOENT) { + err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP); + } + if (err != 0) { + goto out_handle; + } + + boolean_t existed_in_sa = nvlist_exists(nvl, name); + boolean_t sa_space_enough = B_TRUE; + size_t xattr_sa_size = 0; + err = libuzfs_kvattr_set_nvlist(nvl, name, value, size, &xattr_sa_size); + if (err == EFBIG) { + sa_space_enough = B_FALSE; + } else if (err != 0) { + goto out_nvl; + return (err); + } + + char *xattr_sa_data = NULL; + if (existed_in_sa || sa_space_enough) { + if (xattr_sa_size > 0) { + xattr_sa_data = vmem_alloc(xattr_sa_size, KM_SLEEP); + err = nvlist_pack(nvl, &xattr_sa_data, &xattr_sa_size, + NV_ENCODE_XDR, KM_SLEEP); + if (err != 0) { + goto out_free_sa_data; + } + } + } + + boolean_t existed_in_zap = B_FALSE; + uint64_t xattr_zap_obj = DMU_NEW_OBJECT; + uint64_t zap_entries_count = 0; + if (!existed_in_sa || !sa_space_enough) { + err = libufzs_inode_lookup_kvattr_zap(sa_hdl, + sa_tbl, name, &xattr_zap_obj); + existed_in_zap = err == 0; + if (existed_in_zap && !sa_space_enough) { + err = zap_count(os, xattr_zap_obj, &zap_entries_count); + } + + if (err != 0 && err != ENOENT) { + goto out_free_sa_data; + } + } + + dmu_tx_t *tx = dmu_tx_create(os); + dmu_tx_hold_sa(tx, sa_hdl, B_TRUE); + if (existed_in_zap || !sa_space_enough) { + if (existed_in_zap && sa_space_enough && + zap_entries_count == 1) { + dmu_tx_hold_free(tx, xattr_zap_obj, 0, DMU_OBJECT_END); + } else { + dmu_tx_hold_zap(tx, xattr_zap_obj, + !sa_space_enough, name); + } + } + + if ((err = dmu_tx_assign(tx, TXG_WAIT)) == 0) { + libuzfs_inode_save_kvattr(sa_hdl, sa_tbl, existed_in_sa, + sa_space_enough, xattr_sa_data, xattr_sa_size, + existed_in_zap, xattr_zap_obj, zap_entries_count, + name, value, size, tx); + + if (txg != NULL) { + *txg = tx->tx_txg; + } + dmu_tx_commit(tx); + } else { + dmu_tx_abort(tx); + } + +out_free_sa_data: + if (xattr_sa_data != NULL) { + vmem_free(xattr_sa_data, xattr_sa_size); + } +out_nvl: + nvlist_free(nvl); +out_handle: + sa_handle_destroy(sa_hdl); + return (err); +} + +static ssize_t +libuzfs_inode_get_kvattr_zap(sa_handle_t *sa_hdl, sa_attr_type_t *sa_tbl, + const char *name, char *value, size_t size) +{ + // get zap object from sa + objset_t *os = sa_hdl->sa_os; + uint64_t xattr_zap_obj; + int err = sa_lookup(sa_hdl, sa_tbl[UZFS_ZXATTR], + &xattr_zap_obj, sizeof (uint64_t)); + if (err != 0) { + return (-err); + } + + uint64_t integer_size = 0; + uint64_t integer_num = 0; + err = zap_length(os, xattr_zap_obj, name, + &integer_size, &integer_num); + if (err != 0) { + return (-err); + } + + ASSERT(integer_size == 1); + ASSERT(integer_num <= size); + err = zap_lookup(os, xattr_zap_obj, name, 1, size, value); + if (err != 0) { + return (-err); + } + + return (integer_num); +} + +static ssize_t +libuzfs_inode_get_kvattr_sa(sa_handle_t *sa_hdl, const sa_attr_type_t *sa_tbl, + const char *name, char *value, size_t size) +{ + nvlist_t *nvl; + int err = libuzfs_get_nvlist_from_handle(sa_tbl, &nvl, sa_hdl); + if (err != 0) { + return (-err); + } + + uchar_t *nv_value; + uint_t nv_size = 0; + err = nvlist_lookup_byte_array(nvl, name, &nv_value, &nv_size); + if (err == 0 && nv_size <= size) { + memcpy(value, nv_value, nv_size); + } + nvlist_free(nvl); + if (err != 0) { + return (-err); + } + + if (nv_size > size) { + return (-ERANGE); + } + return (nv_size); +} + +ssize_t +libuzfs_inode_get_kvattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, + const char *name, char *value, uint64_t size, int flags) +{ + sa_handle_t *sa_hdl = NULL; + int err = sa_handle_get(dhp->os, ino, NULL, SA_HDL_PRIVATE, &sa_hdl); + if (err != 0) { + return (-err); + } + + ssize_t rc = libuzfs_inode_get_kvattr_sa(sa_hdl, dhp->uzfs_attr_table, + name, value, size); + if (rc == -ENOENT) { + rc = libuzfs_inode_get_kvattr_zap(sa_hdl, dhp->uzfs_attr_table, + name, value, size); + } + + sa_handle_destroy(sa_hdl); + return (rc); +} + +static int +libuzfs_inode_remove_kvattr_from_sa(sa_handle_t *sa_hdl, sa_attr_type_t *sa_tbl, + const char *name, uint64_t *txg) +{ + nvlist_t *nvl = NULL; + int err = libuzfs_get_nvlist_from_handle(sa_tbl, &nvl, sa_hdl); + if (err != 0) { + return (err); + } + + err = nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY); + if (err != 0) { + goto out1; + } + + uint64_t xattr_sa_size = 0; + err = nvlist_size(nvl, &xattr_sa_size, NV_ENCODE_XDR); + if (err != 0) { + goto out1; + } + + char *xattr_sa_data = NULL; + if (xattr_sa_size > 0) { + xattr_sa_data = vmem_alloc(xattr_sa_size, KM_SLEEP); + err = nvlist_pack(nvl, &xattr_sa_data, + &xattr_sa_size, NV_ENCODE_XDR, KM_SLEEP); + if (err != 0) { + goto out2; + } + } + + + dmu_tx_t *tx = dmu_tx_create(sa_hdl->sa_os); + dmu_tx_hold_sa(tx, sa_hdl, B_TRUE); + if ((err = dmu_tx_assign(tx, TXG_WAIT)) == 0) { + if (xattr_sa_size > 0) { + ASSERT(xattr_sa_data != NULL); + VERIFY0(sa_update(sa_hdl, sa_tbl[UZFS_XATTR], + xattr_sa_data, xattr_sa_size, tx)); + } else { + VERIFY0(sa_remove(sa_hdl, sa_tbl[UZFS_XATTR], tx)); + } + + if (txg != NULL) { + *txg = tx->tx_txg; + } + dmu_tx_commit(tx); + } else { + dmu_tx_abort(tx); + } + +out2: + if (xattr_sa_size > 0) { + vmem_free(xattr_sa_data, xattr_sa_size); + } +out1: + nvlist_free(nvl); + return (err); +} + +static int +libuzfs_inode_remove_kvattr_from_zap(sa_handle_t *sa_hdl, + sa_attr_type_t *sa_tbl, const char *name, uint64_t *txg) +{ + uint64_t xattr_zap_obj = 0; + int err = sa_lookup(sa_hdl, sa_tbl[UZFS_ZXATTR], + &xattr_zap_obj, sizeof (xattr_zap_obj)); + if (err != 0) { + return (err); + } + + objset_t *os = sa_hdl->sa_os; + err = zap_contains(os, xattr_zap_obj, name); + if (err != 0) { + return (err); + } + + uint64_t zap_entries_count = 0; + err = zap_count(os, xattr_zap_obj, &zap_entries_count); + if (err != 0) { + return (err); + } + + ASSERT(zap_entries_count > 0); + dmu_tx_t *tx = dmu_tx_create(os); + if (zap_entries_count == 1) { + dmu_tx_hold_free(tx, xattr_zap_obj, 0, DMU_OBJECT_END); + dmu_tx_hold_sa(tx, sa_hdl, B_TRUE); + } else { + dmu_tx_hold_zap(tx, xattr_zap_obj, B_FALSE, name); + } + + if ((err = dmu_tx_assign(tx, TXG_WAIT)) == 0) { + if (zap_entries_count == 1) { + VERIFY0(dmu_object_free(os, xattr_zap_obj, tx)); + VERIFY0(sa_remove(sa_hdl, sa_tbl[UZFS_ZXATTR], tx)); + } else { + VERIFY0(zap_remove(os, xattr_zap_obj, name, tx)); + } + + if (txg != NULL) { + *txg = tx->tx_txg; + } + + dmu_tx_commit(tx); + } else { + dmu_tx_abort(tx); + } + + return (err); +} + +int +libuzfs_inode_remove_kvattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, + const char *name, uint64_t *txg) +{ + sa_handle_t *sa_hdl = NULL; + objset_t *os = dhp->os; + int err = sa_handle_get(os, ino, NULL, SA_HDL_PRIVATE, &sa_hdl); + if (err != 0) { + return (err); + } + + sa_attr_type_t *sa_tbl = dhp->uzfs_attr_table; + err = libuzfs_inode_remove_kvattr_from_sa(sa_hdl, sa_tbl, name, txg); + if (err == ENOENT) { + err = libuzfs_inode_remove_kvattr_from_zap(sa_hdl, + sa_tbl, name, txg); + } + + sa_handle_destroy(sa_hdl); + return (err); +}