From 4f331b4df97be0cee967ab2d842f0e0a134ca02e Mon Sep 17 00:00:00 2001 From: --global Date: Wed, 21 Dec 2022 09:58:26 -0500 Subject: [PATCH] libuzfs && uzfs: reimplement attr/xattr store using zfs sa and add tests --- cmd/uzfs/uzfs.c | 287 ++++++++++++++- include/libuzfs.h | 70 +++- include/libuzfs_impl.h | 7 + lib/libuzfs/Makefile.am | 4 +- lib/libuzfs/libuzfs.c | 293 +--------------- lib/libuzfs/libuzfs_attr.c | 690 +++++++++++++++++++++++++++++++++++++ 6 files changed, 1039 insertions(+), 312 deletions(-) create mode 100644 lib/libuzfs/libuzfs_attr.c diff --git a/cmd/uzfs/uzfs.c b/cmd/uzfs/uzfs.c index a8a8ba1379cd..a1695317a12e 100644 --- a/cmd/uzfs/uzfs.c +++ b/cmd/uzfs/uzfs.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include static int uzfs_zpool_create(int argc, char **argv); static int uzfs_zpool_destroy(int argc, char **argv); @@ -90,6 +92,7 @@ static int uzfs_inode_get_kvobj(int argc, char **argv); static int uzfs_inode_get_kvattr(int argc, char **argv); static int uzfs_inode_set_kvattr(int argc, char **argv); static int uzfs_inode_rm_kvattr(int argc, char **argv); +static int uzfs_attr_random_test(int argc, char **argv); static int uzfs_dentry_create(int argc, char **argv); static int uzfs_dentry_delete(int argc, char **argv); @@ -141,6 +144,7 @@ typedef enum { HELP_DENTRY_CREATE, HELP_DENTRY_DELETE, HELP_DENTRY_LOOKUP, + HELP_ATTR_TEST } uzfs_help_t; typedef struct uzfs_command { @@ -204,6 +208,7 @@ static uzfs_command_t command_table[] = { { "create-dentry", uzfs_dentry_create, HELP_DENTRY_CREATE }, { "delete-dentry", uzfs_dentry_delete, HELP_DENTRY_DELETE }, { "lookup-dentry", uzfs_dentry_lookup, HELP_DENTRY_DELETE }, + { "attr-test", uzfs_attr_random_test, HELP_ATTR_TEST }, }; #define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) @@ -304,6 +309,8 @@ get_usage(uzfs_help_t idx) return (gettext("\tdelete-dentry ...\n")); case HELP_DENTRY_LOOKUP: return (gettext("\tlookup-dentry ...\n")); + case HELP_ATTR_TEST: + return (gettext("\tattr-test ...\n")); default: __builtin_unreachable(); } @@ -370,6 +377,38 @@ static void print_stat(const char *name, struct stat *stat) stat->st_blocks); } +static void +print_stat_sa(const char *name, uzfs_attr_t *stat) +{ + const char *format = + "ino: %lu\n" + "pino: %lu\n" + "psid: %u\n" + "ftype: %d\n" + "gen: %lu\n" + "nlink: %u\n" + "perm: %u\n" + "uid: %u\n" + "gid: %u\n" + "size: %lu\n" + "blksize: %lu\n" + "blocks: %lu\n" + "nsid: %lu\n" + "atime: (%lu, %lu)\n" + "mtime: (%lu, %lu)\n" + "ctime: (%lu, %lu)\n" + "btime: (%lu, %lu)\n"; + + printf(format, stat->ino, stat->pino, stat->psid, + stat->ftype, stat->gen, stat->nlink, stat->perm, + stat->uid, stat->gid, stat->size, stat->blksize, + stat->blocks, stat->nsid, stat->atime.tv_sec, + stat->atime.tv_nsec, stat->mtime.tv_sec, + stat->mtime.tv_nsec, stat->ctime.tv_sec, + stat->ctime.tv_nsec, stat->btime.tv_sec, + stat->btime.tv_nsec); +} + static int find_command_idx(char *command, int *idx) { @@ -1086,7 +1125,7 @@ uzfs_inode_getattr(int argc, char **argv) return (-1); } - struct stat buf; + uzfs_attr_t buf; memset(&buf, 0, sizeof (buf)); err = libuzfs_inode_getattr(dhp, obj, &buf, sizeof (buf)); @@ -1094,7 +1133,7 @@ uzfs_inode_getattr(int argc, char **argv) printf("failed to get attr inode %ld on dataset: %s\n", obj, dsname); else - print_stat(NULL, &buf); + print_stat_sa(NULL, &buf); libuzfs_dataset_close(dhp); @@ -1116,28 +1155,29 @@ uzfs_inode_setattr(int argc, char **argv) return (-1); } - struct stat buf; + uzfs_attr_t buf; memset(&buf, 0, sizeof (buf)); - buf.st_ino = obj; - buf.st_mode = 0x1; - buf.st_nlink = 1; - buf.st_uid = 0; - buf.st_gid = 0; - buf.st_size = 4096; -// buf.st_atime.tv_sec = 0; -// buf.st_mtime.tv_sec = 0; -// buf.st_ctime.tv_sec = 0; - buf.st_blocks = 8; - buf.st_blksize = 512; - - uint64_t txg = 0; - err = libuzfs_inode_setattr(dhp, obj, &buf, sizeof (buf), &txg); + buf.ino = obj; + buf.pino = 0; + buf.psid = 0; + buf.ftype = TYPE_FILE; + buf.gen = 1; + buf.nlink = 1; + buf.perm = 0; + buf.uid = 12358; + buf.gid = 85321; + buf.size = 0; + buf.blksize = 65536; + buf.blocks = 1; + buf.nsid = 1; + + err = libuzfs_inode_setattr(dhp, obj, &buf, NULL); if (err) printf("failed to get attr inode %ld on dataset: %s\n", obj, dsname); else - print_stat(NULL, &buf); + print_stat_sa(NULL, &buf); libuzfs_dataset_close(dhp); @@ -1261,6 +1301,217 @@ uzfs_inode_rm_kvattr(int argc, char **argv) return (err); } +static int +uzfs_attr_cmp(uzfs_attr_t *lhs, uzfs_attr_t *rhs) +{ + return lhs->psid == rhs->psid && lhs->ftype == rhs->ftype&& + lhs->gen == rhs->gen && lhs->nlink == rhs->nlink && + lhs->perm == rhs->perm && lhs->gid == rhs->gid && + lhs->size == rhs->size && lhs->blksize == rhs->blksize && + lhs->blocks == rhs->blocks && lhs->nsid == rhs->nsid && + lhs->atime.tv_nsec == rhs->atime.tv_nsec && + lhs->atime.tv_sec == rhs->atime.tv_sec && + lhs->mtime.tv_nsec == rhs->mtime.tv_nsec && + lhs->mtime.tv_sec == rhs->mtime.tv_sec && + lhs->ctime.tv_nsec == rhs->ctime.tv_nsec && + lhs->ctime.tv_sec == rhs->ctime.tv_sec && + lhs->btime.tv_nsec == rhs->btime.tv_nsec && + lhs->btime.tv_sec == rhs->btime.tv_sec; +} + +static boolean_t +uzfs_attr_ops(libuzfs_dataset_handle_t *dhp, uint64_t *ino, + libuzfs_inode_type_t *type, uzfs_attr_t *cur_attr, + nvlist_t *nvl, boolean_t *reset) +{ + int delete_proportion = 1; + int getkvattr_proportion = 2; + int setkvattr_proportion = 4; + int deletekvattr_proportion = 1; + int getattr_proportion = 6; + int setattr_proportion = 6; + int op = rand() % 20; + *reset = B_FALSE; + if (op < delete_proportion) { + // delete inode + if (*ino != 0) { + VERIFY0(libuzfs_inode_delete(dhp, *ino, *type, NULL)); + memset(cur_attr, 0, sizeof (*cur_attr)); + // printf("delete inode: %lu\n", *ino); + *ino = 0; + *reset = B_TRUE; + } + return (B_TRUE); + } + op -= delete_proportion; + + if (*ino == 0) { + boolean_t claiming = B_FALSE; + if (claiming) { + *ino = 2; + } + *type = rand() % 2; + VERIFY0(libuzfs_create_inode_with_type(dhp, ino, + claiming, *type, NULL)); + } + if (op < getkvattr_proportion) { + // get all kvattr and check + nvpair_t *elem = NULL; + while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) { + char *name = nvpair_name(elem); + char *value = NULL; + uint_t size; + VERIFY0(nvpair_value_byte_array(elem, + (uchar_t **)(&value), &size)); + char *stored_value = + (char *)umem_alloc(size, UMEM_NOFAIL); + VERIFY0(libuzfs_inode_get_kvattr(dhp, *ino, name, + stored_value, (uint64_t)(size), 0)); + if (memcmp(value, stored_value, size) != 0) { + return (B_FALSE); + } + umem_free(stored_value, size); + } + return (B_TRUE); + } + op -= getkvattr_proportion; + + if (op < setkvattr_proportion) { + char *name = NULL; + int name_size = 0; + if (rand() % 4 < 1) { + nvpair_t *elem = NULL; + if ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) { + name = nvpair_name(elem); + } + } + if (name == NULL) { + name_size = rand() % 50 + 1; + name = umem_alloc(name_size + 1, UMEM_NOFAIL); + for (int i = 0; i < name_size; ++i) { + name[i] = rand() % 26 + 'a'; + } + name[name_size] = '\0'; + } + + uint_t value_size = rand() % 32768 + 100; + uchar_t *value = umem_alloc(value_size + 1, UMEM_NOFAIL); + for (int i = 0; i < value_size; ++i) { + value[i] = rand() % 26 + 'a'; + } + value[value_size] = '\0'; + + VERIFY0(libuzfs_inode_set_kvattr(dhp, *ino, name, + (char *)value, value_size + 1, 0, NULL)); + VERIFY0(nvlist_add_byte_array(nvl, name, + value, value_size + 1)); + umem_free(value, value_size + 1); + if (name_size > 0) { + umem_free(name, name_size + 1); + } + return (B_TRUE); + } + op -= setkvattr_proportion; + + if (op < deletekvattr_proportion) { + // delete one kv_attr + nvpair_t *elem = NULL; + if ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) { + char *name = nvpair_name(elem); + // printf("remove kvattr, name: %s\n", name); + int err = libuzfs_inode_remove_kvattr(dhp, + *ino, name, NULL); + if (err != 0) { + printf("remove kvattr, name: %s, " + "failed, err: %d\n", name, err); + return (B_FALSE); + } + VERIFY0(nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY)); + } + return (B_TRUE); + } + op -= deletekvattr_proportion; + + if (op < getattr_proportion) { + // get attr + uzfs_attr_t stored_attr; + VERIFY0(libuzfs_inode_getattr(dhp, *ino, + &stored_attr, sizeof (stored_attr))); + if (uzfs_attr_cmp(&stored_attr, cur_attr) == 0) { + printf("cur_attr: \n"); + print_stat_sa(NULL, cur_attr); + printf("stored_attr: \n"); + print_stat_sa(NULL, &stored_attr); + printf("\n"); + return (B_FALSE); + } + // printf("get ok\n"); + return (B_TRUE); + } + op -= getattr_proportion; + + if (op < setattr_proportion) { + // set attr + // change 4 byte of cur_attr and set + // printf("set attr\n"); + int attr_size = sizeof (uzfs_attr_t); + int start_index = rand() % (attr_size - 3); + for (int i = start_index; i < start_index + 4; ++i) { + ((uchar_t *)(cur_attr))[i] = rand() % 256; + } + VERIFY0(libuzfs_inode_setattr(dhp, *ino, cur_attr, 0)); + return (B_TRUE); + } + return (B_TRUE); +} + +static int +uzfs_attr_random_test(int argc, char **argv) +{ + assert(argc == 3); + char *dsname = argv[1]; + libuzfs_dataset_handle_t *dhp = libuzfs_dataset_open(dsname); + if (!dhp) { + printf("failed to open dataset: %s\n", dsname); + return (-1); + } + + int nloops = atoi(argv[2]); + uint64_t ino = 0; + uzfs_attr_t cur_attr; + memset(&cur_attr, 0, sizeof (cur_attr)); + nvlist_t *nvl = NULL; + VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)); + libuzfs_inode_type_t type = 0; + int seed = time(NULL); + srand(seed); + + printf("testing attr functionalities, " + "loops: %d, seed: %d\n", nloops, seed); + + for (int i = 0; i < nloops; ++i) { + boolean_t reset; + if (!uzfs_attr_ops(dhp, &ino, &type, &cur_attr, nvl, &reset)) { + printf("test failed, total loops: %d\n", i); + break; + } + if (reset) { + nvlist_free(nvl); + VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)); + } + } + + if (ino != 0) { + VERIFY0(libuzfs_inode_delete(dhp, ino, type, 0)); + } + nvlist_free(nvl); + libuzfs_dataset_close(dhp); + + printf("test end\n"); + + return (0); +} + int uzfs_dentry_create(int argc, char **argv) { diff --git a/include/libuzfs.h b/include/libuzfs.h index 3812e249db2d..ea1b2566e90f 100644 --- a/include/libuzfs.h +++ b/include/libuzfs.h @@ -38,8 +38,39 @@ typedef enum { INODE_DIR = 1, } libuzfs_inode_type_t; +typedef enum { + TYPE_FILE, + TYPE_DIR, + TYPE_SYMLINK, + TYPE_SOCK, + TYPE_FIFO, + TYPE_CHR, + TYPE_BLK +} FileType; + +struct uzfs_attr { + uint64_t ino; + uint64_t pino; + uint32_t psid; + FileType ftype; + uint64_t gen; + uint32_t nlink; + uint32_t perm; + uint64_t uid; + uint64_t gid; + uint64_t size; + uint64_t blksize; + uint64_t blocks; + uint32_t nsid; + struct timespec atime; + struct timespec mtime; + struct timespec ctime; + struct timespec btime; +}; + typedef struct libuzfs_zpool_handle libuzfs_zpool_handle_t; typedef struct libuzfs_dataset_handle libuzfs_dataset_handle_t; +typedef struct uzfs_attr uzfs_attr_t; typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); @@ -134,28 +165,9 @@ extern int libuzfs_inode_create(libuzfs_dataset_handle_t *dhp, uint64_t *ino, extern int libuzfs_inode_claim(libuzfs_dataset_handle_t *dhp, uint64_t ino, libuzfs_inode_type_t type); -extern int libuzfs_inode_delete(libuzfs_dataset_handle_t *dhp, uint64_t ino, - libuzfs_inode_type_t type, uint64_t *txg); - -extern int libuzfs_inode_getattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, - void *attr, uint64_t size); - -extern int libuzfs_inode_setattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, - const void *attr, uint64_t size, uint64_t *txg); - extern int libuzfs_inode_get_kvobj(libuzfs_dataset_handle_t *dhp, uint64_t ino, uint64_t *kvobj); -extern int libuzfs_inode_set_kvattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, - const char *name, const char *value, uint64_t size, int flags, - uint64_t *txg); - -extern int libuzfs_inode_get_kvattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, - const char *name, char *value, uint64_t size, int flags); - -extern int libuzfs_inode_remove_kvattr(libuzfs_dataset_handle_t *dhp, - uint64_t ino, const char *name, uint64_t *txg); - extern int libuzfs_dentry_create(libuzfs_dataset_handle_t *dhp, uint64_t dino, const char *name, uint64_t *value, uint64_t num, uint64_t *txg); @@ -196,6 +208,26 @@ extern int libuzfs_write(uint64_t fsid, uint64_t ino, zfs_uio_t *uio, extern int libuzfs_fsync(uint64_t fsid, uint64_t ino, int syncflag); +extern void libuzfs_open_dataset_attr(libuzfs_dataset_handle_t *dhp); +extern int libuzfs_create_inode_with_type( + libuzfs_dataset_handle_t *dhp, uint64_t *obj, + boolean_t claiming, libuzfs_inode_type_t type, uint64_t *txg); +extern int libuzfs_inode_delete(libuzfs_dataset_handle_t *dhp, uint64_t ino, + libuzfs_inode_type_t type, uint64_t *txg); +extern int libuzfs_inode_getattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, + void *attr, uint64_t size); +extern int libuzfs_inode_setattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, + const uzfs_attr_t *attr, uint64_t *txg); +extern int libuzfs_inode_set_kvattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, + const char *name, const char *value, + uint64_t size, int flags, uint64_t *txg); +extern int libuzfs_inode_get_kvattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, + const char *name, char *value, uint64_t size, int flags); +extern int libuzfs_inode_remove_kvattr(libuzfs_dataset_handle_t *dhp, + uint64_t ino, const char *name, uint64_t *txg); +extern int uzfs_get_file_info(dmu_object_type_t bonustype, const void *data, + zfs_file_info_t *zoi); + #ifdef __cplusplus } #endif diff --git a/include/libuzfs_impl.h b/include/libuzfs_impl.h index 09e71054d475..a9ff262b2e75 100644 --- a/include/libuzfs_impl.h +++ b/include/libuzfs_impl.h @@ -31,6 +31,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -46,8 +47,14 @@ struct libuzfs_dataset_handle { objset_t *os; zilog_t *zilog; uint64_t sb_ino; + sa_attr_type_t *uzfs_attr_table; }; +#define UZFS_SIZE_OFFSET 0 +#define UZFS_GEN_OFFSET 8 +#define UZFS_UID_OFFSET 16 +#define UZFS_GID_OFFSET 24 +#define UZFS_PARENT_OFFSET 32 #ifdef __cplusplus } diff --git a/lib/libuzfs/Makefile.am b/lib/libuzfs/Makefile.am index b725359e2978..77ecac25ae27 100644 --- a/lib/libuzfs/Makefile.am +++ b/lib/libuzfs/Makefile.am @@ -14,7 +14,9 @@ pkgconfig_DATA = libuzfs.pc lib_LTLIBRARIES = libuzfs.la -USER_C = libuzfs.c +USER_C = \ + libuzfs.c \ + libuzfs_attr.c libuzfs_la_SOURCES = $(USER_C) diff --git a/lib/libuzfs/libuzfs.c b/lib/libuzfs/libuzfs.c index caf2ddfb84ff..52e42198af0b 100644 --- a/lib/libuzfs/libuzfs.c +++ b/lib/libuzfs/libuzfs.c @@ -429,6 +429,10 @@ libuzfs_objset_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) err = zap_add(os, MASTER_NODE_OBJ, UZFS_SB_OBJ, 8, 1, &sb_obj, tx); ASSERT(err == 0); + uint64_t sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, + DMU_OT_NONE, 0, tx); + err = zap_add(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); + ASSERT(err == 0); } int @@ -501,6 +505,7 @@ libuzfs_dataset_open(const char *dsname) dhp, &os)); libuzfs_dhp_init(dhp, os); + dmu_objset_register_type(DMU_OST_ZFS, uzfs_get_file_info); zilog = dhp->zilog; @@ -508,6 +513,7 @@ libuzfs_dataset_open(const char *dsname) zilog = zil_open(os, libuzfs_get_data); + libuzfs_open_dataset_attr(dhp); return (dhp); } @@ -515,6 +521,9 @@ void libuzfs_dataset_close(libuzfs_dataset_handle_t *dhp) { zil_close(dhp->zilog); + if (dhp->os->os_sa != NULL) { + sa_tear_down(dhp->os); + } dmu_objset_disown(dhp->os, B_TRUE, dhp); libuzfs_dhp_fini(dhp); free(dhp); @@ -574,35 +583,8 @@ int libuzfs_object_create(libuzfs_dataset_handle_t *dhp, uint64_t *obj, uint64_t *txg) { - int err = 0; - dmu_tx_t *tx = NULL; - objset_t *os = dhp->os; - - tx = dmu_tx_create(os); - - dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); - - err = dmu_tx_assign(tx, TXG_WAIT); - if (err) { - dmu_tx_abort(tx); - goto out; - } - - int dnodesize = dmu_objset_dnodesize(os); - int bonuslen = DN_BONUS_SIZE(dnodesize); - int blocksize = 0; - int ibshift = 0; - - *obj = dmu_object_alloc_dnsize(os, DMU_OT_PLAIN_FILE_CONTENTS, 0, - DMU_OT_PLAIN_OTHER, bonuslen, dnodesize, tx); - - VERIFY0(dmu_object_set_blocksize(os, *obj, blocksize, ibshift, tx)); - - *txg = tx->tx_txg; - dmu_tx_commit(tx); - -out: - return (err); + return (libuzfs_create_inode_with_type(dhp, obj, + B_FALSE, INODE_FILE, txg)); } int @@ -635,75 +617,16 @@ libuzfs_object_delete(libuzfs_dataset_handle_t *dhp, uint64_t obj, int libuzfs_object_claim(libuzfs_dataset_handle_t *dhp, uint64_t obj) { - int err = 0; - dmu_tx_t *tx = NULL; - objset_t *os = dhp->os; - - int dnodesize = dmu_objset_dnodesize(os); - int bonuslen = DN_BONUS_SIZE(dnodesize); - int blocksize = 0; - int ibs = 0; - - tx = dmu_tx_create(os); - - dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); - - err = dmu_tx_assign(tx, TXG_WAIT); - if (err) { - dmu_tx_abort(tx); - goto out; - } - - err = dmu_object_claim_dnsize(os, obj, DMU_OT_PLAIN_FILE_CONTENTS, 0, - DMU_OT_PLAIN_OTHER, bonuslen, dnodesize, tx); - goto out; - - VERIFY0(dmu_object_set_blocksize(os, obj, blocksize, ibs, tx)); - - dmu_tx_commit(tx); - -out: - return (err); + return (libuzfs_create_inode_with_type(dhp, &obj, + B_TRUE, INODE_FILE, 0)); } int TEST_libuzfs_object_claim(libuzfs_dataset_handle_t *dhp, uint64_t obj, uint64_t *txg) { - int err = 0; - dmu_tx_t *tx = NULL; - objset_t *os = dhp->os; - - int dnodesize = dmu_objset_dnodesize(os); - int bonuslen = DN_BONUS_SIZE(dnodesize); - int type = DMU_OT_UINT64_OTHER; - int bonus_type = DMU_OT_UINT64_OTHER; - int blocksize = 0; - int ibs = 0; - - tx = dmu_tx_create(os); - - dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); - - err = dmu_tx_assign(tx, TXG_WAIT); - if (err) { - dmu_tx_abort(tx); - goto out; - } - - err = dmu_object_claim_dnsize(os, obj, type, 0, bonus_type, bonuslen, - dnodesize, tx); - if (err) - goto out; - - VERIFY0(dmu_object_set_blocksize(os, obj, blocksize, ibs, tx)); - - *txg = tx->tx_txg; - dmu_tx_commit(tx); - -out: - return (err); - + return (libuzfs_create_inode_with_type(dhp, &obj, + B_TRUE, INODE_FILE, txg)); } uint64_t @@ -1030,140 +953,16 @@ int libuzfs_inode_create(libuzfs_dataset_handle_t *dhp, uint64_t *ino, libuzfs_inode_type_t type, uint64_t *txg) { - if (type == INODE_FILE) - return (libuzfs_object_create(dhp, ino, txg)); - - if (type == INODE_DIR) - return (libuzfs_zap_create(dhp, ino, txg)); - - return (EINVAL); + return (libuzfs_create_inode_with_type(dhp, ino, + B_FALSE, type, txg)); } int libuzfs_inode_claim(libuzfs_dataset_handle_t *dhp, uint64_t ino, libuzfs_inode_type_t type) { - if (type == INODE_FILE) - return (libuzfs_object_claim(dhp, ino)); - - if (type == INODE_DIR) - return (libuzfs_zap_claim(dhp, ino)); - - return (EINVAL); -} - -static int -libuzfs_inode_kvobj_delete(libuzfs_dataset_handle_t *dhp, uint64_t ino, - libuzfs_inode_type_t type, uint64_t kvobj, uint64_t *txg) -{ - int err = 0; - dmu_tx_t *tx = NULL; - objset_t *os = dhp->os; - - tx = dmu_tx_create(os); - - dmu_tx_hold_free(tx, kvobj, 0, DMU_OBJECT_END); - dmu_tx_hold_free(tx, ino, 0, DMU_OBJECT_END); - - err = dmu_tx_assign(tx, TXG_WAIT); - if (err) { - dmu_tx_abort(tx); - goto out; - } - - VERIFY0(zap_destroy(os, kvobj, tx)); - - if (type == INODE_FILE) - VERIFY0(dmu_object_free(os, ino, tx)); - else if (type == INODE_DIR) - VERIFY0(zap_destroy(os, ino, tx)); - - *txg = tx->tx_txg; - dmu_tx_commit(tx); - -out: - return (err); -} - -int -libuzfs_inode_delete(libuzfs_dataset_handle_t *dhp, uint64_t ino, - libuzfs_inode_type_t type, uint64_t *txg) -{ - if (type != INODE_FILE && type != INODE_DIR) - return (EINVAL); - - uint64_t kvobj = 0; - VERIFY0(libuzfs_inode_get_kvobj(dhp, ino, &kvobj)); - - if (kvobj == 0) { - if (type == INODE_FILE) - return (libuzfs_object_delete(dhp, ino, txg)); - if (type == INODE_DIR) - return (libuzfs_zap_delete(dhp, ino, txg)); - } - - return (libuzfs_inode_kvobj_delete(dhp, ino, type, kvobj, txg)); -} - -int -libuzfs_inode_getattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, void *attr, - uint64_t size) -{ - return (libuzfs_object_getattr(dhp, ino, attr, size)); -} - -int -libuzfs_inode_setattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, - const void *attr, uint64_t size, uint64_t *txg) -{ - return (libuzfs_object_setattr(dhp, ino, attr, size, txg)); -} - -static int -libuzfs_object_kvattr_create_add(libuzfs_dataset_handle_t *dhp, uint64_t ino, - const char *key, const char *value, uint64_t size, int flags, uint64_t *txg) -{ - int err = 0; - dmu_tx_t *tx = NULL; - objset_t *os = dhp->os; - dmu_buf_t *db; - uint64_t kvobj = 0; - - tx = dmu_tx_create(os); - - dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); - dmu_tx_hold_bonus(tx, ino); - - err = dmu_tx_assign(tx, TXG_WAIT); - if (err) { - dmu_tx_abort(tx); - goto out; - } - - int dnodesize = dmu_objset_dnodesize(os); - int bonuslen = DN_BONUS_SIZE(dnodesize); - - kvobj = zap_create_dnsize(os, DMU_OT_DIRECTORY_CONTENTS, - DMU_OT_PLAIN_OTHER, bonuslen, dnodesize, tx); - - err = zap_add(os, kvobj, key, 1, size, value, tx); - if (err) { - dmu_tx_abort(tx); - goto out; - } - - VERIFY0(dmu_bonus_hold(os, ino, FTAG, &db)); - dmu_buf_will_dirty(db, tx); - bcopy(&kvobj, db->db_data, sizeof (kvobj)); - dmu_buf_rele(db, FTAG); - - *txg = tx->tx_txg; - dmu_tx_commit(tx); - - libuzfs_wait_synced(dhp); - -out: - return (err); + return (libuzfs_create_inode_with_type(dhp, &ino, + B_FALSE, type, NULL)); } int @@ -1179,60 +978,6 @@ libuzfs_inode_get_kvobj(libuzfs_dataset_handle_t *dhp, uint64_t ino, return (0); } -int -libuzfs_inode_get_kvattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, - const char *name, char *value, uint64_t size, int flags) -{ - int err = 0; - uint64_t kvobj; - - err = libuzfs_inode_get_kvobj(dhp, ino, &kvobj); - if (err) - return (err); - - if (kvobj == 0) - return (ENOENT); - - return (libuzfs_zap_lookup(dhp, kvobj, name, 1, size, value)); -} - -// TODO(hping): remove kvobj when no kv attr -int -libuzfs_inode_remove_kvattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, - const char *name, uint64_t *txg) -{ - int err = 0; - uint64_t kvobj; - - err = libuzfs_inode_get_kvobj(dhp, ino, &kvobj); - if (err) - return (err); - - if (kvobj == 0) - return (ENOENT); - - return (libuzfs_zap_remove(dhp, kvobj, name, txg)); -} - -int -libuzfs_inode_set_kvattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, - const char *name, const char *value, uint64_t size, int flags, - uint64_t *txg) -{ - int err = 0; - uint64_t kvobj; - - err = libuzfs_inode_get_kvobj(dhp, ino, &kvobj); - if (err) - return (err); - - if (kvobj == 0) - return (libuzfs_object_kvattr_create_add(dhp, ino, name, - value, size, flags, txg)); - - return (libuzfs_zap_update(dhp, kvobj, name, 1, size, value, txg)); -} - int libuzfs_dentry_create(libuzfs_dataset_handle_t *dhp, uint64_t dino, const char *name, uint64_t *value, uint64_t num, uint64_t *txg) { diff --git a/lib/libuzfs/libuzfs_attr.c b/lib/libuzfs/libuzfs_attr.c new file mode 100644 index 000000000000..dd341c89c59d --- /dev/null +++ b/lib/libuzfs/libuzfs_attr.c @@ -0,0 +1,690 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef enum uzfs_attr_type { + UZFS_PINO, + UZFS_PSID, + UZFS_FTYPE, + UZFS_GEN, + UZFS_NLINK, + UZFS_PERM, + UZFS_UID, + UZFS_GID, + UZFS_SIZE, + UZFS_BLKSIZE, + UZFS_BLOCKS, + UZFS_NSID, + UZFS_ATIME, + UZFS_MTIME, + UZFS_CTIME, + UZFS_BTIME, + UZFS_DXATTR, // sa index for dir xattr inode + UZFS_XATTR, // sa index for sa xattr (name, value) pairs + UZFS_END +} uzfs_attr_type_t; + +sa_attr_reg_t uzfs_attr_table[UZFS_END+1] = { + {"UZFS_PINO", sizeof (uint64_t), SA_UINT64_ARRAY, 0}, + {"UZFS_PSID", sizeof (uint32_t), SA_UINT32_ARRAY, 1}, + {"UZFS_FTYPE", sizeof (FileType), SA_UINT32_ARRAY, 2}, + {"UZFS_GEN", sizeof (uint64_t), SA_UINT64_ARRAY, 3}, + {"UZFS_NLINK", sizeof (uint32_t), SA_UINT32_ARRAY, 4}, + {"UZFS_PERM", sizeof (uint32_t), SA_UINT32_ARRAY, 5}, + {"UZFS_UID", sizeof (uint64_t), SA_UINT32_ARRAY, 6}, + {"UZFS_GID", sizeof (uint64_t), SA_UINT32_ARRAY, 7}, + {"UZFS_SIZE", sizeof (uint64_t), SA_UINT64_ARRAY, 8}, + {"UZFS_BLKSIZE", sizeof (uint64_t), SA_UINT64_ARRAY, 9}, + {"UZFS_BLOCKS", sizeof (uint64_t), SA_UINT64_ARRAY, 10}, + {"UZFS_NSID", sizeof (uint32_t), SA_UINT32_ARRAY, 11}, + {"UZFS_ATIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 12}, + {"UZFS_MTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 13}, + {"UZFS_CTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 14}, + {"UZFS_BTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 15}, + {"UZFS_DXATTR", sizeof (uint64_t), SA_UINT64_ARRAY, 16}, + {"UZFS_XATTR", 0, SA_UINT8_ARRAY, 0}, + {NULL, 0, 0, 0} +}; + +int +uzfs_get_file_info(dmu_object_type_t bonustype, const void *data, + zfs_file_info_t *zoi) +{ + if (bonustype != DMU_OT_SA) + return (SET_ERROR(ENOENT)); + + zoi->zfi_project = ZFS_DEFAULT_PROJID; + + /* + * If we have a NULL data pointer + * then assume the id's aren't changing and + * return EEXIST to the dmu to let it know to + * use the same ids + */ + if (data == NULL) + return (SET_ERROR(EEXIST)); + + const sa_hdr_phys_t *sap = data; + if (sap->sa_magic == 0) { + /* + * This should only happen for newly created files + * that haven't had the znode data filled in yet. + */ + zoi->zfi_user = 0; + zoi->zfi_group = 0; + zoi->zfi_generation = 0; + return (0); + } + + sa_hdr_phys_t sa = *sap; + boolean_t swap = B_FALSE; + if (sa.sa_magic == BSWAP_32(SA_MAGIC)) { + sa.sa_magic = SA_MAGIC; + sa.sa_layout_info = BSWAP_16(sa.sa_layout_info); + swap = B_TRUE; + } + VERIFY3U(sa.sa_magic, ==, SA_MAGIC); + + int hdrsize = sa_hdrsize(&sa); + VERIFY3U(hdrsize, >=, sizeof (sa_hdr_phys_t)); + + uintptr_t data_after_hdr = (uintptr_t)data + hdrsize; + zoi->zfi_user = *((uint64_t *)(data_after_hdr + UZFS_UID_OFFSET)); + zoi->zfi_group = *((uint64_t *)(data_after_hdr + UZFS_GID_OFFSET)); + zoi->zfi_generation = *((uint64_t *)(data_after_hdr + UZFS_GEN_OFFSET)); + + if (swap) { + zoi->zfi_user = BSWAP_64(zoi->zfi_user); + zoi->zfi_group = BSWAP_64(zoi->zfi_group); + zoi->zfi_project = BSWAP_64(zoi->zfi_project); + zoi->zfi_generation = BSWAP_64(zoi->zfi_generation); + } + return (0); +} + +void +libuzfs_open_dataset_attr(libuzfs_dataset_handle_t *dhp) +{ + uint64_t sa_obj; + VERIFY0(zap_lookup(dhp->os, MASTER_NODE_OBJ, + ZFS_SA_ATTRS, 8, 1, &sa_obj)); + sa_setup(dhp->os, sa_obj, uzfs_attr_table, + UZFS_END, &dhp->uzfs_attr_table); +} + +// make sure sa_attrs has enough space +static void +libuzfs_add_bulk_attr(libuzfs_dataset_handle_t *dhp, sa_bulk_attr_t *sa_attrs, + int *cnt, uzfs_attr_t *attr) +{ + sa_attr_type_t *sa_tbl = dhp->uzfs_attr_table; + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_SIZE], + NULL, &attr->size, 8); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_GEN], + NULL, &attr->gen, 8); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_UID], + NULL, &attr->uid, 8); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_GID], + NULL, &attr->gid, 8); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_PINO], + NULL, &attr->pino, 8); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_PSID], + NULL, &attr->psid, 4); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_FTYPE], + NULL, &attr->ftype, 4); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_NLINK], + NULL, &attr->nlink, 4); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_PERM], + NULL, &attr->perm, 4); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_BLKSIZE], + NULL, &attr->blksize, 8); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_BLOCKS], + NULL, &attr->blocks, 8); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_NSID], + NULL, &attr->nsid, 4); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_ATIME], + NULL, &attr->atime, 16); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_MTIME], + NULL, &attr->mtime, 16); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_CTIME], + NULL, &attr->ctime, 16); + SA_ADD_BULK_ATTR(sa_attrs, (*cnt), sa_tbl[UZFS_BTIME], + NULL, &attr->btime, 16); +} + +static int +libuzfs_object_attr_init(libuzfs_dataset_handle_t *dhp, + sa_handle_t *sa_hdl, dmu_tx_t *tx) +{ + sa_bulk_attr_t sa_attrs[UZFS_END]; + int cnt = 0; + uzfs_attr_t attr; + memset(&attr, 0, sizeof (attr)); + libuzfs_add_bulk_attr(dhp, sa_attrs, &cnt, &attr); + return (sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx)); +} + +static void +libuzfs_create_inode_with_type_impl(libuzfs_dataset_handle_t *dhp, + uint64_t *obj, boolean_t claiming, libuzfs_inode_type_t type, + dmu_tx_t *tx) +{ + objset_t *os = dhp->os; + // create/claim object + int dnodesize = dmu_objset_dnodesize(os); + int bonuslen = DN_BONUS_SIZE(dnodesize); + if (type == INODE_FILE) { + if (claiming) { + ASSERT(*obj != 0); + VERIFY0(dmu_object_claim_dnsize(os, *obj, + DMU_OT_PLAIN_FILE_CONTENTS, + 0, DMU_OT_SA, bonuslen, dnodesize, tx)); + } else { + *obj = dmu_object_alloc_dnsize(os, + DMU_OT_PLAIN_FILE_CONTENTS, 0, + DMU_OT_SA, bonuslen, dnodesize, tx); + } + } else { + if (claiming) { + ASSERT(*obj != 0); + VERIFY0(zap_create_claim_dnsize(os, *obj, + DMU_OT_DIRECTORY_CONTENTS, DMU_OT_SA, + bonuslen, dnodesize, tx)); + } else { + *obj = zap_create_dnsize(os, DMU_OT_DIRECTORY_CONTENTS, + DMU_OT_SA, bonuslen, dnodesize, tx); + } + } + + sa_handle_t *sa_hdl; + sa_handle_get(os, *obj, NULL, SA_HDL_PRIVATE, &sa_hdl); + VERIFY0(libuzfs_object_attr_init(dhp, sa_hdl, tx)); + // maybe we shouldn't destroy handle here + sa_handle_destroy(sa_hdl); +} + +int +libuzfs_create_inode_with_type(libuzfs_dataset_handle_t *dhp, uint64_t *obj, + boolean_t claiming, libuzfs_inode_type_t type, uint64_t *txg) +{ + objset_t *os = dhp->os; + dmu_tx_t *tx = dmu_tx_create(os); + dmu_tx_hold_sa_create(tx, sizeof (uzfs_attr_t)); + if (type == INODE_DIR) { + dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); + } + int err = dmu_tx_assign(tx, TXG_WAIT); + if (err != 0) { + dmu_tx_abort(tx); + return (err); + } + libuzfs_create_inode_with_type_impl(dhp, obj, claiming, type, tx); + if (txg != NULL) { + *txg = tx->tx_txg; + } + dmu_tx_commit(tx); + return (0); +} + +int +libuzfs_inode_delete(libuzfs_dataset_handle_t *dhp, uint64_t ino, + libuzfs_inode_type_t type, uint64_t *txg) +{ + objset_t *os = dhp->os; + sa_handle_t *sa_hdl; + int err = sa_handle_get(os, ino, NULL, SA_HDL_PRIVATE, &sa_hdl); + if (err != 0) { + return (err); + } + + dmu_tx_t *tx = dmu_tx_create(os); + dmu_tx_hold_free(tx, ino, 0, DMU_OBJECT_END); + uint64_t xattr_obj = 0; + sa_attr_type_t *sa_tbl = dhp->uzfs_attr_table; + err = sa_lookup(sa_hdl, sa_tbl[UZFS_DXATTR], + &xattr_obj, sizeof (xattr_obj)); + + uint64_t *value_objs = NULL; + uint64_t obj_cnt = 0; + if (err == 0 && xattr_obj != 0) { + dmu_tx_hold_free(tx, xattr_obj, 0, DMU_OBJECT_END); + if ((err = zap_count(os, xattr_obj, &obj_cnt)) != 0) { + goto out; + } + if (obj_cnt > 0) { + value_objs = umem_alloc(obj_cnt * sizeof (uint64_t), + UMEM_NOFAIL); + } + + zap_cursor_t zc; + zap_attribute_t attr; + uint64_t pos = 0; + for (zap_cursor_init(&zc, os, xattr_obj); + zap_cursor_retrieve(&zc, &attr) == 0; + zap_cursor_advance(&zc)) { + ASSERT(attr.za_integer_length == 8); + ASSERT(attr.za_num_integers == 1); + + uint64_t value_obj; + VERIFY0(zap_lookup(os, xattr_obj, + attr.za_name, 8, 1, &value_obj)); + value_objs[pos++] = value_obj; + dmu_tx_hold_free(tx, value_obj, 0, DMU_OBJECT_END); + } + zap_cursor_fini(&zc); + } + + if ((err = dmu_tx_assign(tx, TXG_WAIT)) != 0) { + goto out; + } + + if (xattr_obj != 0) { + zap_destroy(os, xattr_obj, tx); + for (uint64_t i = 0; i < obj_cnt; ++i) { + dmu_object_free(os, value_objs[i], tx); + } + } + + sa_handle_destroy(sa_hdl); + if (type == INODE_DIR) { + VERIFY0(zap_destroy(os, ino, tx)); + } else { + VERIFY0(dmu_object_free(os, ino, tx)); + } + +out: + if (obj_cnt > 0) { + umem_free(value_objs, sizeof (uint64_t) * obj_cnt); + } + + if (err == 0) { + if (txg != NULL) { + *txg = tx->tx_txg; + } + dmu_tx_commit(tx); + } else { + sa_handle_destroy(sa_hdl); + dmu_tx_abort(tx); + } + return (err); +} + +int +libuzfs_inode_getattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, + void *attr, uint64_t size) +{ + ASSERT(size == sizeof (uzfs_attr_t)); + sa_handle_t *sa_hdl; + int err = sa_handle_get(dhp->os, ino, NULL, SA_HDL_PRIVATE, &sa_hdl); + if (err != 0) { + return (err); + } + + sa_bulk_attr_t sa_attrs[UZFS_END]; + int cnt = 0; + libuzfs_add_bulk_attr(dhp, sa_attrs, &cnt, (uzfs_attr_t *)attr); + err = sa_bulk_lookup(sa_hdl, sa_attrs, cnt); + + sa_handle_destroy(sa_hdl); + return (err); +} + +int +libuzfs_inode_setattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, + const uzfs_attr_t *attr, uint64_t *txg) +{ + sa_handle_t *sa_hdl; + objset_t *os = dhp->os; + int err = sa_handle_get(os, ino, NULL, SA_HDL_PRIVATE, &sa_hdl); + if (err != 0) { + return (err); + } + + dmu_tx_t *tx = dmu_tx_create(os); + dmu_tx_hold_sa(tx, sa_hdl, B_FALSE); + if ((err = dmu_tx_assign(tx, TXG_WAIT)) != 0) { + dmu_tx_abort(tx); + return (err); + } + + sa_bulk_attr_t sa_attrs[UZFS_END]; + int cnt = 0; + libuzfs_add_bulk_attr(dhp, sa_attrs, &cnt, (uzfs_attr_t *)attr); + VERIFY0(sa_bulk_update(sa_hdl, sa_attrs, cnt, tx)); + sa_handle_destroy(sa_hdl); + if (txg != NULL) { + *txg = tx->tx_txg; + } + dmu_tx_commit(tx); + return (0); +} + +static int +libuzfs_get_nvlist_from_handle(const sa_attr_type_t *sa_tbl, + nvlist_t **nvl, sa_handle_t *sa_hdl) +{ + int xattr_size; + int err = sa_size(sa_hdl, sa_tbl[UZFS_XATTR], &xattr_size); + if (err != 0) { + return (err); + } + + char *obj = vmem_alloc(xattr_size, KM_SLEEP); + err = sa_lookup(sa_hdl, sa_tbl[UZFS_XATTR], obj, xattr_size); + if (err != 0) { + return (err); + } + + err = nvlist_unpack(obj, xattr_size, nvl, KM_SLEEP); + vmem_free(obj, xattr_size); + return (err); +} + +static int +libuzfs_kvattr_set_nvlist(nvlist_t *nvl, int err_check_sa, + int *err_check_sa_enough, const char *name, + const char *value, size_t value_size, size_t *xattr_sa_size) +{ + int err = 0; + if (value == NULL && err_check_sa == 0) { + err = nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY); + } else if (value != NULL) { + err = nvlist_add_byte_array(nvl, name, + (uchar_t *)value, (uint_t)value_size); + } + if (err != 0) { + return (err); + } + + err = nvlist_size(nvl, xattr_sa_size, NV_ENCODE_XDR); + if (err != 0) { + return (err); + } + + if (*xattr_sa_size > DXATTR_MAX_SA_SIZE || + *xattr_sa_size > SA_ATTR_MAX_LEN) { + *err_check_sa_enough = EFBIG; + err = nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY); + if (err != 0 && value != NULL) { + return (err); + } + return (nvlist_size(nvl, xattr_sa_size, NV_ENCODE_XDR)); + } + return (err); +} + +static void +libuzfs_save_xattr_dir(sa_handle_t *sa_hdl, libuzfs_dataset_handle_t *dhp, + uint64_t xattr_obj, uint64_t value_obj, const char *name, + const char *value, size_t size, dmu_tx_t *tx) +{ + sa_attr_type_t *sa_tbl = dhp->uzfs_attr_table; + objset_t *os = sa_hdl->sa_os; + int dnodesize = dmu_objset_dnodesize(os); + if (xattr_obj == DMU_NEW_OBJECT) { + xattr_obj = zap_create_dnsize(os, DMU_OT_DIRECTORY_CONTENTS, + DMU_OT_NONE, 0, dnodesize, tx); + VERIFY0(sa_update(sa_hdl, sa_tbl[UZFS_DXATTR], + &xattr_obj, 8, tx)); + } + + if (value_obj == DMU_NEW_OBJECT) { + libuzfs_create_inode_with_type_impl(dhp, &value_obj, + B_FALSE, INODE_FILE, tx); + VERIFY0(zap_update(os, xattr_obj, name, + 8, 1, &value_obj, tx)); + } + + dmu_write(os, value_obj, 0, size, value, tx); + + sa_handle_t *value_obj_hdl = NULL; + VERIFY0(sa_handle_get(os, value_obj, NULL, + SA_HDL_PRIVATE, &value_obj_hdl)); + VERIFY0(sa_update(value_obj_hdl, + sa_tbl[UZFS_SIZE], &size, 8, tx)); + sa_handle_destroy(value_obj_hdl); +} + +int +libuzfs_inode_set_kvattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, + const char *name, const char *value, uint64_t size, + int flags, uint64_t *txg) +{ + // 1. this name is in sa: + // 1.1 sa has enough space for new value, just put in + // 1.2 no enough space, delete from sa and put in dir + // 2. in dir + // 2.1 sa enough space, delete from dir and put in sa + // 2.2 not enough, put in dir + // 3. neigther + // 3.1 sa enough, put in sa + // 3.2 not enough, put in dir + sa_handle_t *sa_hdl; + objset_t *os = dhp->os; + int err = sa_handle_get(os, ino, NULL, SA_HDL_PRIVATE, &sa_hdl); + if (err != 0) { + return (err); + } + + nvlist_t *nvl; + sa_attr_type_t *sa_tbl = dhp->uzfs_attr_table; + err = libuzfs_get_nvlist_from_handle(sa_tbl, &nvl, sa_hdl); + if (err == ENOENT) { + err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP); + } + + if (err != 0) { + goto out; + } + + int err_check_sa = !nvlist_exists(nvl, name); + int err_check_sa_enough = 0; + size_t xattr_size = 0; + err = libuzfs_kvattr_set_nvlist(nvl, err_check_sa, + &err_check_sa_enough, name, value, size, &xattr_size); + if (err != 0) { + goto out2; + } + + uint64_t xattr_obj = DMU_NEW_OBJECT; + int err_check_dir = sa_lookup(sa_hdl, sa_tbl[UZFS_DXATTR], + &xattr_obj, sizeof (xattr_obj)); + if (err_check_dir != ENOENT && err_check_dir != 0) { + err = err_check_dir; + goto out2; + } + uint64_t value_obj = DMU_NEW_OBJECT; + size_t dir_entries = 0; + if (err_check_sa != 0 && err_check_dir == 0) { + err_check_dir = zap_lookup(os, xattr_obj, + name, 8, 1, &value_obj); + if (err_check_dir != ENOENT && err_check_dir != 0) { + err = err_check_dir; + goto out2; + } + + if ((err = zap_count(os, xattr_obj, &dir_entries)) != 0) { + goto out2; + } + } + + if (value == NULL && err_check_sa != 0 && err_check_dir != 0) { + err = ENOENT; + goto out2; + } + + dmu_tx_t *tx = dmu_tx_create(os); + dmu_tx_hold_sa(tx, sa_hdl, B_TRUE); + if (!((err_check_sa == 0 || err_check_dir != 0) && + err_check_sa_enough == 0)) { + dmu_tx_hold_zap(tx, xattr_obj, B_TRUE, name); + } + if (err_check_sa_enough != 0) { + if (err_check_sa == 0) { + dmu_tx_hold_sa_create(tx, sizeof (uzfs_attr_t)); + } + dmu_tx_hold_write(tx, value_obj, 0, size); + } + if ((err_check_sa_enough == 0 || value == NULL) && + err_check_sa != 0 && err_check_dir == 0) { + dmu_tx_hold_free(tx, value_obj, 0, DMU_OBJECT_END); + if (dir_entries == 1) { + dmu_tx_hold_free(tx, xattr_obj, 0, DMU_OBJECT_END); + } + } + + if ((err = dmu_tx_assign(tx, TXG_WAIT)) != 0) { + goto out3; + } + + // name was modified in nvl before, we just need to save the nvl + if (err_check_sa == 0 || + (err_check_sa_enough == 0 && value != NULL)) { + if (xattr_size > 0) { + char *packed_nvl = vmem_alloc(xattr_size, KM_SLEEP); + VERIFY0(nvlist_pack(nvl, &packed_nvl, &xattr_size, + NV_ENCODE_XDR, KM_SLEEP)); + VERIFY0(sa_update(sa_hdl, sa_tbl[UZFS_XATTR], + packed_nvl, xattr_size, tx)); + vmem_free(packed_nvl, xattr_size); + } else { + VERIFY0(sa_remove(sa_hdl, sa_tbl[UZFS_XATTR], tx)); + } + } + + if ((err_check_sa_enough == 0 || value == NULL) && + err_check_sa != 0 && err_check_dir == 0) { + dmu_object_free(os, value_obj, tx); + if (dir_entries == 1) { + VERIFY0(zap_destroy(os, xattr_obj, tx)); + VERIFY0(sa_remove(sa_hdl, sa_tbl[UZFS_DXATTR], tx)); + } else { + VERIFY0(zap_remove(os, xattr_obj, name, tx)); + } + } + + if (value != NULL && err_check_sa_enough != 0) { + libuzfs_save_xattr_dir(sa_hdl, dhp, xattr_obj, value_obj, + name, value, size, tx); + } + +out3: + if (err != 0) { + dmu_tx_abort(tx); + } else { + if (txg != NULL) { + *txg = tx->tx_txg; + } + dmu_tx_commit(tx); + } +out2: + nvlist_free(nvl); +out: + sa_handle_destroy(sa_hdl); + return (err); +} + +static int +libuzfs_inode_get_kvattr_dir(const sa_attr_type_t *sa_tbl, sa_handle_t *sa_hdl, + const char *name, char *value, size_t size) +{ + // get zap object from sa + uint64_t xattr_obj; + int err = sa_lookup(sa_hdl, sa_tbl[UZFS_DXATTR], + &xattr_obj, sizeof (uint64_t)); + if (err != 0) { + return (err); + } + + objset_t *os = sa_hdl->sa_os; + uint64_t value_obj; + err = zap_lookup(os, xattr_obj, name, 8, 1, &value_obj); + if (err != 0) { + return (err); + } + + sa_handle_t *value_obj_hdl = NULL; + err = sa_handle_get(os, value_obj, NULL, + SA_HDL_PRIVATE, &value_obj_hdl); + if (err != 0) { + return (err); + } + uint64_t stored_size; + err = sa_lookup(value_obj_hdl, sa_tbl[UZFS_SIZE], &stored_size, 8); + sa_handle_destroy(value_obj_hdl); + + if (err == 0 && stored_size > size) { + err = ERANGE; + } else if (err == 0) { + err = dmu_read(os, value_obj, 0, + stored_size, value, DMU_READ_NO_PREFETCH); + } + return (err); +} + +static int +libuzfs_inode_get_kvattr_sa(const sa_attr_type_t *sa_tbl, sa_handle_t *sa_hdl, + const char *name, char *value, size_t size) +{ + nvlist_t *nvl; + int err = libuzfs_get_nvlist_from_handle(sa_tbl, &nvl, sa_hdl); + if (err != 0) { + return (err); + } + + uchar_t *nv_value; + uint_t nv_size = 0; + err = nvlist_lookup_byte_array(nvl, name, &nv_value, &nv_size); + if (err == 0 && nv_size <= size) { + if (value != NULL) { + memcpy(value, nv_value, nv_size); + } + } + nvlist_free(nvl); + + if (nv_size > size) { + return (ERANGE); + } + return (err); +} + +int +libuzfs_inode_get_kvattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, + const char *name, char *value, uint64_t size, int flags) +{ + sa_handle_t *sa_hdl; + int err = sa_handle_get(dhp->os, ino, NULL, SA_HDL_PRIVATE, &sa_hdl); + if (err != 0) { + return (err); + } + + err = libuzfs_inode_get_kvattr_sa(dhp->uzfs_attr_table, sa_hdl, + name, value, size); + if (err == 0 || err != ENOENT) { + goto out; + } + + err = libuzfs_inode_get_kvattr_dir(dhp->uzfs_attr_table, sa_hdl, + name, value, size); + +out: + sa_handle_destroy(sa_hdl); + return (err); +} + +int +libuzfs_inode_remove_kvattr(libuzfs_dataset_handle_t *dhp, uint64_t ino, + const char *name, uint64_t *txg) +{ + return (libuzfs_inode_set_kvattr(dhp, ino, name, NULL, 0, 0, txg)); +}