diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index ee5e21f10012..967d74216c06 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -60,7 +60,6 @@ #include #include #include -#undef ZFS_MAXNAMELEN #include #define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \ @@ -2005,7 +2004,7 @@ dump_dir(objset_t *os) uint64_t refdbytes, usedobjs, scratch; char numbuf[32]; char blkbuf[BP_SPRINTF_LEN + 20]; - char osname[MAXNAMELEN]; + char osname[ZFS_MAX_DATASET_NAME_LEN]; char *type = "UNKNOWN"; int verbosity = dump_opt['d']; int print_header = 1; @@ -3553,7 +3552,7 @@ find_zpool(char **target, nvlist_t **configp, int dirc, char **dirv) nvlist_t *match = NULL; char *name = NULL; char *sepp = NULL; - char sep = 0; + char sep = '\0'; int count = 0; importargs_t args = { 0 }; diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 8bcff2dba785..e8a4421ccf87 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -248,10 +248,11 @@ get_usage(zfs_help_t idx) case HELP_PROMOTE: return (gettext("\tpromote \n")); case HELP_RECEIVE: - return (gettext("\treceive [-vnFu] \n" - "\treceive [-vnFu] [-o origin=] [-d | -e] " - "\n")); + "\treceive [-vnsFu] [-o origin=] [-d | -e] " + "\n" + "\treceive -A \n")); case HELP_RENAME: return (gettext("\trename [-f] " "\n" @@ -263,7 +264,8 @@ get_usage(zfs_help_t idx) return (gettext("\tsend [-DnPpRvLe] [-[iI] snapshot] " "\n" "\tsend [-Le] [-i snapshot|bookmark] " - "\n")); + "\n" + "\tsend [-nvPe] -t \n")); case HELP_SET: return (gettext("\tset ... " " ...\n")); @@ -1481,7 +1483,7 @@ get_callback(zfs_handle_t *zhp, void *data) char buf[ZFS_MAXPROPLEN]; char rbuf[ZFS_MAXPROPLEN]; zprop_source_t sourcetype; - char source[ZFS_MAXNAMELEN]; + char source[ZFS_MAX_DATASET_NAME_LEN]; zprop_get_cbdata_t *cbp = data; nvlist_t *user_props = zfs_get_user_props(zhp); zprop_list_t *pl = cbp->cb_proplist; @@ -1961,7 +1963,7 @@ typedef struct upgrade_cbdata { uint64_t cb_version; boolean_t cb_newer; boolean_t cb_foundone; - char cb_lastfs[ZFS_MAXNAMELEN]; + char cb_lastfs[ZFS_MAX_DATASET_NAME_LEN]; } upgrade_cbdata_t; static int @@ -2410,7 +2412,7 @@ userspace_cb(void *arg, const char *domain, uid_t rid, uint64_t space) if (domain != NULL && domain[0] != '\0') { #ifdef HAVE_IDMAP /* SMB */ - char sid[ZFS_MAXNAMELEN + 32]; + char sid[MAXNAMELEN + 32]; uid_t id; uint64_t classes; int err; @@ -2544,7 +2546,7 @@ print_us_node(boolean_t scripted, boolean_t parsable, int *fields, int types, size_t *width, us_node_t *node) { nvlist_t *nvl = node->usn_nvl; - char valstr[ZFS_MAXNAMELEN]; + char valstr[MAXNAMELEN]; boolean_t first = B_TRUE; int cfield = 0; int field; @@ -3415,7 +3417,7 @@ zfs_do_rollback(int argc, char **argv) boolean_t force = B_FALSE; rollback_cbdata_t cb = { 0 }; zfs_handle_t *zhp, *snap; - char parentname[ZFS_MAXNAMELEN]; + char parentname[ZFS_MAX_DATASET_NAME_LEN]; char *delim; /* check options */ @@ -3707,6 +3709,7 @@ zfs_do_send(int argc, char **argv) { char *fromname = NULL; char *toname = NULL; + char *resume_token = NULL; char *cp; zfs_handle_t *zhp; sendflags_t flags = { 0 }; @@ -3715,7 +3718,7 @@ zfs_do_send(int argc, char **argv) boolean_t extraverbose = B_FALSE; /* check options */ - while ((c = getopt(argc, argv, ":i:I:RDpvnPLe")) != -1) { + while ((c = getopt(argc, argv, ":i:I:RDpvnPLet:")) != -1) { switch (c) { case 'i': if (fromname) @@ -3756,6 +3759,9 @@ zfs_do_send(int argc, char **argv) case 'e': flags.embed_data = B_TRUE; break; + case 't': + resume_token = optarg; + break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); @@ -3771,14 +3777,28 @@ zfs_do_send(int argc, char **argv) argc -= optind; argv += optind; - /* check number of arguments */ - if (argc < 1) { - (void) fprintf(stderr, gettext("missing snapshot argument\n")); - usage(B_FALSE); - } - if (argc > 1) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); + if (resume_token != NULL) { + if (fromname != NULL || flags.replicate || flags.props || + flags.dedup) { + (void) fprintf(stderr, + gettext("invalid flags combined with -t\n")); + usage(B_FALSE); + } + if (argc != 0) { + (void) fprintf(stderr, gettext("no additional " + "arguments are permitted with -t\n")); + usage(B_FALSE); + } + } else { + if (argc < 1) { + (void) fprintf(stderr, + gettext("missing snapshot argument\n")); + usage(B_FALSE); + } + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } } if (!flags.dryrun && isatty(STDOUT_FILENO)) { @@ -3788,12 +3808,17 @@ zfs_do_send(int argc, char **argv) return (1); } + if (resume_token != NULL) { + return (zfs_send_resume(g_zfs, &flags, STDOUT_FILENO, + resume_token)); + } + /* * Special case sending a filesystem, or from a bookmark. */ if (strchr(argv[0], '@') == NULL || (fromname && strchr(fromname, '#') != NULL)) { - char frombuf[ZFS_MAXNAMELEN]; + char frombuf[ZFS_MAX_DATASET_NAME_LEN]; enum lzc_send_flags lzc_flags = 0; if (flags.replicate || flags.doall || flags.props || @@ -3845,7 +3870,7 @@ zfs_do_send(int argc, char **argv) * case if they specify the origin. */ if (fromname && (cp = strchr(fromname, '@')) != NULL) { - char origin[ZFS_MAXNAMELEN]; + char origin[ZFS_MAX_DATASET_NAME_LEN]; zprop_source_t src; (void) zfs_prop_get(zhp, ZFS_PROP_ORIGIN, @@ -3893,8 +3918,6 @@ zfs_do_send(int argc, char **argv) } /* - * zfs receive [-vnFu] [-d | -e] - * * Restore a backup stream from stdin. */ static int @@ -3902,6 +3925,8 @@ zfs_do_receive(int argc, char **argv) { int c, err; recvflags_t flags = { 0 }; + boolean_t abort_resumable = B_FALSE; + nvlist_t *props; nvpair_t *nvp = NULL; @@ -3909,7 +3934,7 @@ zfs_do_receive(int argc, char **argv) nomem(); /* check options */ - while ((c = getopt(argc, argv, ":o:denuvF")) != -1) { + while ((c = getopt(argc, argv, ":o:denuvFsA")) != -1) { switch (c) { case 'o': if (parseprop(props, optarg) != 0) @@ -3931,9 +3956,15 @@ zfs_do_receive(int argc, char **argv) case 'v': flags.verbose = B_TRUE; break; + case 's': + flags.resumable = B_TRUE; + break; case 'F': flags.force = B_TRUE; break; + case 'A': + abort_resumable = B_TRUE; + break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); @@ -3966,6 +3997,44 @@ zfs_do_receive(int argc, char **argv) } } + if (abort_resumable) { + if (flags.isprefix || flags.istail || flags.dryrun || + flags.resumable || flags.nomount) { + (void) fprintf(stderr, gettext("invalid option")); + usage(B_FALSE); + } + + char namebuf[ZFS_MAX_DATASET_NAME_LEN]; + (void) snprintf(namebuf, sizeof (namebuf), + "%s/%%recv", argv[0]); + + if (zfs_dataset_exists(g_zfs, namebuf, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) { + zfs_handle_t *zhp = zfs_open(g_zfs, + namebuf, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) + return (1); + err = zfs_destroy(zhp, B_FALSE); + } else { + zfs_handle_t *zhp = zfs_open(g_zfs, + argv[0], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) + usage(B_FALSE); + if (!zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) || + zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN, + NULL, 0, NULL, NULL, 0, B_TRUE) == -1) { + (void) fprintf(stderr, + gettext("'%s' does not have any " + "resumable receive state to abort\n"), + argv[0]); + return (1); + } + err = zfs_destroy(zhp, B_FALSE); + } + + return (err != 0); + } + if (isatty(STDIN_FILENO)) { (void) fprintf(stderr, gettext("Error: Backup stream can not be read " @@ -3973,7 +4042,6 @@ zfs_do_receive(int argc, char **argv) "You must redirect standard input.\n")); return (1); } - err = zfs_receive(g_zfs, argv[0], props, &flags, STDIN_FILENO, NULL); return (err != 0); @@ -4792,7 +4860,7 @@ store_allow_perm(zfs_deleg_who_type_t type, boolean_t local, boolean_t descend, { int i; char ld[2] = { '\0', '\0' }; - char who_buf[ZFS_MAXNAMELEN+32]; + char who_buf[MAXNAMELEN + 32]; char base_type = ZFS_DELEG_WHO_UNKNOWN; char set_type = ZFS_DELEG_WHO_UNKNOWN; nvlist_t *base_nvl = NULL; @@ -5156,7 +5224,7 @@ static void print_fs_perms(fs_perm_set_t *fspset) { fs_perm_node_t *node = NULL; - char buf[ZFS_MAXNAMELEN+32]; + char buf[MAXNAMELEN + 32]; const char *dsname = buf; for (node = uu_list_first(fspset->fsps_list); node != NULL; @@ -5165,7 +5233,7 @@ print_fs_perms(fs_perm_set_t *fspset) uu_avl_t *uge_avl = node->fspn_fsperm.fsp_uge_avl; int left = 0; - (void) snprintf(buf, ZFS_MAXNAMELEN+32, + (void) snprintf(buf, sizeof (buf), gettext("---- Permissions on %s "), node->fspn_fsperm.fsp_name); (void) printf("%s", dsname); @@ -5362,7 +5430,7 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding) for (i = 0; i < argc; ++i) { zfs_handle_t *zhp; - char parent[ZFS_MAXNAMELEN]; + char parent[ZFS_MAX_DATASET_NAME_LEN]; const char *delim; char *path = argv[i]; @@ -5490,7 +5558,7 @@ holds_callback(zfs_handle_t *zhp, void *data) nvlist_t *nvl = NULL; nvpair_t *nvp = NULL; const char *zname = zfs_get_name(zhp); - size_t znamelen = strnlen(zname, ZFS_MAXNAMELEN); + size_t znamelen = strlen(zname); if (cbp->cb_recursive) { const char *snapname; @@ -5511,7 +5579,7 @@ holds_callback(zfs_handle_t *zhp, void *data) while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { const char *tag = nvpair_name(nvp); - size_t taglen = strnlen(tag, MAXNAMELEN); + size_t taglen = strlen(tag); if (taglen > cbp->cb_max_taglen) cbp->cb_max_taglen = taglen; } @@ -5803,6 +5871,24 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol, return (0); } + /* + * If this filesystem is inconsistent and has a receive resume + * token, we can not mount it. + */ + if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) && + zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN, + NULL, 0, NULL, NULL, 0, B_TRUE) == 0) { + if (!explicit) + return (0); + + (void) fprintf(stderr, gettext("cannot %s '%s': " + "Contains partially-completed state from " + "\"zfs receive -r\", which can be resumed with " + "\"zfs send -t\"\n"), + cmdname, zfs_get_name(zhp)); + return (1); + } + /* * At this point, we have verified that the mountpoint and/or * shareopts are appropriate for auto management. If the @@ -6609,7 +6695,7 @@ zfs_do_diff(int argc, char **argv) static int zfs_do_bookmark(int argc, char **argv) { - char snapname[ZFS_MAXNAMELEN]; + char snapname[ZFS_MAX_DATASET_NAME_LEN]; zfs_handle_t *zhp; nvlist_t *nvl; int ret = 0; diff --git a/cmd/zhack/zhack.c b/cmd/zhack/zhack.c index eedd17c30710..cc4f174c661e 100644 --- a/cmd/zhack/zhack.c +++ b/cmd/zhack/zhack.c @@ -48,7 +48,6 @@ #include #include #include -#undef ZFS_MAXNAMELEN #include extern boolean_t zfeature_checks_disable; diff --git a/cmd/zstreamdump/zstreamdump.c b/cmd/zstreamdump/zstreamdump.c index f288d148e574..08d52bb37a83 100644 --- a/cmd/zstreamdump/zstreamdump.c +++ b/cmd/zstreamdump/zstreamdump.c @@ -127,7 +127,7 @@ read_hdr(dmu_replay_record_t *drr, zio_cksum_t *cksum) (longlong_t)saved_cksum.zc_word[1], (longlong_t)saved_cksum.zc_word[2], (longlong_t)saved_cksum.zc_word[3]); - exit(1); + return (0); } return (sizeof (*drr)); } @@ -347,8 +347,7 @@ main(int argc, char *argv[]) if (verbose) (void) printf("\n"); - if ((DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == - DMU_COMPOUNDSTREAM) && drr->drr_payloadlen != 0) { + if (drr->drr_payloadlen != 0) { nvlist_t *nv; int sz = drr->drr_payloadlen; diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index ad15dea1d543..2cc410f10cb1 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -145,8 +145,8 @@ typedef struct ztest_shared_hdr { static ztest_shared_hdr_t *ztest_shared_hdr; typedef struct ztest_shared_opts { - char zo_pool[MAXNAMELEN]; - char zo_dir[MAXNAMELEN]; + char zo_pool[ZFS_MAX_DATASET_NAME_LEN]; + char zo_dir[ZFS_MAX_DATASET_NAME_LEN]; char zo_alt_ztest[MAXNAMELEN]; char zo_alt_libpath[MAXNAMELEN]; uint64_t zo_vdevs; @@ -262,7 +262,7 @@ typedef struct ztest_od { uint64_t od_crdnodesize; uint64_t od_gen; uint64_t od_crgen; - char od_name[MAXNAMELEN]; + char od_name[ZFS_MAX_DATASET_NAME_LEN]; } ztest_od_t; /* @@ -274,7 +274,7 @@ typedef struct ztest_ds { rwlock_t zd_zilog_lock; zilog_t *zd_zilog; ztest_od_t *zd_od; /* debugging aid */ - char zd_name[MAXNAMELEN]; + char zd_name[ZFS_MAX_DATASET_NAME_LEN]; kmutex_t zd_dirobj_lock; rll_t zd_object_lock[ZTEST_OBJECT_LOCKS]; zll_t zd_range_lock[ZTEST_RANGE_LOCKS]; @@ -3504,7 +3504,7 @@ ztest_objset_destroy_cb(const char *name, void *arg) static boolean_t ztest_snapshot_create(char *osname, uint64_t id) { - char snapname[MAXNAMELEN]; + char snapname[ZFS_MAX_DATASET_NAME_LEN]; int error; (void) snprintf(snapname, sizeof (snapname), "%llu", (u_longlong_t)id); @@ -3524,10 +3524,10 @@ ztest_snapshot_create(char *osname, uint64_t id) static boolean_t ztest_snapshot_destroy(char *osname, uint64_t id) { - char snapname[MAXNAMELEN]; + char snapname[ZFS_MAX_DATASET_NAME_LEN]; int error; - (void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname, + (void) snprintf(snapname, sizeof (snapname), "%s@%llu", osname, (u_longlong_t)id); error = dsl_destroy_snapshot(snapname, B_FALSE); @@ -3544,16 +3544,15 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) int iters; int error; objset_t *os, *os2; - char *name; + char name[ZFS_MAX_DATASET_NAME_LEN]; zilog_t *zilog; int i; zdtmp = umem_alloc(sizeof (ztest_ds_t), UMEM_NOFAIL); - name = umem_alloc(MAXNAMELEN, UMEM_NOFAIL); (void) rw_rdlock(&ztest_name_lock); - (void) snprintf(name, MAXNAMELEN, "%s/temp_%llu", + (void) snprintf(name, sizeof (name), "%s/temp_%llu", ztest_opts.zo_pool, (u_longlong_t)id); /* @@ -3639,7 +3638,6 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) out: (void) rw_unlock(&ztest_name_lock); - umem_free(name, MAXNAMELEN); umem_free(zdtmp, sizeof (ztest_ds_t)); } @@ -3668,22 +3666,22 @@ ztest_dsl_dataset_cleanup(char *osname, uint64_t id) char *snap3name; int error; - snap1name = umem_alloc(MAXNAMELEN, UMEM_NOFAIL); - clone1name = umem_alloc(MAXNAMELEN, UMEM_NOFAIL); - snap2name = umem_alloc(MAXNAMELEN, UMEM_NOFAIL); - clone2name = umem_alloc(MAXNAMELEN, UMEM_NOFAIL); - snap3name = umem_alloc(MAXNAMELEN, UMEM_NOFAIL); - - (void) snprintf(snap1name, MAXNAMELEN, "%s@s1_%llu", - osname, (u_longlong_t)id); - (void) snprintf(clone1name, MAXNAMELEN, "%s/c1_%llu", - osname, (u_longlong_t)id); - (void) snprintf(snap2name, MAXNAMELEN, "%s@s2_%llu", - clone1name, (u_longlong_t)id); - (void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", - osname, (u_longlong_t)id); - (void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", - clone1name, (u_longlong_t)id); + snap1name = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL); + clone1name = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL); + snap2name = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL); + clone2name = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL); + snap3name = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL); + + (void) snprintf(snap1name, ZFS_MAX_DATASET_NAME_LEN, + "%s@s1_%llu", osname, (u_longlong_t)id); + (void) snprintf(clone1name, ZFS_MAX_DATASET_NAME_LEN, + "%s/c1_%llu", osname, (u_longlong_t)id); + (void) snprintf(snap2name, ZFS_MAX_DATASET_NAME_LEN, + "%s@s2_%llu", clone1name, (u_longlong_t)id); + (void) snprintf(clone2name, ZFS_MAX_DATASET_NAME_LEN, + "%s/c2_%llu", osname, (u_longlong_t)id); + (void) snprintf(snap3name, ZFS_MAX_DATASET_NAME_LEN, + "%s@s3_%llu", clone1name, (u_longlong_t)id); error = dsl_destroy_head(clone2name); if (error && error != ENOENT) @@ -3701,11 +3699,11 @@ ztest_dsl_dataset_cleanup(char *osname, uint64_t id) if (error && error != ENOENT) fatal(0, "dsl_destroy_snapshot(%s) = %d", snap1name, error); - umem_free(snap1name, MAXNAMELEN); - umem_free(clone1name, MAXNAMELEN); - umem_free(snap2name, MAXNAMELEN); - umem_free(clone2name, MAXNAMELEN); - umem_free(snap3name, MAXNAMELEN); + umem_free(snap1name, ZFS_MAX_DATASET_NAME_LEN); + umem_free(clone1name, ZFS_MAX_DATASET_NAME_LEN); + umem_free(snap2name, ZFS_MAX_DATASET_NAME_LEN); + umem_free(clone2name, ZFS_MAX_DATASET_NAME_LEN); + umem_free(snap3name, ZFS_MAX_DATASET_NAME_LEN); } /* @@ -3723,26 +3721,26 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) char *osname = zd->zd_name; int error; - snap1name = umem_alloc(MAXNAMELEN, UMEM_NOFAIL); - clone1name = umem_alloc(MAXNAMELEN, UMEM_NOFAIL); - snap2name = umem_alloc(MAXNAMELEN, UMEM_NOFAIL); - clone2name = umem_alloc(MAXNAMELEN, UMEM_NOFAIL); - snap3name = umem_alloc(MAXNAMELEN, UMEM_NOFAIL); + snap1name = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL); + clone1name = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL); + snap2name = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL); + clone2name = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL); + snap3name = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL); (void) rw_rdlock(&ztest_name_lock); ztest_dsl_dataset_cleanup(osname, id); - (void) snprintf(snap1name, MAXNAMELEN, "%s@s1_%llu", - osname, (u_longlong_t)id); - (void) snprintf(clone1name, MAXNAMELEN, "%s/c1_%llu", - osname, (u_longlong_t)id); - (void) snprintf(snap2name, MAXNAMELEN, "%s@s2_%llu", - clone1name, (u_longlong_t)id); - (void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", - osname, (u_longlong_t)id); - (void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", - clone1name, (u_longlong_t)id); + (void) snprintf(snap1name, ZFS_MAX_DATASET_NAME_LEN, + "%s@s1_%llu", osname, (u_longlong_t)id); + (void) snprintf(clone1name, ZFS_MAX_DATASET_NAME_LEN, + "%s/c1_%llu", osname, (u_longlong_t)id); + (void) snprintf(snap2name, ZFS_MAX_DATASET_NAME_LEN, + "%s@s2_%llu", clone1name, (u_longlong_t)id); + (void) snprintf(clone2name, ZFS_MAX_DATASET_NAME_LEN, + "%s/c2_%llu", osname, (u_longlong_t)id); + (void) snprintf(snap3name, ZFS_MAX_DATASET_NAME_LEN, + "%s@s3_%llu", clone1name, (u_longlong_t)id); error = dmu_objset_snapshot_one(osname, strchr(snap1name, '@') + 1); if (error && error != EEXIST) { @@ -3808,11 +3806,11 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) (void) rw_unlock(&ztest_name_lock); - umem_free(snap1name, MAXNAMELEN); - umem_free(clone1name, MAXNAMELEN); - umem_free(snap2name, MAXNAMELEN); - umem_free(clone2name, MAXNAMELEN); - umem_free(snap3name, MAXNAMELEN); + umem_free(snap1name, ZFS_MAX_DATASET_NAME_LEN); + umem_free(clone1name, ZFS_MAX_DATASET_NAME_LEN); + umem_free(snap2name, ZFS_MAX_DATASET_NAME_LEN); + umem_free(clone2name, ZFS_MAX_DATASET_NAME_LEN); + umem_free(snap3name, ZFS_MAX_DATASET_NAME_LEN); } #undef OD_ARRAY_SIZE @@ -4622,7 +4620,7 @@ ztest_fzap(ztest_ds_t *zd, uint64_t id) * 2050 entries we should see ptrtbl growth and leaf-block split. */ for (i = 0; i < 2050; i++) { - char name[MAXNAMELEN]; + char name[ZFS_MAX_DATASET_NAME_LEN]; uint64_t value = i; dmu_tx_t *tx; int error; @@ -5077,7 +5075,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) char fullname[100]; char clonename[100]; char tag[100]; - char osname[MAXNAMELEN]; + char osname[ZFS_MAX_DATASET_NAME_LEN]; nvlist_t *holds; (void) rw_rdlock(&ztest_name_lock); @@ -5964,13 +5962,13 @@ ztest_thread(void *arg) static void ztest_dataset_name(char *dsname, char *pool, int d) { - (void) snprintf(dsname, MAXNAMELEN, "%s/ds_%d", pool, d); + (void) snprintf(dsname, ZFS_MAX_DATASET_NAME_LEN, "%s/ds_%d", pool, d); } static void ztest_dataset_destroy(int d) { - char name[MAXNAMELEN]; + char name[ZFS_MAX_DATASET_NAME_LEN]; int t; ztest_dataset_name(name, ztest_opts.zo_pool, d); @@ -6019,7 +6017,7 @@ ztest_dataset_open(int d) uint64_t committed_seq = ZTEST_GET_SHARED_DS(d)->zd_seq; objset_t *os; zilog_t *zilog; - char name[MAXNAMELEN]; + char name[ZFS_MAX_DATASET_NAME_LEN]; int error; ztest_dataset_name(name, ztest_opts.zo_pool, d); @@ -6256,8 +6254,8 @@ ztest_run(ztest_shared_t *zs) * different name. */ if (ztest_random(2) == 0) { - char name[MAXNAMELEN]; - (void) snprintf(name, MAXNAMELEN, "%s_import", + char name[ZFS_MAX_DATASET_NAME_LEN]; + (void) snprintf(name, sizeof (name), "%s_import", ztest_opts.zo_pool); ztest_spa_import_export(ztest_opts.zo_pool, name); ztest_spa_import_export(name, ztest_opts.zo_pool); @@ -6834,7 +6832,7 @@ main(int argc, char **argv) if (spa_open(ztest_opts.zo_pool, &spa, FTAG) == 0) { spa_close(spa, FTAG); } else { - char tmpname[MAXNAMELEN]; + char tmpname[ZFS_MAX_DATASET_NAME_LEN]; kernel_fini(); kernel_init(FREAD | FWRITE); (void) snprintf(tmpname, sizeof (tmpname), "%s_tmp", diff --git a/cmd/zvol_id/zvol_id_main.c b/cmd/zvol_id/zvol_id_main.c index 8f0504ae9992..6bd5113f1ea7 100644 --- a/cmd/zvol_id/zvol_id_main.c +++ b/cmd/zvol_id/zvol_id_main.c @@ -39,14 +39,14 @@ static int ioctl_get_msg(char *var, int fd) { int error = 0; - char msg[ZFS_MAXNAMELEN]; + char msg[ZFS_MAX_DATASET_NAME_LEN]; error = ioctl(fd, BLKZNAME, msg); if (error < 0) { return (error); } - snprintf(var, ZFS_MAXNAMELEN, "%s", msg); + snprintf(var, ZFS_MAX_DATASET_NAME_LEN, "%s", msg); return (error); } @@ -54,7 +54,8 @@ int main(int argc, char **argv) { int fd, error = 0; - char zvol_name[ZFS_MAXNAMELEN], zvol_name_part[ZFS_MAXNAMELEN]; + char zvol_name[ZFS_MAX_DATASET_NAME_LEN]; + char zvol_name_part[ZFS_MAX_DATASET_NAME_LEN]; char *dev_name; struct stat64 statbuf; int dev_minor, dev_part; @@ -87,10 +88,11 @@ main(int argc, char **argv) return (errno); } if (dev_part > 0) - snprintf(zvol_name_part, ZFS_MAXNAMELEN, "%s-part%d", zvol_name, - dev_part); + snprintf(zvol_name_part, ZFS_MAX_DATASET_NAME_LEN, + "%s-part%d", zvol_name, dev_part); else - snprintf(zvol_name_part, ZFS_MAXNAMELEN, "%s", zvol_name); + snprintf(zvol_name_part, ZFS_MAX_DATASET_NAME_LEN, + "%s", zvol_name); for (i = 0; i < strlen(zvol_name_part); i++) { if (isblank(zvol_name_part[i])) diff --git a/config/kernel-kobj-name-len.m4 b/config/kernel-kobj-name-len.m4 deleted file mode 100644 index 37999fabb81d..000000000000 --- a/config/kernel-kobj-name-len.m4 +++ /dev/null @@ -1,21 +0,0 @@ -dnl # -dnl # 2.6.27 API change, -dnl # kobject KOBJ_NAME_LEN static limit removed. All users of this -dnl # constant were removed prior to 2.6.27, but to be on the safe -dnl # side this check ensures the constant is undefined. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_KOBJ_NAME_LEN], [ - AC_MSG_CHECKING([whether kernel defines KOBJ_NAME_LEN]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - int val __attribute__ ((unused)); - val = KOBJ_NAME_LEN; - ],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_KOBJ_NAME_LEN, 1, - [kernel defines KOBJ_NAME_LEN]) - ],[ - AC_MSG_RESULT([no]) - ]) -]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 800d782f1eee..086cd0b05b37 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -11,7 +11,6 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID ZFS_AC_KERNEL_TYPE_FMODE_T - ZFS_AC_KERNEL_KOBJ_NAME_LEN ZFS_AC_KERNEL_3ARG_BLKDEV_GET ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH ZFS_AC_KERNEL_OPEN_BDEV_EXCLUSIVE diff --git a/config/user-commands.m4 b/config/user-commands.m4 index 655b992418cc..bda2b8652b53 100644 --- a/config/user-commands.m4 +++ b/config/user-commands.m4 @@ -67,6 +67,7 @@ AC_DEFUN([ZFS_AC_CONFIG_USER_COMMANDS_COMMON], [ AC_PATH_TOOL(SHUF, shuf, "") AC_PATH_TOOL(SLEEP, sleep, "") AC_PATH_TOOL(SORT, sort, "") + AC_PATH_TOOL(STAT, stat, "") AC_PATH_TOOL(STRINGS, strings, "") AC_PATH_TOOL(SU, su, "") AC_PATH_TOOL(SUM, sum, "") @@ -75,6 +76,7 @@ AC_DEFUN([ZFS_AC_CONFIG_USER_COMMANDS_COMMON], [ AC_PATH_TOOL(TAR, tar, "") AC_PATH_TOOL(TOUCH, touch, "") AC_PATH_TOOL(TR, tr, "") + AC_PATH_TOOL(TRUNCATE, truncate, "") AC_PATH_TOOL(TRUE, true, "") AC_PATH_TOOL(UMASK, umask, "") AC_PATH_TOOL(UMOUNT, umount, "") @@ -103,7 +105,6 @@ AC_DEFUN([ZFS_AC_CONFIG_USER_COMMANDS_LINUX], [ AC_PATH_TOOL(SHARE, exportfs, "") AC_PATH_TOOL(SWAP, swapon, "") AC_PATH_TOOL(SWAPADD, swapon, "") - AC_PATH_TOOL(TRUNCATE, truncate, "") AC_PATH_TOOL(UDEVADM, udevadm, "") AC_PATH_TOOL(UFSDUMP, dump, "") AC_PATH_TOOL(UFSRESTORE, restore, "") diff --git a/include/libzfs.h b/include/libzfs.h index 654b93284318..f83e21423aa0 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -49,8 +49,6 @@ extern "C" { /* * Miscellaneous ZFS constants */ -#define ZFS_MAXNAMELEN MAXNAMELEN -#define ZPOOL_MAXNAMELEN MAXNAMELEN #define ZFS_MAXPROPLEN MAXPATHLEN #define ZPOOL_MAXPROPLEN MAXPATHLEN @@ -640,6 +638,10 @@ typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *); extern int zfs_send(zfs_handle_t *, const char *, const char *, sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **); extern int zfs_send_one(zfs_handle_t *, const char *, int, enum lzc_send_flags); +extern int zfs_send_resume(libzfs_handle_t *, sendflags_t *, int outfd, + const char *); +extern nvlist_t *zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, + const char *token); extern int zfs_promote(zfs_handle_t *); extern int zfs_hold(zfs_handle_t *, const char *, const char *, @@ -680,6 +682,12 @@ typedef struct recvflags { /* set "canmount=off" on all modified filesystems */ boolean_t canmountoff; + /* + * Mark the file systems as "resumable" and do not destroy them if the + * receive is interrupted + */ + boolean_t resumable; + /* byteswap flag is used internally; callers need not specify */ boolean_t byteswap; diff --git a/include/libzfs_core.h b/include/libzfs_core.h index bdd6c951ee49..9e761004aa36 100644 --- a/include/libzfs_core.h +++ b/include/libzfs_core.h @@ -20,7 +20,7 @@ */ /* - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2012, 2014 by Delphix. All rights reserved. */ #ifndef _LIBZFS_CORE_H @@ -58,9 +58,21 @@ enum lzc_send_flags { }; int lzc_send(const char *, const char *, int, enum lzc_send_flags); -int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, int); +int lzc_send_resume(const char *, const char *, int, + enum lzc_send_flags, uint64_t, uint64_t); int lzc_send_space(const char *, const char *, uint64_t *); +struct dmu_replay_record; + +int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, int); +int lzc_receive_resumable(const char *, nvlist_t *, const char *, + boolean_t, int); +int lzc_receive_with_header(const char *, nvlist_t *, const char *, boolean_t, + boolean_t, int, const struct dmu_replay_record *); +int lzc_receive_one(const char *, nvlist_t *, const char *, boolean_t, + boolean_t, int, const struct dmu_replay_record *, int, uint64_t *, + uint64_t *, uint64_t *, nvlist_t **); + boolean_t lzc_exists(const char *); int lzc_rollback(const char *, char *, int); diff --git a/include/libzfs_impl.h b/include/libzfs_impl.h index 1efe4e2330dd..36441ed7bb3a 100644 --- a/include/libzfs_impl.h +++ b/include/libzfs_impl.h @@ -21,17 +21,17 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2011, 2015 by Delphix. All rights reserved. */ #ifndef _LIBZFS_IMPL_H #define _LIBZFS_IMPL_H -#include #include -#include #include #include +#include +#include #include #include @@ -85,7 +85,7 @@ struct libzfs_handle { struct zfs_handle { libzfs_handle_t *zfs_hdl; zpool_handle_t *zpool_hdl; - char zfs_name[ZFS_MAXNAMELEN]; + char zfs_name[ZFS_MAX_DATASET_NAME_LEN]; zfs_type_t zfs_type; /* type including snapshot */ zfs_type_t zfs_head_type; /* type excluding snapshot */ dmu_objset_stats_t zfs_dmustats; @@ -106,7 +106,7 @@ struct zfs_handle { struct zpool_handle { libzfs_handle_t *zpool_hdl; zpool_handle_t *zpool_next; - char zpool_name[ZPOOL_MAXNAMELEN]; + char zpool_name[ZFS_MAX_DATASET_NAME_LEN]; int zpool_state; size_t zpool_config_size; nvlist_t *zpool_config; diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 74001d8f3ca5..87a8a40eb782 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -816,7 +816,7 @@ typedef struct dmu_objset_stats { dmu_objset_type_t dds_type; uint8_t dds_is_snapshot; uint8_t dds_inconsistent; - char dds_origin[MAXNAMELEN]; + char dds_origin[ZFS_MAX_DATASET_NAME_LEN]; } dmu_objset_stats_t; /* diff --git a/include/sys/dmu_impl.h b/include/sys/dmu_impl.h index 75d094f0812e..3e4423c8d6a0 100644 --- a/include/sys/dmu_impl.h +++ b/include/sys/dmu_impl.h @@ -24,7 +24,7 @@ */ /* * Copyright (c) 2012, Joyent, Inc. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013, 2015 by Delphix. All rights reserved. */ #ifndef _SYS_DMU_IMPL_H @@ -268,10 +268,11 @@ typedef struct dmu_sendarg { uint64_t dsa_toguid; int dsa_err; dmu_pendop_t dsa_pending_op; - boolean_t dsa_incremental; uint64_t dsa_featureflags; uint64_t dsa_last_data_object; uint64_t dsa_last_data_offset; + uint64_t dsa_resume_object; + uint64_t dsa_resume_offset; } dmu_sendarg_t; void dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *); diff --git a/include/sys/dmu_send.h b/include/sys/dmu_send.h index 2442a1f8aab1..871f5625460e 100644 --- a/include/sys/dmu_send.h +++ b/include/sys/dmu_send.h @@ -36,10 +36,13 @@ struct vnode; struct dsl_dataset; struct drr_begin; struct avl_tree; +struct dmu_replay_record; -int dmu_send(const char *tosnap, const char *fromsnap, - boolean_t embedok, boolean_t large_block_ok, - int outfd, struct vnode *vp, offset_t *off); +extern const char *recv_clone_name; + +int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, + boolean_t large_block_ok, int outfd, uint64_t resumeobj, uint64_t resumeoff, + struct vnode *vp, offset_t *off); int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds, uint64_t *sizep); int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg, @@ -50,12 +53,14 @@ int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, typedef struct dmu_recv_cookie { struct dsl_dataset *drc_ds; + struct dmu_replay_record *drc_drr_begin; struct drr_begin *drc_drrb; const char *drc_tofs; const char *drc_tosnap; boolean_t drc_newfs; boolean_t drc_byteswap; boolean_t drc_force; + boolean_t drc_resumable; struct avl_tree *drc_guid_to_ds_map; zio_cksum_t drc_cksum; uint64_t drc_newsnapobj; @@ -63,8 +68,9 @@ typedef struct dmu_recv_cookie { cred_t *drc_cred; } dmu_recv_cookie_t; -int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb, - boolean_t force, char *origin, dmu_recv_cookie_t *drc); +int dmu_recv_begin(char *tofs, char *tosnap, + struct dmu_replay_record *drr_begin, + boolean_t force, boolean_t resumable, char *origin, dmu_recv_cookie_t *drc); int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp, int cleanup_fd, uint64_t *action_handlep); int dmu_recv_end(dmu_recv_cookie_t *drc, void *owner); diff --git a/include/sys/dmu_traverse.h b/include/sys/dmu_traverse.h index 544b721e4612..c010edd440d9 100644 --- a/include/sys/dmu_traverse.h +++ b/include/sys/dmu_traverse.h @@ -54,6 +54,8 @@ typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, int traverse_dataset(struct dsl_dataset *ds, uint64_t txg_start, int flags, blkptr_cb_t func, void *arg); +int traverse_dataset_resume(struct dsl_dataset *ds, uint64_t txg_start, + zbookmark_phys_t *resume, int flags, blkptr_cb_t func, void *arg); int traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr, uint64_t txg_start, zbookmark_phys_t *resume, int flags, blkptr_cb_t func, void *arg); diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h index f3e64a77236e..4d2f5e418bee 100644 --- a/include/sys/dsl_dataset.h +++ b/include/sys/dsl_dataset.h @@ -98,6 +98,18 @@ struct dsl_pool; */ #define DS_FIELD_LARGE_DNODE "org.zfsonlinux:large_dnode" +/* + * These fields are set on datasets that are in the middle of a resumable + * receive, and allow the sender to resume the send if it is interrupted. + */ +#define DS_FIELD_RESUME_FROMGUID "com.delphix:resume_fromguid" +#define DS_FIELD_RESUME_TONAME "com.delphix:resume_toname" +#define DS_FIELD_RESUME_TOGUID "com.delphix:resume_toguid" +#define DS_FIELD_RESUME_OBJECT "com.delphix:resume_object" +#define DS_FIELD_RESUME_OFFSET "com.delphix:resume_offset" +#define DS_FIELD_RESUME_BYTES "com.delphix:resume_bytes" +#define DS_FIELD_RESUME_EMBEDOK "com.delphix:resume_embedok" + /* * DS_FLAG_CI_DATASET is set if the dataset contains a file system whose * name lookups should be performed case-insensitively. @@ -191,6 +203,14 @@ typedef struct dsl_dataset { kmutex_t ds_sendstream_lock; list_t ds_sendstreams; + /* + * When in the middle of a resumable receive, tracks how much + * progress we have made. + */ + uint64_t ds_resume_object[TXG_SIZE]; + uint64_t ds_resume_offset[TXG_SIZE]; + uint64_t ds_resume_bytes[TXG_SIZE]; + /* Protected by our dsl_dir's dd_lock */ list_t ds_prop_cbs; @@ -207,7 +227,7 @@ typedef struct dsl_dataset { uint8_t ds_feature_activation_needed[SPA_FEATURES]; /* Protected by ds_lock; keep at end of struct for better locality */ - char ds_snapname[MAXNAMELEN]; + char ds_snapname[ZFS_MAX_DATASET_NAME_LEN]; } dsl_dataset_t; static inline dsl_dataset_phys_t * @@ -242,6 +262,8 @@ int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj, void dsl_dataset_disown(dsl_dataset_t *ds, void *tag); void dsl_dataset_name(dsl_dataset_t *ds, char *name); boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag); +int dsl_dataset_namelen(dsl_dataset_t *ds); +boolean_t dsl_dataset_has_owner(dsl_dataset_t *ds); uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname, dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *); uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, @@ -322,6 +344,8 @@ int dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx, void dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds, zprop_source_t source, uint64_t value, dmu_tx_t *tx); void dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx); +boolean_t dsl_dataset_is_zapified(dsl_dataset_t *ds); +boolean_t dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds); int dsl_dataset_rollback(const char *fsname, void *owner, nvlist_t *result); void dsl_dataset_deactivate_feature(uint64_t dsobj, @@ -330,10 +354,10 @@ void dsl_dataset_deactivate_feature(uint64_t dsobj, #ifdef ZFS_DEBUG #define dprintf_ds(ds, fmt, ...) do { \ if (zfs_flags & ZFS_DEBUG_DPRINTF) { \ - char *__ds_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); \ + char *__ds_name = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); \ dsl_dataset_name(ds, __ds_name); \ dprintf("ds=%s " fmt, __ds_name, __VA_ARGS__); \ - kmem_free(__ds_name, MAXNAMELEN); \ + kmem_free(__ds_name, ZFS_MAX_DATASET_NAME_LEN); \ } \ _NOTE(CONSTCOND) } while (0) #else diff --git a/include/sys/dsl_dir.h b/include/sys/dsl_dir.h index a025c50a0737..fb299684c424 100644 --- a/include/sys/dsl_dir.h +++ b/include/sys/dsl_dir.h @@ -112,7 +112,7 @@ struct dsl_dir { int64_t dd_space_towrite[TXG_SIZE]; /* protected by dd_lock; keep at end of struct for better locality */ - char dd_myname[MAXNAMELEN]; + char dd_myname[ZFS_MAX_DATASET_NAME_LEN]; }; static inline dsl_dir_phys_t * @@ -176,11 +176,10 @@ boolean_t dsl_dir_is_zapified(dsl_dir_t *dd); #ifdef ZFS_DEBUG #define dprintf_dd(dd, fmt, ...) do { \ if (zfs_flags & ZFS_DEBUG_DPRINTF) { \ - char *__ds_name = kmem_alloc(MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, \ - KM_SLEEP); \ + char *__ds_name = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); \ dsl_dir_name(dd, __ds_name); \ dprintf("dd=%s " fmt, __ds_name, __VA_ARGS__); \ - kmem_free(__ds_name, MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); \ + kmem_free(__ds_name, ZFS_MAX_DATASET_NAME_LEN); \ } \ _NOTE(CONSTCOND) } while (0) #else diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 0935dca77a41..b25d3016aea8 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -67,9 +67,13 @@ typedef enum dmu_objset_type { #define ZFS_TYPE_DATASET \ (ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME | ZFS_TYPE_SNAPSHOT) +/* + * All of these include the terminating NUL byte. + */ #define ZAP_MAXNAMELEN 256 #define ZAP_MAXVALUELEN (1024 * 8) #define ZAP_OLDMAXVALUELEN 1024 +#define ZFS_MAX_DATASET_NAME_LEN 256 /* * Dataset properties are identified by these constants and must be added to @@ -158,6 +162,7 @@ typedef enum { ZFS_PROP_REDUNDANT_METADATA, ZFS_PROP_OVERLAY, ZFS_PROP_PREV_SNAP, + ZFS_PROP_RECEIVE_RESUME_TOKEN, ZFS_NUM_PROPS } zfs_prop_t; @@ -231,6 +236,7 @@ typedef enum { #define ZPROP_SOURCE_VAL_RECVD "$recvd" #define ZPROP_N_MORE_ERRORS "N_MORE_ERRORS" + /* * Dataset flag implemented as a special entry in the props zap object * indicating that the dataset has received properties on or after @@ -922,7 +928,7 @@ typedef struct ddt_histogram { */ typedef enum zfs_ioc { /* - * Illumos - 70/128 numbers reserved. + * Illumos - 71/128 numbers reserved. */ ZFS_IOC_FIRST = ('Z' << 8), ZFS_IOC = ZFS_IOC_FIRST, @@ -996,6 +1002,7 @@ typedef enum zfs_ioc { ZFS_IOC_BOOKMARK, ZFS_IOC_GET_BOOKMARKS, ZFS_IOC_DESTROY_BOOKMARKS, + ZFS_IOC_RECV_NEW, /* * Linux - 3/64 numbers reserved. @@ -1016,7 +1023,7 @@ typedef enum zfs_ioc { /* * zvol ioctl to get dataset name */ -#define BLKZNAME _IOR(0x12, 125, char[ZFS_MAXNAMELEN]) +#define BLKZNAME _IOR(0x12, 125, char[ZFS_MAX_DATASET_NAME_LEN]) /* * Internal SPA load state. Used by FMA diagnosis engine. diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 2eb5fa12a85c..59cb44de215d 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -126,7 +126,7 @@ struct spa { /* * Fields protected by spa_namespace_lock. */ - char spa_name[MAXNAMELEN]; /* pool name */ + char spa_name[ZFS_MAX_DATASET_NAME_LEN]; /* pool name */ char *spa_comment; /* comment */ avl_node_t spa_avl; /* node in spa_namespace_avl */ nvlist_t *spa_config; /* last synced config */ diff --git a/include/sys/zap.h b/include/sys/zap.h index 70cbaae9734e..7009473979b2 100644 --- a/include/sys/zap.h +++ b/include/sys/zap.h @@ -363,7 +363,7 @@ typedef struct { boolean_t za_normalization_conflict; uint64_t za_num_integers; uint64_t za_first_integer; /* no sign extension for <8byte ints */ - char za_name[MAXNAMELEN]; + char za_name[ZAP_MAXNAMELEN]; } zap_attribute_t; /* diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h index a978524e2400..5157c6704f4b 100644 --- a/include/sys/zfs_ioctl.h +++ b/include/sys/zfs_ioctl.h @@ -20,7 +20,8 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2012, 2015 by Delphix. All rights reserved. + * Copyright 2016 RackTop Systems. */ #ifndef _SYS_ZFS_IOCTL_H @@ -90,16 +91,16 @@ typedef enum drr_headertype { * Feature flags for zfs send streams (flags in drr_versioninfo) */ -#define DMU_BACKUP_FEATURE_DEDUP (1<<0) -#define DMU_BACKUP_FEATURE_DEDUPPROPS (1<<1) -#define DMU_BACKUP_FEATURE_SA_SPILL (1<<2) +#define DMU_BACKUP_FEATURE_DEDUP (1 << 0) +#define DMU_BACKUP_FEATURE_DEDUPPROPS (1 << 1) +#define DMU_BACKUP_FEATURE_SA_SPILL (1 << 2) /* flags #3 - #15 are reserved for incompatible closed-source implementations */ -#define DMU_BACKUP_FEATURE_EMBED_DATA (1<<16) -#define DMU_BACKUP_FEATURE_EMBED_DATA_LZ4 (1<<17) +#define DMU_BACKUP_FEATURE_EMBED_DATA (1 << 16) +#define DMU_BACKUP_FEATURE_EMBED_DATA_LZ4 (1 << 17) /* flag #18 is reserved for a Delphix feature */ -#define DMU_BACKUP_FEATURE_LARGE_BLOCKS (1<<19) -/* flag #20 is reserved for resumable streams */ -#define DMU_BACKUP_FEATURE_LARGE_DNODE (1<<21) +#define DMU_BACKUP_FEATURE_LARGE_BLOCKS (1 << 19) +#define DMU_BACKUP_FEATURE_RESUMING (1 << 20) +#define DMU_BACKUP_FEATURE_LARGE_DNODE (1 << 21) /* * Mask of all supported backup features @@ -107,11 +108,16 @@ typedef enum drr_headertype { #define DMU_BACKUP_FEATURE_MASK (DMU_BACKUP_FEATURE_DEDUP | \ DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL | \ DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_EMBED_DATA_LZ4 | \ - DMU_BACKUP_FEATURE_LARGE_BLOCKS | DMU_BACKUP_FEATURE_LARGE_DNODE) + DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_LARGE_BLOCKS | \ + DMU_BACKUP_FEATURE_LARGE_DNODE) /* Are all features in the given flag word currently supported? */ #define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK)) +typedef enum dmu_send_resume_token_version { + ZFS_SEND_RESUME_TOKEN_VERSION = 1 +} dmu_send_resume_token_version_t; + /* * The drr_versioninfo field of the dmu_replay_record has the * following layout: @@ -131,8 +137,22 @@ typedef enum drr_headertype { #define DMU_BACKUP_MAGIC 0x2F5bacbacULL +/* + * Send stream flags. Bits 24-31 are reserved for vendor-specific + * implementations and should not be used. + */ #define DRR_FLAG_CLONE (1<<0) #define DRR_FLAG_CI_DATA (1<<1) +/* + * This send stream, if it is a full send, includes the FREE and FREEOBJECT + * records that are created by the sending process. This means that the send + * stream can be received as a clone, even though it is not an incremental. + * This is not implemented as a feature flag, because the receiving side does + * not need to have implemented it to receive this stream; it is fully backwards + * compatible. We need a flag, though, because full send streams without it + * cannot necessarily be received as a clone correctly. + */ +#define DRR_FLAG_FREERECORDS (1<<2) /* * flags in the drr_checksumflags field in the DRR_WRITE and @@ -335,6 +355,12 @@ typedef enum zfs_case { ZFS_CASE_MIXED } zfs_case_t; +/* + * Note: this struct must have the same layout in 32-bit and 64-bit, so + * that 32-bit processes (like /sbin/zfs) can pass it to the 64-bit + * kernel. Therefore, we add padding to it so that no "hidden" padding + * is automatically added on 64-bit (but not on 32-bit). + */ typedef struct zfs_cmd { char zc_name[MAXPATHLEN]; /* name of pool or dataset */ uint64_t zc_nvlist_src; /* really (char *) */ diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index 2c09feee1210..8e563a572e96 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2012, 2015 by Delphix. All rights reserved. */ #ifndef _SYS_FS_ZFS_ZNODE_H @@ -138,17 +138,6 @@ extern "C" { #define ZFS_SHARES_DIR "SHARES" #define ZFS_SA_ATTRS "SA_ATTRS" -/* - * Path component length - * - * The generic fs code uses MAXNAMELEN to represent - * what the largest component length is. Unfortunately, - * this length includes the terminating NULL. ZFS needs - * to tell the users via pathconf() and statvfs() what the - * true maximum length of a component is, excluding the NULL. - */ -#define ZFS_MAXNAMELEN (MAXNAMELEN - 1) - /* * Convert mode bits (zp_mode) to BSD-style DT_* values for storing in * the directory entries. On Linux systems this value is already diff --git a/include/sys/zvol.h b/include/sys/zvol.h index c3e386f0b79e..00ed220d3b17 100644 --- a/include/sys/zvol.h +++ b/include/sys/zvol.h @@ -32,6 +32,8 @@ #define ZVOL_OBJ 1ULL #define ZVOL_ZAP_OBJ 2ULL +extern void *zvol_tag; + extern void zvol_create_minors(spa_t *spa, const char *name, boolean_t async); extern void zvol_remove_minors(spa_t *spa, const char *name, boolean_t async); extern void zvol_rename_minors(spa_t *spa, const char *oldname, diff --git a/lib/libzfs/libzfs_changelist.c b/lib/libzfs/libzfs_changelist.c index 0bcfc0423b6b..5e007c0f4842 100644 --- a/lib/libzfs/libzfs_changelist.c +++ b/lib/libzfs/libzfs_changelist.c @@ -24,6 +24,7 @@ * Use is subject to license terms. * * Portions Copyright 2007 Ramprakash Jelari + * Copyright (c) 2014, 2015 by Delphix. All rights reserved. */ #include @@ -287,7 +288,7 @@ void changelist_rename(prop_changelist_t *clp, const char *src, const char *dst) { prop_changenode_t *cn; - char newname[ZFS_MAXNAMELEN]; + char newname[ZFS_MAX_DATASET_NAME_LEN]; for (cn = uu_list_first(clp->cl_list); cn != NULL; cn = uu_list_next(clp->cl_list, cn)) { diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index 07f2e75a6204..3ff96f0c65eb 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -551,7 +551,7 @@ zfs_bookmark_exists(const char *path) { nvlist_t *bmarks; nvlist_t *props; - char fsname[ZFS_MAXNAMELEN]; + char fsname[ZFS_MAX_DATASET_NAME_LEN]; char *bmark_name; char *pound; int err; @@ -1865,22 +1865,21 @@ getprop_uint64(zfs_handle_t *zhp, zfs_prop_t prop, char **source) return (value); } -static char * +static const char * getprop_string(zfs_handle_t *zhp, zfs_prop_t prop, char **source) { nvlist_t *nv; - char *value; + const char *value; *source = NULL; if (nvlist_lookup_nvlist(zhp->zfs_props, zfs_prop_to_name(prop), &nv) == 0) { - verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0); + value = fnvlist_lookup_string(nv, ZPROP_VALUE); (void) nvlist_lookup_string(nv, ZPROP_SOURCE, source); } else { verify(!zhp->zfs_props_table || zhp->zfs_props_table[prop] == B_TRUE); - if ((value = (char *)zfs_prop_default_string(prop)) == NULL) - value = ""; + value = zfs_prop_default_string(prop); *source = ""; } @@ -2202,7 +2201,7 @@ struct get_clones_arg { uint64_t numclones; nvlist_t *value; const char *origin; - char buf[ZFS_MAXNAMELEN]; + char buf[ZFS_MAX_DATASET_NAME_LEN]; }; int @@ -2257,7 +2256,7 @@ zfs_get_clones_nvl(zfs_handle_t *zhp) if (gca.numclones != 0) { zfs_handle_t *root; - char pool[ZFS_MAXNAMELEN]; + char pool[ZFS_MAX_DATASET_NAME_LEN]; char *cp = pool; /* get the pool name */ @@ -2301,7 +2300,7 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, { char *source = NULL; uint64_t val; - char *str; + const char *str; const char *strval; boolean_t received = zfs_is_recvd_props_mode(zhp); @@ -2407,14 +2406,10 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, break; case ZFS_PROP_ORIGIN: - (void) strlcpy(propbuf, getprop_string(zhp, prop, &source), - proplen); - /* - * If there is no parent at all, return failure to indicate that - * it doesn't apply to this dataset. - */ - if (propbuf[0] == '\0') + str = getprop_string(zhp, prop, &source); + if (str == NULL) return (-1); + (void) strlcpy(propbuf, str, proplen); break; case ZFS_PROP_CLONES: @@ -2596,8 +2591,10 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, break; case PROP_TYPE_STRING: - (void) strlcpy(propbuf, - getprop_string(zhp, prop, &source), proplen); + str = getprop_string(zhp, prop, &source); + if (str == NULL) + return (-1); + (void) strlcpy(propbuf, str, proplen); break; case PROP_TYPE_INDEX: @@ -3004,7 +3001,7 @@ check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned, boolean_t accept_ancestor, int *prefixlen) { zfs_cmd_t zc = {"\0"}; - char parent[ZFS_MAXNAMELEN]; + char parent[ZFS_MAX_DATASET_NAME_LEN]; char *slash; zfs_handle_t *zhp; char errbuf[1024]; @@ -3243,7 +3240,7 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, ost = DMU_OST_ZFS; /* open zpool handle for prop validation */ - char pool_path[MAXNAMELEN]; + char pool_path[ZFS_MAX_DATASET_NAME_LEN]; (void) strlcpy(pool_path, path, sizeof (pool_path)); /* truncate pool_path at first slash */ @@ -3312,7 +3309,7 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, /* check for failure */ if (ret != 0) { - char parent[ZFS_MAXNAMELEN]; + char parent[ZFS_MAX_DATASET_NAME_LEN]; (void) parent_name(path, parent, sizeof (parent)); switch (errno) { @@ -3402,7 +3399,7 @@ static int zfs_check_snap_cb(zfs_handle_t *zhp, void *arg) { struct destroydata *dd = arg; - char name[ZFS_MAXNAMELEN]; + char name[ZFS_MAX_DATASET_NAME_LEN]; int rv = 0; (void) snprintf(name, sizeof (name), @@ -3493,7 +3490,7 @@ zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer) int zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props) { - char parent[ZFS_MAXNAMELEN]; + char parent[ZFS_MAX_DATASET_NAME_LEN]; int ret; char errbuf[1024]; libzfs_handle_t *hdl = zhp->zfs_hdl; @@ -3623,7 +3620,7 @@ static int zfs_snapshot_cb(zfs_handle_t *zhp, void *arg) { snapdata_t *sd = arg; - char name[ZFS_MAXNAMELEN]; + char name[ZFS_MAX_DATASET_NAME_LEN]; int rv = 0; if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) == 0) { @@ -3672,7 +3669,7 @@ zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props) * get pool handle for prop validation. assumes all snaps are in the * same pool, as does lzc_snapshot (below). */ - char pool[MAXNAMELEN]; + char pool[ZFS_MAX_DATASET_NAME_LEN]; elem = nvlist_next_nvpair(snaps, NULL); (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); pool[strcspn(pool, "/@")] = '\0'; @@ -3726,7 +3723,7 @@ zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive, { int ret; snapdata_t sd = { 0 }; - char fsname[ZFS_MAXNAMELEN]; + char fsname[ZFS_MAX_DATASET_NAME_LEN]; char *cp; zfs_handle_t *zhp; char errbuf[1024]; @@ -3905,7 +3902,7 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, prop_changelist_t *cl = NULL; zfs_handle_t *zhrp = NULL; char *parentname = NULL; - char parent[ZFS_MAXNAMELEN]; + char parent[ZFS_MAX_DATASET_NAME_LEN]; libzfs_handle_t *hdl = zhp->zfs_hdl; char errbuf[1024]; @@ -4325,7 +4322,7 @@ zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type, zc.zc_nvlist_dst_size = sizeof (buf); if (zfs_ioctl(hdl, ZFS_IOC_USERSPACE_MANY, &zc) != 0) { - char errbuf[ZFS_MAXNAMELEN + 32]; + char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, @@ -4359,7 +4356,7 @@ static int zfs_hold_one(zfs_handle_t *zhp, void *arg) { struct holdarg *ha = arg; - char name[ZFS_MAXNAMELEN]; + char name[ZFS_MAX_DATASET_NAME_LEN]; int rv = 0; (void) snprintf(name, sizeof (name), @@ -4478,7 +4475,7 @@ static int zfs_release_one(zfs_handle_t *zhp, void *arg) { struct holdarg *ha = arg; - char name[ZFS_MAXNAMELEN]; + char name[ZFS_MAX_DATASET_NAME_LEN]; int rv = 0; nvlist_t *existing_holds; @@ -4607,7 +4604,7 @@ zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl) zc.zc_nvlist_dst_size = nvsz; zc.zc_nvlist_dst = (uintptr_t)nvbuf; - (void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN); + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); if (ioctl(hdl->libzfs_fd, ZFS_IOC_GET_FSACL, &zc) != 0) { (void) snprintf(errbuf, sizeof (errbuf), diff --git a/lib/libzfs/libzfs_diff.c b/lib/libzfs/libzfs_diff.c index c03603934408..f5c799529c6e 100644 --- a/lib/libzfs/libzfs_diff.c +++ b/lib/libzfs/libzfs_diff.c @@ -22,6 +22,7 @@ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2015 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2015 by Delphix. All rights reserved. * Copyright 2016 Joyent, Inc. */ @@ -604,7 +605,7 @@ get_snapshot_names(differ_info_t *di, const char *fromsnap, * not the same dataset name, might be okay if * tosnap is a clone of a fromsnap descendant. */ - char origin[ZFS_MAXNAMELEN]; + char origin[ZFS_MAX_DATASET_NAME_LEN]; zprop_source_t src; zfs_handle_t *zhp; diff --git a/lib/libzfs/libzfs_iter.c b/lib/libzfs/libzfs_iter.c index 5c1cf966a543..96f6574651de 100644 --- a/lib/libzfs/libzfs_iter.c +++ b/lib/libzfs/libzfs_iter.c @@ -197,7 +197,7 @@ zfs_iter_bookmarks(zfs_handle_t *zhp, zfs_iter_f func, void *data) for (pair = nvlist_next_nvpair(bmarks, NULL); pair != NULL; pair = nvlist_next_nvpair(bmarks, pair)) { - char name[ZFS_MAXNAMELEN]; + char name[ZFS_MAX_DATASET_NAME_LEN]; char *bmark_name; nvlist_t *bmark_props; @@ -384,7 +384,7 @@ zfs_iter_snapspec(zfs_handle_t *fs_zhp, const char *spec_orig, * exists. */ if (ssa.ssa_last[0] != '\0') { - char snapname[ZFS_MAXNAMELEN]; + char snapname[ZFS_MAX_DATASET_NAME_LEN]; (void) snprintf(snapname, sizeof (snapname), "%s@%s", zfs_get_name(fs_zhp), ssa.ssa_last); @@ -404,7 +404,7 @@ zfs_iter_snapspec(zfs_handle_t *fs_zhp, const char *spec_orig, ret = ENOENT; } } else { - char snapname[ZFS_MAXNAMELEN]; + char snapname[ZFS_MAX_DATASET_NAME_LEN]; zfs_handle_t *snap_zhp; (void) snprintf(snapname, sizeof (snapname), "%s@%s", zfs_get_name(fs_zhp), comma_separated); diff --git a/lib/libzfs/libzfs_mount.c b/lib/libzfs/libzfs_mount.c index 29907dc8bfac..5e57adab0c9d 100644 --- a/lib/libzfs/libzfs_mount.c +++ b/lib/libzfs/libzfs_mount.c @@ -230,7 +230,7 @@ static boolean_t zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen, zprop_source_t *source) { - char sourceloc[ZFS_MAXNAMELEN]; + char sourceloc[MAXNAMELEN]; zprop_source_t sourcetype; if (!zfs_prop_valid_for_type(ZFS_PROP_MOUNTPOINT, zhp->zfs_type, @@ -1051,6 +1051,17 @@ mount_cb(zfs_handle_t *zhp, void *data) return (0); } + /* + * If this filesystem is inconsistent and has a receive resume + * token, we can not mount it. + */ + if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) && + zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN, + NULL, 0, NULL, NULL, 0, B_TRUE) == 0) { + zfs_close(zhp); + return (0); + } + libzfs_add_handle(cbp, zhp); if (zfs_iter_filesystems(zhp, mount_cb, cbp) != 0) { zfs_close(zhp); diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 8b74e5da6678..2484ddc12a58 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -441,7 +441,7 @@ pool_uses_efi(nvlist_t *config) boolean_t zpool_is_bootable(zpool_handle_t *zhp) { - char bootfs[ZPOOL_MAXNAMELEN]; + char bootfs[ZFS_MAX_DATASET_NAME_LEN]; return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs, sizeof (bootfs), NULL, B_FALSE) == 0 && strncmp(bootfs, "-", @@ -1907,7 +1907,12 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, "one or more devices are already in use\n")); (void) zfs_error(hdl, EZFS_BADDEV, desc); break; - + case ENAMETOOLONG: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "new name of at least one dataset is longer than " + "the maximum allowable length")); + (void) zfs_error(hdl, EZFS_NAMETOOLONG, desc); + break; default: (void) zpool_standard_error(hdl, error, desc); zpool_explain_recover(hdl, @@ -4006,7 +4011,7 @@ zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj, zfs_cmd_t zc = {"\0"}; boolean_t mounted = B_FALSE; char *mntpnt = NULL; - char dsname[MAXNAMELEN]; + char dsname[ZFS_MAX_DATASET_NAME_LEN]; if (dsobj == 0) { /* special case for the MOS */ diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index 46640f623243..b38a2e544090 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -21,11 +21,12 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2014 by Delphix. All rights reserved. + * Copyright (c) 2011, 2015 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2012 Pawel Jakub Dawidek . * All rights reserved * Copyright (c) 2013 Steven Hartland. All rights reserved. + * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved. */ #include @@ -56,6 +57,7 @@ #include "zfs_prop.h" #include "zfs_fletcher.h" #include "libzfs_impl.h" +#include #include #include #include @@ -65,7 +67,9 @@ extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *); static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *, recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **, int, - uint64_t *); + uint64_t *, const char *); +static int guid_to_name(libzfs_handle_t *, const char *, + uint64_t, boolean_t, char *); static const zio_cksum_t zero_cksum = { { 0 } }; @@ -234,7 +238,7 @@ cksummer(void *arg) { dedup_arg_t *dda = arg; char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE); - dmu_replay_record_t thedrr; + dmu_replay_record_t thedrr = { 0 }; dmu_replay_record_t *drr = &thedrr; FILE *ofp; int outfd; @@ -283,8 +287,7 @@ cksummer(void *arg) DMU_BACKUP_FEATURE_DEDUPPROPS); DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags); - if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == - DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) { + if (drr->drr_payloadlen != 0) { sz = drr->drr_payloadlen; if (sz > SPA_MAXBLOCKSIZE) { @@ -834,7 +837,7 @@ typedef struct send_dump_data { /* these are all just the short snapname (the part after the @) */ const char *fromsnap; const char *tosnap; - char prevsnap[ZFS_MAXNAMELEN]; + char prevsnap[ZFS_MAX_DATASET_NAME_LEN]; uint64_t prevsnap_obj; boolean_t seenfrom, seento, replicate, doall, fromorigin; boolean_t verbose, dryrun, parsable, progress, embed_data, std_out; @@ -847,7 +850,7 @@ typedef struct send_dump_data { snapfilter_cb_t *filter_cb; void *filter_cb_arg; nvlist_t *debugnv; - char holdtag[ZFS_MAXNAMELEN]; + char holdtag[ZFS_MAX_DATASET_NAME_LEN]; int cleanup_fd; uint64_t size; } send_dump_data_t; @@ -1013,17 +1016,14 @@ static void * send_progress_thread(void *arg) { progress_arg_t *pa = arg; - zfs_cmd_t zc = {"\0"}; zfs_handle_t *zhp = pa->pa_zhp; libzfs_handle_t *hdl = zhp->zfs_hdl; unsigned long long bytes; char buf[16]; - time_t t; struct tm *tm; - assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); if (!pa->pa_parsable) @@ -1056,6 +1056,51 @@ send_progress_thread(void *arg) } } +static void +send_print_verbose(FILE *fout, const char *tosnap, const char *fromsnap, + uint64_t size, boolean_t parsable) +{ + if (parsable) { + if (fromsnap != NULL) { + (void) fprintf(fout, "incremental\t%s\t%s", + fromsnap, tosnap); + } else { + (void) fprintf(fout, "full\t%s", + tosnap); + } + } else { + if (fromsnap != NULL) { + if (strchr(fromsnap, '@') == NULL && + strchr(fromsnap, '#') == NULL) { + (void) fprintf(fout, dgettext(TEXT_DOMAIN, + "send from @%s to %s"), + fromsnap, tosnap); + } else { + (void) fprintf(fout, dgettext(TEXT_DOMAIN, + "send from %s to %s"), + fromsnap, tosnap); + } + } else { + (void) fprintf(fout, dgettext(TEXT_DOMAIN, + "full send of %s"), + tosnap); + } + } + + if (size != 0) { + if (parsable) { + (void) fprintf(fout, "\t%llu", + (longlong_t)size); + } else { + char buf[16]; + zfs_nicenum(size, buf, sizeof (buf)); + (void) fprintf(fout, dgettext(TEXT_DOMAIN, + " estimated size is %s"), buf); + } + } + (void) fprintf(fout, "\n"); +} + static int dump_snapshot(zfs_handle_t *zhp, void *arg) { @@ -1135,37 +1180,14 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) (sdd->fromorigin || sdd->replicate); if (sdd->verbose) { - uint64_t size; - err = estimate_ioctl(zhp, sdd->prevsnap_obj, + uint64_t size = 0; + (void) estimate_ioctl(zhp, sdd->prevsnap_obj, fromorigin, &size); - if (sdd->parsable) { - if (sdd->prevsnap[0] != '\0') { - (void) fprintf(fout, "incremental\t%s\t%s", - sdd->prevsnap, zhp->zfs_name); - } else { - (void) fprintf(fout, "full\t%s", - zhp->zfs_name); - } - } else { - (void) fprintf(fout, dgettext(TEXT_DOMAIN, - "send from @%s to %s"), - sdd->prevsnap, zhp->zfs_name); - } - if (err == 0) { - if (sdd->parsable) { - (void) fprintf(fout, "\t%llu\n", - (longlong_t)size); - } else { - char buf[16]; - zfs_nicenum(size, buf, sizeof (buf)); - (void) fprintf(fout, dgettext(TEXT_DOMAIN, - " estimated size is %s\n"), buf); - } - sdd->size += size; - } else { - (void) fprintf(fout, "\n"); - } + send_print_verbose(fout, zhp->zfs_name, + sdd->prevsnap[0] ? sdd->prevsnap : NULL, + size, sdd->parsable); + sdd->size += size; } if (!sdd->dryrun) { @@ -1376,6 +1398,231 @@ dump_filesystems(zfs_handle_t *rzhp, void *arg) return (0); } +nvlist_t * +zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, const char *token) +{ + unsigned int version; + int nread, i; + unsigned long long checksum, packed_len; + + /* + * Decode token header, which is: + * -- + * Note that the only supported token version is 1. + */ + nread = sscanf(token, "%u-%llx-%llx-", + &version, &checksum, &packed_len); + if (nread != 3) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "resume token is corrupt (invalid format)")); + return (NULL); + } + + if (version != ZFS_SEND_RESUME_TOKEN_VERSION) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "resume token is corrupt (invalid version %u)"), + version); + return (NULL); + } + + /* convert hexadecimal representation to binary */ + token = strrchr(token, '-') + 1; + int len = strlen(token) / 2; + unsigned char *compressed = zfs_alloc(hdl, len); + for (i = 0; i < len; i++) { + nread = sscanf(token + i * 2, "%2hhx", compressed + i); + if (nread != 1) { + free(compressed); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "resume token is corrupt " + "(payload is not hex-encoded)")); + return (NULL); + } + } + + /* verify checksum */ + zio_cksum_t cksum; + fletcher_4_native(compressed, len, &cksum); + if (cksum.zc_word[0] != checksum) { + free(compressed); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "resume token is corrupt (incorrect checksum)")); + return (NULL); + } + + /* uncompress */ + void *packed = zfs_alloc(hdl, packed_len); + uLongf packed_len_long = packed_len; + if (uncompress(packed, &packed_len_long, compressed, len) != Z_OK || + packed_len_long != packed_len) { + free(packed); + free(compressed); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "resume token is corrupt (decompression failed)")); + return (NULL); + } + + /* unpack nvlist */ + nvlist_t *nv; + int error = nvlist_unpack(packed, packed_len, &nv, KM_SLEEP); + free(packed); + free(compressed); + if (error != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "resume token is corrupt (nvlist_unpack failed)")); + return (NULL); + } + return (nv); +} + +int +zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, + const char *resume_token) +{ + char errbuf[1024]; + char *toname; + char *fromname = NULL; + uint64_t resumeobj, resumeoff, toguid, fromguid, bytes; + zfs_handle_t *zhp; + int error = 0; + char name[ZFS_MAX_DATASET_NAME_LEN]; + enum lzc_send_flags lzc_flags = 0; + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot resume send")); + + nvlist_t *resume_nvl = + zfs_send_resume_token_to_nvlist(hdl, resume_token); + if (resume_nvl == NULL) { + /* + * zfs_error_aux has already been set by + * zfs_send_resume_token_to_nvlist + */ + return (zfs_error(hdl, EZFS_FAULT, errbuf)); + } + if (flags->verbose) { + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "resume token contents:\n")); + nvlist_print(stderr, resume_nvl); + } + + if (nvlist_lookup_string(resume_nvl, "toname", &toname) != 0 || + nvlist_lookup_uint64(resume_nvl, "object", &resumeobj) != 0 || + nvlist_lookup_uint64(resume_nvl, "offset", &resumeoff) != 0 || + nvlist_lookup_uint64(resume_nvl, "bytes", &bytes) != 0 || + nvlist_lookup_uint64(resume_nvl, "toguid", &toguid) != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "resume token is corrupt")); + return (zfs_error(hdl, EZFS_FAULT, errbuf)); + } + fromguid = 0; + (void) nvlist_lookup_uint64(resume_nvl, "fromguid", &fromguid); + + if (flags->embed_data || nvlist_exists(resume_nvl, "embedok")) + lzc_flags |= LZC_SEND_FLAG_EMBED_DATA; + + if (guid_to_name(hdl, toname, toguid, B_FALSE, name) != 0) { + if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' is no longer the same snapshot used in " + "the initial send"), toname); + } else { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' used in the initial send no longer exists"), + toname); + } + return (zfs_error(hdl, EZFS_BADPATH, errbuf)); + } + zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET); + if (zhp == NULL) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "unable to access '%s'"), name); + return (zfs_error(hdl, EZFS_BADPATH, errbuf)); + } + + if (fromguid != 0) { + if (guid_to_name(hdl, toname, fromguid, B_TRUE, name) != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "incremental source %#llx no longer exists"), + (longlong_t)fromguid); + return (zfs_error(hdl, EZFS_BADPATH, errbuf)); + } + fromname = name; + } + + if (flags->verbose) { + uint64_t size = 0; + error = lzc_send_space(zhp->zfs_name, fromname, &size); + if (error == 0) + size = MAX(0, (int64_t)(size - bytes)); + send_print_verbose(stderr, zhp->zfs_name, fromname, + size, flags->parsable); + } + + if (!flags->dryrun) { + progress_arg_t pa = { 0 }; + pthread_t tid; + /* + * If progress reporting is requested, spawn a new thread to + * poll ZFS_IOC_SEND_PROGRESS at a regular interval. + */ + if (flags->progress) { + pa.pa_zhp = zhp; + pa.pa_fd = outfd; + pa.pa_parsable = flags->parsable; + + error = pthread_create(&tid, NULL, + send_progress_thread, &pa); + if (error != 0) { + zfs_close(zhp); + return (error); + } + } + + error = lzc_send_resume(zhp->zfs_name, fromname, outfd, + lzc_flags, resumeobj, resumeoff); + + if (flags->progress) { + (void) pthread_cancel(tid); + (void) pthread_join(tid, NULL); + } + + char errbuf[1024]; + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "warning: cannot send '%s'"), zhp->zfs_name); + + zfs_close(zhp); + + switch (error) { + case 0: + return (0); + case EXDEV: + case ENOENT: + case EDQUOT: + case EFBIG: + case EIO: + case ENOLINK: + case ENOSPC: + case ENOSTR: + case ENXIO: + case EPIPE: + case ERANGE: + case EFAULT: + case EROFS: + zfs_error_aux(hdl, strerror(errno)); + return (zfs_error(hdl, EZFS_BADBACKUP, errbuf)); + + default: + return (zfs_standard_error(hdl, errno, errbuf)); + } + } + + + zfs_close(zhp); + + return (error); +} + /* * Generate a send stream for the dataset identified by the argument zhp. * @@ -1451,7 +1698,9 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, dmu_replay_record_t drr = { 0 }; char *packbuf = NULL; size_t buflen = 0; - zio_cksum_t zc = { { 0 } }; + zio_cksum_t zc; + + ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0); if (flags->replicate || flags->props) { nvlist_t *hdrnv; @@ -1827,8 +2076,8 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname, if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) { seq++; - (void) snprintf(newname, ZFS_MAXNAMELEN, "%.*srecv-%u-%u", - baselen, name, getpid(), seq); + (void) snprintf(newname, ZFS_MAX_DATASET_NAME_LEN, + "%.*srecv-%u-%u", baselen, name, getpid(), seq); (void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value)); if (flags->verbose) { @@ -1913,6 +2162,7 @@ recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen, typedef struct guid_to_name_data { uint64_t guid; + boolean_t bookmark_ok; char *name; char *skip; } guid_to_name_data_t; @@ -1921,20 +2171,25 @@ static int guid_to_name_cb(zfs_handle_t *zhp, void *arg) { guid_to_name_data_t *gtnd = arg; + const char *slash; int err; if (gtnd->skip != NULL && - strcmp(zhp->zfs_name, gtnd->skip) == 0) { + (slash = strrchr(zhp->zfs_name, '/')) != NULL && + strcmp(slash + 1, gtnd->skip) == 0) { + zfs_close(zhp); return (0); } - if (zhp->zfs_dmustats.dds_guid == gtnd->guid) { + if (zfs_prop_get_int(zhp, ZFS_PROP_GUID) == gtnd->guid) { (void) strcpy(gtnd->name, zhp->zfs_name); zfs_close(zhp); return (EEXIST); } err = zfs_iter_children(zhp, guid_to_name_cb, gtnd); + if (err != EEXIST && gtnd->bookmark_ok) + err = zfs_iter_bookmarks(zhp, guid_to_name_cb, gtnd); zfs_close(zhp); return (err); } @@ -1948,45 +2203,48 @@ guid_to_name_cb(zfs_handle_t *zhp, void *arg) */ static int guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid, - char *name) + boolean_t bookmark_ok, char *name) { - /* exhaustive search all local snapshots */ - char pname[ZFS_MAXNAMELEN]; + char pname[ZFS_MAX_DATASET_NAME_LEN]; guid_to_name_data_t gtnd; - int err = 0; - zfs_handle_t *zhp; - char *cp; gtnd.guid = guid; + gtnd.bookmark_ok = bookmark_ok; gtnd.name = name; gtnd.skip = NULL; - (void) strlcpy(pname, parent, sizeof (pname)); - /* - * Search progressively larger portions of the hierarchy. This will + * Search progressively larger portions of the hierarchy, starting + * with the filesystem specified by 'parent'. This will * select the "most local" version of the origin snapshot in the case * that there are multiple matching snapshots in the system. */ - while ((cp = strrchr(pname, '/')) != NULL) { - + (void) strlcpy(pname, parent, sizeof (pname)); + char *cp = strrchr(pname, '@'); + if (cp == NULL) + cp = strchr(pname, '\0'); + for (; cp != NULL; cp = strrchr(pname, '/')) { /* Chop off the last component and open the parent */ *cp = '\0'; - zhp = make_dataset_handle(hdl, pname); + zfs_handle_t *zhp = make_dataset_handle(hdl, pname); if (zhp == NULL) continue; - - err = zfs_iter_children(zhp, guid_to_name_cb, >nd); + int err = guid_to_name_cb(zfs_handle_dup(zhp), >nd); + if (err != EEXIST) + err = zfs_iter_children(zhp, guid_to_name_cb, >nd); + if (err != EEXIST && bookmark_ok) + err = zfs_iter_bookmarks(zhp, guid_to_name_cb, >nd); zfs_close(zhp); if (err == EEXIST) return (0); /* - * Remember the dataset that we already searched, so we - * skip it next time through. + * Remember the last portion of the dataset so we skip it next + * time through (as we've already searched that portion of the + * hierarchy). */ - gtnd.skip = pname; + gtnd.skip = strrchr(pname, '/') + 1; } return (ENOENT); @@ -2002,7 +2260,7 @@ created_before(libzfs_handle_t *hdl, avl_tree_t *avl, { nvlist_t *nvfs; char *fsname = NULL, *snapname = NULL; - char buf[ZFS_MAXNAMELEN]; + char buf[ZFS_MAX_DATASET_NAME_LEN]; int rv; zfs_handle_t *guid1hdl, *guid2hdl; uint64_t create1, create2; @@ -2053,7 +2311,7 @@ recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, avl_tree_t *local_avl; nvpair_t *fselem, *nextfselem; char *fromsnap; - char newname[ZFS_MAXNAMELEN]; + char newname[ZFS_MAX_DATASET_NAME_LEN]; char guidname[32]; int error; boolean_t needagain, progress, recursive; @@ -2172,7 +2430,7 @@ recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, /* check for delete */ if (found == NULL) { - char name[ZFS_MAXNAMELEN]; + char name[ZFS_MAX_DATASET_NAME_LEN]; if (!flags->force) continue; @@ -2213,8 +2471,8 @@ recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, /* check for different snapname */ if (strcmp(nvpair_name(snapelem), stream_snapname) != 0) { - char name[ZFS_MAXNAMELEN]; - char tryname[ZFS_MAXNAMELEN]; + char name[ZFS_MAX_DATASET_NAME_LEN]; + char tryname[ZFS_MAX_DATASET_NAME_LEN]; (void) snprintf(name, sizeof (name), "%s@%s", fsname, nvpair_name(snapelem)); @@ -2298,7 +2556,7 @@ recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, ((flags->isprefix || strcmp(tofs, fsname) != 0) && (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) { nvlist_t *parent; - char tryname[ZFS_MAXNAMELEN]; + char tryname[ZFS_MAX_DATASET_NAME_LEN]; parent = fsavl_find(local_avl, stream_parent_fromsnap_guid, NULL); @@ -2364,9 +2622,10 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, nvlist_t *stream_nv = NULL; avl_tree_t *stream_avl = NULL; char *fromsnap = NULL; + char *sendsnap = NULL; char *cp; - char tofs[ZFS_MAXNAMELEN]; - char sendfs[ZFS_MAXNAMELEN]; + char tofs[ZFS_MAX_DATASET_NAME_LEN]; + char sendfs[ZFS_MAX_DATASET_NAME_LEN]; char errbuf[1024]; dmu_replay_record_t drre; int error; @@ -2450,7 +2709,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, nvlist_t *renamed = NULL; nvpair_t *pair = NULL; - (void) strlcpy(tofs, destname, ZFS_MAXNAMELEN); + (void) strlcpy(tofs, destname, sizeof (tofs)); if (flags->isprefix) { struct drr_begin *drrb = &drr->drr_u.drr_begin; int i; @@ -2459,7 +2718,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, cp = strrchr(drrb->drr_toname, '/'); if (cp == NULL) { (void) strlcat(tofs, "/", - ZFS_MAXNAMELEN); + sizeof (tofs)); i = 0; } else { i = (cp - drrb->drr_toname); @@ -2469,7 +2728,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, } /* zfs_receive_one() will create_parents() */ (void) strlcat(tofs, &drrb->drr_toname[i], - ZFS_MAXNAMELEN); + sizeof (tofs)); *strchr(tofs, '@') = '\0'; } @@ -2511,9 +2770,17 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, * zfs_receive_one(). */ (void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname, - ZFS_MAXNAMELEN); - if ((cp = strchr(sendfs, '@')) != NULL) + sizeof (sendfs)); + if ((cp = strchr(sendfs, '@')) != NULL) { *cp = '\0'; + /* + * Find the "sendsnap", the final snapshot in a replication + * stream. zfs_receive_one() handles certain errors + * differently, depending on if the contained stream is the + * last one or not. + */ + sendsnap = (cp + 1); + } /* Finally, receive each contained stream */ do { @@ -2526,7 +2793,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, */ error = zfs_receive_impl(hdl, destname, NULL, flags, fd, sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd, - action_handlep); + action_handlep, sendsnap); if (error == ENODATA) { error = 0; break; @@ -2586,11 +2853,9 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) switch (drr->drr_type) { case DRR_BEGIN: - /* NB: not to be used on v2 stream packages */ if (drr->drr_payloadlen != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "invalid substream header")); - return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); + (void) recv_read(hdl, fd, buf, + drr->drr_payloadlen, B_FALSE, NULL); } break; @@ -2651,6 +2916,40 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) return (-1); } +static void +recv_ecksum_set_aux(libzfs_handle_t *hdl, const char *target_snap, + boolean_t resumable) +{ + char target_fs[ZFS_MAX_DATASET_NAME_LEN]; + + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "checksum mismatch or incomplete stream")); + + if (!resumable) + return; + (void) strlcpy(target_fs, target_snap, sizeof (target_fs)); + *strchr(target_fs, '@') = '\0'; + zfs_handle_t *zhp = zfs_open(hdl, target_fs, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) + return; + + char token_buf[ZFS_MAXPROPLEN]; + int error = zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN, + token_buf, sizeof (token_buf), + NULL, NULL, 0, B_TRUE); + if (error == 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "checksum mismatch or incomplete stream.\n" + "Partially received snapshot is saved.\n" + "A resuming stream can be generated on the sending " + "system by running:\n" + " zfs send -t %s"), + token_buf); + } + zfs_close(zhp); +} + /* * Restores a backup of tosnap from the file descriptor specified by infd. */ @@ -2659,25 +2958,33 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, const char *originsnap, recvflags_t *flags, dmu_replay_record_t *drr, dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd, - uint64_t *action_handlep) + uint64_t *action_handlep, const char *finalsnap) { - zfs_cmd_t zc = {"\0"}; time_t begin_time; int ioctl_err, ioctl_errno, err; char *cp; struct drr_begin *drrb = &drr->drr_u.drr_begin; char errbuf[1024]; - char prop_errbuf[1024]; const char *chopprefix; boolean_t newfs = B_FALSE; boolean_t stream_wantsnewfs; + boolean_t newprops = B_FALSE; + uint64_t read_bytes = 0; + uint64_t errflags = 0; uint64_t parent_snapguid = 0; prop_changelist_t *clp = NULL; nvlist_t *snapprops_nvlist = NULL; zprop_errflags_t prop_errflags; + nvlist_t *prop_errors = NULL; boolean_t recursive; + char *snapname = NULL; + char destsnap[MAXPATHLEN * 2]; + char origin[MAXNAMELEN]; + char name[MAXPATHLEN]; + nvlist_t *props = NULL; begin_time = time(NULL); + bzero(origin, MAXNAMELEN); (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive")); @@ -2686,27 +2993,21 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, ENOENT); if (stream_avl != NULL) { - char *snapname; nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid, &snapname); - nvlist_t *props; - int ret; (void) nvlist_lookup_uint64(fs, "parentfromsnap", &parent_snapguid); err = nvlist_lookup_nvlist(fs, "props", &props); - if (err) + if (err) { VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0)); + newprops = B_TRUE; + } if (flags->canmountoff) { VERIFY(0 == nvlist_add_uint64(props, zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0)); } - ret = zcmd_write_src_nvlist(hdl, &zc, props); - if (err) - nvlist_free(props); - if (ret != 0) - return (-1); } cp = NULL; @@ -2727,7 +3028,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, if (strchr(tosnap, '@')) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " "argument - snapshot not allowed with -e")); - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf); + goto out; } chopprefix = strrchr(sendfs, '/'); @@ -2754,7 +3056,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, if (strchr(tosnap, '@')) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " "argument - snapshot not allowed with -d")); - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf); + goto out; } chopprefix = strchr(drrb->drr_toname, '/'); @@ -2772,7 +3075,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot specify snapshot name for multi-snapshot " "stream")); - return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); + err = zfs_error(hdl, EZFS_BADSTREAM, errbuf); + goto out; } chopprefix = drrb->drr_toname + strlen(drrb->drr_toname); } @@ -2784,39 +3088,41 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, chopprefix[0] == '\0'); /* - * Determine name of destination snapshot, store in zc_value. + * Determine name of destination snapshot. */ - (void) strcpy(zc.zc_value, tosnap); - (void) strlcat(zc.zc_value, chopprefix, sizeof (zc.zc_value)); + (void) strcpy(destsnap, tosnap); + (void) strlcat(destsnap, chopprefix, sizeof (destsnap)); free(cp); - if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) { - zcmd_free_nvlists(&zc); - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + if (!zfs_name_valid(destsnap, ZFS_TYPE_SNAPSHOT)) { + err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf); + goto out; } /* - * Determine the name of the origin snapshot, store in zc_string. + * Determine the name of the origin snapshot. */ if (drrb->drr_flags & DRR_FLAG_CLONE) { - if (guid_to_name(hdl, zc.zc_value, - drrb->drr_fromguid, zc.zc_string) != 0) { - zcmd_free_nvlists(&zc); + if (guid_to_name(hdl, destsnap, + drrb->drr_fromguid, B_FALSE, origin) != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "local origin for clone %s does not exist"), - zc.zc_value); - return (zfs_error(hdl, EZFS_NOENT, errbuf)); + destsnap); + err = zfs_error(hdl, EZFS_NOENT, errbuf); + goto out; } if (flags->verbose) - (void) printf("found clone origin %s\n", zc.zc_string); + (void) printf("found clone origin %s\n", origin); } else if (originsnap) { - (void) strncpy(zc.zc_string, originsnap, ZFS_MAXNAMELEN); + (void) strncpy(origin, originsnap, sizeof (origin)); if (flags->verbose) (void) printf("using provided clone origin %s\n", - zc.zc_string); + origin); } + boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & + DMU_BACKUP_FEATURE_RESUMING; stream_wantsnewfs = (drrb->drr_fromguid == 0 || - (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap); + (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming; if (stream_wantsnewfs) { /* @@ -2826,18 +3132,18 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive new filesystem stream")); - (void) strcpy(zc.zc_name, zc.zc_value); - cp = strrchr(zc.zc_name, '/'); + (void) strcpy(name, destsnap); + cp = strrchr(name, '/'); if (cp) *cp = '\0'; if (cp && - !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { - char suffix[ZFS_MAXNAMELEN]; - (void) strcpy(suffix, strrchr(zc.zc_value, '/')); - if (guid_to_name(hdl, zc.zc_name, parent_snapguid, - zc.zc_value) == 0) { - *strchr(zc.zc_value, '@') = '\0'; - (void) strcat(zc.zc_value, suffix); + !zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) { + char suffix[ZFS_MAX_DATASET_NAME_LEN]; + (void) strcpy(suffix, strrchr(destsnap, '/')); + if (guid_to_name(hdl, name, parent_snapguid, + B_FALSE, destsnap) == 0) { + *strchr(destsnap, '@') = '\0'; + (void) strcat(destsnap, suffix); } } } else { @@ -2848,8 +3154,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive incremental stream")); - (void) strcpy(zc.zc_name, zc.zc_value); - *strchr(zc.zc_name, '@') = '\0'; + (void) strcpy(name, destsnap); + *strchr(name, '@') = '\0'; /* * If the exact receive path was specified and this is the @@ -2858,65 +3164,67 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, */ if ((flags->isprefix || (*(chopprefix = drrb->drr_toname + strlen(sendfs)) != '\0' && *chopprefix != '@')) && - !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { - char snap[ZFS_MAXNAMELEN]; - (void) strcpy(snap, strchr(zc.zc_value, '@')); - if (guid_to_name(hdl, zc.zc_name, drrb->drr_fromguid, - zc.zc_value) == 0) { - *strchr(zc.zc_value, '@') = '\0'; - (void) strcat(zc.zc_value, snap); + !zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) { + char snap[ZFS_MAX_DATASET_NAME_LEN]; + (void) strcpy(snap, strchr(destsnap, '@')); + if (guid_to_name(hdl, name, drrb->drr_fromguid, + B_FALSE, destsnap) == 0) { + *strchr(destsnap, '@') = '\0'; + (void) strcat(destsnap, snap); } } } - (void) strcpy(zc.zc_name, zc.zc_value); - *strchr(zc.zc_name, '@') = '\0'; + (void) strcpy(name, destsnap); + *strchr(name, '@') = '\0'; - if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { + if (zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) { + zfs_cmd_t zc = {"\0"}; zfs_handle_t *zhp; + (void) strcpy(zc.zc_name, name); + /* - * Destination fs exists. Therefore this should either - * be an incremental, or the stream specifies a new fs - * (full stream or clone) and they want us to blow it - * away (and have therefore specified -F and removed any - * snapshots). + * Destination fs exists. It must be one of these cases: + * - an incremental send stream + * - the stream specifies a new fs (full stream or clone) + * and they want us to blow away the existing fs (and + * have therefore specified -F and removed any snapshots) + * - we are resuming a failed receive. */ if (stream_wantsnewfs) { if (!flags->force) { - zcmd_free_nvlists(&zc); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "destination '%s' exists\n" - "must specify -F to overwrite it"), - zc.zc_name); - return (zfs_error(hdl, EZFS_EXISTS, errbuf)); + "must specify -F to overwrite it"), name); + err = zfs_error(hdl, EZFS_EXISTS, errbuf); + goto out; } if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0) { - zcmd_free_nvlists(&zc); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "destination has snapshots (eg. %s)\n" "must destroy them to overwrite it"), - zc.zc_name); - return (zfs_error(hdl, EZFS_EXISTS, errbuf)); + name); + err = zfs_error(hdl, EZFS_EXISTS, errbuf); + goto out; } } - if ((zhp = zfs_open(hdl, zc.zc_name, + if ((zhp = zfs_open(hdl, name, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) { - zcmd_free_nvlists(&zc); - return (-1); + err = -1; + goto out; } if (stream_wantsnewfs && zhp->zfs_dmustats.dds_origin[0]) { - zcmd_free_nvlists(&zc); zfs_close(zhp); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "destination '%s' is a clone\n" - "must destroy it to overwrite it"), - zc.zc_name); - return (zfs_error(hdl, EZFS_EXISTS, errbuf)); + "must destroy it to overwrite it"), name); + err = zfs_error(hdl, EZFS_EXISTS, errbuf); + goto out; } if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM && @@ -2925,16 +3233,28 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0); if (clp == NULL) { zfs_close(zhp); - zcmd_free_nvlists(&zc); - return (-1); + err = -1; + goto out; } if (changelist_prefix(clp) != 0) { changelist_free(clp); zfs_close(zhp); - zcmd_free_nvlists(&zc); - return (-1); + err = -1; + goto out; } } + + /* + * If we are resuming a newfs, set newfs here so that we will + * mount it if the recv succeeds this time. We can tell + * that it was a newfs on the first recv because the fs + * itself will be inconsistent (if the fs existed when we + * did the first recv, we would have received it into + * .../%recv). + */ + if (resuming && zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT)) + newfs = B_TRUE; + zfs_close(zhp); } else { /* @@ -2945,11 +3265,11 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, * contained no slash character). */ if (!stream_wantsnewfs || - (cp = strrchr(zc.zc_name, '/')) == NULL) { - zcmd_free_nvlists(&zc); + (cp = strrchr(name, '/')) == NULL) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "destination '%s' does not exist"), zc.zc_name); - return (zfs_error(hdl, EZFS_NOENT, errbuf)); + "destination '%s' does not exist"), name); + err = zfs_error(hdl, EZFS_NOENT, errbuf); + goto out; } /* @@ -2959,44 +3279,34 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, *cp = '\0'; if (flags->isprefix && !flags->istail && !flags->dryrun && - create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) { - zcmd_free_nvlists(&zc); - return (zfs_error(hdl, EZFS_BADRESTORE, errbuf)); + create_parents(hdl, destsnap, strlen(tosnap)) != 0) { + err = zfs_error(hdl, EZFS_BADRESTORE, errbuf); + goto out; } newfs = B_TRUE; } - zc.zc_begin_record = drr_noswap->drr_u.drr_begin; - zc.zc_cookie = infd; - zc.zc_guid = flags->force; if (flags->verbose) { (void) printf("%s %s stream of %s into %s\n", flags->dryrun ? "would receive" : "receiving", drrb->drr_fromguid ? "incremental" : "full", - drrb->drr_toname, zc.zc_value); + drrb->drr_toname, destsnap); (void) fflush(stdout); } if (flags->dryrun) { - zcmd_free_nvlists(&zc); - return (recv_skip(hdl, infd, flags->byteswap)); + err = recv_skip(hdl, infd, flags->byteswap); + goto out; } - zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf; - zc.zc_nvlist_dst_size = sizeof (prop_errbuf); - zc.zc_cleanup_fd = cleanup_fd; - zc.zc_action_handle = *action_handlep; - - err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc); - ioctl_errno = errno; - prop_errflags = (zprop_errflags_t)zc.zc_obj; + err = ioctl_err = lzc_receive_one(destsnap, props, origin, + flags->force, flags->resumable, infd, drr_noswap, cleanup_fd, + &read_bytes, &errflags, action_handlep, &prop_errors); + ioctl_errno = ioctl_err; + prop_errflags = errflags; if (err == 0) { - nvlist_t *prop_errors; - VERIFY(0 == nvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst, - zc.zc_nvlist_dst_size, &prop_errors, 0)); - nvpair_t *prop_err = NULL; while ((prop_err = nvlist_next_nvpair(prop_errors, @@ -3011,29 +3321,38 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, ZPROP_N_MORE_ERRORS) == 0) { trunc_prop_errs(intval); break; - } else { + } else if (snapname == NULL || finalsnap == NULL || + strcmp(finalsnap, snapname) == 0 || + strcmp(nvpair_name(prop_err), + zfs_prop_to_name(ZFS_PROP_REFQUOTA)) != 0) { + /* + * Skip the special case of, for example, + * "refquota", errors on intermediate + * snapshots leading up to a final one. + * That's why we have all of the checks above. + * + * See zfs_ioctl.c's extract_delay_props() for + * a list of props which can fail on + * intermediate snapshots, but shouldn't + * affect the overall receive. + */ (void) snprintf(tbuf, sizeof (tbuf), dgettext(TEXT_DOMAIN, "cannot receive %s property on %s"), - nvpair_name(prop_err), zc.zc_name); + nvpair_name(prop_err), name); zfs_setprop_error(hdl, prop, intval, tbuf); } } - nvlist_free(prop_errors); } - zc.zc_nvlist_dst = 0; - zc.zc_nvlist_dst_size = 0; - zcmd_free_nvlists(&zc); - if (err == 0 && snapprops_nvlist) { - zfs_cmd_t zc2 = {"\0"}; + zfs_cmd_t zc = {"\0"}; - (void) strcpy(zc2.zc_name, zc.zc_value); - zc2.zc_cookie = B_TRUE; /* received */ - if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) { - (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2); - zcmd_free_nvlists(&zc2); + (void) strcpy(zc.zc_name, destsnap); + zc.zc_cookie = B_TRUE; /* received */ + if (zcmd_write_src_nvlist(hdl, &zc, snapprops_nvlist) == 0) { + (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc); + zcmd_free_nvlists(&zc); } } @@ -3045,7 +3364,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, */ avl_tree_t *local_avl; nvlist_t *local_nv, *fs; - cp = strchr(zc.zc_value, '@'); + cp = strchr(destsnap, '@'); /* * XXX Do this faster by just iterating over snaps in @@ -3053,7 +3372,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, * get a strange "does not exist" error message. */ *cp = '\0'; - if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE, + if (gather_nvlist(hdl, destsnap, NULL, NULL, B_FALSE, &local_nv, &local_avl) == 0) { *cp = '@'; fs = fsavl_find(local_avl, drrb->drr_toguid, NULL); @@ -3063,7 +3382,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, if (fs != NULL) { if (flags->verbose) { (void) printf("snap %s already exists; " - "ignoring\n", zc.zc_value); + "ignoring\n", destsnap); } err = ioctl_err = recv_skip(hdl, infd, flags->byteswap); @@ -3075,22 +3394,22 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, if (ioctl_err != 0) { switch (ioctl_errno) { case ENODEV: - cp = strchr(zc.zc_value, '@'); + cp = strchr(destsnap, '@'); *cp = '\0'; zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "most recent snapshot of %s does not\n" - "match incremental source"), zc.zc_value); + "match incremental source"), destsnap); (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); *cp = '@'; break; case ETXTBSY: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "destination %s has been modified\n" - "since most recent snapshot"), zc.zc_name); + "since most recent snapshot"), name); (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); break; case EEXIST: - cp = strchr(zc.zc_value, '@'); + cp = strchr(destsnap, '@'); if (newfs) { /* it's the containing fs that exists */ *cp = '\0'; @@ -3099,15 +3418,18 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, "destination already exists")); (void) zfs_error_fmt(hdl, EZFS_EXISTS, dgettext(TEXT_DOMAIN, "cannot restore to %s"), - zc.zc_value); + destsnap); *cp = '@'; break; case EINVAL: + if (flags->resumable) + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "kernel modules must be upgraded to " + "receive this stream.")); (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); break; case ECKSUM: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "invalid stream (checksum mismatch)")); + recv_ecksum_set_aux(hdl, destsnap, flags->resumable); (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); break; case ENOTSUP: @@ -3117,7 +3439,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, break; case EDQUOT: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "destination %s space quota exceeded"), zc.zc_name); + "destination %s space quota exceeded"), name); (void) zfs_error(hdl, EZFS_NOSPC, errbuf); break; default: @@ -3130,12 +3452,12 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, * children of the target filesystem if we did a replication * receive (indicated by stream_avl being non-NULL). */ - cp = strchr(zc.zc_value, '@'); + cp = strchr(destsnap, '@'); if (cp && (ioctl_err == 0 || !newfs)) { zfs_handle_t *h; *cp = '\0'; - h = zfs_open(hdl, zc.zc_value, + h = zfs_open(hdl, destsnap, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); if (h != NULL) { if (h->zfs_type == ZFS_TYPE_VOLUME) { @@ -3146,7 +3468,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, * for mounting and sharing later. */ if (top_zfs && *top_zfs == NULL) - *top_zfs = zfs_strdup(hdl, zc.zc_value); + *top_zfs = zfs_strdup(hdl, destsnap); } zfs_close(h); } @@ -3160,26 +3482,24 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, if (prop_errflags & ZPROP_ERR_NOCLEAR) { (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: " - "failed to clear unreceived properties on %s"), - zc.zc_name); + "failed to clear unreceived properties on %s"), name); (void) fprintf(stderr, "\n"); } if (prop_errflags & ZPROP_ERR_NORESTORE) { (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: " - "failed to restore original properties on %s"), - zc.zc_name); + "failed to restore original properties on %s"), name); (void) fprintf(stderr, "\n"); } - if (err || ioctl_err) - return (-1); - - *action_handlep = zc.zc_action_handle; + if (err || ioctl_err) { + err = -1; + goto out; + } if (flags->verbose) { char buf1[64]; char buf2[64]; - uint64_t bytes = zc.zc_cookie; + uint64_t bytes = read_bytes; time_t delta = time(NULL) - begin_time; if (delta == 0) delta = 1; @@ -3190,14 +3510,22 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, buf1, delta, buf2); } - return (0); + err = 0; +out: + if (prop_errors != NULL) + nvlist_free(prop_errors); + + if (newprops) + nvlist_free(props); + + return (err); } static int zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, const char *originsnap, recvflags_t *flags, int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd, - uint64_t *action_handlep) + uint64_t *action_handlep, const char *finalsnap) { int err; dmu_replay_record_t drr, drr_noswap; @@ -3281,7 +3609,7 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, } if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) { - char nonpackage_sendfs[ZFS_MAXNAMELEN]; + char nonpackage_sendfs[ZFS_MAX_DATASET_NAME_LEN]; if (sendfs == NULL) { /* * We were not called from zfs_receive_package(). Get @@ -3289,14 +3617,16 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, */ char *cp; (void) strlcpy(nonpackage_sendfs, - drr.drr_u.drr_begin.drr_toname, ZFS_MAXNAMELEN); + drr.drr_u.drr_begin.drr_toname, + sizeof (nonpackage_sendfs)); if ((cp = strchr(nonpackage_sendfs, '@')) != NULL) *cp = '\0'; sendfs = nonpackage_sendfs; + VERIFY(finalsnap == NULL); } return (zfs_receive_one(hdl, infd, tosnap, originsnap, flags, &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs, - cleanup_fd, action_handlep)); + cleanup_fd, action_handlep, finalsnap)); } else { assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_COMPOUNDSTREAM); @@ -3309,7 +3639,8 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, * Restores a backup of tosnap from the file descriptor specified by infd. * Return 0 on total success, -2 if some things couldn't be * destroyed/renamed/promoted, -1 if some things couldn't be received. - * (-1 will override -2). + * (-1 will override -2, if -1 and the resumable flag was specified the + * transfer can be resumed if the sending side supports it). */ int zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props, @@ -3373,7 +3704,7 @@ zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props, VERIFY(cleanup_fd >= 0); err = zfs_receive_impl(hdl, tosnap, originsnap, flags, infd, NULL, NULL, - stream_avl, &top_zfs, cleanup_fd, &action_handle); + stream_avl, &top_zfs, cleanup_fd, &action_handle, NULL); VERIFY(0 == close(cleanup_fd)); diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c index b706e6f6be88..0359e4284ac1 100644 --- a/lib/libzfs_core/libzfs_core.c +++ b/lib/libzfs_core/libzfs_core.c @@ -217,7 +217,7 @@ lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist) nvpair_t *elem; nvlist_t *args; int error; - char pool[MAXNAMELEN]; + char pool[ZFS_MAX_DATASET_NAME_LEN]; *errlist = NULL; @@ -269,7 +269,7 @@ lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist) nvpair_t *elem; nvlist_t *args; int error; - char pool[MAXNAMELEN]; + char pool[ZFS_MAX_DATASET_NAME_LEN]; /* determine the pool name */ elem = nvlist_next_nvpair(snaps, NULL); @@ -296,7 +296,7 @@ lzc_snaprange_space(const char *firstsnap, const char *lastsnap, nvlist_t *args; nvlist_t *result; int err; - char fs[MAXNAMELEN]; + char fs[ZFS_MAX_DATASET_NAME_LEN]; char *atp; /* determine the fs name */ @@ -361,7 +361,7 @@ lzc_exists(const char *dataset) int lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist) { - char pool[MAXNAMELEN]; + char pool[ZFS_MAX_DATASET_NAME_LEN]; nvlist_t *args; nvpair_t *elem; int error; @@ -408,7 +408,7 @@ lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist) int lzc_release(nvlist_t *holds, nvlist_t **errlist) { - char pool[MAXNAMELEN]; + char pool[ZFS_MAX_DATASET_NAME_LEN]; nvpair_t *elem; /* determine the pool name */ @@ -467,6 +467,13 @@ lzc_get_holds(const char *snapname, nvlist_t **holdsp) int lzc_send(const char *snapname, const char *from, int fd, enum lzc_send_flags flags) +{ + return (lzc_send_resume(snapname, from, fd, flags, 0, 0)); +} + +int +lzc_send_resume(const char *snapname, const char *from, int fd, + enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff) { nvlist_t *args; int err; @@ -479,6 +486,10 @@ lzc_send(const char *snapname, const char *from, int fd, fnvlist_add_boolean(args, "largeblockok"); if (flags & LZC_SEND_FLAG_EMBED_DATA) fnvlist_add_boolean(args, "embedok"); + if (resumeobj != 0 || resumeoff != 0) { + fnvlist_add_uint64(args, "resume_object", resumeobj); + fnvlist_add_uint64(args, "resume_offset", resumeoff); + } err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL); nvlist_free(args); return (err); @@ -537,91 +548,258 @@ recv_read(int fd, void *buf, int ilen) } /* - * The simplest receive case: receive from the specified fd, creating the - * specified snapshot. Apply the specified properties a "received" properties - * (which can be overridden by locally-set properties). If the stream is a - * clone, its origin snapshot must be specified by 'origin'. The 'force' - * flag will cause the target filesystem to be rolled back or destroyed if - * necessary to receive. + * Linux adds ZFS_IOC_RECV_NEW for resumable streams and preserves the legacy + * ZFS_IOC_RECV user/kernel interface. The new interface supports all stream + * options but is currently only used for resumable streams. This way updated + * user space utilities will interoperate with older kernel modules. * - * Return 0 on success or an errno on failure. - * - * Note: this interface does not work on dedup'd streams - * (those with DMU_BACKUP_FEATURE_DEDUP). + * Non-Linux OpenZFS platforms have opted to modify the legacy interface. */ -int -lzc_receive(const char *snapname, nvlist_t *props, const char *origin, - boolean_t force, int fd) +static int +recv_impl(const char *snapname, nvlist_t *props, const char *origin, + boolean_t force, boolean_t resumable, int input_fd, + const dmu_replay_record_t *begin_record, int cleanup_fd, + uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle, + nvlist_t **errors) { - /* - * The receive ioctl is still legacy, so we need to construct our own - * zfs_cmd_t rather than using zfsc_ioctl(). - */ - zfs_cmd_t zc = {"\0"}; - char *atp; - char *packed = NULL; - size_t size; dmu_replay_record_t drr; + char fsname[MAXPATHLEN]; + char *atp; int error; - ASSERT3S(g_refcount, >, 0); - - /* zc_name is name of containing filesystem */ - (void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name)); - atp = strchr(zc.zc_name, '@'); + /* Set 'fsname' to the name of containing filesystem */ + (void) strlcpy(fsname, snapname, sizeof (fsname)); + atp = strchr(fsname, '@'); if (atp == NULL) return (EINVAL); *atp = '\0'; - /* if the fs does not exist, try its parent. */ - if (!lzc_exists(zc.zc_name)) { - char *slashp = strrchr(zc.zc_name, '/'); + /* If the fs does not exist, try its parent. */ + if (!lzc_exists(fsname)) { + char *slashp = strrchr(fsname, '/'); if (slashp == NULL) return (ENOENT); *slashp = '\0'; + } + /* + * The begin_record is normally a non-byteswapped BEGIN record. + * For resumable streams it may be set to any non-byteswapped + * dmu_replay_record_t. + */ + if (begin_record == NULL) { + error = recv_read(input_fd, &drr, sizeof (drr)); + if (error != 0) + return (error); + } else { + drr = *begin_record; } - /* zc_value is full name of the snapshot to create */ - (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); + if (resumable) { + nvlist_t *outnvl = NULL; + nvlist_t *innvl = fnvlist_alloc(); - if (props != NULL) { - /* zc_nvlist_src is props to set */ - packed = fnvlist_pack(props, &size); - zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; - zc.zc_nvlist_src_size = size; - } + fnvlist_add_string(innvl, "snapname", snapname); - /* zc_string is name of clone origin (if DRR_FLAG_CLONE) */ - if (origin != NULL) - (void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string)); + if (props != NULL) + fnvlist_add_nvlist(innvl, "props", props); - /* zc_begin_record is non-byteswapped BEGIN record */ - error = recv_read(fd, &drr, sizeof (drr)); - if (error != 0) - goto out; - zc.zc_begin_record = drr.drr_u.drr_begin; + if (origin != NULL && strlen(origin)) + fnvlist_add_string(innvl, "origin", origin); - /* zc_cookie is fd to read from */ - zc.zc_cookie = fd; + fnvlist_add_byte_array(innvl, "begin_record", + (uchar_t *) &drr, sizeof (drr)); - /* zc guid is force flag */ - zc.zc_guid = force; + fnvlist_add_int32(innvl, "input_fd", input_fd); - /* zc_cleanup_fd is unused */ - zc.zc_cleanup_fd = -1; + if (force) + fnvlist_add_boolean(innvl, "force"); - error = ioctl(g_fd, ZFS_IOC_RECV, &zc); - if (error != 0) - error = errno; + if (resumable) + fnvlist_add_boolean(innvl, "resumable"); + + if (cleanup_fd >= 0) + fnvlist_add_int32(innvl, "cleanup_fd", cleanup_fd); + + if (action_handle != NULL) + fnvlist_add_uint64(innvl, "action_handle", + *action_handle); + + error = lzc_ioctl(ZFS_IOC_RECV_NEW, fsname, innvl, &outnvl); + + if (error == 0 && read_bytes != NULL) + error = nvlist_lookup_uint64(outnvl, "read_bytes", + read_bytes); + + if (error == 0 && errflags != NULL) + error = nvlist_lookup_uint64(outnvl, "error_flags", + errflags); + + if (error == 0 && action_handle != NULL) + error = nvlist_lookup_uint64(outnvl, "action_handle", + action_handle); + + if (error == 0 && errors != NULL) { + nvlist_t *nvl; + error = nvlist_lookup_nvlist(outnvl, "errors", &nvl); + if (error == 0) + *errors = fnvlist_dup(nvl); + } + + fnvlist_free(innvl); + fnvlist_free(outnvl); + } else { + zfs_cmd_t zc = {"\0"}; + char *packed = NULL; + size_t size; + + ASSERT3S(g_refcount, >, 0); + + (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_value)); + (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); + + if (props != NULL) { + packed = fnvlist_pack(props, &size); + zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; + zc.zc_nvlist_src_size = size; + } + + if (origin != NULL) + (void) strlcpy(zc.zc_string, origin, + sizeof (zc.zc_string)); + + ASSERT3S(drr.drr_type, ==, DRR_BEGIN); + zc.zc_begin_record = drr.drr_u.drr_begin; + zc.zc_guid = force; + zc.zc_cookie = input_fd; + zc.zc_cleanup_fd = -1; + zc.zc_action_handle = 0; + + if (cleanup_fd >= 0) + zc.zc_cleanup_fd = cleanup_fd; + + if (action_handle != NULL) + zc.zc_action_handle = *action_handle; + + zc.zc_nvlist_dst_size = 128 * 1024; + zc.zc_nvlist_dst = (uint64_t)(uintptr_t) + malloc(zc.zc_nvlist_dst_size); + + error = ioctl(g_fd, ZFS_IOC_RECV, &zc); + if (error != 0) { + error = errno; + } else { + if (read_bytes != NULL) + *read_bytes = zc.zc_cookie; + + if (errflags != NULL) + *errflags = zc.zc_obj; + + if (action_handle != NULL) + *action_handle = zc.zc_action_handle; + + if (errors != NULL) + VERIFY0(nvlist_unpack( + (void *)(uintptr_t)zc.zc_nvlist_dst, + zc.zc_nvlist_dst_size, errors, KM_SLEEP)); + } + + if (packed != NULL) + fnvlist_pack_free(packed, size); + free((void *)(uintptr_t)zc.zc_nvlist_dst); + } -out: - if (packed != NULL) - fnvlist_pack_free(packed, size); - free((void*)(uintptr_t)zc.zc_nvlist_dst); return (error); } +/* + * The simplest receive case: receive from the specified fd, creating the + * specified snapshot. Apply the specified properties as "received" properties + * (which can be overridden by locally-set properties). If the stream is a + * clone, its origin snapshot must be specified by 'origin'. The 'force' + * flag will cause the target filesystem to be rolled back or destroyed if + * necessary to receive. + * + * Return 0 on success or an errno on failure. + * + * Note: this interface does not work on dedup'd streams + * (those with DMU_BACKUP_FEATURE_DEDUP). + */ +int +lzc_receive(const char *snapname, nvlist_t *props, const char *origin, + boolean_t force, int fd) +{ + return (recv_impl(snapname, props, origin, force, B_FALSE, fd, + NULL, -1, NULL, NULL, NULL, NULL)); +} + +/* + * Like lzc_receive, but if the receive fails due to premature stream + * termination, the intermediate state will be preserved on disk. In this + * case, ECKSUM will be returned. The receive may subsequently be resumed + * with a resuming send stream generated by lzc_send_resume(). + */ +int +lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin, + boolean_t force, int fd) +{ + return (recv_impl(snapname, props, origin, force, B_TRUE, fd, + NULL, -1, NULL, NULL, NULL, NULL)); +} + +/* + * Like lzc_receive, but allows the caller to read the begin record and then to + * pass it in. That could be useful if the caller wants to derive, for example, + * the snapname or the origin parameters based on the information contained in + * the begin record. + * The begin record must be in its original form as read from the stream, + * in other words, it should not be byteswapped. + * + * The 'resumable' parameter allows to obtain the same behavior as with + * lzc_receive_resumable. + */ +int +lzc_receive_with_header(const char *snapname, nvlist_t *props, + const char *origin, boolean_t force, boolean_t resumable, int fd, + const dmu_replay_record_t *begin_record) +{ + if (begin_record == NULL) + return (EINVAL); + return (recv_impl(snapname, props, origin, force, resumable, fd, + begin_record, -1, NULL, NULL, NULL, NULL)); +} + +/* + * Like lzc_receive, but allows the caller to pass all supported arguments + * and retrieve all values returned. The only additional input parameter + * is 'cleanup_fd' which is used to set a cleanup-on-exit file descriptor. + * + * The following parameters all provide return values. Several may be set + * in the failure case and will contain additional information. + * + * The 'read_bytes' value will be set to the total number of bytes read. + * + * The 'errflags' value will contain zprop_errflags_t flags which are + * used to describe any failures. + * + * The 'action_handle' is used to pass the handle for this guid/ds mapping. + * It should be set to zero on first call and will contain an updated handle + * on success, it should be passed in subsequent calls. + * + * The 'errors' nvlist contains an entry for each unapplied received + * property. Callers are responsible for freeing this nvlist. + */ +int lzc_receive_one(const char *snapname, nvlist_t *props, + const char *origin, boolean_t force, boolean_t resumable, int input_fd, + const dmu_replay_record_t *begin_record, int cleanup_fd, + uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle, + nvlist_t **errors) +{ + return (recv_impl(snapname, props, origin, force, resumable, + input_fd, begin_record, cleanup_fd, read_bytes, errflags, + action_handle, errors)); +} + /* * Roll back this filesystem or volume to its most recent snapshot. * If snapnamebuf is not NULL, it will be filled in with the name @@ -664,7 +842,7 @@ lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist) { nvpair_t *elem; int error; - char pool[MAXNAMELEN]; + char pool[ZFS_MAX_DATASET_NAME_LEN]; /* determine the pool name */ elem = nvlist_next_nvpair(bookmarks, NULL); @@ -726,7 +904,7 @@ lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist) { nvpair_t *elem; int error; - char pool[MAXNAMELEN]; + char pool[ZFS_MAX_DATASET_NAME_LEN]; /* determine the pool name */ elem = nvlist_next_nvpair(bmarks, NULL); diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index 800cdd7d7c55..93bfb0403717 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -1408,6 +1408,8 @@ spl_fstrans_check(void) return (0); } +void *zvol_tag = "zvol_tag"; + void zvol_create_minors(spa_t *spa, const char *name, boolean_t async) { diff --git a/man/man8/zfs.8 b/man/man8/zfs.8 index 2aca58b76cf0..6921e3b4e119 100644 --- a/man/man8/zfs.8 +++ b/man/man8/zfs.8 @@ -22,7 +22,7 @@ .\" .\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved. .\" Copyright 2011 Joshua M. Clulow -.\" Copyright (c) 2011, 2014 by Delphix. All rights reserved. +.\" Copyright (c) 2011, 2015 by Delphix. All rights reserved. .\" Copyright (c) 2014, Joyent, Inc. All rights reserved. .\" Copyright 2012 Nexenta Systems, Inc. All Rights Reserved. .\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved. @@ -180,17 +180,27 @@ zfs \- configures ZFS file systems .LP .nf -\fBzfs\fR \fBsend\fR [\fB-eL\fR] [\fB-i \fIsnapshot\fR|\fIbookmark\fR]\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR +\fBzfs\fR \fBsend\fR [\fB-Le\fR] [\fB-i \fIsnapshot\fR|\fIbookmark\fR]\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR .fi .LP .nf -\fBzfs\fR \fBreceive\fR [\fB-vnFu\fR] [\fB-o origin\fR=\fIsnapshot\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR +\fBzfs\fR \fBsend\fR [\fB-Penv\fR] \fB-t\fR \fIreceive_resume_token\fR .fi .LP .nf -\fBzfs\fR \fBreceive\fR [\fB-vnFu\fR] [\fB-d\fR|\fB-e\fR] [\fB-o origin\fR=\fIsnapshot\fR] \fIfilesystem\fR +\fBzfs\fR \fBreceive\fR [\fB-Fnsuv\fR] [\fB-o origin\fR=\fIsnapshot\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR +.fi + +.LP +.nf +\fBzfs\fR \fBreceive\fR [\fB-Fnsuv\fR] [\fB-d\fR|\fB-e\fR] [\fB-o origin\fR=\fIsnapshot\fR] \fIfilesystem\fR +.fi + +.LP +.nf +\fBzfs\fR \fBreceive\fR \fB-A\fR \fIfilesystem\fR|\fIvolume\fR .fi .LP @@ -532,6 +542,17 @@ For cloned file systems or volumes, the snapshot from which the clone was create .sp .ne 2 .na +\fB\fBreceive_resume_token\fR\fR +.ad +.sp .6 +.RS 4n +For filesystems or volumes which have saved partially-completed state from \fBzfs receive -s\fR , this opaque token can be provided to \fBzfs send -t\fR to resume and complete the \fBzfs receive\fR. +.RE + +.sp +.ne 2 +.mk +.na \fB\fBreferenced\fR\fR .ad .sp .6 @@ -2799,7 +2820,7 @@ The format of the stream is committed. You will be able to receive your streams .sp .ne 2 .na -\fBzfs send\fR [\fB-eL\fR] [\fB-i\fR \fIsnapshot\fR|\fIbookmark\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR +\fBzfs send\fR [\fB-Le\fR] [\fB-i\fR \fIsnapshot\fR|\fIbookmark\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR .ad .sp .6 .RS 4n @@ -2809,24 +2830,6 @@ the pool must be read-only, or the filesystem must not be mounted. When the stream generated from a filesystem or volume is received, the default snapshot name will be "--head--". -.sp -.ne 2 -.na -\fB-i\fR \fIsnapshot\fR|\fIbookmark\fR -.ad -.sp .6 -.RS 4n -Generate an incremental send stream. The incremental source must be an earlier -snapshot in the destination's history. It will commonly be an earlier -snapshot in the destination's filesystem, in which case it can be -specified as the last component of the name (the \fB#\fR or \fB@\fR character -and following). -.sp -If the incremental target is a clone, the incremental source can -be the origin snapshot, or an earlier snapshot in the origin's filesystem, -or the origin's origin, etc. -.RE - .sp .ne 2 .na @@ -2859,15 +2862,39 @@ then the receiving system must have that feature enabled as well. See \fBembedded_data\fR feature. .RE +.sp +.ne 2 +.na +\fB-i\fR \fIsnapshot\fR|\fIbookmark\fR +.ad +.sp .6 +.RS 4n +Generate an incremental send stream. The incremental source must be an earlier snapshot in the destination's history. It will commonly be an earlier snapshot in the destination's filesystem, in which case it can be specified as the last component of the name (the \fB#\fR or \fB@\fR character and following). +.sp +If the incremental target is a clone, the incremental source can be the origin snapshot, or an earlier snapshot in the origin's filesystem, or the origin's origin, etc. +.RE + +.RE +.sp +.ne 2 +.na +\fB\fBzfs send\fR [\fB-Penv\fR] \fB-t\fR \fIreceive_resume_token\fR\fR +.ad +.sp .6 +.RS 4n +Creates a send stream which resumes an interrupted receive. The \fIreceive_resume_token\fR is the value of this property on the filesystem or volume that was being received into. See the documentation for \fBzfs receive -s\fR for more details. + +.RE + .RE .sp .ne 2 .na -\fB\fBzfs receive\fR [\fB-vnFu\fR] [\fB-o origin\fR=\fIsnapshot\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR +\fB\fBzfs receive\fR [\fB-Fnsuv\fR] [\fB-o origin\fR=\fIsnapshot\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR .ad .br .na -\fB\fBzfs receive\fR [\fB-vnFu\fR] [\fB-d\fR|\fB-e\fR] [\fB-o origin\fR=\fIsnapshot\fR] \fIfilesystem\fR\fR +\fB\fBzfs receive\fR [\fB-Fnsuv\fR] [\fB-d\fR|\fB-e\fR] [\fB-o origin\fR=\fIsnapshot\fR] \fIfilesystem\fR\fR .ad .sp .6 .RS 4n @@ -2885,21 +2912,36 @@ The \fB-d\fR and \fB-e\fR options cause the file system name of the target snaps .sp .ne 2 .na -\fB\fB-d\fR\fR +\fB\fB-F\fR\fR .ad .sp .6 .RS 4n -Discard the first element of the sent snapshot's file system name, using the remaining elements to determine the name of the target file system for the new snapshot as described in the paragraph above. +Force a rollback of the file system to the most recent snapshot before performing the receive operation. If receiving an incremental replication stream (for example, one generated by \fBzfs send -R -[iI]\fR), destroy snapshots and file systems that do not exist on the sending side. .RE .sp .ne 2 +.mk .na -\fB\fB-e\fR\fR +\fB\fB-n\fR\fR .ad .sp .6 .RS 4n -Discard all but the last element of the sent snapshot's file system name, using that element to determine the name of the target file system for the new snapshot as described in the paragraph above. +Do not actually receive the stream. This can be useful in conjunction with the \fB-v\fR option to verify the name the receive operation would use. +.RE + +.sp +.ne 2 +.na +\fB\fB-s\fR\fR +.ad +.sp .6 +.RS 4n +If the receive is interrupted, save the partially received state, rather than deleting it. Interruption may be due to premature termination of the stream (e.g. due to network failure or failure of the remote system if the stream is being read over a network connection), a checksum error in the stream, termination of the \fBzfs receive\fR process, or unclean shutdown of the system. +.sp +The receive can be resumed with a stream generated by \fBzfs send -t\fR token, where the \fItoken\fR is the value of the \fBreceive_resume_token\fR property of the filesystem or volume which is received into. +.sp +To use this flag, the storage pool must have the \fBextensible_dataset\fR feature enabled. See \fBzpool-features\fR(5) for details on ZFS feature flags. .RE .sp @@ -2925,33 +2967,49 @@ Print verbose information about the stream and the time required to perform the .sp .ne 2 .na -\fB\fB-n\fR\fR +\fB\fB-d\fR\fR .ad .sp .6 .RS 4n -Do not actually receive the stream. This can be useful in conjunction with the \fB-v\fR option to verify the name the receive operation would use. +Discard the first element of the sent snapshot's file system name, using the remaining elements to determine the name of the target file system for the new snapshot as described in the paragraph above. .RE .sp .ne 2 .na -\fB\fB-o\fR \fBorigin\fR=\fIsnapshot\fR +\fB\fB-e\fR\fR .ad .sp .6 .RS 4n -Forces the stream to be received as a clone of the given snapshot. This is only valid if the stream is an incremental stream whose source is the same as the provided origin. +Discard all but the last element of the sent snapshot's file system name, using that element to determine the name of the target file system for the new snapshot as described in the paragraph above. .RE .sp .ne 2 .na -\fB\fB-F\fR\fR +\fB\fB-o\fR \fBorigin\fR=\fIsnapshot\fR .ad .sp .6 .RS 4n -Force a rollback of the file system to the most recent snapshot before performing the receive operation. If receiving an incremental replication stream (for example, one generated by \fBzfs send -R -[iI]\fR), destroy snapshots and file systems that do not exist on the sending side. +Forces the stream to be received as a clone of the given snapshot. +If the stream is a full send stream, this will create the filesystem +described by the stream as a clone of the specified snapshot. Which +snapshot was specified will not affect the success or failure of the +receive, as long as the snapshot does exist. If the stream is an +incremental send stream, all the normal verification will be performed. +.RE + .RE +.sp +.ne 2 +.na +\fB\fBzfs receive\fR [\fB-A\fR] \fIfilesystem\fR|\fIvolume\fR +.ad +.sp .6 +.RS 4n +Abort an interrupted \fBzfs receive \fB-s\fR\fR, deleting its saved partially received state. + .RE .sp diff --git a/module/zcommon/zfs_fletcher.c b/module/zcommon/zfs_fletcher.c index 2c2d01d5c2c5..e76c5b8a5835 100644 --- a/module/zcommon/zfs_fletcher.c +++ b/module/zcommon/zfs_fletcher.c @@ -334,7 +334,12 @@ fletcher_4_impl_get(void) void fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp) { - const fletcher_4_ops_t *ops = fletcher_4_impl_get(); + const fletcher_4_ops_t *ops; + + if (IS_P2ALIGNED(size, 4 * sizeof (uint32_t))) + ops = fletcher_4_impl_get(); + else + ops = &fletcher_4_scalar_ops; ops->init(zcp); ops->compute(buf, size, zcp); @@ -345,7 +350,12 @@ fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp) void fletcher_4_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp) { - const fletcher_4_ops_t *ops = fletcher_4_impl_get(); + const fletcher_4_ops_t *ops; + + if (IS_P2ALIGNED(size, 4 * sizeof (uint32_t))) + ops = fletcher_4_impl_get(); + else + ops = &fletcher_4_scalar_ops; ops->init(zcp); ops->compute_byteswap(buf, size, zcp); diff --git a/module/zcommon/zfs_namecheck.c b/module/zcommon/zfs_namecheck.c index ff724be588cc..b58071bed055 100644 --- a/module/zcommon/zfs_namecheck.c +++ b/module/zcommon/zfs_namecheck.c @@ -69,7 +69,7 @@ zfs_component_namecheck(const char *path, namecheck_err_t *why, char *what) { const char *loc; - if (strlen(path) >= MAXNAMELEN) { + if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN) { if (why) *why = NAME_ERR_TOOLONG; return (-1); @@ -140,27 +140,8 @@ dataset_namecheck(const char *path, namecheck_err_t *why, char *what) /* * Make sure the name is not too long. - * - * ZFS_MAXNAMELEN is the maximum dataset length used in the userland - * which is the same as MAXNAMELEN used in the kernel. - * If ZFS_MAXNAMELEN value is changed, make sure to cleanup all - * places using MAXNAMELEN. - * - * When HAVE_KOBJ_NAME_LEN is defined the maximum safe kobject name - * length is 20 bytes. This 20 bytes is broken down as follows to - * provide a maximum safe /[@snapshot] length of only - * 18 bytes. To ensure bytes are left for [@snapshot] the - * portition is futher limited to 9 bytes. For 2.6.27 and - * newer kernels this limit is set to MAXNAMELEN. - * - * / + + - * (18) + (1) + (1) */ -#ifdef HAVE_KOBJ_NAME_LEN - if (strlen(path) > 18) { -#else - if (strlen(path) >= MAXNAMELEN) { -#endif /* HAVE_KOBJ_NAME_LEN */ + if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN) { if (why) *why = NAME_ERR_TOOLONG; return (-1); @@ -289,7 +270,7 @@ mountpoint_namecheck(const char *path, namecheck_err_t *why) while (*end != '/' && *end != '\0') end++; - if (end - start >= MAXNAMELEN) { + if (end - start >= ZFS_MAX_DATASET_NAME_LEN) { if (why) *why = NAME_ERR_TOOLONG; return (-1); @@ -314,27 +295,8 @@ pool_namecheck(const char *pool, namecheck_err_t *why, char *what) /* * Make sure the name is not too long. - * - * ZPOOL_MAXNAMELEN is the maximum pool length used in the userland - * which is the same as MAXNAMELEN used in the kernel. - * If ZPOOL_MAXNAMELEN value is changed, make sure to cleanup all - * places using MAXNAMELEN. - * - * When HAVE_KOBJ_NAME_LEN is defined the maximum safe kobject name - * length is 20 bytes. This 20 bytes is broken down as follows to - * provide a maximum safe /[@snapshot] length of only - * 18 bytes. To ensure bytes are left for [@snapshot] the - * portition is futher limited to 8 bytes. For 2.6.27 and - * newer kernels this limit is set to MAXNAMELEN. - * - * / + + - * (18) + (1) + (1) */ -#ifdef HAVE_KOBJ_NAME_LEN - if (strlen(pool) > 8) { -#else - if (strlen(pool) >= MAXNAMELEN) { -#endif /* HAVE_KOBJ_NAME_LEN */ + if (strlen(pool) >= ZFS_MAX_DATASET_NAME_LEN) { if (why) *why = NAME_ERR_TOOLONG; return (-1); diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c index 1dbeab084601..1d68ca29e6d6 100644 --- a/module/zcommon/zfs_prop.c +++ b/module/zcommon/zfs_prop.c @@ -375,6 +375,10 @@ zfs_prop_init(void) zprop_register_string(ZFS_PROP_SELINUX_ROOTCONTEXT, "rootcontext", "none", PROP_DEFAULT, ZFS_TYPE_DATASET, "", "ROOTCONTEXT"); + zprop_register_string(ZFS_PROP_RECEIVE_RESUME_TOKEN, + "receive_resume_token", + NULL, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "", "RESUMETOK"); /* readonly number properties */ zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY, diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index cdc897726ee7..22ca84d96a27 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -405,6 +405,17 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, * checksum/compression/copies. */ if (ds != NULL) { + boolean_t needlock = B_FALSE; + + /* + * Note: it's valid to open the objset if the dataset is + * long-held, in which case the pool_config lock will not + * be held. + */ + if (!dsl_pool_config_held(dmu_objset_pool(os))) { + needlock = B_TRUE; + dsl_pool_config_enter(dmu_objset_pool(os), FTAG); + } err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE), primary_cache_changed_cb, os); @@ -461,6 +472,8 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, dnodesize_changed_cb, os); } } + if (needlock) + dsl_pool_config_exit(dmu_objset_pool(os), FTAG); if (err != 0) { VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf)); @@ -520,6 +533,13 @@ dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp) { int err = 0; + /* + * We shouldn't be doing anything with dsl_dataset_t's unless the + * pool_config lock is held, or the dataset is long-held. + */ + ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool) || + dsl_dataset_long_held(ds)); + mutex_enter(&ds->ds_opening_lock); if (ds->ds_objset == NULL) { objset_t *os; @@ -651,7 +671,7 @@ dmu_objset_refresh_ownership(objset_t *os, void *tag) { dsl_pool_t *dp; dsl_dataset_t *ds, *newds; - char name[MAXNAMELEN]; + char name[ZFS_MAX_DATASET_NAME_LEN]; ds = os->os_dsl_dataset; VERIFY3P(ds, !=, NULL); @@ -875,6 +895,9 @@ dmu_objset_create_check(void *arg, dmu_tx_t *tx) if (strchr(doca->doca_name, '@') != NULL) return (SET_ERROR(EINVAL)); + if (strlen(doca->doca_name) >= ZFS_MAX_DATASET_NAME_LEN) + return (SET_ERROR(ENAMETOOLONG)); + error = dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail); if (error != 0) return (error); @@ -961,6 +984,9 @@ dmu_objset_clone_check(void *arg, dmu_tx_t *tx) if (strchr(doca->doca_clone, '@') != NULL) return (SET_ERROR(EINVAL)); + if (strlen(doca->doca_clone) >= ZFS_MAX_DATASET_NAME_LEN) + return (SET_ERROR(ENAMETOOLONG)); + error = dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail); if (error != 0) return (error); @@ -1000,7 +1026,7 @@ dmu_objset_clone_sync(void *arg, dmu_tx_t *tx) const char *tail; dsl_dataset_t *origin, *ds; uint64_t obj; - char namebuf[MAXNAMELEN]; + char namebuf[ZFS_MAX_DATASET_NAME_LEN]; VERIFY0(dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail)); VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin)); @@ -2027,7 +2053,7 @@ dmu_objset_get_user(objset_t *os) /* * Determine name of filesystem, given name of snapshot. - * buf must be at least MAXNAMELEN bytes + * buf must be at least ZFS_MAX_DATASET_NAME_LEN bytes */ int dmu_fsname(const char *snapname, char *buf) @@ -2035,7 +2061,7 @@ dmu_fsname(const char *snapname, char *buf) char *atp = strchr(snapname, '@'); if (atp == NULL) return (SET_ERROR(EINVAL)); - if (atp - snapname >= MAXNAMELEN) + if (atp - snapname >= ZFS_MAX_DATASET_NAME_LEN) return (SET_ERROR(ENAMETOOLONG)); (void) strlcpy(buf, snapname, atp - snapname + 1); return (0); diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index 901386a5a4d0..80f7dc1aae11 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -20,10 +20,11 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2015 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2011, 2015 by Delphix. All rights reserved. * Copyright (c) 2014, Joyent, Inc. All rights reserved. - * Copyright (c) 2011, 2014 by Delphix. All rights reserved. + * Copyright 2014 HybridCluster. All rights reserved. + * Copyright 2016 RackTop Systems. * Copyright (c) 2016 Actifio, Inc. All rights reserved. */ @@ -62,14 +63,18 @@ int zfs_send_corrupt_data = B_FALSE; int zfs_send_queue_length = 16 * 1024 * 1024; int zfs_recv_queue_length = 16 * 1024 * 1024; +/* Set this tunable to FALSE to disable setting of DRR_FLAG_FREERECORDS */ +int zfs_send_set_freerecords_bit = B_TRUE; static char *dmu_recv_tag = "dmu_recv_tag"; -static const char *recv_clone_name = "%recv"; +const char *recv_clone_name = "%recv"; #define BP_SPAN(datablkszsec, indblkshift, level) \ (((uint64_t)datablkszsec) << (SPA_MINBLOCKSHIFT + \ (level) * (indblkshift - SPA_BLKPTRSHIFT))) +static void byteswap_record(dmu_replay_record_t *drr); + struct send_thread_arg { bqueue_t q; dsl_dataset_t *ds; /* Dataset to traverse */ @@ -77,6 +82,7 @@ struct send_thread_arg { int flags; /* flags to pass to traverse_dataset */ int error_code; boolean_t cancel; + zbookmark_phys_t resume; }; struct send_block_record { @@ -99,8 +105,21 @@ dump_bytes_cb(void *arg) { dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg; dmu_sendarg_t *dsp = dbi->dbi_dsp; - dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset; + dsl_dataset_t *ds = dmu_objset_ds(dsp->dsa_os); ssize_t resid; /* have to get resid to get detailed errno */ + + /* + * The code does not rely on this (len being a multiple of 8). We keep + * this assertion because of the corresponding assertion in + * receive_read(). Keeping this assertion ensures that we do not + * inadvertently break backwards compatibility (causing the assertion + * in receive_read() to trigger on old software). + * + * Removing the assertions could be rolled into a new feature that uses + * data that isn't 8-byte aligned; if the assertions were removed, a + * feature flag would have to be added. + */ + ASSERT0(dbi->dbi_len % 8); dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp, @@ -169,6 +188,14 @@ dump_record(dmu_sendarg_t *dsp, void *payload, int payload_len) return (0); } +/* + * Fill in the drr_free struct, or perform aggregation if the previous record is + * also a free record, and the two are adjacent. + * + * Note that we send free records even for a full send, because we want to be + * able to receive a full send as a clone, which requires a list of all the free + * and freeobject records that were generated on the source. + */ static int dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, uint64_t length) @@ -180,7 +207,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, * that the receiving system doesn't have any dbufs in the range * being freed. This is always true because there is a one-record * constraint: we only send one WRITE record for any given - * object+offset. We know that the one-record constraint is + * object,offset. We know that the one-record constraint is * true because we always send data in increasing order by * object,offset. * @@ -192,15 +219,6 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, (object == dsp->dsa_last_data_object && offset > dsp->dsa_last_data_offset)); - /* - * If we are doing a non-incremental send, then there can't - * be any data in the dataset we're receiving into. Therefore - * a free record would simply be a no-op. Save space by not - * sending it to begin with. - */ - if (!dsp->dsa_incremental) - return (0); - if (length != -1ULL && offset + length < offset) length = -1ULL; @@ -378,10 +396,6 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs) { struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects); - /* See comment in dump_free(). */ - if (!dsp->dsa_incremental) - return (0); - /* * If there is a pending op, but it's not PENDING_FREEOBJECTS, * push it out, since free block aggregation can only be done for @@ -428,6 +442,19 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) { struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object); + if (object < dsp->dsa_resume_object) { + /* + * Note: when resuming, we will visit all the dnodes in + * the block of dnodes that we are resuming from. In + * this case it's unnecessary to send the dnodes prior to + * the one we are resuming from. We should be at most one + * block's worth of dnodes behind the resume point. + */ + ASSERT3U(dsp->dsa_resume_object - object, <, + 1 << (DNODE_BLOCK_SHIFT - DNODE_SHIFT)); + return (0); + } + if (dnp == NULL || dnp->dn_type == DMU_OT_NONE) return (dump_freeobjects(dsp, object, 1)); @@ -509,6 +536,9 @@ send_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, uint64_t record_size; int err = 0; + ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT || + zb->zb_object >= sta->resume.zb_object); + if (sta->cancel) return (SET_ERROR(EINTR)); @@ -545,8 +575,10 @@ send_traverse_thread(void *arg) struct send_block_record *data; if (st_arg->ds != NULL) { - err = traverse_dataset(st_arg->ds, st_arg->fromtxg, - st_arg->flags, send_cb, arg); + err = traverse_dataset_resume(st_arg->ds, + st_arg->fromtxg, &st_arg->resume, + st_arg->flags, send_cb, st_arg); + if (err != EINTR) st_arg->error_code = err; } @@ -575,6 +607,9 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data) ASSERT3U(zb->zb_level, >=, 0); + ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT || + zb->zb_object >= dsa->dsa_resume_object); + if (zb->zb_object != DMU_META_DNODE_OBJECT && DMU_OBJECT_IS_SPECIAL(zb->zb_object)) { return (0); @@ -637,6 +672,10 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data) uint64_t offset; ASSERT0(zb->zb_level); + ASSERT(zb->zb_object > dsa->dsa_resume_object || + (zb->zb_object == dsa->dsa_resume_object && + zb->zb_blkid * blksz >= dsa->dsa_resume_offset)); + if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &aflags, zb) != 0) { @@ -697,8 +736,10 @@ get_next_record(bqueue_t *bq, struct send_block_record *data) */ static int dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, - zfs_bookmark_phys_t *ancestor_zb, boolean_t is_clone, boolean_t embedok, - boolean_t large_block_ok, int outfd, vnode_t *vp, offset_t *off) + zfs_bookmark_phys_t *ancestor_zb, + boolean_t is_clone, boolean_t embedok, boolean_t large_block_ok, int outfd, + uint64_t resumeobj, uint64_t resumeoff, + vnode_t *vp, offset_t *off) { objset_t *os; dmu_replay_record_t *drr; @@ -707,6 +748,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, uint64_t fromtxg = 0; uint64_t featureflags = 0; struct send_thread_arg to_arg; + void *payload = NULL; + size_t payload_len = 0; struct send_block_record *to_data; err = dmu_objset_from_ds(to_ds, &os); @@ -721,6 +764,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo, DMU_SUBSTREAM); + bzero(&to_arg, sizeof (to_arg)); + #ifdef _KERNEL if (dmu_objset_type(os) == DMU_OST_ZFS) { uint64_t version; @@ -746,6 +791,10 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA_LZ4; } + if (resumeobj != 0 || resumeoff != 0) { + featureflags |= DMU_BACKUP_FEATURE_RESUMING; + } + DMU_SET_FEATUREFLAGS(drr->drr_u.drr_begin.drr_versioninfo, featureflags); @@ -757,6 +806,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, drr->drr_u.drr_begin.drr_toguid = dsl_dataset_phys(to_ds)->ds_guid; if (dsl_dataset_phys(to_ds)->ds_flags & DS_FLAG_CI_DATASET) drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA; + if (zfs_send_set_freerecords_bit) + drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_FREERECORDS; if (ancestor_zb != NULL) { drr->drr_u.drr_begin.drr_fromguid = @@ -779,8 +830,9 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, dsp->dsa_off = off; dsp->dsa_toguid = dsl_dataset_phys(to_ds)->ds_guid; dsp->dsa_pending_op = PENDING_NONE; - dsp->dsa_incremental = (ancestor_zb != NULL); dsp->dsa_featureflags = featureflags; + dsp->dsa_resume_object = resumeobj; + dsp->dsa_resume_offset = resumeoff; mutex_enter(&to_ds->ds_sendstream_lock); list_insert_head(&to_ds->ds_sendstreams, dsp); @@ -789,7 +841,26 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, dsl_dataset_long_hold(to_ds, FTAG); dsl_pool_rele(dp, tag); - if (dump_record(dsp, NULL, 0) != 0) { + if (resumeobj != 0 || resumeoff != 0) { + dmu_object_info_t to_doi; + nvlist_t *nvl; + err = dmu_object_info(os, resumeobj, &to_doi); + if (err != 0) + goto out; + SET_BOOKMARK(&to_arg.resume, to_ds->ds_object, resumeobj, 0, + resumeoff / to_doi.doi_data_block_size); + + nvl = fnvlist_alloc(); + fnvlist_add_uint64(nvl, "resume_object", resumeobj); + fnvlist_add_uint64(nvl, "resume_offset", resumeoff); + payload = fnvlist_pack(nvl, &payload_len); + drr->drr_payloadlen = payload_len; + fnvlist_free(nvl); + } + + err = dump_record(dsp, payload, payload_len); + fnvlist_pack_free(payload, payload_len); + if (err != 0) { err = dsp->dsa_err; goto out; } @@ -899,19 +970,19 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, is_clone = (fromds->ds_dir != ds->ds_dir); dsl_dataset_rele(fromds, FTAG); err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, - embedok, large_block_ok, outfd, vp, off); + embedok, large_block_ok, outfd, 0, 0, vp, off); } else { err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, - embedok, large_block_ok, outfd, vp, off); + embedok, large_block_ok, outfd, 0, 0, vp, off); } dsl_dataset_rele(ds, FTAG); return (err); } int -dmu_send(const char *tosnap, const char *fromsnap, - boolean_t embedok, boolean_t large_block_ok, - int outfd, vnode_t *vp, offset_t *off) +dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, + boolean_t large_block_ok, int outfd, uint64_t resumeobj, uint64_t resumeoff, + vnode_t *vp, offset_t *off) { dsl_pool_t *dp; dsl_dataset_t *ds; @@ -978,10 +1049,12 @@ dmu_send(const char *tosnap, const char *fromsnap, return (err); } err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, - embedok, large_block_ok, outfd, vp, off); + embedok, large_block_ok, + outfd, resumeobj, resumeoff, vp, off); } else { err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, - embedok, large_block_ok, outfd, vp, off); + embedok, large_block_ok, + outfd, resumeobj, resumeoff, vp, off); } if (owned) dsl_dataset_disown(ds, FTAG); @@ -1221,6 +1294,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) /* already checked */ ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC); + ASSERT(!(featureflags & DMU_BACKUP_FEATURE_RESUMING)); if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_COMPOUNDSTREAM || @@ -1233,6 +1307,10 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) spa_version(dp->dp_spa) < SPA_VERSION_SA) return (SET_ERROR(ENOTSUP)); + if (drba->drba_cookie->drc_resumable && + !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EXTENSIBLE_DATASET)) + return (SET_ERROR(ENOTSUP)); + /* * The receiving code doesn't know how to translate a WRITE_EMBEDDED * record to a plan WRITE record, so the pool must have the @@ -1269,7 +1347,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) /* target fs already exists; recv into temp clone */ /* Can't recv a clone into an existing fs */ - if (flags & DRR_FLAG_CLONE) { + if (flags & DRR_FLAG_CLONE || drba->drba_origin) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } @@ -1278,7 +1356,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) dsl_dataset_rele(ds, FTAG); } else if (error == ENOENT) { /* target fs does not exist; must be a full backup or clone */ - char buf[MAXNAMELEN]; + char buf[ZFS_MAX_DATASET_NAME_LEN]; /* * If it's a non-clone incremental, we are missing the @@ -1288,8 +1366,17 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) drba->drba_origin)) return (SET_ERROR(ENOENT)); + /* + * If we're receiving a full send as a clone, and it doesn't + * contain all the necessary free records and freeobject + * records, reject it. + */ + if (fromguid == 0 && drba->drba_origin && + !(flags & DRR_FLAG_FREERECORDS)) + return (SET_ERROR(EINVAL)); + /* Open the parent of tofs */ - ASSERT3U(strlen(tofs), <, MAXNAMELEN); + ASSERT3U(strlen(tofs), <, sizeof (buf)); (void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1); error = dsl_dataset_hold(dp, buf, FTAG, &ds); if (error != 0) @@ -1327,7 +1414,8 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } - if (dsl_dataset_phys(origin)->ds_guid != fromguid) { + if (dsl_dataset_phys(origin)->ds_guid != fromguid && + fromguid != 0) { dsl_dataset_rele(origin, FTAG); dsl_dataset_rele(ds, FTAG); return (SET_ERROR(ENODEV)); @@ -1345,15 +1433,16 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) { dmu_recv_begin_arg_t *drba = arg; dsl_pool_t *dp = dmu_tx_pool(tx); + objset_t *mos = dp->dp_meta_objset; struct drr_begin *drrb = drba->drba_cookie->drc_drrb; const char *tofs = drba->drba_cookie->drc_tofs; dsl_dataset_t *ds, *newds; uint64_t dsobj; int error; - uint64_t crflags; + uint64_t crflags = 0; - crflags = (drrb->drr_flags & DRR_FLAG_CI_DATA) ? - DS_FLAG_CI_DATASET : 0; + if (drrb->drr_flags & DRR_FLAG_CI_DATA) + crflags |= DS_FLAG_CI_DATASET; error = dsl_dataset_hold(dp, tofs, FTAG, &ds); if (error == 0) { @@ -1391,6 +1480,32 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) } VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds)); + if (drba->drba_cookie->drc_resumable) { + uint64_t one = 1; + uint64_t zero = 0; + + dsl_dataset_zapify(newds, tx); + if (drrb->drr_fromguid != 0) { + VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_FROMGUID, + 8, 1, &drrb->drr_fromguid, tx)); + } + VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_TOGUID, + 8, 1, &drrb->drr_toguid, tx)); + VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_TONAME, + 1, strlen(drrb->drr_toname) + 1, drrb->drr_toname, tx)); + VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_OBJECT, + 8, 1, &one, tx)); + VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_OFFSET, + 8, 1, &zero, tx)); + VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_BYTES, + 8, 1, &zero, tx)); + if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & + DMU_BACKUP_FEATURE_EMBED_DATA) { + VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_EMBEDOK, + 8, 1, &one, tx)); + } + } + dmu_buf_will_dirty(newds->ds_dbuf, tx); dsl_dataset_phys(newds)->ds_flags |= DS_FLAG_INCONSISTENT; @@ -1408,56 +1523,194 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) spa_history_log_internal_ds(newds, "receive", tx, ""); } +static int +dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx) +{ + dmu_recv_begin_arg_t *drba = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + struct drr_begin *drrb = drba->drba_cookie->drc_drrb; + int error; + uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); + dsl_dataset_t *ds; + const char *tofs = drba->drba_cookie->drc_tofs; + uint64_t val; + + /* 6 extra bytes for /%recv */ + char recvname[ZFS_MAX_DATASET_NAME_LEN + 6]; + + /* already checked */ + ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC); + ASSERT(featureflags & DMU_BACKUP_FEATURE_RESUMING); + + if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == + DMU_COMPOUNDSTREAM || + drrb->drr_type >= DMU_OST_NUMTYPES) + return (SET_ERROR(EINVAL)); + + /* Verify pool version supports SA if SA_SPILL feature set */ + if ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) && + spa_version(dp->dp_spa) < SPA_VERSION_SA) + return (SET_ERROR(ENOTSUP)); + + /* + * The receiving code doesn't know how to translate a WRITE_EMBEDDED + * record to a plain WRITE record, so the pool must have the + * EMBEDDED_DATA feature enabled if the stream has WRITE_EMBEDDED + * records. Same with WRITE_EMBEDDED records that use LZ4 compression. + */ + if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) && + !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) + return (SET_ERROR(ENOTSUP)); + if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4) && + !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) + return (SET_ERROR(ENOTSUP)); + + (void) snprintf(recvname, sizeof (recvname), "%s/%s", + tofs, recv_clone_name); + + if (dsl_dataset_hold(dp, recvname, FTAG, &ds) != 0) { + /* %recv does not exist; continue in tofs */ + error = dsl_dataset_hold(dp, tofs, FTAG, &ds); + if (error != 0) + return (error); + } + + /* check that ds is marked inconsistent */ + if (!DS_IS_INCONSISTENT(ds)) { + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(EINVAL)); + } + + /* check that there is resuming data, and that the toguid matches */ + if (!dsl_dataset_is_zapified(ds)) { + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(EINVAL)); + } + error = zap_lookup(dp->dp_meta_objset, ds->ds_object, + DS_FIELD_RESUME_TOGUID, sizeof (val), 1, &val); + if (error != 0 || drrb->drr_toguid != val) { + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(EINVAL)); + } + + /* + * Check if the receive is still running. If so, it will be owned. + * Note that nothing else can own the dataset (e.g. after the receive + * fails) because it will be marked inconsistent. + */ + if (dsl_dataset_has_owner(ds)) { + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(EBUSY)); + } + + /* There should not be any snapshots of this fs yet. */ + if (ds->ds_prev != NULL && ds->ds_prev->ds_dir == ds->ds_dir) { + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(EINVAL)); + } + + /* + * Note: resume point will be checked when we process the first WRITE + * record. + */ + + /* check that the origin matches */ + val = 0; + (void) zap_lookup(dp->dp_meta_objset, ds->ds_object, + DS_FIELD_RESUME_FROMGUID, sizeof (val), 1, &val); + if (drrb->drr_fromguid != val) { + dsl_dataset_rele(ds, FTAG); + return (SET_ERROR(EINVAL)); + } + + dsl_dataset_rele(ds, FTAG); + return (0); +} + +static void +dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx) +{ + dmu_recv_begin_arg_t *drba = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + const char *tofs = drba->drba_cookie->drc_tofs; + dsl_dataset_t *ds; + uint64_t dsobj; + /* 6 extra bytes for /%recv */ + char recvname[ZFS_MAX_DATASET_NAME_LEN + 6]; + + (void) snprintf(recvname, sizeof (recvname), "%s/%s", + tofs, recv_clone_name); + + if (dsl_dataset_hold(dp, recvname, FTAG, &ds) != 0) { + /* %recv does not exist; continue in tofs */ + VERIFY0(dsl_dataset_hold(dp, tofs, FTAG, &ds)); + drba->drba_cookie->drc_newfs = B_TRUE; + } + + /* clear the inconsistent flag so that we can own it */ + ASSERT(DS_IS_INCONSISTENT(ds)); + dmu_buf_will_dirty(ds->ds_dbuf, tx); + dsl_dataset_phys(ds)->ds_flags &= ~DS_FLAG_INCONSISTENT; + dsobj = ds->ds_object; + dsl_dataset_rele(ds, FTAG); + + VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &ds)); + + dmu_buf_will_dirty(ds->ds_dbuf, tx); + dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT; + + ASSERT(!BP_IS_HOLE(dsl_dataset_get_blkptr(ds))); + + drba->drba_cookie->drc_ds = ds; + + spa_history_log_internal_ds(ds, "resume receive", tx, ""); +} + /* * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin() * succeeds; otherwise we will leak the holds on the datasets. */ int -dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb, - boolean_t force, char *origin, dmu_recv_cookie_t *drc) +dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin, + boolean_t force, boolean_t resumable, char *origin, dmu_recv_cookie_t *drc) { dmu_recv_begin_arg_t drba = { 0 }; - dmu_replay_record_t *drr; bzero(drc, sizeof (dmu_recv_cookie_t)); - drc->drc_drrb = drrb; + drc->drc_drr_begin = drr_begin; + drc->drc_drrb = &drr_begin->drr_u.drr_begin; drc->drc_tosnap = tosnap; drc->drc_tofs = tofs; drc->drc_force = force; + drc->drc_resumable = resumable; drc->drc_cred = CRED(); - if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) + if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) { drc->drc_byteswap = B_TRUE; - else if (drrb->drr_magic != DMU_BACKUP_MAGIC) - return (SET_ERROR(EINVAL)); - - drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); - drr->drr_type = DRR_BEGIN; - drr->drr_u.drr_begin = *drc->drc_drrb; - if (drc->drc_byteswap) { - fletcher_4_incremental_byteswap(drr, + fletcher_4_incremental_byteswap(drr_begin, sizeof (dmu_replay_record_t), &drc->drc_cksum); - } else { - fletcher_4_incremental_native(drr, + byteswap_record(drr_begin); + } else if (drc->drc_drrb->drr_magic == DMU_BACKUP_MAGIC) { + fletcher_4_incremental_native(drr_begin, sizeof (dmu_replay_record_t), &drc->drc_cksum); - } - kmem_free(drr, sizeof (dmu_replay_record_t)); - - if (drc->drc_byteswap) { - drrb->drr_magic = BSWAP_64(drrb->drr_magic); - drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); - drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); - drrb->drr_type = BSWAP_32(drrb->drr_type); - drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); - drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); + } else { + return (SET_ERROR(EINVAL)); } drba.drba_origin = origin; drba.drba_cookie = drc; drba.drba_cred = CRED(); - return (dsl_sync_task(tofs, dmu_recv_begin_check, dmu_recv_begin_sync, - &drba, 5, ZFS_SPACE_CHECK_NORMAL)); + if (DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo) & + DMU_BACKUP_FEATURE_RESUMING) { + return (dsl_sync_task(tofs, + dmu_recv_resume_begin_check, dmu_recv_resume_begin_sync, + &drba, 5, ZFS_SPACE_CHECK_NORMAL)); + } else { + return (dsl_sync_task(tofs, + dmu_recv_begin_check, dmu_recv_begin_sync, + &drba, 5, ZFS_SPACE_CHECK_NORMAL)); + } } struct receive_record_arg { @@ -1469,6 +1722,7 @@ struct receive_record_arg { */ arc_buf_t *write_buf; int payload_size; + uint64_t bytes_read; /* bytes read from stream when record created */ boolean_t eos_marker; /* Marks the end of the stream */ bqueue_node_t node; }; @@ -1477,6 +1731,7 @@ struct receive_writer_arg { objset_t *os; boolean_t byteswap; bqueue_t q; + /* * These three args are used to signal to the main thread that we're * done. @@ -1484,15 +1739,34 @@ struct receive_writer_arg { kmutex_t mutex; kcondvar_t cv; boolean_t done; + int err; /* A map from guid to dataset to help handle dedup'd streams. */ avl_tree_t *guid_to_ds_map; + boolean_t resumable; + uint64_t last_object, last_offset; + uint64_t bytes_read; /* bytes read when current record created */ +}; + +struct objlist { + list_t list; /* List of struct receive_objnode. */ + /* + * Last object looked up. Used to assert that objects are being looked + * up in ascending order. + */ + uint64_t last_lookup; +}; + +struct receive_objnode { + list_node_t node; + uint64_t object; }; struct receive_arg { objset_t *os; vnode_t *vp; /* The vnode to read the stream from */ uint64_t voff; /* The current offset in the stream */ + uint64_t bytes_read; /* * A record that has had its payload read in, but hasn't yet been handed * off to the worker thread. @@ -1505,12 +1779,7 @@ struct receive_arg { int err; boolean_t byteswap; /* Sorted list of objects not to issue prefetches for. */ - list_t ignore_obj_list; -}; - -struct receive_ign_obj_node { - list_node_t node; - uint64_t object; + struct objlist ignore_objlist; }; typedef struct guid_map_entry { @@ -1553,7 +1822,10 @@ receive_read(struct receive_arg *ra, int len, void *buf) { int done = 0; - /* some things will require 8-byte alignment, so everything must */ + /* + * The code doesn't rely on this (lengths being multiples of 8). See + * comment in dump_bytes. + */ ASSERT0(len % 8); while (done < len) { @@ -1564,14 +1836,21 @@ receive_read(struct receive_arg *ra, int len, void *buf) ra->voff, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid); - if (resid == len - done) - ra->err = SET_ERROR(EINVAL); + if (resid == len - done) { + /* + * Note: ECKSUM indicates that the receive + * was interrupted and can potentially be resumed. + */ + ra->err = SET_ERROR(ECKSUM); + } ra->voff += len - done - resid; done = len - resid; if (ra->err != 0) return (ra->err); } + ra->bytes_read += len; + ASSERT3U(done, ==, len); return (0); } @@ -1675,6 +1954,43 @@ deduce_nblkptr(dmu_object_type_t bonus_type, uint64_t bonus_size) } } +static void +save_resume_state(struct receive_writer_arg *rwa, + uint64_t object, uint64_t offset, dmu_tx_t *tx) +{ + int txgoff = dmu_tx_get_txg(tx) & TXG_MASK; + + if (!rwa->resumable) + return; + + /* + * We use ds_resume_bytes[] != 0 to indicate that we need to + * update this on disk, so it must not be 0. + */ + ASSERT(rwa->bytes_read != 0); + + /* + * We only resume from write records, which have a valid + * (non-meta-dnode) object number. + */ + ASSERT(object != 0); + + /* + * For resuming to work correctly, we must receive records in order, + * sorted by object,offset. This is checked by the callers, but + * assert it here for good measure. + */ + ASSERT3U(object, >=, rwa->os->os_dsl_dataset->ds_resume_object[txgoff]); + ASSERT(object != rwa->os->os_dsl_dataset->ds_resume_object[txgoff] || + offset >= rwa->os->os_dsl_dataset->ds_resume_offset[txgoff]); + ASSERT3U(rwa->bytes_read, >=, + rwa->os->os_dsl_dataset->ds_resume_bytes[txgoff]); + + rwa->os->os_dsl_dataset->ds_resume_object[txgoff] = object; + rwa->os->os_dsl_dataset->ds_resume_offset[txgoff] = offset; + rwa->os->os_dsl_dataset->ds_resume_bytes[txgoff] = rwa->bytes_read; +} + noinline static int receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, void *data) @@ -1773,6 +2089,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, dmu_buf_rele(db, FTAG); } dmu_tx_commit(tx); + return (0); } @@ -1782,13 +2099,14 @@ receive_freeobjects(struct receive_writer_arg *rwa, struct drr_freeobjects *drrfo) { uint64_t obj; + int next_err = 0; if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj) return (SET_ERROR(EINVAL)); for (obj = drrfo->drr_firstobj == 0 ? 1 : drrfo->drr_firstobj; - obj < drrfo->drr_firstobj + drrfo->drr_numobjs; - (void) dmu_object_next(rwa->os, &obj, FALSE, 0)) { + obj < drrfo->drr_firstobj + drrfo->drr_numobjs && next_err == 0; + next_err = dmu_object_next(rwa->os, &obj, FALSE, 0)) { dmu_object_info_t doi; int err; @@ -1804,7 +2122,8 @@ receive_freeobjects(struct receive_writer_arg *rwa, if (err != 0) return (err); } - + if (next_err != ESRCH) + return (next_err); return (0); } @@ -1820,6 +2139,18 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw, !DMU_OT_IS_VALID(drrw->drr_type)) return (SET_ERROR(EINVAL)); + /* + * For resuming to work, records must be in increasing order + * by (object, offset). + */ + if (drrw->drr_object < rwa->last_object || + (drrw->drr_object == rwa->last_object && + drrw->drr_offset < rwa->last_offset)) { + return (SET_ERROR(EINVAL)); + } + rwa->last_object = drrw->drr_object; + rwa->last_offset = drrw->drr_offset; + if (dmu_object_info(rwa->os, drrw->drr_object, NULL) != 0) return (SET_ERROR(EINVAL)); @@ -1842,8 +2173,17 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw, if (dmu_bonus_hold(rwa->os, drrw->drr_object, FTAG, &bonus) != 0) return (SET_ERROR(EINVAL)); dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx); + + /* + * Note: If the receive fails, we want the resume stream to start + * with the same record that we last successfully received (as opposed + * to the next record), so that we can verify that we are + * resuming from the correct location. + */ + save_resume_state(rwa, drrw->drr_object, drrw->drr_offset, tx); dmu_tx_commit(tx); dmu_buf_rele(bonus, FTAG); + return (0); } @@ -1902,43 +2242,48 @@ receive_write_byref(struct receive_writer_arg *rwa, dmu_write(rwa->os, drrwbr->drr_object, drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx); dmu_buf_rele(dbp, FTAG); + + /* See comment in restore_write. */ + save_resume_state(rwa, drrwbr->drr_object, drrwbr->drr_offset, tx); dmu_tx_commit(tx); return (0); } static int receive_write_embedded(struct receive_writer_arg *rwa, - struct drr_write_embedded *drrwnp, void *data) + struct drr_write_embedded *drrwe, void *data) { dmu_tx_t *tx; int err; - if (drrwnp->drr_offset + drrwnp->drr_length < drrwnp->drr_offset) + if (drrwe->drr_offset + drrwe->drr_length < drrwe->drr_offset) return (EINVAL); - if (drrwnp->drr_psize > BPE_PAYLOAD_SIZE) + if (drrwe->drr_psize > BPE_PAYLOAD_SIZE) return (EINVAL); - if (drrwnp->drr_etype >= NUM_BP_EMBEDDED_TYPES) + if (drrwe->drr_etype >= NUM_BP_EMBEDDED_TYPES) return (EINVAL); - if (drrwnp->drr_compression >= ZIO_COMPRESS_FUNCTIONS) + if (drrwe->drr_compression >= ZIO_COMPRESS_FUNCTIONS) return (EINVAL); tx = dmu_tx_create(rwa->os); - dmu_tx_hold_write(tx, drrwnp->drr_object, - drrwnp->drr_offset, drrwnp->drr_length); + dmu_tx_hold_write(tx, drrwe->drr_object, + drrwe->drr_offset, drrwe->drr_length); err = dmu_tx_assign(tx, TXG_WAIT); if (err != 0) { dmu_tx_abort(tx); return (err); } - dmu_write_embedded(rwa->os, drrwnp->drr_object, - drrwnp->drr_offset, data, drrwnp->drr_etype, - drrwnp->drr_compression, drrwnp->drr_lsize, drrwnp->drr_psize, + dmu_write_embedded(rwa->os, drrwe->drr_object, + drrwe->drr_offset, data, drrwe->drr_etype, + drrwe->drr_compression, drrwe->drr_lsize, drrwe->drr_psize, rwa->byteswap ^ ZFS_HOST_BYTEORDER, tx); + /* See comment in restore_write. */ + save_resume_state(rwa, drrwe->drr_object, drrwe->drr_offset, tx); dmu_tx_commit(tx); return (0); } @@ -2012,10 +2357,16 @@ receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf) static void dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc) { - char name[MAXNAMELEN]; - dsl_dataset_name(drc->drc_ds, name); - dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); - (void) dsl_destroy_head(name); + if (drc->drc_resumable) { + /* wait for our resume state to be written to disk */ + txg_wait_synced(drc->drc_ds->ds_dir->dd_pool, 0); + dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); + } else { + char name[ZFS_MAX_DATASET_NAME_LEN]; + dsl_dataset_name(drc->drc_ds, name); + dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); + (void) dsl_destroy_head(name); + } } static void @@ -2044,12 +2395,17 @@ receive_read_payload_and_next_header(struct receive_arg *ra, int len, void *buf) if (len != 0) { ASSERT3U(len, <=, SPA_MAXBLOCKSIZE); - ra->rrd->payload = buf; - ra->rrd->payload_size = len; - err = receive_read(ra, len, ra->rrd->payload); + err = receive_read(ra, len, buf); if (err != 0) return (err); - receive_cksum(ra, len, ra->rrd->payload); + receive_cksum(ra, len, buf); + + /* note: rrd is NULL when reading the begin record's payload */ + if (ra->rrd != NULL) { + ra->rrd->payload = buf; + ra->rrd->payload_size = len; + ra->rrd->bytes_read = ra->bytes_read; + } } ra->prev_cksum = ra->cksum; @@ -2057,6 +2413,7 @@ receive_read_payload_and_next_header(struct receive_arg *ra, int len, void *buf) ra->next_rrd = kmem_zalloc(sizeof (*ra->next_rrd), KM_SLEEP); err = receive_read(ra, sizeof (ra->next_rrd->header), &ra->next_rrd->header); + ra->next_rrd->bytes_read = ra->bytes_read; if (err != 0) { kmem_free(ra->next_rrd, sizeof (*ra->next_rrd)); ra->next_rrd = NULL; @@ -2096,6 +2453,70 @@ receive_read_payload_and_next_header(struct receive_arg *ra, int len, void *buf) return (0); } +static void +objlist_create(struct objlist *list) +{ + list_create(&list->list, sizeof (struct receive_objnode), + offsetof(struct receive_objnode, node)); + list->last_lookup = 0; +} + +static void +objlist_destroy(struct objlist *list) +{ + struct receive_objnode *n; + + for (n = list_remove_head(&list->list); + n != NULL; n = list_remove_head(&list->list)) { + kmem_free(n, sizeof (*n)); + } + list_destroy(&list->list); +} + +/* + * This function looks through the objlist to see if the specified object number + * is contained in the objlist. In the process, it will remove all object + * numbers in the list that are smaller than the specified object number. Thus, + * any lookup of an object number smaller than a previously looked up object + * number will always return false; therefore, all lookups should be done in + * ascending order. + */ +static boolean_t +objlist_exists(struct objlist *list, uint64_t object) +{ + struct receive_objnode *node = list_head(&list->list); + ASSERT3U(object, >=, list->last_lookup); + list->last_lookup = object; + while (node != NULL && node->object < object) { + VERIFY3P(node, ==, list_remove_head(&list->list)); + kmem_free(node, sizeof (*node)); + node = list_head(&list->list); + } + return (node != NULL && node->object == object); +} + +/* + * The objlist is a list of object numbers stored in ascending order. However, + * the insertion of new object numbers does not seek out the correct location to + * store a new object number; instead, it appends it to the list for simplicity. + * Thus, any users must take care to only insert new object numbers in ascending + * order. + */ +static void +objlist_insert(struct objlist *list, uint64_t object) +{ + struct receive_objnode *node = kmem_zalloc(sizeof (*node), KM_SLEEP); + node->object = object; +#ifdef ZFS_DEBUG + { + struct receive_objnode *last_object = list_tail(&list->list); + uint64_t last_objnum = (last_object != NULL ? last_object->object : 0); + ASSERT3U(node->object, >, last_objnum); + } +#endif + list_insert_tail(&list->list, node); +} + /* * Issue the prefetch reads for any necessary indirect blocks. * @@ -2118,13 +2539,7 @@ static void receive_read_prefetch(struct receive_arg *ra, uint64_t object, uint64_t offset, uint64_t length) { - struct receive_ign_obj_node *node = list_head(&ra->ignore_obj_list); - while (node != NULL && node->object < object) { - VERIFY3P(node, ==, list_remove_head(&ra->ignore_obj_list)); - kmem_free(node, sizeof (*node)); - node = list_head(&ra->ignore_obj_list); - } - if (node == NULL || node->object > object) { + if (!objlist_exists(&ra->ignore_objlist, object)) { dmu_prefetch(ra->os, object, 1, offset, length, ZIO_PRIORITY_SYNC_READ); } @@ -2157,20 +2572,7 @@ receive_read_record(struct receive_arg *ra) */ if (err == ENOENT || (err == 0 && doi.doi_data_block_size != drro->drr_blksz)) { - struct receive_ign_obj_node *node = - kmem_zalloc(sizeof (*node), - KM_SLEEP); - node->object = drro->drr_object; -#ifdef ZFS_DEBUG - { - struct receive_ign_obj_node *last_object = - list_tail(&ra->ignore_obj_list); - uint64_t last_objnum = (last_object != NULL ? - last_object->object : 0); - ASSERT3U(node->object, >, last_objnum); - } -#endif - list_insert_tail(&ra->ignore_obj_list, node); + objlist_insert(&ra->ignore_objlist, drro->drr_object); err = 0; } return (err); @@ -2236,7 +2638,7 @@ receive_read_record(struct receive_arg *ra) { struct drr_end *drre = &ra->rrd->header.drr_u.drr_end; if (!ZIO_CHECKSUM_EQUAL(ra->prev_cksum, drre->drr_checksum)) - return (SET_ERROR(EINVAL)); + return (SET_ERROR(ECKSUM)); return (0); } case DRR_SPILL: @@ -2263,6 +2665,10 @@ receive_process_record(struct receive_writer_arg *rwa, { int err; + /* Processing in order, therefore bytes_read should be increasing. */ + ASSERT3U(rrd->bytes_read, >=, rwa->bytes_read); + rwa->bytes_read = rrd->bytes_read; + switch (rrd->header.drr_type) { case DRR_OBJECT: { @@ -2357,6 +2763,32 @@ receive_writer_thread(void *arg) mutex_exit(&rwa->mutex); } +static int +resume_check(struct receive_arg *ra, nvlist_t *begin_nvl) +{ + uint64_t val; + objset_t *mos = dmu_objset_pool(ra->os)->dp_meta_objset; + uint64_t dsobj = dmu_objset_id(ra->os); + uint64_t resume_obj, resume_off; + + if (nvlist_lookup_uint64(begin_nvl, + "resume_object", &resume_obj) != 0 || + nvlist_lookup_uint64(begin_nvl, + "resume_offset", &resume_off) != 0) { + return (SET_ERROR(EINVAL)); + } + VERIFY0(zap_lookup(mos, dsobj, + DS_FIELD_RESUME_OBJECT, sizeof (val), 1, &val)); + if (resume_obj != val) + return (SET_ERROR(EINVAL)); + VERIFY0(zap_lookup(mos, dsobj, + DS_FIELD_RESUME_OFFSET, sizeof (val), 1, &val)); + if (resume_off != val) + return (SET_ERROR(EINVAL)); + + return (0); +} + /* * Read in the stream's records, one by one, and apply them to the pool. There * are two threads involved; the thread that calls this function will spin up a @@ -2377,7 +2809,9 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, struct receive_arg *ra; struct receive_writer_arg *rwa; int featureflags; - struct receive_ign_obj_node *n; + uint32_t payloadlen; + void *payload; + nvlist_t *begin_nvl = NULL; ra = kmem_zalloc(sizeof (*ra), KM_SLEEP); rwa = kmem_zalloc(sizeof (*rwa), KM_SLEEP); @@ -2386,8 +2820,14 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, ra->cksum = drc->drc_cksum; ra->vp = vp; ra->voff = *voffp; - list_create(&ra->ignore_obj_list, sizeof (struct receive_ign_obj_node), - offsetof(struct receive_ign_obj_node, node)); + + if (dsl_dataset_is_zapified(drc->drc_ds)) { + (void) zap_lookup(drc->drc_ds->ds_dir->dd_pool->dp_meta_objset, + drc->drc_ds->ds_object, DS_FIELD_RESUME_BYTES, + sizeof (ra->bytes_read), 1, &ra->bytes_read); + } + + objlist_create(&ra->ignore_objlist); /* these were verified in dmu_recv_begin */ ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==, @@ -2438,9 +2878,29 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, drc->drc_guid_to_ds_map = rwa->guid_to_ds_map; } - err = receive_read_payload_and_next_header(ra, 0, NULL); - if (err) + payloadlen = drc->drc_drr_begin->drr_payloadlen; + payload = NULL; + if (payloadlen != 0) + payload = kmem_alloc(payloadlen, KM_SLEEP); + + err = receive_read_payload_and_next_header(ra, payloadlen, payload); + if (err != 0) { + if (payloadlen != 0) + kmem_free(payload, payloadlen); goto out; + } + if (payloadlen != 0) { + err = nvlist_unpack(payload, payloadlen, &begin_nvl, KM_SLEEP); + kmem_free(payload, payloadlen); + if (err != 0) + goto out; + } + + if (featureflags & DMU_BACKUP_FEATURE_RESUMING) { + err = resume_check(ra, begin_nvl); + if (err != 0) + goto out; + } (void) bqueue_init(&rwa->q, zfs_recv_queue_length, offsetof(struct receive_record_arg, node)); @@ -2448,6 +2908,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, mutex_init(&rwa->mutex, NULL, MUTEX_DEFAULT, NULL); rwa->os = ra->os; rwa->byteswap = drc->drc_byteswap; + rwa->resumable = drc->drc_resumable; (void) thread_create(NULL, 0, receive_writer_thread, rwa, 0, curproc, TS_RUN, minclsyspri); @@ -2461,7 +2922,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, * We can leave this loop in 3 ways: First, if rwa->err is * non-zero. In that case, the writer thread will free the rrd we just * pushed. Second, if we're interrupted; in that case, either it's the - * first loop and ra->rrd was never allocated, or it's later, and ra.rrd + * first loop and ra->rrd was never allocated, or it's later and ra->rrd * has been handed off to the writer thread who will free it. Finally, * if receive_read_record fails or we're at the end of the stream, then * we free ra->rrd and exit. @@ -2506,24 +2967,21 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, err = rwa->err; out: + nvlist_free(begin_nvl); if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1)) zfs_onexit_fd_rele(cleanup_fd); if (err != 0) { /* - * destroy what we created, so we don't leave it in the - * inconsistent restoring state. + * Clean up references. If receive is not resumable, + * destroy what we created, so we don't leave it in + * the inconsistent state. */ dmu_recv_cleanup_ds(drc); } *voffp = ra->voff; - - for (n = list_remove_head(&ra->ignore_obj_list); n != NULL; - n = list_remove_head(&ra->ignore_obj_list)) { - kmem_free(n, sizeof (*n)); - } - list_destroy(&ra->ignore_obj_list); + objlist_destroy(&ra->ignore_objlist); kmem_free(ra, sizeof (*ra)); kmem_free(rwa, sizeof (*rwa)); return (err); @@ -2674,6 +3132,20 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx) dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_dataset_phys(ds)->ds_flags &= ~DS_FLAG_INCONSISTENT; + if (dsl_dataset_has_resume_receive_state(ds)) { + (void) zap_remove(dp->dp_meta_objset, ds->ds_object, + DS_FIELD_RESUME_FROMGUID, tx); + (void) zap_remove(dp->dp_meta_objset, ds->ds_object, + DS_FIELD_RESUME_OBJECT, tx); + (void) zap_remove(dp->dp_meta_objset, ds->ds_object, + DS_FIELD_RESUME_OFFSET, tx); + (void) zap_remove(dp->dp_meta_objset, ds->ds_object, + DS_FIELD_RESUME_BYTES, tx); + (void) zap_remove(dp->dp_meta_objset, ds->ds_object, + DS_FIELD_RESUME_TOGUID, tx); + (void) zap_remove(dp->dp_meta_objset, ds->ds_object, + DS_FIELD_RESUME_TONAME, tx); + } } drc->drc_newsnapobj = dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj; zvol_create_minors(dp->dp_spa, drc->drc_tofs, B_TRUE); @@ -2722,16 +3194,13 @@ dmu_recv_existing_end(dmu_recv_cookie_t *drc) int error; #ifdef _KERNEL - char *name; - /* * We will be destroying the ds; make sure its origin is unmounted if * necessary. */ - name = kmem_alloc(MAXNAMELEN, KM_SLEEP); + char name[ZFS_MAX_DATASET_NAME_LEN]; dsl_dataset_name(drc->drc_ds, name); zfs_destroy_unmount_origin(name); - kmem_free(name, MAXNAMELEN); #endif error = dsl_sync_task(drc->drc_tofs, diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c index 44ba74181c46..0df12fac8c36 100644 --- a/module/zfs/dmu_traverse.c +++ b/module/zfs/dmu_traverse.c @@ -47,6 +47,7 @@ typedef struct prefetch_data { int pd_flags; boolean_t pd_cancel; boolean_t pd_exited; + zbookmark_phys_t pd_resume; } prefetch_data_t; typedef struct traverse_data { @@ -323,30 +324,29 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, uint32_t flags = ARC_FLAG_WAIT; int32_t i; int32_t epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; - dnode_phys_t *cdnp; + dnode_phys_t *child_dnp; err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); if (err != 0) goto post; - cdnp = buf->b_data; + child_dnp = buf->b_data; - for (i = 0; i < epb; i += cdnp[i].dn_extra_slots + 1) { - prefetch_dnode_metadata(td, &cdnp[i], zb->zb_objset, - zb->zb_blkid * epb + i); + for (i = 0; i < epb; i += child_dnp[i].dn_extra_slots + 1) { + prefetch_dnode_metadata(td, &child_dnp[i], + zb->zb_objset, zb->zb_blkid * epb + i); } /* recursively visitbp() blocks below this */ - for (i = 0; i < epb; i += cdnp[i].dn_extra_slots + 1) { - err = traverse_dnode(td, &cdnp[i], zb->zb_objset, - zb->zb_blkid * epb + i); + for (i = 0; i < epb; i += child_dnp[i].dn_extra_slots + 1) { + err = traverse_dnode(td, &child_dnp[i], + zb->zb_objset, zb->zb_blkid * epb + i); if (err != 0) break; } } else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { arc_flags_t flags = ARC_FLAG_WAIT; objset_phys_t *osp; - dnode_phys_t *mdnp, *gdnp, *udnp; err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); @@ -354,11 +354,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, goto post; osp = buf->b_data; - mdnp = &osp->os_meta_dnode; - gdnp = &osp->os_groupused_dnode; - udnp = &osp->os_userused_dnode; - - prefetch_dnode_metadata(td, mdnp, zb->zb_objset, + prefetch_dnode_metadata(td, &osp->os_meta_dnode, zb->zb_objset, DMU_META_DNODE_OBJECT); /* * See the block comment above for the goal of this variable. @@ -370,21 +366,21 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, td->td_realloc_possible = B_FALSE; if (arc_buf_size(buf) >= sizeof (objset_phys_t)) { - prefetch_dnode_metadata(td, gdnp, zb->zb_objset, - DMU_GROUPUSED_OBJECT); - prefetch_dnode_metadata(td, udnp, zb->zb_objset, - DMU_USERUSED_OBJECT); + prefetch_dnode_metadata(td, &osp->os_groupused_dnode, + zb->zb_objset, DMU_GROUPUSED_OBJECT); + prefetch_dnode_metadata(td, &osp->os_userused_dnode, + zb->zb_objset, DMU_USERUSED_OBJECT); } - err = traverse_dnode(td, mdnp, zb->zb_objset, + err = traverse_dnode(td, &osp->os_meta_dnode, zb->zb_objset, DMU_META_DNODE_OBJECT); if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) { - err = traverse_dnode(td, gdnp, zb->zb_objset, - DMU_GROUPUSED_OBJECT); + err = traverse_dnode(td, &osp->os_groupused_dnode, + zb->zb_objset, DMU_GROUPUSED_OBJECT); } if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) { - err = traverse_dnode(td, udnp, zb->zb_objset, - DMU_USERUSED_OBJECT); + err = traverse_dnode(td, &osp->os_userused_dnode, + zb->zb_objset, DMU_USERUSED_OBJECT); } } @@ -416,9 +412,15 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, * Set the bookmark to the first level-0 block that we need * to visit. This way, the resuming code does not need to * deal with resuming from indirect blocks. + * + * Note, if zb_level <= 0, dnp may be NULL, so we don't want + * to dereference it. */ - td->td_resume->zb_blkid = zb->zb_blkid << - (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)); + td->td_resume->zb_blkid = zb->zb_blkid; + if (zb->zb_level > 0) { + td->td_resume->zb_blkid <<= zb->zb_level * + (dnp->dn_indblkshift - SPA_BLKPTRSHIFT); + } td->td_paused = B_TRUE; } @@ -450,6 +452,10 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp, int j, err = 0; zbookmark_phys_t czb; + if (object != DMU_META_DNODE_OBJECT && td->td_resume != NULL && + object < td->td_resume->zb_object) + return (0); + if (td->td_flags & TRAVERSE_PRE) { SET_BOOKMARK(&czb, objset, object, ZB_DNODE_LEVEL, ZB_DNODE_BLKID); @@ -527,6 +533,7 @@ traverse_prefetch_thread(void *arg) td.td_func = traverse_prefetcher; td.td_arg = td_main->td_pfd; td.td_pfd = NULL; + td.td_resume = &td_main->td_pfd->pd_resume; SET_BOOKMARK(&czb, td.td_objset, ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); @@ -556,12 +563,6 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp, ASSERT(ds == NULL || objset == ds->ds_object); ASSERT(!(flags & TRAVERSE_PRE) || !(flags & TRAVERSE_POST)); - /* - * The data prefetching mechanism (the prefetch thread) is incompatible - * with resuming from a bookmark. - */ - ASSERT(resume == NULL || !(flags & TRAVERSE_PREFETCH_DATA)); - td = kmem_alloc(sizeof (traverse_data_t), KM_SLEEP); pd = kmem_zalloc(sizeof (prefetch_data_t), KM_SLEEP); czb = kmem_alloc(sizeof (zbookmark_phys_t), KM_SLEEP); @@ -586,6 +587,8 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp, } pd->pd_flags = flags; + if (resume != NULL) + pd->pd_resume = *resume; mutex_init(&pd->pd_mtx, NULL, MUTEX_DEFAULT, NULL); cv_init(&pd->pd_cv, NULL, CV_DEFAULT, NULL); @@ -638,11 +641,19 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp, * in syncing context). */ int -traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags, - blkptr_cb_t func, void *arg) +traverse_dataset_resume(dsl_dataset_t *ds, uint64_t txg_start, + zbookmark_phys_t *resume, + int flags, blkptr_cb_t func, void *arg) { return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds, ds->ds_object, - &dsl_dataset_phys(ds)->ds_bp, txg_start, NULL, flags, func, arg)); + &dsl_dataset_phys(ds)->ds_bp, txg_start, resume, flags, func, arg)); +} + +int +traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, + int flags, blkptr_cb_t func, void *arg) +{ + return (traverse_dataset_resume(ds, txg_start, NULL, flags, func, arg)); } int @@ -675,7 +686,7 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags, /* visit each dataset */ for (obj = 1; err == 0; - err = dmu_object_next(mos, &obj, FALSE, txg_start)) { + err = dmu_object_next(mos, &obj, B_FALSE, txg_start)) { dmu_object_info_t doi; err = dmu_object_info(mos, obj, &doi); diff --git a/module/zfs/dsl_bookmark.c b/module/zfs/dsl_bookmark.c index 447a3a2dc3a2..5a7f034ce649 100644 --- a/module/zfs/dsl_bookmark.c +++ b/module/zfs/dsl_bookmark.c @@ -34,10 +34,10 @@ static int dsl_bookmark_hold_ds(dsl_pool_t *dp, const char *fullname, dsl_dataset_t **dsp, void *tag, char **shortnamep) { - char buf[MAXNAMELEN]; + char buf[ZFS_MAX_DATASET_NAME_LEN]; char *hashp; - if (strlen(fullname) >= MAXNAMELEN) + if (strlen(fullname) >= ZFS_MAX_DATASET_NAME_LEN) return (SET_ERROR(ENAMETOOLONG)); hashp = strchr(fullname, '#'); if (hashp == NULL) diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 9c275b234ca8..5b7de74dc90d 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -25,6 +25,7 @@ * Copyright (c) 2014 RackTop Systems. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2016 Actifio, Inc. All rights reserved. + * Copyright 2016, OmniTI Computer Consulting, Inc. All rights reserved. */ #include @@ -52,6 +53,9 @@ #include #include #include +#include +#include +#include /* * The SPA supports block sizes up to 16MB. However, very large blocks @@ -75,6 +79,8 @@ int zfs_max_recordsize = 1 * 1024 * 1024; extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds); +extern int spa_asize_inflation; + /* * Figure out how much of this delta should be propogated to the dsl_dir * layer. If there's a refreservation, that space has already been @@ -664,22 +670,38 @@ dsl_dataset_name(dsl_dataset_t *ds, char *name) dsl_dir_name(ds->ds_dir, name); VERIFY0(dsl_dataset_get_snapname(ds)); if (ds->ds_snapname[0]) { - (void) strcat(name, "@"); + VERIFY3U(strlcat(name, "@", ZFS_MAX_DATASET_NAME_LEN), + <, ZFS_MAX_DATASET_NAME_LEN); /* * We use a "recursive" mutex so that we * can call dprintf_ds() with ds_lock held. */ if (!MUTEX_HELD(&ds->ds_lock)) { mutex_enter(&ds->ds_lock); - (void) strcat(name, ds->ds_snapname); + VERIFY3U(strlcat(name, ds->ds_snapname, + ZFS_MAX_DATASET_NAME_LEN), <, + ZFS_MAX_DATASET_NAME_LEN); mutex_exit(&ds->ds_lock); } else { - (void) strcat(name, ds->ds_snapname); + VERIFY3U(strlcat(name, ds->ds_snapname, + ZFS_MAX_DATASET_NAME_LEN), <, + ZFS_MAX_DATASET_NAME_LEN); } } } } +int +dsl_dataset_namelen(dsl_dataset_t *ds) +{ + int len; + VERIFY0(dsl_dataset_get_snapname(ds)); + mutex_enter(&ds->ds_lock); + len = dsl_dir_namelen(ds->ds_dir) + 1 + strlen(ds->ds_snapname); + mutex_exit(&ds->ds_lock); + return (len); +} + void dsl_dataset_rele(dsl_dataset_t *ds, void *tag) { @@ -704,6 +726,7 @@ dsl_dataset_tryown(dsl_dataset_t *ds, void *tag) { boolean_t gotit = FALSE; + ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); mutex_enter(&ds->ds_lock); if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) { ds->ds_owner = tag; @@ -714,6 +737,16 @@ dsl_dataset_tryown(dsl_dataset_t *ds, void *tag) return (gotit); } +boolean_t +dsl_dataset_has_owner(dsl_dataset_t *ds) +{ + boolean_t rv; + mutex_enter(&ds->ds_lock); + rv = (ds->ds_owner != NULL); + mutex_exit(&ds->ds_lock); + return (rv); +} + static void dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx) { @@ -1238,10 +1271,10 @@ dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx) int error = 0; dsl_dataset_t *ds; char *name, *atp; - char dsname[MAXNAMELEN]; + char dsname[ZFS_MAX_DATASET_NAME_LEN]; name = nvpair_name(pair); - if (strlen(name) >= MAXNAMELEN) + if (strlen(name) >= ZFS_MAX_DATASET_NAME_LEN) error = SET_ERROR(ENAMETOOLONG); if (error == 0) { atp = strchr(name, '@'); @@ -1414,7 +1447,7 @@ dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx) pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { dsl_dataset_t *ds; char *name, *atp; - char dsname[MAXNAMELEN]; + char dsname[ZFS_MAX_DATASET_NAME_LEN]; name = nvpair_name(pair); atp = strchr(name, '@'); @@ -1461,7 +1494,7 @@ dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors) suspended = fnvlist_alloc(); for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; pair = nvlist_next_nvpair(snaps, pair)) { - char fsname[MAXNAMELEN]; + char fsname[ZFS_MAX_DATASET_NAME_LEN]; char *snapname = nvpair_name(pair); char *atp; void *cookie; @@ -1615,6 +1648,21 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_dataset_phys(ds)->ds_fsid_guid = ds->ds_fsid_guid; + if (ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] != 0) { + VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, + ds->ds_object, DS_FIELD_RESUME_OBJECT, 8, 1, + &ds->ds_resume_object[tx->tx_txg & TXG_MASK], tx)); + VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, + ds->ds_object, DS_FIELD_RESUME_OFFSET, 8, 1, + &ds->ds_resume_offset[tx->tx_txg & TXG_MASK], tx)); + VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, + ds->ds_object, DS_FIELD_RESUME_BYTES, 8, 1, + &ds->ds_resume_bytes[tx->tx_txg & TXG_MASK], tx)); + ds->ds_resume_object[tx->tx_txg & TXG_MASK] = 0; + ds->ds_resume_offset[tx->tx_txg & TXG_MASK] = 0; + ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] = 0; + } + dmu_objset_sync(ds->ds_objset, zio, tx); for (f = 0; f < SPA_FEATURES; f++) { @@ -1655,7 +1703,7 @@ get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { dsl_dataset_t *clone; - char buf[ZFS_MAXNAMELEN]; + char buf[ZFS_MAX_DATASET_NAME_LEN]; VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool, za.za_first_integer, FTAG, &clone)); dsl_dir_name(clone->ds_dir, buf); @@ -1670,6 +1718,78 @@ get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) nvlist_free(propval); } +static void +get_receive_resume_stats(dsl_dataset_t *ds, nvlist_t *nv) +{ + dsl_pool_t *dp = ds->ds_dir->dd_pool; + + if (dsl_dataset_has_resume_receive_state(ds)) { + char *str; + void *packed; + uint8_t *compressed; + uint64_t val; + nvlist_t *token_nv = fnvlist_alloc(); + size_t packed_size, compressed_size; + zio_cksum_t cksum; + char *propval; + char buf[MAXNAMELEN]; + int i; + + if (zap_lookup(dp->dp_meta_objset, ds->ds_object, + DS_FIELD_RESUME_FROMGUID, sizeof (val), 1, &val) == 0) { + fnvlist_add_uint64(token_nv, "fromguid", val); + } + if (zap_lookup(dp->dp_meta_objset, ds->ds_object, + DS_FIELD_RESUME_OBJECT, sizeof (val), 1, &val) == 0) { + fnvlist_add_uint64(token_nv, "object", val); + } + if (zap_lookup(dp->dp_meta_objset, ds->ds_object, + DS_FIELD_RESUME_OFFSET, sizeof (val), 1, &val) == 0) { + fnvlist_add_uint64(token_nv, "offset", val); + } + if (zap_lookup(dp->dp_meta_objset, ds->ds_object, + DS_FIELD_RESUME_BYTES, sizeof (val), 1, &val) == 0) { + fnvlist_add_uint64(token_nv, "bytes", val); + } + if (zap_lookup(dp->dp_meta_objset, ds->ds_object, + DS_FIELD_RESUME_TOGUID, sizeof (val), 1, &val) == 0) { + fnvlist_add_uint64(token_nv, "toguid", val); + } + if (zap_lookup(dp->dp_meta_objset, ds->ds_object, + DS_FIELD_RESUME_TONAME, 1, sizeof (buf), buf) == 0) { + fnvlist_add_string(token_nv, "toname", buf); + } + if (zap_contains(dp->dp_meta_objset, ds->ds_object, + DS_FIELD_RESUME_EMBEDOK) == 0) { + fnvlist_add_boolean(token_nv, "embedok"); + } + packed = fnvlist_pack(token_nv, &packed_size); + fnvlist_free(token_nv); + compressed = kmem_alloc(packed_size, KM_SLEEP); + + compressed_size = gzip_compress(packed, compressed, + packed_size, packed_size, 6); + + fletcher_4_native(compressed, compressed_size, &cksum); + + str = kmem_alloc(compressed_size * 2 + 1, KM_SLEEP); + for (i = 0; i < compressed_size; i++) { + (void) sprintf(str + i * 2, "%02x", compressed[i]); + } + str[compressed_size * 2] = '\0'; + propval = kmem_asprintf("%u-%llx-%llx-%s", + ZFS_SEND_RESUME_TOKEN_VERSION, + (longlong_t)cksum.zc_word[0], + (longlong_t)packed_size, str); + dsl_prop_nvlist_add_string(nv, + ZFS_PROP_RECEIVE_RESUME_TOKEN, propval); + kmem_free(packed, packed_size); + kmem_free(str, compressed_size * 2 + 1); + kmem_free(compressed, packed_size); + strfree(propval); + } +} + void dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) { @@ -1693,7 +1813,7 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) get_clones_stat(ds, nv); } else { if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) { - char buf[MAXNAMELEN]; + char buf[ZFS_MAX_DATASET_NAME_LEN]; dsl_dataset_name(ds->ds_prev, buf); dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP, buf); } @@ -1743,6 +1863,32 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) } } + if (!dsl_dataset_is_snapshot(ds)) { + /* 6 extra bytes for /%recv */ + char recvname[ZFS_MAX_DATASET_NAME_LEN + 6]; + dsl_dataset_t *recv_ds; + + /* + * A failed "newfs" (e.g. full) resumable receive leaves + * the stats set on this dataset. Check here for the prop. + */ + get_receive_resume_stats(ds, nv); + + /* + * A failed incremental resumable receive leaves the + * stats set on our child named "%recv". Check the child + * for the prop. + */ + dsl_dataset_name(ds, recvname); + if (strlcat(recvname, "/", sizeof (recvname)) < + sizeof (recvname) && + strlcat(recvname, recv_clone_name, sizeof (recvname)) < + sizeof (recvname) && + dsl_dataset_hold(dp, recvname, FTAG, &recv_ds) == 0) { + get_receive_resume_stats(recv_ds, nv); + dsl_dataset_rele(recv_ds, FTAG); + } + } } void @@ -1863,7 +2009,7 @@ dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp, /* dataset name + 1 for the "@" + the new snapshot name must fit */ if (dsl_dir_namelen(hds->ds_dir) + 1 + - strlen(ddrsa->ddrsa_newsnapname) >= MAXNAMELEN) + strlen(ddrsa->ddrsa_newsnapname) >= ZFS_MAX_DATASET_NAME_LEN) error = SET_ERROR(ENAMETOOLONG); return (error); @@ -1970,7 +2116,8 @@ dsl_dataset_rename_snapshot(const char *fsname, * only one long hold on the dataset. We're not allowed to change anything here * so we don't permanently release the long hold or regular hold here. We want * to do this only when syncing to avoid the dataset unexpectedly going away - * when we release the long hold. + * when we release the long hold. Allow a long hold to exist for volumes, this + * may occur when asynchronously registering the minor with the kernel. */ static int dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx) @@ -1985,7 +2132,7 @@ dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx) dsl_dataset_long_rele(ds, owner); } - held = dsl_dataset_long_held(ds); + held = (dsl_dataset_long_held(ds) && (ds->ds_owner != zvol_tag)); if (owner != NULL) dsl_dataset_long_hold(ds, owner); @@ -2095,7 +2242,7 @@ dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx) dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *ds, *clone; uint64_t cloneobj; - char namebuf[ZFS_MAXNAMELEN]; + char namebuf[ZFS_MAX_DATASET_NAME_LEN]; VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds)); @@ -2648,7 +2795,7 @@ promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag) * Promote a clone. * * If it fails due to a conflicting snapshot name, "conflsnap" will be filled - * in with the name. (It must be at least MAXNAMELEN bytes long.) + * in with the name. (It must be at least ZFS_MAX_DATASET_NAME_LEN bytes long.) */ int dsl_dataset_promote(const char *name, char *conflsnap) @@ -2685,6 +2832,11 @@ int dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone, dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx) { + /* + * "slack" factor for received datasets with refquota set on them. + * See the bottom of this function for details on its use. + */ + uint64_t refquota_slack = DMU_MAX_ACCESS * spa_asize_inflation; int64_t unused_refres_delta; /* they should both be heads */ @@ -2727,10 +2879,22 @@ dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone, dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE)) return (SET_ERROR(ENOSPC)); - /* clone can't be over the head's refquota */ + /* + * The clone can't be too much over the head's refquota. + * + * To ensure that the entire refquota can be used, we allow one + * transaction to exceed the the refquota. Therefore, this check + * needs to also allow for the space referenced to be more than the + * refquota. The maximum amount of space that one transaction can use + * on disk is DMU_MAX_ACCESS * spa_asize_inflation. Allowing this + * overage ensures that we are able to receive a filesystem that + * exceeds the refquota on the source system. + * + * So that overage is the refquota_slack we use below. + */ if (origin_head->ds_quota != 0 && dsl_dataset_phys(clone)->ds_referenced_bytes > - origin_head->ds_quota) + origin_head->ds_quota + refquota_slack) return (SET_ERROR(EDQUOT)); return (0); @@ -2745,8 +2909,13 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, int64_t unused_refres_delta; ASSERT(clone->ds_reserved == 0); + /* + * NOTE: On DEBUG kernels there could be a race between this and + * the check function if spa_asize_inflation is adjusted... + */ ASSERT(origin_head->ds_quota == 0 || - dsl_dataset_phys(clone)->ds_unique_bytes <= origin_head->ds_quota); + dsl_dataset_phys(clone)->ds_unique_bytes <= origin_head->ds_quota + + DMU_MAX_ACCESS * spa_asize_inflation); ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev); /* @@ -3391,6 +3560,23 @@ dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx) dmu_object_zapify(mos, ds->ds_object, DMU_OT_DSL_DATASET, tx); } +boolean_t +dsl_dataset_is_zapified(dsl_dataset_t *ds) +{ + dmu_object_info_t doi; + + dmu_object_info_from_db(ds->ds_dbuf, &doi); + return (doi.doi_type == DMU_OTN_ZAP_METADATA); +} + +boolean_t +dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds) +{ + return (dsl_dataset_is_zapified(ds) && + zap_contains(ds->ds_dir->dd_pool->dp_meta_objset, + ds->ds_object, DS_FIELD_RESUME_TOGUID) == 0); +} + #if defined(_KERNEL) && defined(HAVE_SPL) #if defined(_LP64) module_param(zfs_max_recordsize, int, 0644); diff --git a/module/zfs/dsl_deleg.c b/module/zfs/dsl_deleg.c index 952422be2381..eb39cff57f2a 100644 --- a/module/zfs/dsl_deleg.c +++ b/module/zfs/dsl_deleg.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2014 by Delphix. All rights reserved. + * Copyright (c) 2011, 2015 by Delphix. All rights reserved. */ /* @@ -330,7 +330,7 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp) za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); basezc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP); baseza = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); - source = kmem_alloc(MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, KM_SLEEP); + source = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); for (dd = startdd; dd != NULL; dd = dd->dd_parent) { @@ -370,7 +370,7 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp) nvlist_free(sp_nvp); } - kmem_free(source, MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); + kmem_free(source, ZFS_MAX_DATASET_NAME_LEN); kmem_free(baseza, sizeof (zap_attribute_t)); kmem_free(basezc, sizeof (zap_cursor_t)); kmem_free(za, sizeof (zap_attribute_t)); diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c index d7c34c9a403e..716081ba3ac3 100644 --- a/module/zfs/dsl_destroy.c +++ b/module/zfs/dsl_destroy.c @@ -978,9 +978,17 @@ dsl_destroy_inconsistent(const char *dsname, void *arg) objset_t *os; if (dmu_objset_hold(dsname, FTAG, &os) == 0) { - boolean_t inconsistent = DS_IS_INCONSISTENT(dmu_objset_ds(os)); + boolean_t need_destroy = DS_IS_INCONSISTENT(dmu_objset_ds(os)); + + /* + * If the dataset is inconsistent because a resumable receive + * has failed, then do not destroy it. + */ + if (dsl_dataset_has_resume_receive_state(dmu_objset_ds(os))) + need_destroy = B_FALSE; + dmu_objset_rele(os, FTAG); - if (inconsistent) + if (need_destroy) (void) dsl_destroy_head(dsname); } return (0); diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index 8983e0793f23..ae67b362ee8a 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -299,13 +299,14 @@ dsl_dir_async_rele(dsl_dir_t *dd, void *tag) dmu_buf_rele(dd->dd_dbuf, tag); } -/* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */ +/* buf must be at least ZFS_MAX_DATASET_NAME_LEN bytes */ void dsl_dir_name(dsl_dir_t *dd, char *buf) { if (dd->dd_parent) { dsl_dir_name(dd->dd_parent, buf); - (void) strcat(buf, "/"); + VERIFY3U(strlcat(buf, "/", ZFS_MAX_DATASET_NAME_LEN), <, + ZFS_MAX_DATASET_NAME_LEN); } else { buf[0] = '\0'; } @@ -315,10 +316,12 @@ dsl_dir_name(dsl_dir_t *dd, char *buf) * dprintf_dd() with dd_lock held */ mutex_enter(&dd->dd_lock); - (void) strcat(buf, dd->dd_myname); + VERIFY3U(strlcat(buf, dd->dd_myname, ZFS_MAX_DATASET_NAME_LEN), + <, ZFS_MAX_DATASET_NAME_LEN); mutex_exit(&dd->dd_lock); } else { - (void) strcat(buf, dd->dd_myname); + VERIFY3U(strlcat(buf, dd->dd_myname, ZFS_MAX_DATASET_NAME_LEN), + <, ZFS_MAX_DATASET_NAME_LEN); } } @@ -367,12 +370,12 @@ getcomponent(const char *path, char *component, const char **nextp) if (p != NULL && (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0')) return (SET_ERROR(EINVAL)); - if (strlen(path) >= MAXNAMELEN) + if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN) return (SET_ERROR(ENAMETOOLONG)); (void) strcpy(component, path); p = NULL; } else if (p[0] == '/') { - if (p - path >= MAXNAMELEN) + if (p - path >= ZFS_MAX_DATASET_NAME_LEN) return (SET_ERROR(ENAMETOOLONG)); (void) strncpy(component, path, p - path); component[p - path] = '\0'; @@ -384,7 +387,7 @@ getcomponent(const char *path, char *component, const char **nextp) */ if (strchr(path, '/')) return (SET_ERROR(EINVAL)); - if (p - path >= MAXNAMELEN) + if (p - path >= ZFS_MAX_DATASET_NAME_LEN) return (SET_ERROR(ENAMETOOLONG)); (void) strncpy(component, path, p - path); component[p - path] = '\0'; @@ -412,7 +415,7 @@ dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag, dsl_dir_t *dd; uint64_t ddobj; - buf = kmem_alloc(MAXNAMELEN, KM_SLEEP); + buf = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); err = getcomponent(name, buf, &next); if (err != 0) goto error; @@ -479,7 +482,7 @@ dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag, *tailp = next; *ddp = dd; error: - kmem_free(buf, MAXNAMELEN); + kmem_free(buf, ZFS_MAX_DATASET_NAME_LEN); return (err); } @@ -974,7 +977,7 @@ dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv) if (dsl_dir_is_clone(dd)) { dsl_dataset_t *ds; - char buf[MAXNAMELEN]; + char buf[ZFS_MAX_DATASET_NAME_LEN]; VERIFY0(dsl_dataset_hold_obj(dd->dd_pool, dsl_dir_phys(dd)->dd_origin_obj, FTAG, &ds)); @@ -1691,11 +1694,11 @@ static int dsl_valid_rename(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) { int *deltap = arg; - char namebuf[MAXNAMELEN]; + char namebuf[ZFS_MAX_DATASET_NAME_LEN]; dsl_dataset_name(ds, namebuf); - if (strlen(namebuf) + *deltap >= MAXNAMELEN) + if (strlen(namebuf) + *deltap >= ZFS_MAX_DATASET_NAME_LEN) return (SET_ERROR(ENAMETOOLONG)); return (0); } diff --git a/module/zfs/dsl_prop.c b/module/zfs/dsl_prop.c index 36147327550d..66e899a57118 100644 --- a/module/zfs/dsl_prop.c +++ b/module/zfs/dsl_prop.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright (c) 2013 Martin Matuska. All rights reserved. * Copyright 2015, Joyent, Inc. */ @@ -1095,7 +1095,7 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp, dsl_pool_t *dp = dd->dd_pool; objset_t *mos = dp->dp_meta_objset; int err = 0; - char setpoint[MAXNAMELEN]; + char setpoint[ZFS_MAX_DATASET_NAME_LEN]; VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 72163521efa6..6c5f1f0b5b3f 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -1115,7 +1115,7 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx) * rootbp's birth time is < cur_min_txg. Then we will * add the next snapshots/clones to the work queue. */ - char *dsname = kmem_alloc(MAXNAMELEN, KM_SLEEP); + char *dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); dsl_dataset_name(ds, dsname); zfs_dbgmsg("scanning dataset %llu (%s) is unnecessary because " "cur_min_txg (%llu) >= max_txg (%llu)", @@ -1146,7 +1146,7 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx) dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_scan_visit_rootbp(scn, ds, &dsl_dataset_phys(ds)->ds_bp, tx); - dsname = kmem_alloc(ZFS_MAXNAMELEN, KM_SLEEP); + dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); dsl_dataset_name(ds, dsname); zfs_dbgmsg("scanned dataset %llu (%s) with min=%llu max=%llu; " "pausing=%u", @@ -1154,7 +1154,7 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx) (longlong_t)scn->scn_phys.scn_cur_min_txg, (longlong_t)scn->scn_phys.scn_cur_max_txg, (int)scn->scn_pausing); - kmem_free(dsname, ZFS_MAXNAMELEN); + kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN); if (scn->scn_pausing) goto out; diff --git a/module/zfs/dsl_userhold.c b/module/zfs/dsl_userhold.c index 1b234ed480f9..a6d1aa937ef1 100644 --- a/module/zfs/dsl_userhold.c +++ b/module/zfs/dsl_userhold.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. */ @@ -181,7 +181,7 @@ dsl_dataset_user_hold_sync_one_impl(nvlist_t *tmpholds, dsl_dataset_t *ds, } typedef struct zfs_hold_cleanup_arg { - char zhca_spaname[MAXNAMELEN]; + char zhca_spaname[ZFS_MAX_DATASET_NAME_LEN]; uint64_t zhca_spa_load_guid; nvlist_t *zhca_holds; } zfs_hold_cleanup_arg_t; @@ -580,7 +580,7 @@ dsl_dataset_user_release_impl(nvlist_t *holds, nvlist_t *errlist, error = dsl_dataset_hold_obj_string(tmpdp, nvpair_name(pair), FTAG, &ds); if (error == 0) { - char name[MAXNAMELEN]; + char name[ZFS_MAX_DATASET_NAME_LEN]; dsl_dataset_name(ds, name); dsl_pool_config_exit(tmpdp, FTAG); dsl_dataset_rele(ds, FTAG); diff --git a/module/zfs/spa.c b/module/zfs/spa.c index d1aefe58542c..26181af84fb9 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -361,8 +361,7 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp) break; } - strval = kmem_alloc( - MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, + strval = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); dsl_dataset_name(ds, strval); dsl_dataset_rele(ds, FTAG); @@ -375,8 +374,7 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp) spa_prop_add_list(*nvp, prop, strval, intval, src); if (strval != NULL) - kmem_free(strval, - MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); + kmem_free(strval, ZFS_MAX_DATASET_NAME_LEN); break; @@ -2018,6 +2016,16 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, return (0); } +/* ARGSUSED */ +int +verify_dataset_name_len(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) +{ + if (dsl_dataset_namelen(ds) >= ZFS_MAX_DATASET_NAME_LEN) + return (SET_ERROR(ENAMETOOLONG)); + + return (0); +} + static int spa_load_verify(spa_t *spa) { @@ -2032,6 +2040,14 @@ spa_load_verify(spa_t *spa) if (policy.zrp_request & ZPOOL_NEVER_REWIND) return (0); + dsl_pool_config_enter(spa->spa_dsl_pool, FTAG); + error = dmu_objset_find_dp(spa->spa_dsl_pool, + spa->spa_dsl_pool->dp_root_dir_obj, verify_dataset_name_len, NULL, + DS_FIND_CHILDREN); + dsl_pool_config_exit(spa->spa_dsl_pool, FTAG); + if (error != 0) + return (error); + rio = zio_root(spa, NULL, &sle, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); diff --git a/module/zfs/spa_history.c b/module/zfs/spa_history.c index 01aa4641e63f..cf6fc224a230 100644 --- a/module/zfs/spa_history.c +++ b/module/zfs/spa_history.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2014 by Delphix. All rights reserved. + * Copyright (c) 2011, 2015 by Delphix. All rights reserved. */ #include @@ -493,7 +493,7 @@ spa_history_log_internal_ds(dsl_dataset_t *ds, const char *operation, dmu_tx_t *tx, const char *fmt, ...) { va_list adx; - char namebuf[MAXNAMELEN]; + char namebuf[ZFS_MAX_DATASET_NAME_LEN]; nvlist_t *nvl = fnvlist_alloc(); ASSERT(tx != NULL); @@ -512,7 +512,7 @@ spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation, dmu_tx_t *tx, const char *fmt, ...) { va_list adx; - char namebuf[MAXNAMELEN]; + char namebuf[ZFS_MAX_DATASET_NAME_LEN]; nvlist_t *nvl = fnvlist_alloc(); ASSERT(tx != NULL); diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c index e47cfc878d08..d279d1828407 100644 --- a/module/zfs/zfs_ctldir.c +++ b/module/zfs/zfs_ctldir.c @@ -749,12 +749,13 @@ zfsctl_snapshot_path_objset(zfs_sb_t *zsb, uint64_t objsetid, return (ENOENT); cookie = spl_fstrans_mark(); - snapname = kmem_alloc(MAXNAMELEN, KM_SLEEP); + snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); while (error == 0) { dsl_pool_config_enter(dmu_objset_pool(os), FTAG); - error = dmu_snapshot_list_next(zsb->z_os, MAXNAMELEN, - snapname, &id, &pos, &case_conflict); + error = dmu_snapshot_list_next(zsb->z_os, + ZFS_MAX_DATASET_NAME_LEN, snapname, &id, &pos, + &case_conflict); dsl_pool_config_exit(dmu_objset_pool(os), FTAG); if (error) goto out; @@ -767,7 +768,7 @@ zfsctl_snapshot_path_objset(zfs_sb_t *zsb, uint64_t objsetid, snprintf(full_path, path_len - 1, "%s/.zfs/snapshot/%s", zsb->z_mntopts->z_mntpoint, snapname); out: - kmem_free(snapname, MAXNAMELEN); + kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN); spl_fstrans_unmark(cookie); return (error); @@ -854,14 +855,14 @@ zfsctl_snapdir_rename(struct inode *sdip, char *snm, ZFS_ENTER(zsb); - to = kmem_alloc(MAXNAMELEN, KM_SLEEP); - from = kmem_alloc(MAXNAMELEN, KM_SLEEP); - real = kmem_alloc(MAXNAMELEN, KM_SLEEP); - fsname = kmem_alloc(MAXNAMELEN, KM_SLEEP); + to = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); + from = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); + real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); + fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); if (zsb->z_case == ZFS_CASE_INSENSITIVE) { error = dmu_snapshot_realname(zsb->z_os, snm, real, - MAXNAMELEN, NULL); + ZFS_MAX_DATASET_NAME_LEN, NULL); if (error == 0) { snm = real; } else if (error != ENOTSUP) { @@ -871,9 +872,11 @@ zfsctl_snapdir_rename(struct inode *sdip, char *snm, dmu_objset_name(zsb->z_os, fsname); - error = zfsctl_snapshot_name(ITOZSB(sdip), snm, MAXNAMELEN, from); + error = zfsctl_snapshot_name(ITOZSB(sdip), snm, + ZFS_MAX_DATASET_NAME_LEN, from); if (error == 0) - error = zfsctl_snapshot_name(ITOZSB(tdip), tnm, MAXNAMELEN, to); + error = zfsctl_snapshot_name(ITOZSB(tdip), tnm, + ZFS_MAX_DATASET_NAME_LEN, to); if (error == 0) error = zfs_secpolicy_rename_perms(from, to, cr); if (error != 0) @@ -903,10 +906,10 @@ zfsctl_snapdir_rename(struct inode *sdip, char *snm, rw_exit(&zfs_snapshot_lock); out: - kmem_free(from, MAXNAMELEN); - kmem_free(to, MAXNAMELEN); - kmem_free(real, MAXNAMELEN); - kmem_free(fsname, MAXNAMELEN); + kmem_free(from, ZFS_MAX_DATASET_NAME_LEN); + kmem_free(to, ZFS_MAX_DATASET_NAME_LEN); + kmem_free(real, ZFS_MAX_DATASET_NAME_LEN); + kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN); ZFS_EXIT(zsb); @@ -929,12 +932,12 @@ zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags) ZFS_ENTER(zsb); - snapname = kmem_alloc(MAXNAMELEN, KM_SLEEP); - real = kmem_alloc(MAXNAMELEN, KM_SLEEP); + snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); + real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); if (zsb->z_case == ZFS_CASE_INSENSITIVE) { error = dmu_snapshot_realname(zsb->z_os, name, real, - MAXNAMELEN, NULL); + ZFS_MAX_DATASET_NAME_LEN, NULL); if (error == 0) { name = real; } else if (error != ENOTSUP) { @@ -942,7 +945,8 @@ zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags) } } - error = zfsctl_snapshot_name(ITOZSB(dip), name, MAXNAMELEN, snapname); + error = zfsctl_snapshot_name(ITOZSB(dip), name, + ZFS_MAX_DATASET_NAME_LEN, snapname); if (error == 0) error = zfs_secpolicy_destroy_perms(snapname, cr); if (error != 0) @@ -952,8 +956,8 @@ zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags) if ((error == 0) || (error == ENOENT)) error = dsl_destroy_snapshot(snapname, B_FALSE); out: - kmem_free(snapname, MAXNAMELEN); - kmem_free(real, MAXNAMELEN); + kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN); + kmem_free(real, ZFS_MAX_DATASET_NAME_LEN); ZFS_EXIT(zsb); @@ -975,7 +979,7 @@ zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap, if (!zfs_admin_snapshot) return (EACCES); - dsname = kmem_alloc(MAXNAMELEN, KM_SLEEP); + dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); if (zfs_component_namecheck(dirname, NULL, NULL) != 0) { error = SET_ERROR(EILSEQ); @@ -997,7 +1001,7 @@ zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap, 0, cr, NULL, NULL); } out: - kmem_free(dsname, MAXNAMELEN); + kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN); return (error); } @@ -1075,11 +1079,11 @@ zfsctl_snapshot_mount(struct path *path, int flags) zsb = ITOZSB(ip); ZFS_ENTER(zsb); - full_name = kmem_zalloc(MAXNAMELEN, KM_SLEEP); + full_name = kmem_zalloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); full_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); error = zfsctl_snapshot_name(zsb, dname(dentry), - MAXNAMELEN, full_name); + ZFS_MAX_DATASET_NAME_LEN, full_name); if (error) goto error; @@ -1153,7 +1157,7 @@ zfsctl_snapshot_mount(struct path *path, int flags) } path_put(&spath); error: - kmem_free(full_name, MAXNAMELEN); + kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN); kmem_free(full_path, MAXPATHLEN); ZFS_EXIT(zsb); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 30338ac148e0..09f83a5cf683 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -22,6 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Portions Copyright 2011 Martin Matuska + * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved. * Portions Copyright 2012 Pawel Jakub Dawidek * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright 2015 Nexenta Systems, Inc. All rights reserved. @@ -603,7 +604,7 @@ zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval, case ZFS_PROP_SNAPSHOT_LIMIT: if (!INGLOBALZONE(curproc)) { uint64_t zoned; - char setpoint[MAXNAMELEN]; + char setpoint[ZFS_MAX_DATASET_NAME_LEN]; /* * Unprivileged users are allowed to modify the * limit on things *under* (ie. contained by) @@ -845,7 +846,7 @@ zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) int zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr) { - char parentname[MAXNAMELEN]; + char parentname[ZFS_MAX_DATASET_NAME_LEN]; int error; if ((error = zfs_secpolicy_write_perms(from, @@ -898,7 +899,7 @@ zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone); if (error == 0) { - char parentname[MAXNAMELEN]; + char parentname[ZFS_MAX_DATASET_NAME_LEN]; dsl_dataset_t *origin = NULL; dsl_dir_t *dd; dd = clone->ds_dir; @@ -944,6 +945,13 @@ zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) ZFS_DELEG_PERM_CREATE, cr)); } +/* ARGSUSED */ +static int +zfs_secpolicy_recv_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +{ + return (zfs_secpolicy_recv(zc, innvl, cr)); +} + int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr) { @@ -1068,7 +1076,7 @@ zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) static int zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - char parentname[MAXNAMELEN]; + char parentname[ZFS_MAX_DATASET_NAME_LEN]; int error; char *origin; @@ -1211,7 +1219,7 @@ zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; pair = nvlist_next_nvpair(holds, pair)) { - char fsname[MAXNAMELEN]; + char fsname[ZFS_MAX_DATASET_NAME_LEN]; error = dmu_fsname(nvpair_name(pair), fsname); if (error != 0) return (error); @@ -1232,7 +1240,7 @@ zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) { - char fsname[MAXNAMELEN]; + char fsname[ZFS_MAX_DATASET_NAME_LEN]; error = dmu_fsname(nvpair_name(pair), fsname); if (error != 0) return (error); @@ -2252,7 +2260,8 @@ zfs_ioc_snapshot_list_next(zfs_cmd_t *zc) * A dataset name of maximum length cannot have any snapshots, * so exit immediately. */ - if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) { + if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= + ZFS_MAX_DATASET_NAME_LEN) { dmu_objset_rele(os, FTAG); return (SET_ERROR(ESRCH)); } @@ -3040,7 +3049,7 @@ zfs_fill_zplprops(const char *dataset, nvlist_t *createprops, boolean_t fuids_ok, sa_ok; uint64_t zplver = ZPL_VERSION; objset_t *os = NULL; - char parentname[MAXNAMELEN]; + char parentname[ZFS_MAX_DATASET_NAME_LEN]; char *cp; spa_t *spa; uint64_t spa_vers; @@ -3406,7 +3415,7 @@ zfs_destroy_unmount_origin(const char *fsname) return; ds = dmu_objset_ds(os); if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) { - char originname[MAXNAMELEN]; + char originname[ZFS_MAX_DATASET_NAME_LEN]; dsl_dataset_name(ds->ds_prev, originname); dmu_objset_rele(os, FTAG); (void) zfs_unmount_snap(originname); @@ -3990,77 +3999,93 @@ props_reduce(nvlist_t *props, nvlist_t *origprops) } } +/* + * Extract properties that cannot be set PRIOR to the receipt of a dataset. + * For example, refquota cannot be set until after the receipt of a dataset, + * because in replication streams, an older/earlier snapshot may exceed the + * refquota. We want to receive the older/earlier snapshot, but setting + * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent + * the older/earlier snapshot from being received (with EDQUOT). + * + * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario. + * + * libzfs will need to be judicious handling errors encountered by props + * extracted by this function. + */ +static nvlist_t * +extract_delay_props(nvlist_t *props) +{ + nvlist_t *delayprops; + nvpair_t *nvp, *tmp; + static const zfs_prop_t delayable[] = { ZFS_PROP_REFQUOTA, 0 }; + int i; + + VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL; + nvp = nvlist_next_nvpair(props, nvp)) { + /* + * strcmp() is safe because zfs_prop_to_name() always returns + * a bounded string. + */ + for (i = 0; delayable[i] != 0; i++) { + if (strcmp(zfs_prop_to_name(delayable[i]), + nvpair_name(nvp)) == 0) { + break; + } + } + if (delayable[i] != 0) { + tmp = nvlist_prev_nvpair(props, nvp); + VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0); + VERIFY(nvlist_remove_nvpair(props, nvp) == 0); + nvp = tmp; + } + } + + if (nvlist_empty(delayprops)) { + nvlist_free(delayprops); + delayprops = NULL; + } + return (delayprops); +} + #ifdef DEBUG static boolean_t zfs_ioc_recv_inject_err; #endif /* - * inputs: - * zc_name name of containing filesystem - * zc_nvlist_src{_size} nvlist of properties to apply - * zc_value name of snapshot to create - * zc_string name of clone origin (if DRR_FLAG_CLONE) - * zc_cookie file descriptor to recv from - * zc_begin_record the BEGIN record of the stream (not byteswapped) - * zc_guid force flag - * zc_cleanup_fd cleanup-on-exit file descriptor - * zc_action_handle handle for this guid/ds mapping (or zero on first call) - * - * outputs: - * zc_cookie number of bytes read - * zc_nvlist_dst{_size} error for each unapplied received property - * zc_obj zprop_errflags_t - * zc_action_handle handle for this guid/ds mapping + * On failure the 'errors' nvlist may be allocated and will contain a + * descriptions of the failures. It's the callers responsibilty to free. */ static int -zfs_ioc_recv(zfs_cmd_t *zc) +zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, + nvlist_t *props, boolean_t force, boolean_t resumable, int input_fd, + dmu_replay_record_t *begin_record, int cleanup_fd, uint64_t *read_bytes, + uint64_t *errflags, uint64_t *action_handle, nvlist_t **errors) { - file_t *fp; dmu_recv_cookie_t drc; - boolean_t force = (boolean_t)zc->zc_guid; - int fd; int error = 0; int props_error = 0; - nvlist_t *errors; offset_t off; - nvlist_t *props = NULL; /* sent properties */ + nvlist_t *delayprops = NULL; /* sent properties applied post-receive */ nvlist_t *origprops = NULL; /* existing properties */ - char *origin = NULL; - char *tosnap; - char tofs[ZFS_MAXNAMELEN]; boolean_t first_recvd_props = B_FALSE; + file_t *input_fp; - if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 || - strchr(zc->zc_value, '@') == NULL || - strchr(zc->zc_value, '%')) - return (SET_ERROR(EINVAL)); - - (void) strcpy(tofs, zc->zc_value); - tosnap = strchr(tofs, '@'); - *tosnap++ = '\0'; - - if (zc->zc_nvlist_src != 0 && - (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, - zc->zc_iflags, &props)) != 0) - return (error); - - fd = zc->zc_cookie; - fp = getf(fd); - if (fp == NULL) { - nvlist_free(props); + *errors = NULL; + input_fp = getf(input_fd); + if (input_fp == NULL) return (SET_ERROR(EBADF)); - } - - VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0); - - if (zc->zc_string[0]) - origin = zc->zc_string; error = dmu_recv_begin(tofs, tosnap, - &zc->zc_begin_record, force, origin, &drc); + begin_record, force, resumable, origin, &drc); if (error != 0) goto out; + *read_bytes = 0; + *errflags = 0; + *errors = fnvlist_alloc(); + /* * Set properties before we receive the stream so that they are applied * to the new data. Note that we must call dmu_recv_stream() if @@ -4090,14 +4115,14 @@ zfs_ioc_recv(zfs_cmd_t *zc) if (!first_recvd_props) props_reduce(props, origprops); if (zfs_check_clearable(tofs, origprops, &errlist) != 0) - (void) nvlist_merge(errors, errlist, 0); + (void) nvlist_merge(*errors, errlist, 0); nvlist_free(errlist); if (clear_received_props(tofs, origprops, first_recvd_props ? NULL : props) != 0) - zc->zc_obj |= ZPROP_ERR_NOCLEAR; + *errflags |= ZPROP_ERR_NOCLEAR; } else { - zc->zc_obj |= ZPROP_ERR_NOCLEAR; + *errflags |= ZPROP_ERR_NOCLEAR; } } @@ -4105,24 +4130,15 @@ zfs_ioc_recv(zfs_cmd_t *zc) props_error = dsl_prop_set_hasrecvd(tofs); if (props_error == 0) { + delayprops = extract_delay_props(props); (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED, - props, errors); + props, *errors); } } - if (zc->zc_nvlist_dst_size != 0 && - (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 || - put_nvlist(zc, errors) != 0)) { - /* - * Caller made zc->zc_nvlist_dst less than the minimum expected - * size or supplied an invalid address. - */ - props_error = SET_ERROR(EINVAL); - } - - off = fp->f_offset; - error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd, - &zc->zc_action_handle); + off = input_fp->f_offset; + error = dmu_recv_stream(&drc, input_fp->f_vnode, &off, cleanup_fd, + action_handle); if (error == 0) { zfs_sb_t *zsb = NULL; @@ -4144,11 +4160,32 @@ zfs_ioc_recv(zfs_cmd_t *zc) } else { error = dmu_recv_end(&drc, NULL); } + + /* Set delayed properties now, after we're done receiving. */ + if (delayprops != NULL && error == 0) { + (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED, + delayprops, *errors); + } } - zc->zc_cookie = off - fp->f_offset; - if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) - fp->f_offset = off; + if (delayprops != NULL) { + /* + * Merge delayed props back in with initial props, in case + * we're DEBUG and zfs_ioc_recv_inject_err is set (which means + * we have to make sure clear_received_props() includes + * the delayed properties). + * + * Since zfs_ioc_recv_inject_err is only in DEBUG kernels, + * using ASSERT() will be just like a VERIFY. + */ + ASSERT(nvlist_merge(props, delayprops, 0) == 0); + nvlist_free(delayprops); + } + + + *read_bytes = off - input_fp->f_offset; + if (VOP_SEEK(input_fp->f_vnode, input_fp->f_offset, &off, NULL) == 0) + input_fp->f_offset = off; #ifdef DEBUG if (zfs_ioc_recv_inject_err) { @@ -4167,14 +4204,14 @@ zfs_ioc_recv(zfs_cmd_t *zc) * Since we may have left a $recvd value on the * system, we can't clear the $hasrecvd flag. */ - zc->zc_obj |= ZPROP_ERR_NORESTORE; + *errflags |= ZPROP_ERR_NORESTORE; } else if (first_recvd_props) { dsl_prop_unset_hasrecvd(tofs); } if (origprops == NULL && !drc.drc_newfs) { /* We failed to stash the original properties. */ - zc->zc_obj |= ZPROP_ERR_NORESTORE; + *errflags |= ZPROP_ERR_NORESTORE; } /* @@ -4191,14 +4228,12 @@ zfs_ioc_recv(zfs_cmd_t *zc) * We stashed the original properties but failed to * restore them. */ - zc->zc_obj |= ZPROP_ERR_NORESTORE; + *errflags |= ZPROP_ERR_NORESTORE; } } out: - nvlist_free(props); + releasef(input_fd); nvlist_free(origprops); - nvlist_free(errors); - releasef(fd); if (error == 0) error = props_error; @@ -4206,6 +4241,176 @@ zfs_ioc_recv(zfs_cmd_t *zc) return (error); } +/* + * inputs: + * zc_name name of containing filesystem (unused) + * zc_nvlist_src{_size} nvlist of properties to apply + * zc_value name of snapshot to create + * zc_string name of clone origin (if DRR_FLAG_CLONE) + * zc_cookie file descriptor to recv from + * zc_begin_record the BEGIN record of the stream (not byteswapped) + * zc_guid force flag + * zc_cleanup_fd cleanup-on-exit file descriptor + * zc_action_handle handle for this guid/ds mapping (or zero on first call) + * + * outputs: + * zc_cookie number of bytes read + * zc_obj zprop_errflags_t + * zc_action_handle handle for this guid/ds mapping + * zc_nvlist_dst{_size} error for each unapplied received property + */ +static int +zfs_ioc_recv(zfs_cmd_t *zc) +{ + dmu_replay_record_t begin_record; + nvlist_t *errors = NULL; + nvlist_t *props = NULL; + char *origin = NULL; + char *tosnap; + char tofs[ZFS_MAX_DATASET_NAME_LEN]; + int error = 0; + + if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 || + strchr(zc->zc_value, '@') == NULL || + strchr(zc->zc_value, '%')) + return (SET_ERROR(EINVAL)); + + (void) strcpy(tofs, zc->zc_value); + tosnap = strchr(tofs, '@'); + *tosnap++ = '\0'; + + if (zc->zc_nvlist_src != 0 && + (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, + zc->zc_iflags, &props)) != 0) + return (error); + + if (zc->zc_string[0]) + origin = zc->zc_string; + + begin_record.drr_type = DRR_BEGIN; + begin_record.drr_payloadlen = 0; + begin_record.drr_u.drr_begin = zc->zc_begin_record; + + error = zfs_ioc_recv_impl(tofs, tosnap, origin, props, zc->zc_guid, + B_FALSE, zc->zc_cookie, &begin_record, zc->zc_cleanup_fd, + &zc->zc_cookie, &zc->zc_obj, &zc->zc_action_handle, &errors); + nvlist_free(props); + + /* + * Now that all props, initial and delayed, are set, report the prop + * errors to the caller. + */ + if (zc->zc_nvlist_dst_size != 0 && errors != NULL && + (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 || + put_nvlist(zc, errors) != 0)) { + /* + * Caller made zc->zc_nvlist_dst less than the minimum expected + * size or supplied an invalid address. + */ + error = SET_ERROR(EINVAL); + } + + nvlist_free(errors); + + return (error); +} + +/* + * innvl: { + * "snapname" -> full name of the snapshot to create + * (optional) "props" -> properties to set (nvlist) + * (optional) "origin" -> name of clone origin (DRR_FLAG_CLONE) + * "begin_record" -> non-byteswapped dmu_replay_record_t + * "input_fd" -> file descriptor to read stream from (int32) + * (optional) "force" -> force flag (value ignored) + * (optional) "resumable" -> resumable flag (value ignored) + * (optional) "cleanup_fd" -> cleanup-on-exit file descriptor + * (optional) "action_handle" -> handle for this guid/ds mapping + * } + * + * outnvl: { + * "read_bytes" -> number of bytes read + * "error_flags" -> zprop_errflags_t + * "action_handle" -> handle for this guid/ds mapping + * "errors" -> error for each unapplied received property (nvlist) + * } + */ +static int +zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) +{ + dmu_replay_record_t *begin_record; + uint_t begin_record_size; + nvlist_t *errors = NULL; + nvlist_t *props = NULL; + char *snapname = NULL; + char *origin = NULL; + char *tosnap; + char tofs[ZFS_MAX_DATASET_NAME_LEN]; + boolean_t force; + boolean_t resumable; + uint64_t action_handle = 0; + uint64_t read_bytes = 0; + uint64_t errflags = 0; + int input_fd = -1; + int cleanup_fd = -1; + int error; + + error = nvlist_lookup_string(innvl, "snapname", &snapname); + if (error != 0) + return (SET_ERROR(EINVAL)); + + if (dataset_namecheck(snapname, NULL, NULL) != 0 || + strchr(snapname, '@') == NULL || + strchr(snapname, '%')) + return (SET_ERROR(EINVAL)); + + (void) strcpy(tofs, snapname); + tosnap = strchr(tofs, '@'); + *tosnap++ = '\0'; + + error = nvlist_lookup_string(innvl, "origin", &origin); + if (error && error != ENOENT) + return (error); + + error = nvlist_lookup_byte_array(innvl, "begin_record", + (uchar_t **) &begin_record, &begin_record_size); + if (error != 0 || begin_record_size != sizeof (*begin_record)) + return (SET_ERROR(EINVAL)); + + error = nvlist_lookup_int32(innvl, "input_fd", &input_fd); + if (error != 0) + return (SET_ERROR(EINVAL)); + + force = nvlist_exists(innvl, "force"); + resumable = nvlist_exists(innvl, "resumable"); + + error = nvlist_lookup_int32(innvl, "cleanup_fd", &cleanup_fd); + if (error && error != ENOENT) + return (error); + + error = nvlist_lookup_uint64(innvl, "action_handle", &action_handle); + if (error && error != ENOENT) + return (error); + + error = nvlist_lookup_nvlist(innvl, "props", &props); + if (error && error != ENOENT) + return (error); + + error = zfs_ioc_recv_impl(tofs, tosnap, origin, props, force, + resumable, input_fd, begin_record, cleanup_fd, &read_bytes, + &errflags, &action_handle, &errors); + + fnvlist_add_uint64(outnvl, "read_bytes", read_bytes); + fnvlist_add_uint64(outnvl, "error_flags", errflags); + fnvlist_add_uint64(outnvl, "action_handle", action_handle); + fnvlist_add_nvlist(outnvl, "errors", errors); + + nvlist_free(errors); + nvlist_free(props); + + return (error); +} + /* * inputs: * zc_name name of snapshot to send @@ -5182,6 +5387,8 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl) * indicates that blocks > 128KB are permitted * (optional) "embedok" -> (value ignored) * presence indicates DRR_WRITE_EMBEDDED records are permitted + * (optional) "resume_object" and "resume_offset" -> (uint64) + * if present, resume send stream from specified object and offset. * } * * outnvl is unused @@ -5197,6 +5404,8 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) file_t *fp; boolean_t largeblockok; boolean_t embedok; + uint64_t resumeobj = 0; + uint64_t resumeoff = 0; error = nvlist_lookup_int32(innvl, "fd", &fd); if (error != 0) @@ -5207,12 +5416,15 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) largeblockok = nvlist_exists(innvl, "largeblockok"); embedok = nvlist_exists(innvl, "embedok"); + (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj); + (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff); + if ((fp = getf(fd)) == NULL) return (SET_ERROR(EBADF)); off = fp->f_offset; - error = dmu_send(snapname, fromname, embedok, largeblockok, - fd, fp->f_vnode, &off); + error = dmu_send(snapname, fromname, embedok, largeblockok, fd, + resumeobj, resumeoff, fp->f_vnode, &off); if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) fp->f_offset = off; @@ -5470,6 +5682,10 @@ zfs_ioctl_init(void) POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + zfs_ioctl_register("receive", ZFS_IOC_RECV_NEW, + zfs_ioc_recv_new, zfs_secpolicy_recv_new, DATASET_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + /* IOCTLS that use the legacy function signature */ zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze, diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index a72841c15e6c..ef04b203dc60 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -1020,7 +1020,7 @@ zfs_statvfs(struct dentry *dentry, struct kstatfs *statp) statp->f_fsid.val[0] = (uint32_t)fsid; statp->f_fsid.val[1] = (uint32_t)(fsid >> 32); statp->f_type = ZFS_SUPER_MAGIC; - statp->f_namelen = ZFS_MAXNAMELEN; + statp->f_namelen = MAXNAMELEN - 1; /* * We have all of 40 characters to stuff a string here. diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 988ffec292f3..863ccb930c37 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -2080,7 +2080,7 @@ typedef struct zil_replay_arg { static int zil_replay_error(zilog_t *zilog, lr_t *lr, int error) { - char name[MAXNAMELEN]; + char name[ZFS_MAX_DATASET_NAME_LEN]; zilog->zl_replaying_seq--; /* didn't actually replay this one */ diff --git a/module/zfs/zpl_inode.c b/module/zfs/zpl_inode.c index 089e3a1bcc97..8c75698e5881 100644 --- a/module/zfs/zpl_inode.c +++ b/module/zfs/zpl_inode.c @@ -50,7 +50,7 @@ zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) int zfs_flags = 0; zfs_sb_t *zsb = dentry->d_sb->s_fs_info; - if (dlen(dentry) > ZFS_MAXNAMELEN) + if (dlen(dentry) > ZFS_MAX_DATASET_NAME_LEN) return (ERR_PTR(-ENAMETOOLONG)); crhold(cr); diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 9c89493edfcf..73277901ff16 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -61,7 +61,7 @@ unsigned long zvol_max_discard_blocks = 16384; static kmutex_t zvol_state_lock; static list_t zvol_state_list; -static char *zvol_tag = "zvol_tag"; +void *zvol_tag = "zvol_tag"; /* * The in-core state of each volume. diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 003c513dd711..b2f35ee240a0 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -146,14 +146,13 @@ tests = ['zfs_promote_001_pos', 'zfs_promote_002_pos', 'zfs_promote_003_pos', tests = [] # DISABLED: -# zfs_receive_003_pos - needs investigation -# zfs_receive_010_pos - needs investigation -# zfs_receive_011_pos - needs investigation -# zfs_receive_012_pos - needs investigation +# zfs_receive_004_neg - Fails for OpenZFS on illumos +# zfs_receive_011_pos - Requires port of OpenZFS 6562 [tests/functional/cli_root/zfs_receive] -tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_005_neg', - 'zfs_receive_006_pos', 'zfs_receive_007_neg', 'zfs_receive_008_pos', - 'zfs_receive_009_neg'] +tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_003_pos', + 'zfs_receive_005_neg', 'zfs_receive_006_pos', + 'zfs_receive_007_neg', 'zfs_receive_008_pos', 'zfs_receive_009_neg', + 'zfs_receive_010_pos', 'zfs_receive_012_pos'] # DISABLED: # zfs_rename_002_pos - needs investigation @@ -175,11 +174,10 @@ tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos'] [tests/functional/cli_root/zfs_rollback] tests = ['zfs_rollback_003_neg', 'zfs_rollback_004_neg'] -# DISABLED: -# zfs_send_007_pos - needs investigation [tests/functional/cli_root/zfs_send] tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos', - 'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos'] + 'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos', + 'zfs_send_007_pos'] # DISABLED: # mountpoint_003_pos - needs investigation @@ -207,10 +205,11 @@ tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos', # DISABLED: # zfs_snapshot_008_neg - nested pools +# zfs_snapshot_009_pos - Fails for OpenZFS on illumos [tests/functional/cli_root/zfs_snapshot] tests = ['zfs_snapshot_001_neg', 'zfs_snapshot_002_neg', 'zfs_snapshot_003_neg', 'zfs_snapshot_004_neg', 'zfs_snapshot_005_neg', - 'zfs_snapshot_006_pos', 'zfs_snapshot_007_neg', 'zfs_snapshot_009_pos'] + 'zfs_snapshot_006_pos', 'zfs_snapshot_007_neg'] # DISABLED: # zfs_unmount_005_pos - needs investigation @@ -565,12 +564,17 @@ tests = ['reservation_001_pos', 'reservation_002_pos', 'reservation_003_pos', #[tests/functional/rootpool] #tests = ['rootpool_002_neg', 'rootpool_003_neg', 'rootpool_007_neg'] -# DISABLED: Hangs on I/O for unclear reason. -#[tests/functional/rsend] -#tests = ['rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos', -# 'rsend_005_pos', 'rsend_006_pos', 'rsend_007_pos', 'rsend_008_pos', -# 'rsend_009_pos', 'rsend_010_pos', 'rsend_011_pos', 'rsend_012_pos', -# 'rsend_013_pos'] +# DISABLED: +# rsend_008_pos - Fails for OpenZFS on illumos +# rsend_009_pos - Fails for OpenZFS on illumos +# rsend_020_pos - ASSERTs in dump_record() +[tests/functional/rsend] +tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos', + 'rsend_005_pos', 'rsend_006_pos', 'rsend_007_pos', + 'rsend_010_pos', 'rsend_011_pos', 'rsend_012_pos', + 'rsend_013_pos', 'rsend_014_pos', + 'rsend_019_pos', + 'rsend_021_pos', 'rsend_022_pos', 'rsend_024_pos'] [tests/functional/scrub_mirror] tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos', @@ -586,17 +590,17 @@ tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos', 'slog_009_neg', 'slog_010_neg', 'slog_011_neg'] # DISABLED: +# clone_001_pos - nested pools # rollback_003_pos - Hangs in unmount and spins. -# snapshot_013_pos - Hangs on I/O for unclear reason. -# snapshot_016_pos - .zfs mv/rmdir/mkdir disabled by default. -#[tests/functional/snapshot] -#tests = ['clone_001_pos', 'rollback_001_pos', 'rollback_002_pos', -# 'snapshot_001_pos', 'snapshot_002_pos', -# 'snapshot_003_pos', 'snapshot_004_pos', 'snapshot_005_pos', -# 'snapshot_006_pos', 'snapshot_007_pos', 'snapshot_008_pos', -# 'snapshot_009_pos', 'snapshot_010_pos', 'snapshot_011_pos', -# 'snapshot_012_pos', 'snapshot_014_pos', -# 'snapshot_015_pos', 'snapshot_017_pos'] +# snapshot_016_pos - Problem with automount +[tests/functional/snapshot] +tests = ['rollback_001_pos', 'rollback_002_pos', + 'snapshot_001_pos', 'snapshot_002_pos', + 'snapshot_003_pos', 'snapshot_004_pos', 'snapshot_005_pos', + 'snapshot_006_pos', 'snapshot_007_pos', 'snapshot_008_pos', + 'snapshot_009_pos', 'snapshot_010_pos', 'snapshot_011_pos', + 'snapshot_012_pos', 'snapshot_013_pos', 'snapshot_014_pos', + 'snapshot_015_pos', 'snapshot_017_pos'] [tests/functional/snapused] tests = ['snapused_001_pos', 'snapused_002_pos', 'snapused_003_pos', 'snapused_004_pos', 'snapused_005_pos'] diff --git a/tests/zfs-tests/cmd/mktree/mktree.c b/tests/zfs-tests/cmd/mktree/mktree.c index 8f9b38578c11..95d31a6527d1 100644 --- a/tests/zfs-tests/cmd/mktree/mktree.c +++ b/tests/zfs-tests/cmd/mktree/mktree.c @@ -172,7 +172,7 @@ crtfile(char *pname) exit(errno); } - if (fsetxattr(fd, "xattr", pbuf, 1024, 0) < 0) { + if (fsetxattr(fd, "user.xattr", pbuf, 1024, 0) < 0) { (void) fprintf(stderr, "fsetxattr(fd, \"xattr\", pbuf, " "1024, 0) failed.\n[%d]: %s.\n", errno, strerror(errno)); exit(errno); diff --git a/tests/zfs-tests/include/commands.cfg.in b/tests/zfs-tests/include/commands.cfg.in index 823ee9679e7a..bea8df62931c 100644 --- a/tests/zfs-tests/include/commands.cfg.in +++ b/tests/zfs-tests/include/commands.cfg.in @@ -85,6 +85,7 @@ export SHARE="@SHARE@" export SHUF="@SHUF@" export SLEEP="@SLEEP@" export SORT="@SORT@" +export STAT="@STAT@" export STRINGS="@STRINGS@" export SU="@SU@" export SUM="@SUM@" @@ -97,8 +98,8 @@ export TAIL="@TAIL@" export TAR="@TAR@" export TOUCH="@TOUCH@" export TR="@TR@" -export TRUE="@TRUE@" export TRUNCATE="@TRUNCATE@" +export TRUE="@TRUE@" export UDEVADM="@UDEVADM@" export UFSDUMP="@UFSDUMP@" export UFSRESTORE="@UFSRESTORE@" diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile.am index 533acc695bd4..f5857f4a4839 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile.am @@ -10,4 +10,7 @@ dist_pkgdata_SCRIPTS = \ zfs_receive_006_pos.ksh \ zfs_receive_007_neg.ksh \ zfs_receive_008_pos.ksh \ - zfs_receive_009_neg.ksh + zfs_receive_009_neg.ksh \ + zfs_receive_010_pos.ksh \ + zfs_receive_011_pos.ksh \ + zfs_receive_012_pos.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_007_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_007_pos.ksh index 13ae4f024810..308903d9700e 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_007_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_007_pos.ksh @@ -61,7 +61,7 @@ test_pool () first_object=$(ls -i $mntpnt | awk '{print $1}') log_must $ZFS snapshot $POOL/fs@a while true; do - log_must $FIND $mntpnt -delete + log_must $FIND $mntpnt/* -delete sync log_must $MKFILES "$mntpnt/" 4000 FILE=$(ls -i $mntpnt | awk \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_rename_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_rename_001_pos.ksh index 60ccc0dd6006..441f7a7ae036 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_rename_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_rename_001_pos.ksh @@ -26,7 +26,7 @@ # # -# Copyright (c) 2012 by Delphix. All rights reserved. +# Copyright (c) 2012, 2015 by Delphix. All rights reserved. # . $STF_SUITE/include/libtest.shlib @@ -92,6 +92,8 @@ function cleanup [[ -d $ALTER_ROOT ]] && \ log_must $RM -rf $ALTER_ROOT + [[ -e $VDEV_FILE ]] && \ + log_must $RM $VDEV_FILE } log_onexit cleanup @@ -159,4 +161,13 @@ while (( i < ${#pools[*]} )); do ((i = i + 1)) done +VDEV_FILE=$(mktemp /tmp/tmp.XXXXXX) + +log_must $MKFILE -n 128M $VDEV_FILE +log_must $ZPOOL create testpool $VDEV_FILE +log_must $ZFS create testpool/testfs +ID=$($ZPOOL get -Ho value guid testpool) +log_must $ZPOOL export testpool +log_mustnot $ZPOOL import $(echo $ID) $($PRINTF "%*s\n" 250 "" | $TR ' ' 'c') + log_pass "Successfully imported and renamed a ZPOOL" diff --git a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib index 91779cc7862e..4cdf6b7310b7 100644 --- a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib +++ b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib @@ -74,9 +74,11 @@ function setup_test_model if is_global_zone ; then log_must $ZFS create -V 16M $pool/vol log_must $ZFS create -V 16M $pool/$FS/vol + block_device_wait log_must $ZFS snapshot $pool/$FS/vol@vsnap log_must $ZFS clone $pool/$FS/vol@vsnap $pool/$FS/vclone + block_device_wait fi log_must snapshot_tree $pool/$FS/fs1/fs2@fsnap @@ -199,10 +201,10 @@ function cmp_ds_prop typeset dtst1=$1 typeset dtst2=$2 - for item in "type" "origin" "volblocksize" "aclinherit" "aclmode" \ + for item in "type" "origin" "volblocksize" "aclinherit" "acltype" \ "atime" "canmount" "checksum" "compression" "copies" "devices" \ "dnodesize" "exec" "quota" "readonly" "recordsize" "reservation" \ - "setuid" "sharenfs" "snapdir" "version" "volsize" "xattr" "zoned" \ + "setuid" "snapdir" "version" "volsize" "xattr" "zoned" \ "mountpoint"; do $ZFS get -H -o property,value,source $item $dtst1 >> \ @@ -393,7 +395,7 @@ function mk_files for ((i=0; i<$nfiles; i=i+1)); do $DD if=/dev/urandom \ of=/$fs/file-$maxsize-$((i+$file_id_offset)) \ - bs=$(($RANDOM * $RANDOM % $maxsize)) \ + bs=$((($RANDOM * $RANDOM % ($maxsize - 1)) + 1)) \ count=1 >/dev/null 2>&1 || log_fail \ "Failed to create /$fs/file-$maxsize-$((i+$file_id_offset))" done @@ -438,7 +440,7 @@ function mess_file # write the same value that's already there. # log_must eval "$DD if=/dev/urandom of=$file conv=notrunc " \ - "bs=1 count=2 oseek=$offset >/dev/null 2>&1" + "bs=1 count=2 seek=$offset >/dev/null 2>&1" else log_must $TRUNCATE -s $offset $file fi @@ -523,20 +525,20 @@ function test_fs_setup mk_files 100 1048576 0 $sendfs & mk_files 10 10485760 0 $sendfs & mk_files 1 104857600 0 $sendfs & - log_must $WAIT + wait log_must $ZFS snapshot $sendfs@a rm_files 200 256 0 $sendfs & rm_files 200 131072 0 $sendfs & rm_files 20 1048576 0 $sendfs & rm_files 2 10485760 0 $sendfs & - log_must $WAIT + wait mk_files 400 256 0 $sendfs & mk_files 400 131072 0 $sendfs & mk_files 40 1048576 0 $sendfs & mk_files 4 10485760 0 $sendfs & - log_must $WAIT + wait log_must $ZFS snapshot $sendfs@b log_must eval "$ZFS send -v $sendfs@a >/$sendpool/initial.zsend" diff --git a/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh b/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh index 91cdd6e34da0..a7698da69145 100755 --- a/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh +++ b/tests/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh @@ -110,9 +110,6 @@ function cleanup log_must $ZFS inherit $prop $POOL2 done - #if is_shared $POOL; then - # log_must $ZFS set sharenfs=off $POOL - #fi log_must setup_test_model $POOL if [[ -d $TESTDIR ]]; then @@ -131,7 +128,7 @@ for fs in "$POOL" "$POOL/pclone" "$POOL/$FS" "$POOL/$FS/fs1" \ "$POOL/$FS/fs1/fs2" "$POOL/$FS/fs1/fclone" ; do rand_set_prop $fs aclinherit "discard" "noallow" "secure" "passthrough" rand_set_prop $fs checksum "on" "off" "fletcher2" "fletcher4" "sha256" - rand_set_prop $fs aclmode "discard" "groupmask" "passthrough" + rand_set_prop $fs acltype "off" "noacl" "posixacl" rand_set_prop $fs atime "on" "off" rand_set_prop $fs checksum "on" "off" "fletcher2" "fletcher4" "sha256" rand_set_prop $fs compression "on" "off" "lzjb" "gzip" \ @@ -161,7 +158,8 @@ done # Verify inherited property can be received -rand_set_prop $POOL sharenfs "on" "off" "rw" +rand_set_prop $POOL redundant_metadata "all" "most" +rand_set_prop $POOL sync "standard" "always" "disabled" # # Duplicate POOL2 for testing diff --git a/tests/zfs-tests/tests/functional/rsend/rsend_014_pos.ksh b/tests/zfs-tests/tests/functional/rsend/rsend_014_pos.ksh old mode 100644 new mode 100755 index b6cbb1c63c87..68576816c59a --- a/tests/zfs-tests/tests/functional/rsend/rsend_014_pos.ksh +++ b/tests/zfs-tests/tests/functional/rsend/rsend_014_pos.ksh @@ -46,6 +46,7 @@ log_must cleanup_pool $POOL2 log_must eval "$ZFS send -R $POOL/$FS@final > $BACKDIR/fs-final-R" log_must eval "$ZFS receive -d $POOL2 < $BACKDIR/fs-final-R" +block_device_wait log_must eval "$ZPOOL export $POOL" log_must eval "$ZPOOL import $POOL" diff --git a/tests/zfs-tests/tests/functional/rsend/rsend_019_pos.ksh b/tests/zfs-tests/tests/functional/rsend/rsend_019_pos.ksh old mode 100644 new mode 100755 diff --git a/tests/zfs-tests/tests/functional/rsend/rsend_020_pos.ksh b/tests/zfs-tests/tests/functional/rsend/rsend_020_pos.ksh old mode 100644 new mode 100755 diff --git a/tests/zfs-tests/tests/functional/rsend/rsend_021_pos.ksh b/tests/zfs-tests/tests/functional/rsend/rsend_021_pos.ksh old mode 100644 new mode 100755 diff --git a/tests/zfs-tests/tests/functional/rsend/rsend_022_pos.ksh b/tests/zfs-tests/tests/functional/rsend/rsend_022_pos.ksh old mode 100644 new mode 100755 diff --git a/tests/zfs-tests/tests/functional/rsend/rsend_024_pos.ksh b/tests/zfs-tests/tests/functional/rsend/rsend_024_pos.ksh old mode 100644 new mode 100755 diff --git a/zfs-script-config.sh.in b/zfs-script-config.sh.in index e22cfd595a31..0a85c5fef6d4 100644 --- a/zfs-script-config.sh.in +++ b/zfs-script-config.sh.in @@ -50,7 +50,7 @@ export FILE_WRITE=${TESTSDIR}/zfs-tests/cmd/file_write/file_write export LARGEST_FILE=${TESTSDIR}/zfs-tests/cmd/largest_file/largest_file export MKBUSY=${TESTSDIR}/zfs-tests/cmd/mkbusy/mkbusy export MKFILE=${TESTSDIR}/zfs-tests/cmd/mkfile/mkfile -export MKFILES=${TESTSDIR}/zfs-tests/cmd/mkfile/mkfiles +export MKFILES=${TESTSDIR}/zfs-tests/cmd/mkfiles/mkfiles export MKTREE=${TESTSDIR}/zfs-tests/cmd/mktree/mktree export MMAP_EXEC=${TESTSDIR}/zfs-tests/cmd/mmap_exec/mmap_exec export MMAPWRITE=${TESTSDIR}/zfs-tests/cmd/mmapwrite/mmapwrite