3740 Poor ZFS send / receive performance due to snapshot
     hold / release processing
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Approved by: Christopher Siden <christopher.siden@delphix.com>

References:
  https://www.illumos.org/issues/3740
  illumos/illumos-gate@a7a845e4bf

Ported-by: Richard Yao <ryao@gentoo.org>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #1775

Porting notes:

1. 13fe019870 introduced a merge conflict
   in dsl_dataset_user_release_tmp where some variables were moved
   outside of the preprocessor directive.

2. dea9dfefdd747534b3846845629d2200f0616dad made the previous merge
   conflict worse by switching KM_SLEEP to KM_PUSHPAGE. This is notable
   because this commit refactors the code, adding a new KM_SLEEP
   allocation. It is not clear to me whether this should be converted
   to KM_PUSHPAGE.

3. We had a merge conflict in libzfs_sendrecv.c because of copyright
   notices.

4. Several small C99 compatibility fixed were made.
This commit is contained in:
Steven Hartland
2013-05-25 02:06:23 +00:00
committed by Brian Behlendorf
parent 7bc7f25040
commit 95fd54a1c5
13 changed files with 672 additions and 505 deletions
+45 -48
View File
@@ -26,6 +26,7 @@
* Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
* Copyright 2012 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 Martin Matuska. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
#include <ctype.h>
@@ -3153,18 +3154,14 @@ static int
zfs_check_snap_cb(zfs_handle_t *zhp, void *arg)
{
struct destroydata *dd = arg;
zfs_handle_t *szhp;
char name[ZFS_MAXNAMELEN];
int rv = 0;
(void) snprintf(name, sizeof (name),
"%s@%s", zhp->zfs_name, dd->snapname);
szhp = make_dataset_handle(zhp->zfs_hdl, name);
if (szhp) {
if (lzc_exists(name))
verify(nvlist_add_boolean(dd->nvl, name) == 0);
zfs_close(szhp);
}
if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
(void) zvol_remove_link(zhp->zfs_hdl, name);
@@ -3193,7 +3190,7 @@ zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer)
verify(nvlist_alloc(&dd.nvl, NV_UNIQUE_NAME, 0) == 0);
(void) zfs_check_snap_cb(zfs_handle_dup(zhp), &dd);
if (nvlist_next_nvpair(dd.nvl, NULL) == NULL) {
if (nvlist_empty(dd.nvl)) {
ret = zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT,
dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"),
zhp->zfs_name, snapname);
@@ -3219,7 +3216,7 @@ zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer)
if (ret == 0)
return (0);
if (nvlist_next_nvpair(errlist, NULL) == NULL) {
if (nvlist_empty(errlist)) {
char errbuf[1024];
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot destroy snapshots"));
@@ -4421,18 +4418,14 @@ static int
zfs_hold_one(zfs_handle_t *zhp, void *arg)
{
struct holdarg *ha = arg;
zfs_handle_t *szhp;
char name[ZFS_MAXNAMELEN];
int rv = 0;
(void) snprintf(name, sizeof (name),
"%s@%s", zhp->zfs_name, ha->snapname);
szhp = make_dataset_handle(zhp->zfs_hdl, name);
if (szhp) {
if (lzc_exists(name))
fnvlist_add_string(ha->nvl, name, ha->tag);
zfs_close(szhp);
}
if (ha->recursive)
rv = zfs_iter_filesystems(zhp, zfs_hold_one, ha);
@@ -4442,14 +4435,10 @@ zfs_hold_one(zfs_handle_t *zhp, void *arg)
int
zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
boolean_t recursive, boolean_t enoent_ok, int cleanup_fd)
boolean_t recursive, int cleanup_fd)
{
int ret;
struct holdarg ha;
nvlist_t *errors;
libzfs_handle_t *hdl = zhp->zfs_hdl;
char errbuf[1024];
nvpair_t *elem;
ha.nvl = fnvlist_alloc();
ha.snapname = snapname;
@@ -4457,26 +4446,44 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
ha.recursive = recursive;
(void) zfs_hold_one(zfs_handle_dup(zhp), &ha);
if (nvlist_next_nvpair(ha.nvl, NULL) == NULL) {
if (nvlist_empty(ha.nvl)) {
char errbuf[1024];
fnvlist_free(ha.nvl);
ret = ENOENT;
if (!enoent_ok) {
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN,
"cannot hold snapshot '%s@%s'"),
zhp->zfs_name, snapname);
(void) zfs_standard_error(hdl, ret, errbuf);
}
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN,
"cannot hold snapshot '%s@%s'"),
zhp->zfs_name, snapname);
(void) zfs_standard_error(zhp->zfs_hdl, ret, errbuf);
return (ret);
}
ret = lzc_hold(ha.nvl, cleanup_fd, &errors);
ret = zfs_hold_nvl(zhp, cleanup_fd, ha.nvl);
fnvlist_free(ha.nvl);
if (ret == 0)
return (0);
return (ret);
}
if (nvlist_next_nvpair(errors, NULL) == NULL) {
int
zfs_hold_nvl(zfs_handle_t *zhp, int cleanup_fd, nvlist_t *holds)
{
int ret;
nvlist_t *errors;
libzfs_handle_t *hdl = zhp->zfs_hdl;
char errbuf[1024];
nvpair_t *elem;
errors = NULL;
ret = lzc_hold(holds, cleanup_fd, &errors);
if (ret == 0) {
/* There may be errors even in the success case. */
fnvlist_free(errors);
return (0);
}
if (nvlist_empty(errors)) {
/* no hold-specific errors */
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot hold"));
@@ -4516,10 +4523,6 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
case EEXIST:
(void) zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf);
break;
case ENOENT:
if (enoent_ok)
return (ENOENT);
/* FALLTHROUGH */
default:
(void) zfs_standard_error(hdl,
fnvpair_value_int32(elem), errbuf);
@@ -4530,30 +4533,21 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
return (ret);
}
struct releasearg {
nvlist_t *nvl;
const char *snapname;
const char *tag;
boolean_t recursive;
};
static int
zfs_release_one(zfs_handle_t *zhp, void *arg)
{
struct holdarg *ha = arg;
zfs_handle_t *szhp;
char name[ZFS_MAXNAMELEN];
int rv = 0;
(void) snprintf(name, sizeof (name),
"%s@%s", zhp->zfs_name, ha->snapname);
szhp = make_dataset_handle(zhp->zfs_hdl, name);
if (szhp) {
if (lzc_exists(name)) {
nvlist_t *holds = fnvlist_alloc();
fnvlist_add_boolean(holds, ha->tag);
fnvlist_add_nvlist(ha->nvl, name, holds);
zfs_close(szhp);
fnvlist_free(holds);
}
if (ha->recursive)
@@ -4568,7 +4562,7 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
{
int ret;
struct holdarg ha;
nvlist_t *errors;
nvlist_t *errors = NULL;
nvpair_t *elem;
libzfs_handle_t *hdl = zhp->zfs_hdl;
char errbuf[1024];
@@ -4579,7 +4573,7 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
ha.recursive = recursive;
(void) zfs_release_one(zfs_handle_dup(zhp), &ha);
if (nvlist_next_nvpair(ha.nvl, NULL) == NULL) {
if (nvlist_empty(ha.nvl)) {
fnvlist_free(ha.nvl);
ret = ENOENT;
(void) snprintf(errbuf, sizeof (errbuf),
@@ -4593,10 +4587,13 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
ret = lzc_release(ha.nvl, &errors);
fnvlist_free(ha.nvl);
if (ret == 0)
if (ret == 0) {
/* There may be errors even in the success case. */
fnvlist_free(errors);
return (0);
}
if (nvlist_next_nvpair(errors, NULL) == NULL) {
if (nvlist_empty(errors)) {
/* no hold-specific errors */
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot release"));
+74 -74
View File
@@ -22,9 +22,10 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
* All rights reserved
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
#include <assert.h>
@@ -799,6 +800,7 @@ typedef struct send_dump_data {
int outfd;
boolean_t err;
nvlist_t *fss;
nvlist_t *snapholds;
avl_tree_t *fsavl;
snapfilter_cb_t *filter_cb;
void *filter_cb_arg;
@@ -948,41 +950,19 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
return (0);
}
static int
hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
static void
gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
{
zfs_handle_t *pzhp;
int error = 0;
char *thissnap;
assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
if (sdd->dryrun)
return (0);
/*
* zfs_send() only opens a cleanup_fd for sends that need it,
* zfs_send() only sets snapholds for sends that need them,
* e.g. replication and doall.
*/
if (sdd->cleanup_fd == -1)
return (0);
if (sdd->snapholds == NULL)
return;
thissnap = strchr(zhp->zfs_name, '@') + 1;
*(thissnap - 1) = '\0';
pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET);
*(thissnap - 1) = '@';
/*
* It's OK if the parent no longer exists. The send code will
* handle that error.
*/
if (pzhp) {
error = zfs_hold(pzhp, thissnap, sdd->holdtag,
B_FALSE, B_TRUE, sdd->cleanup_fd);
zfs_close(pzhp);
}
return (error);
fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
}
static void *
@@ -1038,28 +1018,23 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
send_dump_data_t *sdd = arg;
progress_arg_t pa = { 0 };
pthread_t tid;
char *thissnap;
int err;
boolean_t isfromsnap, istosnap, fromorigin;
boolean_t exclude = B_FALSE;
err = 0;
thissnap = strchr(zhp->zfs_name, '@') + 1;
isfromsnap = (sdd->fromsnap != NULL &&
strcmp(sdd->fromsnap, thissnap) == 0);
if (!sdd->seenfrom && isfromsnap) {
err = hold_for_send(zhp, sdd);
if (err == 0) {
sdd->seenfrom = B_TRUE;
(void) strcpy(sdd->prevsnap, thissnap);
sdd->prevsnap_obj = zfs_prop_get_int(zhp,
ZFS_PROP_OBJSETID);
} else if (err == ENOENT) {
err = 0;
}
gather_holds(zhp, sdd);
sdd->seenfrom = B_TRUE;
(void) strcpy(sdd->prevsnap, thissnap);
sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
zfs_close(zhp);
return (err);
return (0);
}
if (sdd->seento || !sdd->seenfrom) {
@@ -1110,14 +1085,7 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
return (0);
}
err = hold_for_send(zhp, sdd);
if (err) {
if (err == ENOENT)
err = 0;
zfs_close(zhp);
return (err);
}
gather_holds(zhp, sdd);
fromorigin = sdd->prevsnap[0] == '\0' &&
(sdd->fromorigin || sdd->replicate);
@@ -1385,7 +1353,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
avl_tree_t *fsavl = NULL;
static uint64_t holdseq;
int spa_version;
pthread_t tid;
pthread_t tid = 0;
int pipefd[2];
dedup_arg_t dda = { 0 };
int featureflags = 0;
@@ -1458,11 +1426,8 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
*debugnvp = hdrnv;
else
nvlist_free(hdrnv);
if (err) {
fsavl_destroy(fsavl);
nvlist_free(fss);
if (err)
goto stderr_out;
}
}
if (!flags->dryrun) {
@@ -1486,8 +1451,6 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
}
free(packbuf);
if (err == -1) {
fsavl_destroy(fsavl);
nvlist_free(fss);
err = errno;
goto stderr_out;
}
@@ -1498,8 +1461,6 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
drr.drr_u.drr_end.drr_checksum = zc;
err = write(outfd, &drr, sizeof (drr));
if (err == -1) {
fsavl_destroy(fsavl);
nvlist_free(fss);
err = errno;
goto stderr_out;
}
@@ -1511,7 +1472,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
/* dump each stream */
sdd.fromsnap = fromsnap;
sdd.tosnap = tosnap;
if (flags->dedup)
if (tid != 0)
sdd.outfd = pipefd[0];
else
sdd.outfd = outfd;
@@ -1548,36 +1509,71 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
err = errno;
goto stderr_out;
}
sdd.snapholds = fnvlist_alloc();
} else {
sdd.cleanup_fd = -1;
sdd.snapholds = NULL;
}
if (flags->verbose) {
if (flags->verbose || sdd.snapholds != NULL) {
/*
* Do a verbose no-op dry run to get all the verbose output
* before generating any data. Then do a non-verbose real
* run to generate the streams.
* or to gather snapshot hold's before generating any data,
* then do a non-verbose real run to generate the streams.
*/
sdd.dryrun = B_TRUE;
err = dump_filesystems(zhp, &sdd);
sdd.dryrun = flags->dryrun;
sdd.verbose = B_FALSE;
if (flags->parsable) {
(void) fprintf(stderr, "size\t%llu\n",
(longlong_t)sdd.size);
} else {
char buf[16];
zfs_nicenum(sdd.size, buf, sizeof (buf));
(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
"total estimated size is %s\n"), buf);
if (err != 0)
goto stderr_out;
if (flags->verbose) {
if (flags->parsable) {
(void) fprintf(stderr, "size\t%llu\n",
(longlong_t)sdd.size);
} else {
char buf[16];
zfs_nicenum(sdd.size, buf, sizeof (buf));
(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
"total estimated size is %s\n"), buf);
}
}
/* Ensure no snaps found is treated as an error. */
if (!sdd.seento) {
err = ENOENT;
goto err_out;
}
/* Skip the second run if dryrun was requested. */
if (flags->dryrun)
goto err_out;
if (sdd.snapholds != NULL) {
err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
if (err != 0)
goto stderr_out;
fnvlist_free(sdd.snapholds);
sdd.snapholds = NULL;
}
sdd.dryrun = B_FALSE;
sdd.verbose = B_FALSE;
}
err = dump_filesystems(zhp, &sdd);
fsavl_destroy(fsavl);
nvlist_free(fss);
if (flags->dedup) {
(void) close(pipefd[0]);
/* Ensure no snaps found is treated as an error. */
if (err == 0 && !sdd.seento)
err = ENOENT;
if (tid != 0) {
if (err != 0)
(void) pthread_cancel(tid);
(void) pthread_join(tid, NULL);
(void) close(pipefd[0]);
}
if (sdd.cleanup_fd != -1) {
@@ -1605,9 +1601,13 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
stderr_out:
err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
err_out:
fsavl_destroy(fsavl);
nvlist_free(fss);
fnvlist_free(sdd.snapholds);
if (sdd.cleanup_fd != -1)
VERIFY(0 == close(sdd.cleanup_fd));
if (flags->dedup) {
if (tid != 0) {
(void) pthread_cancel(tid);
(void) pthread_join(tid, NULL);
(void) close(pipefd[0]);