mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-13 11:40:25 +03:00
Rebase master to b121
This commit is contained in:
parent
9babb37438
commit
45d1cae3b8
@ -1 +1 @@
|
||||
http://dlc.sun.com/osol/on/downloads/b117/on-src.tar.bz2
|
||||
http://dlc.sun.com/osol/on/downloads/b121/on-src.tar.bz2
|
||||
|
@ -818,6 +818,8 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
|
||||
(u_longlong_t)ds->ds_snapnames_zapobj);
|
||||
(void) printf("\t\tnum_children = %llu\n",
|
||||
(u_longlong_t)ds->ds_num_children);
|
||||
(void) printf("\t\tuserrefs_obj = %llu\n",
|
||||
(u_longlong_t)ds->ds_userrefs_obj);
|
||||
(void) printf("\t\tcreation_time = %s", ctime(&crtime));
|
||||
(void) printf("\t\tcreation_txg = %llu\n",
|
||||
(u_longlong_t)ds->ds_creation_txg);
|
||||
@ -1049,6 +1051,7 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = {
|
||||
dump_zap, /* DSL scrub queue */
|
||||
dump_zap, /* ZFS user/group used */
|
||||
dump_zap, /* ZFS user/group quota */
|
||||
dump_zap, /* snapshot refcount tags */
|
||||
};
|
||||
|
||||
static void
|
||||
|
@ -80,6 +80,8 @@ static int zfs_do_receive(int argc, char **argv);
|
||||
static int zfs_do_promote(int argc, char **argv);
|
||||
static int zfs_do_userspace(int argc, char **argv);
|
||||
static int zfs_do_python(int argc, char **argv);
|
||||
static int zfs_do_hold(int argc, char **argv);
|
||||
static int zfs_do_release(int argc, char **argv);
|
||||
|
||||
/*
|
||||
* Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
|
||||
@ -121,7 +123,10 @@ typedef enum {
|
||||
HELP_ALLOW,
|
||||
HELP_UNALLOW,
|
||||
HELP_USERSPACE,
|
||||
HELP_GROUPSPACE
|
||||
HELP_GROUPSPACE,
|
||||
HELP_HOLD,
|
||||
HELP_HOLDS,
|
||||
HELP_RELEASE
|
||||
} zfs_help_t;
|
||||
|
||||
typedef struct zfs_command {
|
||||
@ -169,6 +174,10 @@ static zfs_command_t command_table[] = {
|
||||
{ "allow", zfs_do_python, HELP_ALLOW },
|
||||
{ NULL },
|
||||
{ "unallow", zfs_do_python, HELP_UNALLOW },
|
||||
{ NULL },
|
||||
{ "hold", zfs_do_hold, HELP_HOLD },
|
||||
{ "holds", zfs_do_python, HELP_HOLDS },
|
||||
{ "release", zfs_do_release, HELP_RELEASE },
|
||||
};
|
||||
|
||||
#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0]))
|
||||
@ -189,7 +198,8 @@ get_usage(zfs_help_t idx)
|
||||
"-V <size> <volume>\n"));
|
||||
case HELP_DESTROY:
|
||||
return (gettext("\tdestroy [-rRf] "
|
||||
"<filesystem|volume|snapshot>\n"));
|
||||
"<filesystem|volume|snapshot>\n"
|
||||
"\tdestroy -d [-r] <filesystem|volume|snapshot>\n"));
|
||||
case HELP_GET:
|
||||
return (gettext("\tget [-rHp] [-d max] "
|
||||
"[-o field[,...]] [-s source[,...]]\n"
|
||||
@ -236,7 +246,7 @@ get_usage(zfs_help_t idx)
|
||||
return (gettext("\tunmount [-f] "
|
||||
"<-a | filesystem|mountpoint>\n"));
|
||||
case HELP_UNSHARE:
|
||||
return (gettext("\tunshare [-f] "
|
||||
return (gettext("\tunshare "
|
||||
"<-a | filesystem|mountpoint>\n"));
|
||||
case HELP_ALLOW:
|
||||
return (gettext("\tallow <filesystem|volume>\n"
|
||||
@ -266,6 +276,12 @@ get_usage(zfs_help_t idx)
|
||||
return (gettext("\tgroupspace [-hniHpU] [-o field[,...]] "
|
||||
"[-sS field] ... [-t type[,...]]\n"
|
||||
"\t <filesystem|snapshot>\n"));
|
||||
case HELP_HOLD:
|
||||
return (gettext("\thold [-r] <tag> <snapshot> ...\n"));
|
||||
case HELP_HOLDS:
|
||||
return (gettext("\tholds [-r] <snapshot> ...\n"));
|
||||
case HELP_RELEASE:
|
||||
return (gettext("\trelease [-r] <tag> <snapshot> ...\n"));
|
||||
}
|
||||
|
||||
abort();
|
||||
@ -769,11 +785,13 @@ badusage:
|
||||
}
|
||||
|
||||
/*
|
||||
* zfs destroy [-rf] <fs, snap, vol>
|
||||
* zfs destroy [-rRf] <fs, snap, vol>
|
||||
* zfs destroy -d [-r] <fs, snap, vol>
|
||||
*
|
||||
* -r Recursively destroy all children
|
||||
* -R Recursively destroy all dependents, including clones
|
||||
* -f Force unmounting of any dependents
|
||||
* -d If we can't destroy now, mark for deferred destruction
|
||||
*
|
||||
* Destroys the given dataset. By default, it will unmount any filesystems,
|
||||
* and refuse to destroy a dataset that has any dependents. A dependent can
|
||||
@ -789,6 +807,7 @@ typedef struct destroy_cbdata {
|
||||
boolean_t cb_closezhp;
|
||||
zfs_handle_t *cb_target;
|
||||
char *cb_snapname;
|
||||
boolean_t cb_defer_destroy;
|
||||
} destroy_cbdata_t;
|
||||
|
||||
/*
|
||||
@ -869,7 +888,7 @@ destroy_callback(zfs_handle_t *zhp, void *data)
|
||||
* Bail out on the first error.
|
||||
*/
|
||||
if (zfs_unmount(zhp, NULL, cbp->cb_force ? MS_FORCE : 0) != 0 ||
|
||||
zfs_destroy(zhp) != 0) {
|
||||
zfs_destroy(zhp, cbp->cb_defer_destroy) != 0) {
|
||||
zfs_close(zhp);
|
||||
return (-1);
|
||||
}
|
||||
@ -923,8 +942,11 @@ zfs_do_destroy(int argc, char **argv)
|
||||
char *cp;
|
||||
|
||||
/* check options */
|
||||
while ((c = getopt(argc, argv, "frR")) != -1) {
|
||||
while ((c = getopt(argc, argv, "dfrR")) != -1) {
|
||||
switch (c) {
|
||||
case 'd':
|
||||
cb.cb_defer_destroy = B_TRUE;
|
||||
break;
|
||||
case 'f':
|
||||
cb.cb_force = 1;
|
||||
break;
|
||||
@ -956,6 +978,9 @@ zfs_do_destroy(int argc, char **argv)
|
||||
usage(B_FALSE);
|
||||
}
|
||||
|
||||
if (cb.cb_defer_destroy && cb.cb_doclones)
|
||||
usage(B_FALSE);
|
||||
|
||||
/*
|
||||
* If we are doing recursive destroy of a snapshot, then the
|
||||
* named snapshot may not exist. Go straight to libzfs.
|
||||
@ -977,7 +1002,7 @@ zfs_do_destroy(int argc, char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
ret = zfs_destroy_snaps(zhp, cp);
|
||||
ret = zfs_destroy_snaps(zhp, cp, cb.cb_defer_destroy);
|
||||
zfs_close(zhp);
|
||||
if (ret) {
|
||||
(void) fprintf(stderr,
|
||||
@ -986,7 +1011,6 @@ zfs_do_destroy(int argc, char **argv)
|
||||
return (ret != 0);
|
||||
}
|
||||
|
||||
|
||||
/* Open the given dataset */
|
||||
if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET)) == NULL)
|
||||
return (1);
|
||||
@ -1014,15 +1038,15 @@ zfs_do_destroy(int argc, char **argv)
|
||||
* Check for any dependents and/or clones.
|
||||
*/
|
||||
cb.cb_first = B_TRUE;
|
||||
if (!cb.cb_doclones &&
|
||||
if (!cb.cb_doclones && !cb.cb_defer_destroy &&
|
||||
zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent,
|
||||
&cb) != 0) {
|
||||
zfs_close(zhp);
|
||||
return (1);
|
||||
}
|
||||
|
||||
if (cb.cb_error ||
|
||||
zfs_iter_dependents(zhp, B_FALSE, destroy_callback, &cb) != 0) {
|
||||
if (cb.cb_error || (!cb.cb_defer_destroy &&
|
||||
(zfs_iter_dependents(zhp, B_FALSE, destroy_callback, &cb) != 0))) {
|
||||
zfs_close(zhp);
|
||||
return (1);
|
||||
}
|
||||
@ -1035,7 +1059,6 @@ zfs_do_destroy(int argc, char **argv)
|
||||
if (destroy_callback(zhp, &cb) != 0)
|
||||
return (1);
|
||||
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -1613,7 +1636,7 @@ zfs_do_upgrade(int argc, char **argv)
|
||||
(void) printf(gettext(" 1 Initial ZFS filesystem version\n"));
|
||||
(void) printf(gettext(" 2 Enhanced directory entries\n"));
|
||||
(void) printf(gettext(" 3 Case insensitive and File system "
|
||||
"unique identifer (FUID)\n"));
|
||||
"unique identifier (FUID)\n"));
|
||||
(void) printf(gettext(" 4 userquota, groupquota "
|
||||
"properties\n"));
|
||||
(void) printf(gettext("\nFor more information on a particular "
|
||||
@ -2651,6 +2674,108 @@ zfs_do_receive(int argc, char **argv)
|
||||
return (err != 0);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding)
|
||||
{
|
||||
int errors = 0;
|
||||
int i;
|
||||
const char *tag;
|
||||
boolean_t recursive = B_FALSE;
|
||||
int c;
|
||||
int (*func)(zfs_handle_t *, const char *, const char *, boolean_t);
|
||||
|
||||
/* check options */
|
||||
while ((c = getopt(argc, argv, "r")) != -1) {
|
||||
switch (c) {
|
||||
case 'r':
|
||||
recursive = B_TRUE;
|
||||
break;
|
||||
case '?':
|
||||
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
|
||||
optopt);
|
||||
usage(B_FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
argc -= optind;
|
||||
argv += optind;
|
||||
|
||||
/* check number of arguments */
|
||||
if (argc < 2)
|
||||
usage(B_FALSE);
|
||||
|
||||
tag = argv[0];
|
||||
--argc;
|
||||
++argv;
|
||||
|
||||
if (holding) {
|
||||
if (tag[0] == '.') {
|
||||
/* tags starting with '.' are reserved for libzfs */
|
||||
(void) fprintf(stderr,
|
||||
gettext("tag may not start with '.'\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
func = zfs_hold;
|
||||
} else {
|
||||
func = zfs_release;
|
||||
}
|
||||
|
||||
for (i = 0; i < argc; ++i) {
|
||||
zfs_handle_t *zhp;
|
||||
char parent[ZFS_MAXNAMELEN];
|
||||
const char *delim;
|
||||
char *path = argv[i];
|
||||
|
||||
delim = strchr(path, '@');
|
||||
if (delim == NULL) {
|
||||
(void) fprintf(stderr,
|
||||
gettext("'%s' is not a snapshot\n"), path);
|
||||
++errors;
|
||||
continue;
|
||||
}
|
||||
(void) strncpy(parent, path, delim - path);
|
||||
parent[delim - path] = '\0';
|
||||
|
||||
zhp = zfs_open(g_zfs, parent,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
|
||||
if (zhp == NULL) {
|
||||
++errors;
|
||||
continue;
|
||||
}
|
||||
if (func(zhp, delim+1, tag, recursive) != 0)
|
||||
++errors;
|
||||
zfs_close(zhp);
|
||||
}
|
||||
|
||||
return (errors != 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* zfs hold [-r] <tag> <snap> ...
|
||||
*
|
||||
* -r Recursively hold
|
||||
*
|
||||
* Apply a user-hold with the given tag to the list of snapshots.
|
||||
*/
|
||||
static int
|
||||
zfs_do_hold(int argc, char **argv)
|
||||
{
|
||||
return (zfs_do_hold_rele_impl(argc, argv, B_TRUE));
|
||||
}
|
||||
|
||||
/*
|
||||
* zfs release [-r] <tag> <snap> ...
|
||||
*
|
||||
* -r Recursively release
|
||||
*
|
||||
* Release a user-hold with the given tag from the list of snapshots.
|
||||
*/
|
||||
static int
|
||||
zfs_do_release(int argc, char **argv)
|
||||
{
|
||||
return (zfs_do_hold_rele_impl(argc, argv, B_FALSE));
|
||||
}
|
||||
|
||||
typedef struct get_all_cbdata {
|
||||
zfs_handle_t **cb_handles;
|
||||
size_t cb_alloc;
|
||||
|
@ -1469,7 +1469,7 @@ show_import(nvlist_t *config)
|
||||
*/
|
||||
static int
|
||||
do_import(nvlist_t *config, const char *newname, const char *mntopts,
|
||||
int force, nvlist_t *props, boolean_t allowfaulted)
|
||||
int force, nvlist_t *props, boolean_t do_verbatim)
|
||||
{
|
||||
zpool_handle_t *zhp;
|
||||
char *name;
|
||||
@ -1522,14 +1522,14 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
|
||||
}
|
||||
}
|
||||
|
||||
if (zpool_import_props(g_zfs, config, newname, props,
|
||||
allowfaulted) != 0)
|
||||
if (zpool_import_props(g_zfs, config, newname, props, do_verbatim) != 0)
|
||||
return (1);
|
||||
|
||||
if (newname != NULL)
|
||||
name = (char *)newname;
|
||||
|
||||
verify((zhp = zpool_open_canfail(g_zfs, name)) != NULL);
|
||||
if ((zhp = zpool_open_canfail(g_zfs, name)) == NULL)
|
||||
return (1);
|
||||
|
||||
if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL &&
|
||||
zpool_enable_datasets(zhp, mntopts, 0) != 0) {
|
||||
@ -1566,7 +1566,8 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
|
||||
* -F Import even in the presence of faulted vdevs. This is an
|
||||
* intentionally undocumented option for testing purposes, and
|
||||
* treats the pool configuration as complete, leaving any bad
|
||||
* vdevs in the FAULTED state.
|
||||
* vdevs in the FAULTED state. In other words, it does verbatim
|
||||
* import.
|
||||
*
|
||||
* -a Import all pools found.
|
||||
*
|
||||
@ -1595,7 +1596,7 @@ zpool_do_import(int argc, char **argv)
|
||||
nvlist_t *found_config;
|
||||
nvlist_t *props = NULL;
|
||||
boolean_t first;
|
||||
boolean_t allow_faulted = B_FALSE;
|
||||
boolean_t do_verbatim = B_FALSE;
|
||||
uint64_t pool_state;
|
||||
char *cachefile = NULL;
|
||||
|
||||
@ -1628,7 +1629,7 @@ zpool_do_import(int argc, char **argv)
|
||||
do_force = B_TRUE;
|
||||
break;
|
||||
case 'F':
|
||||
allow_faulted = B_TRUE;
|
||||
do_verbatim = B_TRUE;
|
||||
break;
|
||||
case 'o':
|
||||
if ((propval = strchr(optarg, '=')) != NULL) {
|
||||
@ -1778,7 +1779,7 @@ zpool_do_import(int argc, char **argv)
|
||||
|
||||
if (do_all)
|
||||
err |= do_import(config, NULL, mntopts,
|
||||
do_force, props, allow_faulted);
|
||||
do_force, props, do_verbatim);
|
||||
else
|
||||
show_import(config);
|
||||
} else if (searchname != NULL) {
|
||||
@ -1826,7 +1827,7 @@ zpool_do_import(int argc, char **argv)
|
||||
err = B_TRUE;
|
||||
} else {
|
||||
err |= do_import(found_config, argc == 1 ? NULL :
|
||||
argv[1], mntopts, do_force, props, allow_faulted);
|
||||
argv[1], mntopts, do_force, props, do_verbatim);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3117,6 +3118,17 @@ status_callback(zpool_handle_t *zhp, void *data)
|
||||
"replace'.\n"));
|
||||
break;
|
||||
|
||||
case ZPOOL_STATUS_REMOVED_DEV:
|
||||
(void) printf(gettext("status: One or more devices has "
|
||||
"been removed by the administrator.\n\tSufficient "
|
||||
"replicas exist for the pool to continue functioning in "
|
||||
"a\n\tdegraded state.\n"));
|
||||
(void) printf(gettext("action: Online the device using "
|
||||
"'zpool online' or replace the device with\n\t'zpool "
|
||||
"replace'.\n"));
|
||||
break;
|
||||
|
||||
|
||||
case ZPOOL_STATUS_RESILVERING:
|
||||
(void) printf(gettext("status: One or more devices is "
|
||||
"currently being resilvered. The pool will\n\tcontinue "
|
||||
@ -3539,6 +3551,8 @@ zpool_do_upgrade(int argc, char **argv)
|
||||
(void) printf(gettext(" 14 passthrough-x aclinherit\n"));
|
||||
(void) printf(gettext(" 15 user/group space accounting\n"));
|
||||
(void) printf(gettext(" 16 stmf property support\n"));
|
||||
(void) printf(gettext(" 17 Triple-parity RAID-Z\n"));
|
||||
(void) printf(gettext(" 18 snapshot user holds\n"));
|
||||
(void) printf(gettext("For more information on a particular "
|
||||
"version, including supported releases, see:\n\n"));
|
||||
(void) printf("http://www.opensolaris.org/os/community/zfs/"
|
||||
@ -3624,6 +3638,8 @@ char *hist_event_table[LOG_END] = {
|
||||
"refquota set",
|
||||
"refreservation set",
|
||||
"pool scrub done",
|
||||
"user hold",
|
||||
"user release",
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
@ -67,6 +67,7 @@
|
||||
#include <libdiskmgt.h>
|
||||
#include <libintl.h>
|
||||
#include <libnvpair.h>
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
@ -1093,19 +1094,34 @@ check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
|
||||
}
|
||||
|
||||
static const char *
|
||||
is_grouping(const char *type, int *mindev)
|
||||
is_grouping(const char *type, int *mindev, int *maxdev)
|
||||
{
|
||||
if (strcmp(type, "raidz") == 0 || strcmp(type, "raidz1") == 0) {
|
||||
if (strncmp(type, "raidz", 5) == 0) {
|
||||
const char *p = type + 5;
|
||||
char *end;
|
||||
long nparity;
|
||||
|
||||
if (*p == '\0') {
|
||||
nparity = 1;
|
||||
} else if (*p == '0') {
|
||||
return (NULL); /* no zero prefixes allowed */
|
||||
} else {
|
||||
errno = 0;
|
||||
nparity = strtol(p, &end, 10);
|
||||
if (errno != 0 || nparity < 1 || nparity >= 255 ||
|
||||
*end != '\0')
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
if (mindev != NULL)
|
||||
*mindev = 2;
|
||||
*mindev = nparity + 1;
|
||||
if (maxdev != NULL)
|
||||
*maxdev = 255;
|
||||
return (VDEV_TYPE_RAIDZ);
|
||||
}
|
||||
|
||||
if (strcmp(type, "raidz2") == 0) {
|
||||
if (mindev != NULL)
|
||||
*mindev = 3;
|
||||
return (VDEV_TYPE_RAIDZ);
|
||||
}
|
||||
if (maxdev != NULL)
|
||||
*maxdev = INT_MAX;
|
||||
|
||||
if (strcmp(type, "mirror") == 0) {
|
||||
if (mindev != NULL)
|
||||
@ -1144,7 +1160,7 @@ nvlist_t *
|
||||
construct_spec(int argc, char **argv)
|
||||
{
|
||||
nvlist_t *nvroot, *nv, **top, **spares, **l2cache;
|
||||
int t, toplevels, mindev, nspares, nlogs, nl2cache;
|
||||
int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache;
|
||||
const char *type;
|
||||
uint64_t is_log;
|
||||
boolean_t seen_logs;
|
||||
@ -1166,7 +1182,7 @@ construct_spec(int argc, char **argv)
|
||||
* If it's a mirror or raidz, the subsequent arguments are
|
||||
* its leaves -- until we encounter the next mirror or raidz.
|
||||
*/
|
||||
if ((type = is_grouping(argv[0], &mindev)) != NULL) {
|
||||
if ((type = is_grouping(argv[0], &mindev, &maxdev)) != NULL) {
|
||||
nvlist_t **child = NULL;
|
||||
int c, children = 0;
|
||||
|
||||
@ -1223,7 +1239,7 @@ construct_spec(int argc, char **argv)
|
||||
}
|
||||
|
||||
for (c = 1; c < argc; c++) {
|
||||
if (is_grouping(argv[c], NULL) != NULL)
|
||||
if (is_grouping(argv[c], NULL, NULL) != NULL)
|
||||
break;
|
||||
children++;
|
||||
child = realloc(child,
|
||||
@ -1243,6 +1259,13 @@ construct_spec(int argc, char **argv)
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
if (children > maxdev) {
|
||||
(void) fprintf(stderr, gettext("invalid vdev "
|
||||
"specification: %s supports no more than "
|
||||
"%d devices\n"), argv[0], maxdev);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
argc -= c;
|
||||
argv += c;
|
||||
|
||||
|
@ -479,7 +479,7 @@ process_options(int argc, char **argv)
|
||||
zopt_raidz = MAX(1, value);
|
||||
break;
|
||||
case 'R':
|
||||
zopt_raidz_parity = MIN(MAX(value, 1), 2);
|
||||
zopt_raidz_parity = MIN(MAX(value, 1), 3);
|
||||
break;
|
||||
case 'd':
|
||||
zopt_datasets = MAX(1, value);
|
||||
@ -1387,7 +1387,7 @@ ztest_destroy_cb(char *name, void *arg)
|
||||
/*
|
||||
* Destroy the dataset.
|
||||
*/
|
||||
error = dmu_objset_destroy(name);
|
||||
error = dmu_objset_destroy(name, B_FALSE);
|
||||
if (error) {
|
||||
(void) dmu_objset_open(name, DMU_OST_OTHER,
|
||||
DS_MODE_USER | DS_MODE_READONLY, &os);
|
||||
@ -1560,7 +1560,7 @@ ztest_dmu_objset_create_destroy(ztest_args_t *za)
|
||||
zil_close(zilog);
|
||||
dmu_objset_close(os);
|
||||
|
||||
error = dmu_objset_destroy(name);
|
||||
error = dmu_objset_destroy(name, B_FALSE);
|
||||
if (error)
|
||||
fatal(0, "dmu_objset_destroy(%s) = %d", name, error);
|
||||
|
||||
@ -1583,7 +1583,7 @@ ztest_dmu_snapshot_create_destroy(ztest_args_t *za)
|
||||
(void) snprintf(snapname, 100, "%s@%llu", osname,
|
||||
(u_longlong_t)za->za_instance);
|
||||
|
||||
error = dmu_objset_destroy(snapname);
|
||||
error = dmu_objset_destroy(snapname, B_FALSE);
|
||||
if (error != 0 && error != ENOENT)
|
||||
fatal(0, "dmu_objset_destroy() = %d", error);
|
||||
error = dmu_objset_snapshot(osname, strchr(snapname, '@')+1,
|
||||
@ -1614,19 +1614,19 @@ ztest_dsl_dataset_cleanup(char *osname, uint64_t curval)
|
||||
(void) snprintf(clone2name, 100, "%s/c2_%llu", osname, curval);
|
||||
(void) snprintf(snap3name, 100, "%s@s3_%llu", clone1name, curval);
|
||||
|
||||
error = dmu_objset_destroy(clone2name);
|
||||
error = dmu_objset_destroy(clone2name, B_FALSE);
|
||||
if (error && error != ENOENT)
|
||||
fatal(0, "dmu_objset_destroy(%s) = %d", clone2name, error);
|
||||
error = dmu_objset_destroy(snap3name);
|
||||
error = dmu_objset_destroy(snap3name, B_FALSE);
|
||||
if (error && error != ENOENT)
|
||||
fatal(0, "dmu_objset_destroy(%s) = %d", snap3name, error);
|
||||
error = dmu_objset_destroy(snap2name);
|
||||
error = dmu_objset_destroy(snap2name, B_FALSE);
|
||||
if (error && error != ENOENT)
|
||||
fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error);
|
||||
error = dmu_objset_destroy(clone1name);
|
||||
error = dmu_objset_destroy(clone1name, B_FALSE);
|
||||
if (error && error != ENOENT)
|
||||
fatal(0, "dmu_objset_destroy(%s) = %d", clone1name, error);
|
||||
error = dmu_objset_destroy(snap1name);
|
||||
error = dmu_objset_destroy(snap1name, B_FALSE);
|
||||
if (error && error != ENOENT)
|
||||
fatal(0, "dmu_objset_destroy(%s) = %d", snap1name, error);
|
||||
}
|
||||
|
@ -117,6 +117,8 @@ enum {
|
||||
EZFS_NOTSUP, /* ops not supported on this dataset */
|
||||
EZFS_ACTIVE_SPARE, /* pool has active shared spare devices */
|
||||
EZFS_UNPLAYED_LOGS, /* log device has unplayed logs */
|
||||
EZFS_REFTAG_RELE, /* snapshot release: tag not found */
|
||||
EZFS_REFTAG_HOLD, /* snapshot hold: tag already exists */
|
||||
EZFS_UNKNOWN
|
||||
};
|
||||
|
||||
@ -286,6 +288,7 @@ typedef enum {
|
||||
ZPOOL_STATUS_VERSION_OLDER, /* older on-disk version */
|
||||
ZPOOL_STATUS_RESILVERING, /* device being resilvered */
|
||||
ZPOOL_STATUS_OFFLINE_DEV, /* device online */
|
||||
ZPOOL_STATUS_REMOVED_DEV, /* removed device */
|
||||
|
||||
/*
|
||||
* Finally, the following indicates a healthy pool.
|
||||
@ -454,8 +457,8 @@ extern int zfs_iter_snapshots(zfs_handle_t *, zfs_iter_f, void *);
|
||||
extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t,
|
||||
nvlist_t *);
|
||||
extern int zfs_create_ancestors(libzfs_handle_t *, const char *);
|
||||
extern int zfs_destroy(zfs_handle_t *);
|
||||
extern int zfs_destroy_snaps(zfs_handle_t *, char *);
|
||||
extern int zfs_destroy(zfs_handle_t *, boolean_t);
|
||||
extern int zfs_destroy_snaps(zfs_handle_t *, char *, boolean_t);
|
||||
extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *);
|
||||
extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *);
|
||||
extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t);
|
||||
@ -463,6 +466,8 @@ extern int zfs_rename(zfs_handle_t *, const char *, boolean_t);
|
||||
extern int zfs_send(zfs_handle_t *, const char *, const char *,
|
||||
boolean_t, boolean_t, boolean_t, boolean_t, int);
|
||||
extern int zfs_promote(zfs_handle_t *);
|
||||
extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t);
|
||||
extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t);
|
||||
|
||||
typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain,
|
||||
uid_t rid, uint64_t space);
|
||||
|
@ -508,6 +508,14 @@ change_one(zfs_handle_t *zhp, void *data)
|
||||
&idx);
|
||||
uu_list_insert(clp->cl_list, cn, idx);
|
||||
} else {
|
||||
/*
|
||||
* Add this child to beginning of the list. Children
|
||||
* below this one in the hierarchy will get added above
|
||||
* this one in the list. This produces a list in
|
||||
* reverse dataset name order.
|
||||
* This is necessary when the original mountpoint
|
||||
* is legacy or none.
|
||||
*/
|
||||
ASSERT(!clp->cl_alldependents);
|
||||
verify(uu_list_insert_before(clp->cl_list,
|
||||
uu_list_first(clp->cl_list), cn) == 0);
|
||||
@ -574,6 +582,7 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int gather_flags,
|
||||
zfs_handle_t *temp;
|
||||
char property[ZFS_MAXPROPLEN];
|
||||
uu_compare_fn_t *compare = NULL;
|
||||
boolean_t legacy = B_FALSE;
|
||||
|
||||
if ((clp = zfs_alloc(zhp->zfs_hdl, sizeof (prop_changelist_t))) == NULL)
|
||||
return (NULL);
|
||||
@ -586,9 +595,20 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int gather_flags,
|
||||
if (prop == ZFS_PROP_NAME || prop == ZFS_PROP_ZONED ||
|
||||
prop == ZFS_PROP_MOUNTPOINT || prop == ZFS_PROP_SHARENFS ||
|
||||
prop == ZFS_PROP_SHARESMB) {
|
||||
|
||||
if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT,
|
||||
property, sizeof (property),
|
||||
NULL, NULL, 0, B_FALSE) == 0 &&
|
||||
(strcmp(property, "legacy") == 0 ||
|
||||
strcmp(property, "none") == 0)) {
|
||||
|
||||
legacy = B_TRUE;
|
||||
}
|
||||
if (!legacy) {
|
||||
compare = compare_mountpoints;
|
||||
clp->cl_sorted = B_TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
clp->cl_pool = uu_list_pool_create("changelist_pool",
|
||||
sizeof (prop_changenode_t),
|
||||
@ -695,6 +715,12 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int gather_flags,
|
||||
(void) uu_list_find(clp->cl_list, cn, NULL, &idx);
|
||||
uu_list_insert(clp->cl_list, cn, idx);
|
||||
} else {
|
||||
/*
|
||||
* Add the target dataset to the end of the list.
|
||||
* The list is not really unsorted. The list will be
|
||||
* in reverse dataset name order. This is necessary
|
||||
* when the original mountpoint is legacy or none.
|
||||
*/
|
||||
verify(uu_list_insert_after(clp->cl_list,
|
||||
uu_list_last(clp->cl_list), cn) == 0);
|
||||
}
|
||||
@ -703,11 +729,7 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int gather_flags,
|
||||
* If the mountpoint property was previously 'legacy', or 'none',
|
||||
* record it as the behavior of changelist_postfix() will be different.
|
||||
*/
|
||||
if ((clp->cl_prop == ZFS_PROP_MOUNTPOINT) &&
|
||||
(zfs_prop_get(zhp, prop, property, sizeof (property),
|
||||
NULL, NULL, 0, B_FALSE) == 0 &&
|
||||
(strcmp(property, "legacy") == 0 ||
|
||||
strcmp(property, "none") == 0))) {
|
||||
if ((clp->cl_prop == ZFS_PROP_MOUNTPOINT) && legacy) {
|
||||
/*
|
||||
* do not automatically mount ex-legacy datasets if
|
||||
* we specifically set canmount to noauto
|
||||
|
@ -47,6 +47,7 @@
|
||||
#include <ucred.h>
|
||||
#include <idmap.h>
|
||||
#include <aclutils.h>
|
||||
#include <directory.h>
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zap.h>
|
||||
@ -1674,21 +1675,13 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src,
|
||||
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
|
||||
if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_OBJSET_ZPLPROPS, &zc)) {
|
||||
zcmd_free_nvlists(&zc);
|
||||
zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
|
||||
"unable to get %s property"),
|
||||
zfs_prop_to_name(prop));
|
||||
return (zfs_error(zhp->zfs_hdl, EZFS_BADVERSION,
|
||||
dgettext(TEXT_DOMAIN, "internal error")));
|
||||
return (-1);
|
||||
}
|
||||
if (zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &zplprops) != 0 ||
|
||||
nvlist_lookup_uint64(zplprops, zfs_prop_to_name(prop),
|
||||
val) != 0) {
|
||||
zcmd_free_nvlists(&zc);
|
||||
zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
|
||||
"unable to get %s property"),
|
||||
zfs_prop_to_name(prop));
|
||||
return (zfs_error(zhp->zfs_hdl, EZFS_NOMEM,
|
||||
dgettext(TEXT_DOMAIN, "internal error")));
|
||||
return (-1);
|
||||
}
|
||||
if (zplprops)
|
||||
nvlist_free(zplprops);
|
||||
@ -2074,6 +2067,7 @@ userquota_propname_decode(const char *propname, boolean_t zoned,
|
||||
{
|
||||
zfs_userquota_prop_t type;
|
||||
char *cp, *end;
|
||||
char *numericsid = NULL;
|
||||
boolean_t isuser;
|
||||
|
||||
domain[0] = '\0';
|
||||
@ -2096,33 +2090,41 @@ userquota_propname_decode(const char *propname, boolean_t zoned,
|
||||
if (strchr(cp, '@')) {
|
||||
/*
|
||||
* It's a SID name (eg "user@domain") that needs to be
|
||||
* turned into S-1-domainID-RID. There should be a
|
||||
* better way to do this, but for now just translate it
|
||||
* to the (possibly ephemeral) uid and then back to the
|
||||
* SID. This is like getsidname(noresolve=TRUE).
|
||||
* turned into S-1-domainID-RID.
|
||||
*/
|
||||
uid_t id;
|
||||
idmap_rid_t rid;
|
||||
char *mapdomain;
|
||||
|
||||
directory_error_t e;
|
||||
if (zoned && getzoneid() == GLOBAL_ZONEID)
|
||||
return (ENOENT);
|
||||
if (sid_to_id(cp, isuser, &id) != 0)
|
||||
if (isuser) {
|
||||
e = directory_sid_from_user_name(NULL,
|
||||
cp, &numericsid);
|
||||
} else {
|
||||
e = directory_sid_from_group_name(NULL,
|
||||
cp, &numericsid);
|
||||
}
|
||||
if (e != NULL) {
|
||||
directory_error_free(e);
|
||||
return (ENOENT);
|
||||
if (idmap_id_to_numeric_domain_rid(id, isuser,
|
||||
&mapdomain, &rid) != 0)
|
||||
}
|
||||
if (numericsid == NULL)
|
||||
return (ENOENT);
|
||||
(void) strlcpy(domain, mapdomain, domainlen);
|
||||
*ridp = rid;
|
||||
} else if (strncmp(cp, "S-1-", 4) == 0) {
|
||||
cp = numericsid;
|
||||
/* will be further decoded below */
|
||||
}
|
||||
|
||||
if (strncmp(cp, "S-1-", 4) == 0) {
|
||||
/* It's a numeric SID (eg "S-1-234-567-89") */
|
||||
(void) strcpy(domain, cp);
|
||||
(void) strlcpy(domain, cp, domainlen);
|
||||
cp = strrchr(domain, '-');
|
||||
*cp = '\0';
|
||||
cp++;
|
||||
|
||||
errno = 0;
|
||||
*ridp = strtoull(cp, &end, 10);
|
||||
if (numericsid) {
|
||||
free(numericsid);
|
||||
numericsid = NULL;
|
||||
}
|
||||
if (errno != 0 || *end != '\0')
|
||||
return (EINVAL);
|
||||
} else if (!isdigit(*cp)) {
|
||||
@ -2158,13 +2160,14 @@ userquota_propname_decode(const char *propname, boolean_t zoned,
|
||||
if (idmap_id_to_numeric_domain_rid(id, isuser,
|
||||
&mapdomain, &rid) != 0)
|
||||
return (ENOENT);
|
||||
(void) strcpy(domain, mapdomain);
|
||||
(void) strlcpy(domain, mapdomain, domainlen);
|
||||
*ridp = rid;
|
||||
} else {
|
||||
*ridp = id;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT3P(numericsid, ==, NULL);
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -2763,7 +2766,7 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
|
||||
* isn't mounted, and that there are no active dependents.
|
||||
*/
|
||||
int
|
||||
zfs_destroy(zfs_handle_t *zhp)
|
||||
zfs_destroy(zfs_handle_t *zhp, boolean_t defer)
|
||||
{
|
||||
zfs_cmd_t zc = { 0 };
|
||||
|
||||
@ -2787,6 +2790,7 @@ zfs_destroy(zfs_handle_t *zhp)
|
||||
zc.zc_objset_type = DMU_OST_ZFS;
|
||||
}
|
||||
|
||||
zc.zc_defer_destroy = defer;
|
||||
if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY, &zc) != 0) {
|
||||
return (zfs_standard_error_fmt(zhp->zfs_hdl, errno,
|
||||
dgettext(TEXT_DOMAIN, "cannot destroy '%s'"),
|
||||
@ -2843,7 +2847,7 @@ zfs_remove_link_cb(zfs_handle_t *zhp, void *arg)
|
||||
* Destroys all snapshots with the given name in zhp & descendants.
|
||||
*/
|
||||
int
|
||||
zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname)
|
||||
zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer)
|
||||
{
|
||||
zfs_cmd_t zc = { 0 };
|
||||
int ret;
|
||||
@ -2860,6 +2864,7 @@ zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname)
|
||||
|
||||
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
|
||||
(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
|
||||
zc.zc_defer_destroy = defer;
|
||||
|
||||
ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY_SNAPS, &zc);
|
||||
if (ret != 0) {
|
||||
@ -3275,7 +3280,7 @@ rollback_destroy(zfs_handle_t *zhp, void *data)
|
||||
|
||||
logstr = zhp->zfs_hdl->libzfs_log_str;
|
||||
zhp->zfs_hdl->libzfs_log_str = NULL;
|
||||
cbp->cb_error |= zfs_destroy(zhp);
|
||||
cbp->cb_error |= zfs_destroy(zhp, B_FALSE);
|
||||
zhp->zfs_hdl->libzfs_log_str = logstr;
|
||||
}
|
||||
} else {
|
||||
@ -3289,7 +3294,7 @@ rollback_destroy(zfs_handle_t *zhp, void *data)
|
||||
zfs_close(zhp);
|
||||
return (0);
|
||||
}
|
||||
if (zfs_destroy(zhp) != 0)
|
||||
if (zfs_destroy(zhp, B_FALSE) != 0)
|
||||
cbp->cb_error = B_TRUE;
|
||||
else
|
||||
changelist_remove(clp, zhp->zfs_name);
|
||||
@ -4089,3 +4094,79 @@ zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type,
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
|
||||
boolean_t recursive)
|
||||
{
|
||||
zfs_cmd_t zc = { 0 };
|
||||
libzfs_handle_t *hdl = zhp->zfs_hdl;
|
||||
|
||||
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
|
||||
(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
|
||||
(void) strlcpy(zc.zc_string, tag, sizeof (zc.zc_string));
|
||||
zc.zc_cookie = recursive;
|
||||
|
||||
if (zfs_ioctl(hdl, ZFS_IOC_HOLD, &zc) != 0) {
|
||||
char errbuf[ZFS_MAXNAMELEN+32];
|
||||
|
||||
/*
|
||||
* if it was recursive, the one that actually failed will be in
|
||||
* zc.zc_name.
|
||||
*/
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
"cannot hold '%s@%s'"), zc.zc_name, snapname);
|
||||
switch (errno) {
|
||||
case ENOTSUP:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"pool must be upgraded"));
|
||||
return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
|
||||
case EINVAL:
|
||||
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
|
||||
case EEXIST:
|
||||
return (zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf));
|
||||
default:
|
||||
return (zfs_standard_error_fmt(hdl, errno, errbuf));
|
||||
}
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
|
||||
boolean_t recursive)
|
||||
{
|
||||
zfs_cmd_t zc = { 0 };
|
||||
libzfs_handle_t *hdl = zhp->zfs_hdl;
|
||||
|
||||
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
|
||||
(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
|
||||
(void) strlcpy(zc.zc_string, tag, sizeof (zc.zc_string));
|
||||
zc.zc_cookie = recursive;
|
||||
|
||||
if (zfs_ioctl(hdl, ZFS_IOC_RELEASE, &zc) != 0) {
|
||||
char errbuf[ZFS_MAXNAMELEN+32];
|
||||
|
||||
/*
|
||||
* if it was recursive, the one that actually failed will be in
|
||||
* zc.zc_name.
|
||||
*/
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
"cannot release '%s@%s'"), zc.zc_name, snapname);
|
||||
switch (errno) {
|
||||
case ESRCH:
|
||||
return (zfs_error(hdl, EZFS_REFTAG_RELE, errbuf));
|
||||
case ENOTSUP:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"pool must be upgraded"));
|
||||
return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
|
||||
case EINVAL:
|
||||
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
|
||||
default:
|
||||
return (zfs_standard_error_fmt(hdl, errno, errbuf));
|
||||
}
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
@ -113,6 +113,9 @@ fsavl_destroy(avl_tree_t *avl)
|
||||
free(avl);
|
||||
}
|
||||
|
||||
/*
|
||||
* Given an nvlist, produce an avl tree of snapshots, ordered by guid
|
||||
*/
|
||||
static avl_tree_t *
|
||||
fsavl_create(nvlist_t *fss)
|
||||
{
|
||||
@ -243,7 +246,9 @@ send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
|
||||
continue;
|
||||
|
||||
verify(nvpair_value_nvlist(elem, &propnv) == 0);
|
||||
if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION) {
|
||||
if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
|
||||
prop == ZFS_PROP_REFQUOTA ||
|
||||
prop == ZFS_PROP_REFRESERVATION) {
|
||||
/* these guys are modifyable, but have no source */
|
||||
uint64_t value;
|
||||
verify(nvlist_lookup_uint64(propnv,
|
||||
@ -274,6 +279,11 @@ send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* recursively generate nvlists describing datasets. See comment
|
||||
* for the data structure send_data_t above for description of contents
|
||||
* of the nvlist.
|
||||
*/
|
||||
static int
|
||||
send_iterate_fs(zfs_handle_t *zhp, void *arg)
|
||||
{
|
||||
@ -689,9 +699,20 @@ again:
|
||||
}
|
||||
|
||||
/*
|
||||
* Dumps a backup of tosnap, incremental from fromsnap if it isn't NULL.
|
||||
* If 'doall', dump all intermediate snaps.
|
||||
* If 'replicate', dump special header and do recursively.
|
||||
* Generate a send stream for the dataset identified by the argument zhp.
|
||||
*
|
||||
* The content of the send stream is the snapshot identified by
|
||||
* 'tosnap'. Incremental streams are requested in two ways:
|
||||
* - from the snapshot identified by "fromsnap" (if non-null) or
|
||||
* - from the origin of the dataset identified by zhp, which must
|
||||
* be a clone. In this case, "fromsnap" is null and "fromorigin"
|
||||
* is TRUE.
|
||||
*
|
||||
* The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
|
||||
* uses a special header (with a version field of DMU_BACKUP_HEADER_VERSION)
|
||||
* if "replicate" is set. If "doall" is set, dump all the intermediate
|
||||
* snapshots. The DMU_BACKUP_HEADER_VERSION header is used in the "doall"
|
||||
* case too.
|
||||
*/
|
||||
int
|
||||
zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
|
||||
@ -900,11 +921,12 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
zc.zc_objset_type = DMU_OST_ZFS;
|
||||
(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
|
||||
|
||||
if (tryname) {
|
||||
(void) strcpy(newname, tryname);
|
||||
|
||||
zc.zc_objset_type = DMU_OST_ZFS;
|
||||
(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
|
||||
(void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
|
||||
|
||||
if (flags.verbose) {
|
||||
@ -959,12 +981,18 @@ recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
|
||||
int err = 0;
|
||||
prop_changelist_t *clp;
|
||||
zfs_handle_t *zhp;
|
||||
boolean_t defer = B_FALSE;
|
||||
int spa_version;
|
||||
|
||||
zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
|
||||
if (zhp == NULL)
|
||||
return (-1);
|
||||
clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
|
||||
flags.force ? MS_FORCE : 0);
|
||||
if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
|
||||
zfs_spa_version(zhp, &spa_version) == 0 &&
|
||||
spa_version >= SPA_VERSION_USERREFS)
|
||||
defer = B_TRUE;
|
||||
zfs_close(zhp);
|
||||
if (clp == NULL)
|
||||
return (-1);
|
||||
@ -973,12 +1001,12 @@ recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
|
||||
return (err);
|
||||
|
||||
zc.zc_objset_type = DMU_OST_ZFS;
|
||||
zc.zc_defer_destroy = defer;
|
||||
(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
|
||||
|
||||
if (flags.verbose)
|
||||
(void) printf("attempting destroy %s\n", zc.zc_name);
|
||||
err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
|
||||
|
||||
if (err == 0) {
|
||||
if (flags.verbose)
|
||||
(void) printf("success\n");
|
||||
@ -988,7 +1016,12 @@ recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
|
||||
(void) changelist_postfix(clp);
|
||||
changelist_free(clp);
|
||||
|
||||
if (err != 0)
|
||||
/*
|
||||
* Deferred destroy should always succeed. Since we can't tell
|
||||
* if it destroyed the dataset or just marked it for deferred
|
||||
* destroy, always do the rename just in case.
|
||||
*/
|
||||
if (err != 0 || defer)
|
||||
err = recv_rename(hdl, name, NULL, baselen, newname, flags);
|
||||
|
||||
return (err);
|
||||
@ -1775,11 +1808,13 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
|
||||
/* We can't do online recv in this case */
|
||||
clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
|
||||
if (clp == NULL) {
|
||||
zfs_close(zhp);
|
||||
zcmd_free_nvlists(&zc);
|
||||
return (-1);
|
||||
}
|
||||
if (changelist_prefix(clp) != 0) {
|
||||
changelist_free(clp);
|
||||
zfs_close(zhp);
|
||||
zcmd_free_nvlists(&zc);
|
||||
return (-1);
|
||||
}
|
||||
@ -1936,7 +1971,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
|
||||
* (if created, or if we tore them down to do an incremental
|
||||
* restore), and the /dev links for the new snapshot (if
|
||||
* created). Also mount any children of the target filesystem
|
||||
* if we did an incremental receive.
|
||||
* if we did a replication receive (indicated by stream_avl
|
||||
* being non-NULL).
|
||||
*/
|
||||
cp = strchr(zc.zc_value, '@');
|
||||
if (cp && (ioctl_err == 0 || !newfs)) {
|
||||
@ -1952,7 +1988,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
|
||||
if (err == 0 && ioctl_err == 0)
|
||||
err = zvol_create_link(hdl,
|
||||
zc.zc_value);
|
||||
} else if (newfs) {
|
||||
} else if (newfs || stream_avl) {
|
||||
/*
|
||||
* Track the first/top of hierarchy fs,
|
||||
* for mounting and sharing later.
|
||||
|
@ -19,7 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
@ -104,6 +104,13 @@ vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs)
|
||||
return (state == VDEV_STATE_OFFLINE);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
vdev_removed(uint64_t state, uint64_t aux, uint64_t errs)
|
||||
{
|
||||
return (state == VDEV_STATE_REMOVED);
|
||||
}
|
||||
|
||||
/*
|
||||
* Detect if any leaf devices that have seen errors or could not be opened.
|
||||
*/
|
||||
@ -275,6 +282,12 @@ check_status(nvlist_t *config, boolean_t isimport)
|
||||
if (find_vdev_problem(nvroot, vdev_offlined))
|
||||
return (ZPOOL_STATUS_OFFLINE_DEV);
|
||||
|
||||
/*
|
||||
* Removed device
|
||||
*/
|
||||
if (find_vdev_problem(nvroot, vdev_removed))
|
||||
return (ZPOOL_STATUS_REMOVED_DEV);
|
||||
|
||||
/*
|
||||
* Currently resilvering
|
||||
*/
|
||||
|
@ -213,6 +213,11 @@ libzfs_error_description(libzfs_handle_t *hdl)
|
||||
case EZFS_UNPLAYED_LOGS:
|
||||
return (dgettext(TEXT_DOMAIN, "log device has unplayed intent "
|
||||
"logs"));
|
||||
case EZFS_REFTAG_RELE:
|
||||
return (dgettext(TEXT_DOMAIN, "no such tag on this dataset"));
|
||||
case EZFS_REFTAG_HOLD:
|
||||
return (dgettext(TEXT_DOMAIN, "tag already exists on this "
|
||||
"dataset"));
|
||||
case EZFS_UNKNOWN:
|
||||
return (dgettext(TEXT_DOMAIN, "unknown error"));
|
||||
default:
|
||||
|
@ -18,6 +18,7 @@
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
@ -26,6 +27,8 @@
|
||||
#ifndef _SYS_FS_ZFS_H
|
||||
#define _SYS_FS_ZFS_H
|
||||
|
||||
#include <sys/time.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
@ -111,6 +114,8 @@ typedef enum {
|
||||
ZFS_PROP_USEDREFRESERV,
|
||||
ZFS_PROP_USERACCOUNTING, /* not exposed to the user */
|
||||
ZFS_PROP_STMF_SHAREINFO, /* not exposed to the user */
|
||||
ZFS_PROP_DEFER_DESTROY,
|
||||
ZFS_PROP_USERREFS,
|
||||
ZFS_NUM_PROPS
|
||||
} zfs_prop_t;
|
||||
|
||||
@ -280,14 +285,16 @@ typedef enum zfs_cache_type {
|
||||
#define SPA_VERSION_14 14ULL
|
||||
#define SPA_VERSION_15 15ULL
|
||||
#define SPA_VERSION_16 16ULL
|
||||
#define SPA_VERSION_17 17ULL
|
||||
#define SPA_VERSION_18 18ULL
|
||||
/*
|
||||
* When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
|
||||
* format change. Go to usr/src/grub/grub-0.97/stage2/{zfs-include/, fsys_zfs*},
|
||||
* and do the appropriate changes. Also bump the version number in
|
||||
* usr/src/grub/capability.
|
||||
*/
|
||||
#define SPA_VERSION SPA_VERSION_16
|
||||
#define SPA_VERSION_STRING "16"
|
||||
#define SPA_VERSION SPA_VERSION_18
|
||||
#define SPA_VERSION_STRING "18"
|
||||
|
||||
/*
|
||||
* Symbolic names for the changes that caused a SPA_VERSION switch.
|
||||
@ -303,7 +310,7 @@ typedef enum zfs_cache_type {
|
||||
#define SPA_VERSION_INITIAL SPA_VERSION_1
|
||||
#define SPA_VERSION_DITTO_BLOCKS SPA_VERSION_2
|
||||
#define SPA_VERSION_SPARES SPA_VERSION_3
|
||||
#define SPA_VERSION_RAID6 SPA_VERSION_3
|
||||
#define SPA_VERSION_RAIDZ2 SPA_VERSION_3
|
||||
#define SPA_VERSION_BPLIST_ACCOUNT SPA_VERSION_3
|
||||
#define SPA_VERSION_RAIDZ_DEFLATE SPA_VERSION_3
|
||||
#define SPA_VERSION_DNODE_BYTES SPA_VERSION_3
|
||||
@ -325,6 +332,8 @@ typedef enum zfs_cache_type {
|
||||
#define SPA_VERSION_PASSTHROUGH_X SPA_VERSION_14
|
||||
#define SPA_VERSION_USERSPACE SPA_VERSION_15
|
||||
#define SPA_VERSION_STMF_PROP SPA_VERSION_16
|
||||
#define SPA_VERSION_RAIDZ3 SPA_VERSION_17
|
||||
#define SPA_VERSION_USERREFS SPA_VERSION_18
|
||||
|
||||
/*
|
||||
* ZPL version - rev'd whenever an incompatible on-disk format change
|
||||
@ -601,7 +610,10 @@ typedef enum zfs_ioc {
|
||||
ZFS_IOC_SMB_ACL,
|
||||
ZFS_IOC_USERSPACE_ONE,
|
||||
ZFS_IOC_USERSPACE_MANY,
|
||||
ZFS_IOC_USERSPACE_UPGRADE
|
||||
ZFS_IOC_USERSPACE_UPGRADE,
|
||||
ZFS_IOC_HOLD,
|
||||
ZFS_IOC_RELEASE,
|
||||
ZFS_IOC_GET_HOLDS
|
||||
} zfs_ioc_t;
|
||||
|
||||
/*
|
||||
@ -715,6 +727,8 @@ typedef enum history_internal_events {
|
||||
LOG_DS_REFQUOTA,
|
||||
LOG_DS_REFRESERV,
|
||||
LOG_POOL_SCRUB_DONE,
|
||||
LOG_DS_USER_HOLD,
|
||||
LOG_DS_USER_RELEASE,
|
||||
LOG_END
|
||||
} history_internal_events_t;
|
||||
|
||||
|
@ -61,6 +61,8 @@ typedef enum {
|
||||
ZFS_DELEG_NOTE_GROUPQUOTA,
|
||||
ZFS_DELEG_NOTE_USERUSED,
|
||||
ZFS_DELEG_NOTE_GROUPUSED,
|
||||
ZFS_DELEG_NOTE_HOLD,
|
||||
ZFS_DELEG_NOTE_RELEASE,
|
||||
ZFS_DELEG_NOTE_NONE
|
||||
} zfs_deleg_note_t;
|
||||
|
||||
|
@ -67,6 +67,8 @@ zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = {
|
||||
{ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA },
|
||||
{ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_NOTE_USERUSED },
|
||||
{ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED },
|
||||
{ZFS_DELEG_PERM_HOLD, ZFS_DELEG_NOTE_HOLD },
|
||||
{ZFS_DELEG_PERM_RELEASE, ZFS_DELEG_NOTE_RELEASE },
|
||||
{NULL, ZFS_DELEG_NOTE_NONE }
|
||||
};
|
||||
|
||||
|
@ -59,7 +59,7 @@ valid_char(char c)
|
||||
* Snapshot names must be made up of alphanumeric characters plus the following
|
||||
* characters:
|
||||
*
|
||||
* [-_.:]
|
||||
* [-_.: ]
|
||||
*/
|
||||
int
|
||||
snapshot_namecheck(const char *path, namecheck_err_t *why, char *what)
|
||||
|
@ -235,6 +235,9 @@ zfs_prop_init(void)
|
||||
/* readonly index (boolean) properties */
|
||||
register_index(ZFS_PROP_MOUNTED, "mounted", 0, PROP_READONLY,
|
||||
ZFS_TYPE_FILESYSTEM, "yes | no", "MOUNTED", boolean_table);
|
||||
register_index(ZFS_PROP_DEFER_DESTROY, "defer_destroy", 0,
|
||||
PROP_READONLY, ZFS_TYPE_SNAPSHOT, "yes | no", "DEFER_DESTROY",
|
||||
boolean_table);
|
||||
|
||||
/* set once index properties */
|
||||
register_index(ZFS_PROP_NORMALIZE, "normalization", 0,
|
||||
@ -286,6 +289,8 @@ zfs_prop_init(void)
|
||||
register_number(ZFS_PROP_USEDREFRESERV, "usedbyrefreservation", 0,
|
||||
PROP_READONLY,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "USEDREFRESERV");
|
||||
register_number(ZFS_PROP_USERREFS, "userrefs", 0, PROP_READONLY,
|
||||
ZFS_TYPE_SNAPSHOT, "<count>", "USERREFS");
|
||||
|
||||
/* default number properties */
|
||||
register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT,
|
||||
|
@ -87,6 +87,7 @@ const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
|
||||
{ zap_byteswap, TRUE, "scrub work queue" },
|
||||
{ zap_byteswap, TRUE, "ZFS user/group used" },
|
||||
{ zap_byteswap, TRUE, "ZFS user/group quota" },
|
||||
{ zap_byteswap, TRUE, "snapshot refcount tags"},
|
||||
};
|
||||
|
||||
int
|
||||
@ -195,7 +196,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
|
||||
|
||||
ASSERT(length <= DMU_MAX_ACCESS);
|
||||
|
||||
dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT;
|
||||
dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT;
|
||||
if (flags & DMU_READ_NO_PREFETCH || length > zfetch_array_rd_sz)
|
||||
dbuf_flags |= DB_RF_NOPREFETCH;
|
||||
|
||||
@ -212,6 +213,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
|
||||
os_dsl_dataset->ds_object,
|
||||
(longlong_t)dn->dn_object, dn->dn_datablksz,
|
||||
(longlong_t)offset, (longlong_t)length);
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
return (EIO);
|
||||
}
|
||||
nblks = 1;
|
||||
@ -234,9 +236,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
|
||||
}
|
||||
/* initiate async i/o */
|
||||
if (read) {
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
(void) dbuf_read(db, zio, dbuf_flags);
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
}
|
||||
dbp[i] = &db->db;
|
||||
}
|
||||
@ -376,56 +376,51 @@ dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len)
|
||||
dnode_rele(dn, FTAG);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the next "chunk" of file data to free. We traverse the file from
|
||||
* the end so that the file gets shorter over time (if we crashes in the
|
||||
* middle, this will leave us in a better state). We find allocated file
|
||||
* data by simply searching the allocated level 1 indirects.
|
||||
*/
|
||||
static int
|
||||
get_next_chunk(dnode_t *dn, uint64_t *offset, uint64_t limit)
|
||||
get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t limit)
|
||||
{
|
||||
uint64_t len = *offset - limit;
|
||||
uint64_t chunk_len = dn->dn_datablksz * DMU_MAX_DELETEBLKCNT;
|
||||
uint64_t subchunk =
|
||||
uint64_t len = *start - limit;
|
||||
uint64_t blkcnt = 0;
|
||||
uint64_t maxblks = DMU_MAX_ACCESS / (1ULL << (dn->dn_indblkshift + 1));
|
||||
uint64_t iblkrange =
|
||||
dn->dn_datablksz * EPB(dn->dn_indblkshift, SPA_BLKPTRSHIFT);
|
||||
|
||||
ASSERT(limit <= *offset);
|
||||
ASSERT(limit <= *start);
|
||||
|
||||
if (len <= chunk_len) {
|
||||
*offset = limit;
|
||||
if (len <= iblkrange * maxblks) {
|
||||
*start = limit;
|
||||
return (0);
|
||||
}
|
||||
ASSERT(ISP2(iblkrange));
|
||||
|
||||
ASSERT(ISP2(subchunk));
|
||||
|
||||
while (*offset > limit) {
|
||||
uint64_t initial_offset = P2ROUNDUP(*offset, subchunk);
|
||||
uint64_t delta;
|
||||
while (*start > limit && blkcnt < maxblks) {
|
||||
int err;
|
||||
|
||||
/* skip over allocated data */
|
||||
/* find next allocated L1 indirect */
|
||||
err = dnode_next_offset(dn,
|
||||
DNODE_FIND_HOLE|DNODE_FIND_BACKWARDS, offset, 1, 1, 0);
|
||||
if (err == ESRCH)
|
||||
*offset = limit;
|
||||
else if (err)
|
||||
return (err);
|
||||
DNODE_FIND_BACKWARDS, start, 2, 1, 0);
|
||||
|
||||
ASSERT3U(*offset, <=, initial_offset);
|
||||
*offset = P2ALIGN(*offset, subchunk);
|
||||
delta = initial_offset - *offset;
|
||||
if (delta >= chunk_len) {
|
||||
*offset += delta - chunk_len;
|
||||
/* if there are no more, then we are done */
|
||||
if (err == ESRCH) {
|
||||
*start = limit;
|
||||
return (0);
|
||||
}
|
||||
chunk_len -= delta;
|
||||
|
||||
/* skip over unallocated data */
|
||||
err = dnode_next_offset(dn,
|
||||
DNODE_FIND_BACKWARDS, offset, 1, 1, 0);
|
||||
if (err == ESRCH)
|
||||
*offset = limit;
|
||||
else if (err)
|
||||
} else if (err) {
|
||||
return (err);
|
||||
}
|
||||
blkcnt += 1;
|
||||
|
||||
if (*offset < limit)
|
||||
*offset = limit;
|
||||
ASSERT3U(*offset, <, initial_offset);
|
||||
/* reset offset to end of "next" block back */
|
||||
*start = P2ALIGN(*start, iblkrange);
|
||||
if (*start <= limit)
|
||||
*start = limit;
|
||||
else
|
||||
*start -= 1;
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
@ -548,7 +543,7 @@ dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||
{
|
||||
dnode_t *dn;
|
||||
dmu_buf_t **dbp;
|
||||
int numbufs, i, err;
|
||||
int numbufs, err;
|
||||
|
||||
err = dnode_hold(os->os, object, FTAG, &dn);
|
||||
if (err)
|
||||
@ -559,7 +554,7 @@ dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||
* block. If we ever do the tail block optimization, we will need to
|
||||
* handle that here as well.
|
||||
*/
|
||||
if (dn->dn_datablkshift == 0) {
|
||||
if (dn->dn_maxblkid == 0) {
|
||||
int newsz = offset > dn->dn_datablksz ? 0 :
|
||||
MIN(size, dn->dn_datablksz - offset);
|
||||
bzero((char *)buf + newsz, size - newsz);
|
||||
@ -568,6 +563,7 @@ dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||
|
||||
while (size > 0) {
|
||||
uint64_t mylen = MIN(size, DMU_MAX_ACCESS / 2);
|
||||
int i;
|
||||
|
||||
/*
|
||||
* NB: we could do this block-at-a-time, but it's nice
|
||||
@ -803,9 +799,6 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||
if (tocpy == db->db_size)
|
||||
dmu_buf_fill_done(db, tx);
|
||||
|
||||
if (err)
|
||||
break;
|
||||
|
||||
offset += tocpy;
|
||||
size -= tocpy;
|
||||
}
|
||||
|
@ -679,7 +679,7 @@ dmu_objset_create(const char *name, dmu_objset_type_t type,
|
||||
}
|
||||
|
||||
int
|
||||
dmu_objset_destroy(const char *name)
|
||||
dmu_objset_destroy(const char *name, boolean_t defer)
|
||||
{
|
||||
objset_t *os;
|
||||
int error;
|
||||
@ -696,7 +696,7 @@ dmu_objset_destroy(const char *name)
|
||||
dsl_dataset_t *ds = os->os->os_dsl_dataset;
|
||||
zil_destroy(dmu_objset_zil(os), B_FALSE);
|
||||
|
||||
error = dsl_dataset_destroy(ds, os);
|
||||
error = dsl_dataset_destroy(ds, os, defer);
|
||||
/*
|
||||
* dsl_dataset_destroy() closes the ds.
|
||||
*/
|
||||
@ -1130,7 +1130,7 @@ dmu_objset_userspace_upgrade(objset_t *os)
|
||||
*/
|
||||
|
||||
for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) {
|
||||
dmu_tx_t *tx = dmu_tx_create(os);
|
||||
dmu_tx_t *tx;
|
||||
dmu_buf_t *db;
|
||||
int objerr;
|
||||
|
||||
@ -1140,6 +1140,7 @@ dmu_objset_userspace_upgrade(objset_t *os)
|
||||
objerr = dmu_bonus_hold(os, obj, FTAG, &db);
|
||||
if (objerr)
|
||||
continue;
|
||||
tx = dmu_tx_create(os);
|
||||
dmu_tx_hold_bonus(tx, obj);
|
||||
objerr = dmu_tx_assign(tx, TXG_WAIT);
|
||||
if (objerr) {
|
||||
|
@ -393,6 +393,7 @@ recv_full_existing_check(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
dsl_dataset_t *ds = arg1;
|
||||
struct recvbeginsyncarg *rbsa = arg2;
|
||||
int err;
|
||||
struct dsl_ds_destroyarg dsda = {0};
|
||||
|
||||
/* must be a head ds */
|
||||
if (ds->ds_phys->ds_next_snap_obj != 0)
|
||||
@ -402,7 +403,8 @@ recv_full_existing_check(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
if (dsl_dir_is_clone(ds->ds_dir))
|
||||
return (EINVAL);
|
||||
|
||||
err = dsl_dataset_destroy_check(ds, rbsa->tag, tx);
|
||||
dsda.ds = ds;
|
||||
err = dsl_dataset_destroy_check(&dsda, rbsa->tag, tx);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
@ -427,13 +429,16 @@ recv_full_existing_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
dsl_dir_t *dd = ds->ds_dir;
|
||||
uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags;
|
||||
uint64_t dsobj;
|
||||
struct dsl_ds_destroyarg dsda = {0};
|
||||
|
||||
/*
|
||||
* NB: caller must provide an extra hold on the dsl_dir_t, so it
|
||||
* won't go away when dsl_dataset_destroy_sync() closes the
|
||||
* dataset.
|
||||
*/
|
||||
dsl_dataset_destroy_sync(ds, rbsa->tag, cr, tx);
|
||||
dsda.ds = ds;
|
||||
dsl_dataset_destroy_sync(&dsda, rbsa->tag, cr, tx);
|
||||
ASSERT3P(dsda.rm_origin, ==, NULL);
|
||||
|
||||
dsobj = dsl_dataset_create_sync_dd(dd, rbsa->origin, flags, tx);
|
||||
|
||||
@ -483,7 +488,7 @@ recv_incremental_check(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
|
||||
/* ARGSUSED */
|
||||
static void
|
||||
recv_online_incremental_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
recv_incremental_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dataset_t *ohds = arg1;
|
||||
struct recvbeginsyncarg *rbsa = arg2;
|
||||
@ -513,27 +518,13 @@ recv_online_incremental_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
dp->dp_spa, tx, cr, "dataset = %lld", dsobj);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static void
|
||||
recv_offline_incremental_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dataset_t *ds = arg1;
|
||||
|
||||
dmu_buf_will_dirty(ds->ds_dbuf, tx);
|
||||
ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
|
||||
|
||||
spa_history_internal_log(LOG_DS_REPLAY_INC_SYNC,
|
||||
ds->ds_dir->dd_pool->dp_spa, tx, cr, "dataset = %lld",
|
||||
ds->ds_object);
|
||||
}
|
||||
|
||||
/*
|
||||
* NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin()
|
||||
* succeeds; otherwise we will leak the holds on the datasets.
|
||||
*/
|
||||
int
|
||||
dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
|
||||
boolean_t force, objset_t *origin, boolean_t online, dmu_recv_cookie_t *drc)
|
||||
boolean_t force, objset_t *origin, dmu_recv_cookie_t *drc)
|
||||
{
|
||||
int err = 0;
|
||||
boolean_t byteswap;
|
||||
@ -582,36 +573,8 @@ dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
|
||||
/*
|
||||
* Process the begin in syncing context.
|
||||
*/
|
||||
if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE) && !online) {
|
||||
/* offline incremental receive */
|
||||
err = dsl_dataset_own(tofs, 0, dmu_recv_tag, &ds);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
/*
|
||||
* Only do the rollback if the most recent snapshot
|
||||
* matches the incremental source
|
||||
*/
|
||||
if (force) {
|
||||
if (ds->ds_prev == NULL ||
|
||||
ds->ds_prev->ds_phys->ds_guid !=
|
||||
rbsa.fromguid) {
|
||||
dsl_dataset_disown(ds, dmu_recv_tag);
|
||||
return (ENODEV);
|
||||
}
|
||||
(void) dsl_dataset_rollback(ds, DMU_OST_NONE);
|
||||
}
|
||||
rbsa.force = B_FALSE;
|
||||
err = dsl_sync_task_do(ds->ds_dir->dd_pool,
|
||||
recv_incremental_check,
|
||||
recv_offline_incremental_sync, ds, &rbsa, 1);
|
||||
if (err) {
|
||||
dsl_dataset_disown(ds, dmu_recv_tag);
|
||||
return (err);
|
||||
}
|
||||
drc->drc_logical_ds = drc->drc_real_ds = ds;
|
||||
} else if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE)) {
|
||||
/* online incremental receive */
|
||||
if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE)) {
|
||||
/* incremental receive */
|
||||
|
||||
/* tmp clone name is: tofs/%tosnap" */
|
||||
(void) snprintf(rbsa.clonelastname, sizeof (rbsa.clonelastname),
|
||||
@ -622,11 +585,18 @@ dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
/* must not have an incremental recv already in progress */
|
||||
if (!mutex_tryenter(&ds->ds_recvlock)) {
|
||||
dsl_dataset_rele(ds, dmu_recv_tag);
|
||||
return (EBUSY);
|
||||
}
|
||||
|
||||
rbsa.force = force;
|
||||
err = dsl_sync_task_do(ds->ds_dir->dd_pool,
|
||||
recv_incremental_check,
|
||||
recv_online_incremental_sync, ds, &rbsa, 5);
|
||||
recv_incremental_sync, ds, &rbsa, 5);
|
||||
if (err) {
|
||||
mutex_exit(&ds->ds_recvlock);
|
||||
dsl_dataset_rele(ds, dmu_recv_tag);
|
||||
return (err);
|
||||
}
|
||||
@ -931,26 +901,6 @@ restore_free(struct restorearg *ra, objset_t *os,
|
||||
return (err);
|
||||
}
|
||||
|
||||
void
|
||||
dmu_recv_abort_cleanup(dmu_recv_cookie_t *drc)
|
||||
{
|
||||
if (drc->drc_newfs || drc->drc_real_ds != drc->drc_logical_ds) {
|
||||
/*
|
||||
* online incremental or new fs: destroy the fs (which
|
||||
* may be a clone) that we created
|
||||
*/
|
||||
(void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag);
|
||||
if (drc->drc_real_ds != drc->drc_logical_ds)
|
||||
dsl_dataset_rele(drc->drc_logical_ds, dmu_recv_tag);
|
||||
} else {
|
||||
/*
|
||||
* offline incremental: rollback to most recent snapshot.
|
||||
*/
|
||||
(void) dsl_dataset_rollback(drc->drc_real_ds, DMU_OST_NONE);
|
||||
dsl_dataset_disown(drc->drc_real_ds, dmu_recv_tag);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* NB: callers *must* call dmu_recv_end() if this succeeds.
|
||||
*/
|
||||
@ -1078,11 +1028,17 @@ out:
|
||||
|
||||
if (ra.err != 0) {
|
||||
/*
|
||||
* rollback or destroy what we created, so we don't
|
||||
* leave it in the restoring state.
|
||||
* destroy what we created, so we don't leave it in the
|
||||
* inconsistent restoring state.
|
||||
*/
|
||||
txg_wait_synced(drc->drc_real_ds->ds_dir->dd_pool, 0);
|
||||
dmu_recv_abort_cleanup(drc);
|
||||
|
||||
(void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag,
|
||||
B_FALSE);
|
||||
if (drc->drc_real_ds != drc->drc_logical_ds) {
|
||||
mutex_exit(&drc->drc_logical_ds->ds_recvlock);
|
||||
dsl_dataset_rele(drc->drc_logical_ds, dmu_recv_tag);
|
||||
}
|
||||
}
|
||||
|
||||
kmem_free(ra.buf, ra.bufsize);
|
||||
@ -1149,7 +1105,9 @@ dmu_recv_end(dmu_recv_cookie_t *drc)
|
||||
dsl_dataset_rele(ds, dmu_recv_tag);
|
||||
}
|
||||
/* dsl_dataset_destroy() will disown the ds */
|
||||
(void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag);
|
||||
(void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag,
|
||||
B_FALSE);
|
||||
mutex_exit(&drc->drc_logical_ds->ds_recvlock);
|
||||
if (err)
|
||||
return (err);
|
||||
}
|
||||
@ -1163,7 +1121,8 @@ dmu_recv_end(dmu_recv_cookie_t *drc)
|
||||
if (err) {
|
||||
if (drc->drc_newfs) {
|
||||
ASSERT(ds == drc->drc_real_ds);
|
||||
(void) dsl_dataset_destroy(ds, dmu_recv_tag);
|
||||
(void) dsl_dataset_destroy(ds, dmu_recv_tag,
|
||||
B_FALSE);
|
||||
return (err);
|
||||
} else {
|
||||
(void) dsl_dataset_rollback(ds, DMU_OST_NONE);
|
||||
|
@ -697,8 +697,7 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
|
||||
}
|
||||
|
||||
err = zap_count_write(&dn->dn_objset->os, dn->dn_object, name, add,
|
||||
&txh->txh_space_towrite, &txh->txh_space_tooverwrite,
|
||||
txh->txh_dnode->dn_datablkshift);
|
||||
&txh->txh_space_towrite, &txh->txh_space_tooverwrite);
|
||||
|
||||
/*
|
||||
* If the modified blocks are scattered to the four winds,
|
||||
|
@ -1260,6 +1260,22 @@ dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx)
|
||||
dmu_tx_willuse_space(tx, space);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function scans a block at the indicated "level" looking for
|
||||
* a hole or data (depending on 'flags'). If level > 0, then we are
|
||||
* scanning an indirect block looking at its pointers. If level == 0,
|
||||
* then we are looking at a block of dnodes. If we don't find what we
|
||||
* are looking for in the block, we return ESRCH. Otherwise, return
|
||||
* with *offset pointing to the beginning (if searching forwards) or
|
||||
* end (if searching backwards) of the range covered by the block
|
||||
* pointer we matched on (or dnode).
|
||||
*
|
||||
* The basic search algorithm used below by dnode_next_offset() is to
|
||||
* use this function to search up the block tree (widen the search) until
|
||||
* we find something (i.e., we don't return ESRCH) and then search back
|
||||
* down the tree (narrow the search) until we reach our original search
|
||||
* level.
|
||||
*/
|
||||
static int
|
||||
dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
|
||||
int lvl, uint64_t blkfill, uint64_t txg)
|
||||
@ -1330,6 +1346,7 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
|
||||
error = ESRCH;
|
||||
} else {
|
||||
blkptr_t *bp = data;
|
||||
uint64_t start = *offset;
|
||||
span = (lvl - 1) * epbs + dn->dn_datablkshift;
|
||||
minfill = 0;
|
||||
maxfill = blkfill << ((lvl - 1) * epbs);
|
||||
@ -1339,18 +1356,25 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
|
||||
else
|
||||
minfill++;
|
||||
|
||||
for (i = (*offset >> span) & ((1ULL << epbs) - 1);
|
||||
*offset = *offset >> span;
|
||||
for (i = BF64_GET(*offset, 0, epbs);
|
||||
i >= 0 && i < epb; i += inc) {
|
||||
if (bp[i].blk_fill >= minfill &&
|
||||
bp[i].blk_fill <= maxfill &&
|
||||
(hole || bp[i].blk_birth > txg))
|
||||
break;
|
||||
if (inc < 0 && *offset < (1ULL << span))
|
||||
*offset = 0;
|
||||
else
|
||||
*offset += (1ULL << span) * inc;
|
||||
if (inc > 0 || *offset > 0)
|
||||
*offset += inc;
|
||||
}
|
||||
if (i < 0 || i == epb)
|
||||
*offset = *offset << span;
|
||||
if (inc < 0) {
|
||||
/* traversing backwards; position offset at the end */
|
||||
ASSERT3U(*offset, <=, start);
|
||||
*offset = MIN(*offset + (1ULL << span) - 1, start);
|
||||
} else if (*offset < start) {
|
||||
*offset = start;
|
||||
}
|
||||
if (i < 0 || i >= epb)
|
||||
error = ESRCH;
|
||||
}
|
||||
|
||||
|
@ -39,6 +39,7 @@
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
#include <sys/sunddi.h>
|
||||
#include <sys/zvol.h>
|
||||
|
||||
static char *dsl_reaper = "the grim reaper";
|
||||
|
||||
@ -262,6 +263,7 @@ dsl_dataset_evict(dmu_buf_t *db, void *dsv)
|
||||
ASSERT(!list_link_active(&ds->ds_synced_link));
|
||||
|
||||
mutex_destroy(&ds->ds_lock);
|
||||
mutex_destroy(&ds->ds_recvlock);
|
||||
mutex_destroy(&ds->ds_opening_lock);
|
||||
mutex_destroy(&ds->ds_deadlist.bpl_lock);
|
||||
rw_destroy(&ds->ds_rwlock);
|
||||
@ -359,6 +361,7 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
|
||||
ds->ds_phys = dbuf->db_data;
|
||||
|
||||
mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT,
|
||||
NULL);
|
||||
@ -377,6 +380,7 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
|
||||
* just opened it.
|
||||
*/
|
||||
mutex_destroy(&ds->ds_lock);
|
||||
mutex_destroy(&ds->ds_recvlock);
|
||||
mutex_destroy(&ds->ds_opening_lock);
|
||||
mutex_destroy(&ds->ds_deadlist.bpl_lock);
|
||||
rw_destroy(&ds->ds_rwlock);
|
||||
@ -406,8 +410,15 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
|
||||
dsl_dataset_rele(origin, FTAG);
|
||||
}
|
||||
}
|
||||
} else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) {
|
||||
} else {
|
||||
if (zfs_flags & ZFS_DEBUG_SNAPNAMES)
|
||||
err = dsl_dataset_get_snapname(ds);
|
||||
if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) {
|
||||
err = zap_count(
|
||||
ds->ds_dir->dd_pool->dp_meta_objset,
|
||||
ds->ds_phys->ds_userrefs_obj,
|
||||
&ds->ds_userrefs);
|
||||
}
|
||||
}
|
||||
|
||||
if (err == 0 && !dsl_dataset_is_snapshot(ds)) {
|
||||
@ -448,6 +459,7 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
|
||||
dsl_dataset_drop_ref(ds->ds_prev, ds);
|
||||
dsl_dir_close(ds->ds_dir, ds);
|
||||
mutex_destroy(&ds->ds_lock);
|
||||
mutex_destroy(&ds->ds_recvlock);
|
||||
mutex_destroy(&ds->ds_opening_lock);
|
||||
mutex_destroy(&ds->ds_deadlist.bpl_lock);
|
||||
rw_destroy(&ds->ds_rwlock);
|
||||
@ -845,6 +857,7 @@ struct destroyarg {
|
||||
dsl_sync_task_group_t *dstg;
|
||||
char *snapname;
|
||||
char *failed;
|
||||
boolean_t defer;
|
||||
};
|
||||
|
||||
static int
|
||||
@ -852,23 +865,30 @@ dsl_snapshot_destroy_one(char *name, void *arg)
|
||||
{
|
||||
struct destroyarg *da = arg;
|
||||
dsl_dataset_t *ds;
|
||||
char *cp;
|
||||
int err;
|
||||
char *dsname;
|
||||
size_t buflen;
|
||||
|
||||
(void) strcat(name, "@");
|
||||
(void) strcat(name, da->snapname);
|
||||
err = dsl_dataset_own(name, DS_MODE_READONLY | DS_MODE_INCONSISTENT,
|
||||
/* alloc a buffer to hold name@snapname, plus the terminating NULL */
|
||||
buflen = strlen(name) + strlen(da->snapname) + 2;
|
||||
dsname = kmem_alloc(buflen, KM_SLEEP);
|
||||
(void) snprintf(dsname, buflen, "%s@%s", name, da->snapname);
|
||||
err = dsl_dataset_own(dsname, DS_MODE_READONLY | DS_MODE_INCONSISTENT,
|
||||
da->dstg, &ds);
|
||||
cp = strchr(name, '@');
|
||||
*cp = '\0';
|
||||
kmem_free(dsname, buflen);
|
||||
if (err == 0) {
|
||||
struct dsl_ds_destroyarg *dsda;
|
||||
|
||||
dsl_dataset_make_exclusive(ds, da->dstg);
|
||||
if (ds->ds_user_ptr) {
|
||||
ds->ds_user_evict_func(ds, ds->ds_user_ptr);
|
||||
ds->ds_user_ptr = NULL;
|
||||
}
|
||||
dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), KM_SLEEP);
|
||||
dsda->ds = ds;
|
||||
dsda->defer = da->defer;
|
||||
dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check,
|
||||
dsl_dataset_destroy_sync, ds, da->dstg, 0);
|
||||
dsl_dataset_destroy_sync, dsda, da->dstg, 0);
|
||||
} else if (err == ENOENT) {
|
||||
err = 0;
|
||||
} else {
|
||||
@ -882,7 +902,7 @@ dsl_snapshot_destroy_one(char *name, void *arg)
|
||||
*/
|
||||
#pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy
|
||||
int
|
||||
dsl_snapshots_destroy(char *fsname, char *snapname)
|
||||
dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer)
|
||||
{
|
||||
int err;
|
||||
struct destroyarg da;
|
||||
@ -895,6 +915,7 @@ dsl_snapshots_destroy(char *fsname, char *snapname)
|
||||
da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
|
||||
da.snapname = snapname;
|
||||
da.failed = fsname;
|
||||
da.defer = defer;
|
||||
|
||||
err = dmu_objset_find(fsname,
|
||||
dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN);
|
||||
@ -904,7 +925,9 @@ dsl_snapshots_destroy(char *fsname, char *snapname)
|
||||
|
||||
for (dst = list_head(&da.dstg->dstg_tasks); dst;
|
||||
dst = list_next(&da.dstg->dstg_tasks, dst)) {
|
||||
dsl_dataset_t *ds = dst->dst_arg1;
|
||||
struct dsl_ds_destroyarg *dsda = dst->dst_arg1;
|
||||
dsl_dataset_t *ds = dsda->ds;
|
||||
|
||||
/*
|
||||
* Return the file system name that triggered the error
|
||||
*/
|
||||
@ -912,7 +935,9 @@ dsl_snapshots_destroy(char *fsname, char *snapname)
|
||||
dsl_dataset_name(ds, fsname);
|
||||
*strchr(fsname, '@') = '\0';
|
||||
}
|
||||
ASSERT3P(dsda->rm_origin, ==, NULL);
|
||||
dsl_dataset_disown(ds, da.dstg);
|
||||
kmem_free(dsda, sizeof (struct dsl_ds_destroyarg));
|
||||
}
|
||||
|
||||
dsl_sync_task_group_destroy(da.dstg);
|
||||
@ -920,18 +945,100 @@ dsl_snapshots_destroy(char *fsname, char *snapname)
|
||||
return (err);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
dsl_dataset_might_destroy_origin(dsl_dataset_t *ds)
|
||||
{
|
||||
boolean_t might_destroy = B_FALSE;
|
||||
|
||||
mutex_enter(&ds->ds_lock);
|
||||
if (ds->ds_phys->ds_num_children == 2 && ds->ds_userrefs == 0 &&
|
||||
DS_IS_DEFER_DESTROY(ds))
|
||||
might_destroy = B_TRUE;
|
||||
mutex_exit(&ds->ds_lock);
|
||||
|
||||
return (might_destroy);
|
||||
}
|
||||
|
||||
#ifdef _KERNEL
|
||||
static int
|
||||
dsl_dataset_zvol_cleanup(dsl_dataset_t *ds, const char *name)
|
||||
{
|
||||
int error;
|
||||
objset_t *os;
|
||||
|
||||
error = dmu_objset_open_ds(ds, DMU_OST_ANY, &os);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
if (dmu_objset_type(os) == DMU_OST_ZVOL)
|
||||
error = zvol_remove_minor(name);
|
||||
dmu_objset_close(os);
|
||||
|
||||
return (error);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If we're removing a clone, and these three conditions are true:
|
||||
* 1) the clone's origin has no other children
|
||||
* 2) the clone's origin has no user references
|
||||
* 3) the clone's origin has been marked for deferred destruction
|
||||
* Then, prepare to remove the origin as part of this sync task group.
|
||||
*/
|
||||
static int
|
||||
dsl_dataset_origin_rm_prep(struct dsl_ds_destroyarg *dsda, void *tag)
|
||||
{
|
||||
dsl_dataset_t *ds = dsda->ds;
|
||||
dsl_dataset_t *origin = ds->ds_prev;
|
||||
|
||||
if (dsl_dataset_might_destroy_origin(origin)) {
|
||||
char *name;
|
||||
int namelen;
|
||||
int error;
|
||||
|
||||
namelen = dsl_dataset_namelen(origin) + 1;
|
||||
name = kmem_alloc(namelen, KM_SLEEP);
|
||||
dsl_dataset_name(origin, name);
|
||||
#ifdef _KERNEL
|
||||
error = zfs_unmount_snap(name, NULL);
|
||||
if (error) {
|
||||
kmem_free(name, namelen);
|
||||
return (error);
|
||||
}
|
||||
error = dsl_dataset_zvol_cleanup(origin, name);
|
||||
if (error) {
|
||||
kmem_free(name, namelen);
|
||||
return (error);
|
||||
}
|
||||
#endif
|
||||
error = dsl_dataset_own(name,
|
||||
DS_MODE_READONLY | DS_MODE_INCONSISTENT,
|
||||
tag, &origin);
|
||||
kmem_free(name, namelen);
|
||||
if (error)
|
||||
return (error);
|
||||
dsda->rm_origin = origin;
|
||||
dsl_dataset_make_exclusive(origin, tag);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* ds must be opened as OWNER. On return (whether successful or not),
|
||||
* ds will be closed and caller can no longer dereference it.
|
||||
*/
|
||||
int
|
||||
dsl_dataset_destroy(dsl_dataset_t *ds, void *tag)
|
||||
dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
|
||||
{
|
||||
int err;
|
||||
dsl_sync_task_group_t *dstg;
|
||||
objset_t *os;
|
||||
dsl_dir_t *dd;
|
||||
uint64_t obj;
|
||||
struct dsl_ds_destroyarg dsda = {0};
|
||||
|
||||
dsda.ds = ds;
|
||||
|
||||
if (dsl_dataset_is_snapshot(ds)) {
|
||||
/* Destroying a snapshot is simpler */
|
||||
@ -941,9 +1048,12 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag)
|
||||
ds->ds_user_evict_func(ds, ds->ds_user_ptr);
|
||||
ds->ds_user_ptr = NULL;
|
||||
}
|
||||
/* NOTE: defer is always B_FALSE for non-snapshots */
|
||||
dsda.defer = defer;
|
||||
err = dsl_sync_task_do(ds->ds_dir->dd_pool,
|
||||
dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
|
||||
ds, tag, 0);
|
||||
&dsda, tag, 0);
|
||||
ASSERT3P(dsda.rm_origin, ==, NULL);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1024,13 +1134,45 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag)
|
||||
ds->ds_user_evict_func(ds, ds->ds_user_ptr);
|
||||
ds->ds_user_ptr = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're removing a clone, we might also need to remove its
|
||||
* origin.
|
||||
*/
|
||||
do {
|
||||
dsda.need_prep = B_FALSE;
|
||||
if (dsl_dir_is_clone(dd)) {
|
||||
err = dsl_dataset_origin_rm_prep(&dsda, tag);
|
||||
if (err) {
|
||||
dsl_dir_close(dd, FTAG);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
|
||||
dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
|
||||
dsl_dataset_destroy_sync, ds, tag, 0);
|
||||
dsl_dataset_destroy_sync, &dsda, tag, 0);
|
||||
dsl_sync_task_create(dstg, dsl_dir_destroy_check,
|
||||
dsl_dir_destroy_sync, dd, FTAG, 0);
|
||||
err = dsl_sync_task_group_wait(dstg);
|
||||
dsl_sync_task_group_destroy(dstg);
|
||||
|
||||
/*
|
||||
* We could be racing against 'zfs release' or 'zfs destroy -d'
|
||||
* on the origin snap, in which case we can get EBUSY if we
|
||||
* needed to destroy the origin snap but were not ready to
|
||||
* do so.
|
||||
*/
|
||||
if (dsda.need_prep) {
|
||||
ASSERT(err == EBUSY);
|
||||
ASSERT(dsl_dir_is_clone(dd));
|
||||
ASSERT(dsda.rm_origin == NULL);
|
||||
}
|
||||
} while (dsda.need_prep);
|
||||
|
||||
if (dsda.rm_origin != NULL)
|
||||
dsl_dataset_disown(dsda.rm_origin, tag);
|
||||
|
||||
/* if it is successful, dsl_dir_destroy_sync will close the dd */
|
||||
if (err)
|
||||
dsl_dir_close(dd, FTAG);
|
||||
@ -1211,7 +1353,8 @@ dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
/*
|
||||
* We can only roll back to emptyness if it is a ZPL objset.
|
||||
*/
|
||||
if (*ost != DMU_OST_ZFS && ds->ds_phys->ds_prev_snap_txg == 0)
|
||||
if (*ost != DMU_OST_ZFS &&
|
||||
ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL)
|
||||
return (EINVAL);
|
||||
|
||||
/*
|
||||
@ -1316,6 +1459,7 @@ dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
} else {
|
||||
objset_impl_t *osi;
|
||||
|
||||
ASSERT(*ost != DMU_OST_ZVOL);
|
||||
ASSERT3U(ds->ds_phys->ds_used_bytes, ==, 0);
|
||||
ASSERT3U(ds->ds_phys->ds_compressed_bytes, ==, 0);
|
||||
ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, ==, 0);
|
||||
@ -1385,18 +1529,63 @@ dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
cr, "dataset = %llu", ds->ds_object);
|
||||
}
|
||||
|
||||
static int
|
||||
dsl_dataset_origin_check(struct dsl_ds_destroyarg *dsda, void *tag,
|
||||
dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dataset_t *ds = dsda->ds;
|
||||
dsl_dataset_t *ds_prev = ds->ds_prev;
|
||||
|
||||
if (dsl_dataset_might_destroy_origin(ds_prev)) {
|
||||
struct dsl_ds_destroyarg ndsda = {0};
|
||||
|
||||
/*
|
||||
* If we're not prepared to remove the origin, don't remove
|
||||
* the clone either.
|
||||
*/
|
||||
if (dsda->rm_origin == NULL) {
|
||||
dsda->need_prep = B_TRUE;
|
||||
return (EBUSY);
|
||||
}
|
||||
|
||||
ndsda.ds = ds_prev;
|
||||
ndsda.is_origin_rm = B_TRUE;
|
||||
return (dsl_dataset_destroy_check(&ndsda, tag, tx));
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're not going to remove the origin after all,
|
||||
* undo the open context setup.
|
||||
*/
|
||||
if (dsda->rm_origin != NULL) {
|
||||
dsl_dataset_disown(dsda->rm_origin, tag);
|
||||
dsda->rm_origin = NULL;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
int
|
||||
dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dataset_t *ds = arg1;
|
||||
struct dsl_ds_destroyarg *dsda = arg1;
|
||||
dsl_dataset_t *ds = dsda->ds;
|
||||
|
||||
/* we have an owner hold, so noone else can destroy us */
|
||||
ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
|
||||
|
||||
/* Can't delete a branch point. */
|
||||
if (ds->ds_phys->ds_num_children > 1)
|
||||
return (EEXIST);
|
||||
/*
|
||||
* Only allow deferred destroy on pools that support it.
|
||||
* NOTE: deferred destroy is only supported on snapshots.
|
||||
*/
|
||||
if (dsda->defer) {
|
||||
if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
|
||||
SPA_VERSION_USERREFS)
|
||||
return (ENOTSUP);
|
||||
ASSERT(dsl_dataset_is_snapshot(ds));
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Can't delete a head dataset if there are snapshots of it.
|
||||
@ -1414,6 +1603,31 @@ dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
|
||||
return (EAGAIN);
|
||||
|
||||
if (dsl_dataset_is_snapshot(ds)) {
|
||||
/*
|
||||
* If this snapshot has an elevated user reference count,
|
||||
* we can't destroy it yet.
|
||||
*/
|
||||
if (ds->ds_userrefs > 0 && !dsda->releasing)
|
||||
return (EBUSY);
|
||||
|
||||
mutex_enter(&ds->ds_lock);
|
||||
/*
|
||||
* Can't delete a branch point. However, if we're destroying
|
||||
* a clone and removing its origin due to it having a user
|
||||
* hold count of 0 and having been marked for deferred destroy,
|
||||
* it's OK for the origin to have a single clone.
|
||||
*/
|
||||
if (ds->ds_phys->ds_num_children >
|
||||
(dsda->is_origin_rm ? 2 : 1)) {
|
||||
mutex_exit(&ds->ds_lock);
|
||||
return (EEXIST);
|
||||
}
|
||||
mutex_exit(&ds->ds_lock);
|
||||
} else if (dsl_dir_is_clone(ds->ds_dir)) {
|
||||
return (dsl_dataset_origin_check(dsda, arg2, tx));
|
||||
}
|
||||
|
||||
/* XXX we should do some i/o error checking... */
|
||||
return (0);
|
||||
}
|
||||
@ -1461,7 +1675,8 @@ dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag)
|
||||
void
|
||||
dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dataset_t *ds = arg1;
|
||||
struct dsl_ds_destroyarg *dsda = arg1;
|
||||
dsl_dataset_t *ds = dsda->ds;
|
||||
zio_t *zio;
|
||||
int err;
|
||||
int after_branch_point = FALSE;
|
||||
@ -1471,11 +1686,20 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
|
||||
uint64_t obj;
|
||||
|
||||
ASSERT(ds->ds_owner);
|
||||
ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
|
||||
ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1);
|
||||
ASSERT(ds->ds_prev == NULL ||
|
||||
ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
|
||||
ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
|
||||
|
||||
if (dsda->defer) {
|
||||
ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
|
||||
if (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1) {
|
||||
dmu_buf_will_dirty(ds->ds_dbuf, tx);
|
||||
ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* signal any waiters that this dataset is going away */
|
||||
mutex_enter(&ds->ds_lock);
|
||||
ds->ds_owner = dsl_reaper;
|
||||
@ -1521,6 +1745,20 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
|
||||
/* This clone is toast. */
|
||||
ASSERT(ds_prev->ds_phys->ds_num_children > 1);
|
||||
ds_prev->ds_phys->ds_num_children--;
|
||||
|
||||
/*
|
||||
* If the clone's origin has no other clones, no
|
||||
* user holds, and has been marked for deferred
|
||||
* deletion, then we should have done the necessary
|
||||
* destroy setup for it.
|
||||
*/
|
||||
if (ds_prev->ds_phys->ds_num_children == 1 &&
|
||||
ds_prev->ds_userrefs == 0 &&
|
||||
DS_IS_DEFER_DESTROY(ds_prev)) {
|
||||
ASSERT3P(dsda->rm_origin, !=, NULL);
|
||||
} else {
|
||||
ASSERT3P(dsda->rm_origin, ==, NULL);
|
||||
}
|
||||
} else if (!after_branch_point) {
|
||||
ds_prev->ds_phys->ds_next_snap_obj =
|
||||
ds->ds_phys->ds_next_snap_obj;
|
||||
@ -1733,10 +1971,32 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
|
||||
}
|
||||
if (ds->ds_phys->ds_props_obj != 0)
|
||||
VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
|
||||
if (ds->ds_phys->ds_userrefs_obj != 0)
|
||||
VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx));
|
||||
dsl_dir_close(ds->ds_dir, ds);
|
||||
ds->ds_dir = NULL;
|
||||
dsl_dataset_drain_refs(ds, tag);
|
||||
VERIFY(0 == dmu_object_free(mos, obj, tx));
|
||||
|
||||
if (dsda->rm_origin) {
|
||||
/*
|
||||
* Remove the origin of the clone we just destroyed.
|
||||
*/
|
||||
dsl_dataset_t *origin = ds->ds_prev;
|
||||
struct dsl_ds_destroyarg ndsda = {0};
|
||||
|
||||
ASSERT3P(origin, ==, dsda->rm_origin);
|
||||
if (origin->ds_user_ptr) {
|
||||
origin->ds_user_evict_func(origin, origin->ds_user_ptr);
|
||||
origin->ds_user_ptr = NULL;
|
||||
}
|
||||
|
||||
dsl_dataset_rele(origin, tag);
|
||||
ds->ds_prev = NULL;
|
||||
|
||||
ndsda.ds = origin;
|
||||
dsl_dataset_destroy_sync(&ndsda, tag, cr, tx);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
@ -1951,6 +2211,9 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
|
||||
ds->ds_reserved);
|
||||
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID,
|
||||
ds->ds_phys->ds_guid);
|
||||
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, ds->ds_userrefs);
|
||||
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,
|
||||
DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
|
||||
|
||||
if (ds->ds_phys->ds_next_snap_obj) {
|
||||
/*
|
||||
@ -3019,7 +3282,7 @@ dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
|
||||
ds->ds_quota = new_quota;
|
||||
|
||||
dsl_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx);
|
||||
dsl_dir_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx);
|
||||
|
||||
spa_history_internal_log(LOG_DS_REFQUOTA, ds->ds_dir->dd_pool->dp_spa,
|
||||
tx, cr, "%lld dataset = %llu ",
|
||||
@ -3114,7 +3377,7 @@ dsl_dataset_set_reservation_sync(void *arg1, void *arg2, cred_t *cr,
|
||||
|
||||
dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx);
|
||||
mutex_exit(&ds->ds_dir->dd_lock);
|
||||
dsl_prop_set_uint64_sync(ds->ds_dir, "refreservation",
|
||||
dsl_dir_prop_set_uint64_sync(ds->ds_dir, "refreservation",
|
||||
new_reservation, cr, tx);
|
||||
|
||||
spa_history_internal_log(LOG_DS_REFRESERV,
|
||||
@ -3138,3 +3401,421 @@ dsl_dataset_set_reservation(const char *dsname, uint64_t reservation)
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dataset_t *ds = arg1;
|
||||
char *htag = arg2;
|
||||
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
|
||||
int error = 0;
|
||||
|
||||
if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS)
|
||||
return (ENOTSUP);
|
||||
|
||||
if (!dsl_dataset_is_snapshot(ds))
|
||||
return (EINVAL);
|
||||
|
||||
if (strlen(htag) >= ZAP_MAXNAMELEN)
|
||||
return (ENAMETOOLONG);
|
||||
|
||||
/* tags must be unique */
|
||||
mutex_enter(&ds->ds_lock);
|
||||
if (ds->ds_phys->ds_userrefs_obj) {
|
||||
error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, htag,
|
||||
8, 1, tx);
|
||||
if (error == 0)
|
||||
error = EEXIST;
|
||||
else if (error == ENOENT)
|
||||
error = 0;
|
||||
}
|
||||
mutex_exit(&ds->ds_lock);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static void
|
||||
dsl_dataset_user_hold_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dataset_t *ds = arg1;
|
||||
char *htag = arg2;
|
||||
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
|
||||
time_t now = gethrestime_sec();
|
||||
uint64_t zapobj;
|
||||
|
||||
mutex_enter(&ds->ds_lock);
|
||||
if (ds->ds_phys->ds_userrefs_obj == 0) {
|
||||
/*
|
||||
* This is the first user hold for this dataset. Create
|
||||
* the userrefs zap object.
|
||||
*/
|
||||
dmu_buf_will_dirty(ds->ds_dbuf, tx);
|
||||
zapobj = ds->ds_phys->ds_userrefs_obj =
|
||||
zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
|
||||
} else {
|
||||
zapobj = ds->ds_phys->ds_userrefs_obj;
|
||||
}
|
||||
ds->ds_userrefs++;
|
||||
mutex_exit(&ds->ds_lock);
|
||||
|
||||
VERIFY(0 == zap_add(mos, zapobj, htag, 8, 1, &now, tx));
|
||||
|
||||
spa_history_internal_log(LOG_DS_USER_HOLD,
|
||||
ds->ds_dir->dd_pool->dp_spa, tx, cr, "<%s> dataset = %llu",
|
||||
htag, ds->ds_object);
|
||||
}
|
||||
|
||||
struct dsl_ds_holdarg {
|
||||
dsl_sync_task_group_t *dstg;
|
||||
char *htag;
|
||||
char *snapname;
|
||||
boolean_t recursive;
|
||||
char failed[MAXPATHLEN];
|
||||
};
|
||||
|
||||
static int
|
||||
dsl_dataset_user_hold_one(char *dsname, void *arg)
|
||||
{
|
||||
struct dsl_ds_holdarg *ha = arg;
|
||||
dsl_dataset_t *ds;
|
||||
int error;
|
||||
char *name;
|
||||
size_t buflen;
|
||||
|
||||
/* alloc a buffer to hold dsname@snapname plus terminating NULL */
|
||||
buflen = strlen(dsname) + strlen(ha->snapname) + 2;
|
||||
name = kmem_alloc(buflen, KM_SLEEP);
|
||||
(void) snprintf(name, buflen, "%s@%s", dsname, ha->snapname);
|
||||
error = dsl_dataset_hold(name, ha->dstg, &ds);
|
||||
kmem_free(name, buflen);
|
||||
if (error == 0) {
|
||||
dsl_sync_task_create(ha->dstg, dsl_dataset_user_hold_check,
|
||||
dsl_dataset_user_hold_sync, ds, ha->htag, 0);
|
||||
} else if (error == ENOENT && ha->recursive) {
|
||||
error = 0;
|
||||
} else {
|
||||
(void) strcpy(ha->failed, dsname);
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
|
||||
boolean_t recursive)
|
||||
{
|
||||
struct dsl_ds_holdarg *ha;
|
||||
dsl_sync_task_t *dst;
|
||||
spa_t *spa;
|
||||
int error;
|
||||
|
||||
ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
|
||||
|
||||
(void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
|
||||
|
||||
error = spa_open(dsname, &spa, FTAG);
|
||||
if (error) {
|
||||
kmem_free(ha, sizeof (struct dsl_ds_holdarg));
|
||||
return (error);
|
||||
}
|
||||
|
||||
ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
|
||||
ha->htag = htag;
|
||||
ha->snapname = snapname;
|
||||
ha->recursive = recursive;
|
||||
if (recursive) {
|
||||
error = dmu_objset_find(dsname, dsl_dataset_user_hold_one,
|
||||
ha, DS_FIND_CHILDREN);
|
||||
} else {
|
||||
error = dsl_dataset_user_hold_one(dsname, ha);
|
||||
}
|
||||
if (error == 0)
|
||||
error = dsl_sync_task_group_wait(ha->dstg);
|
||||
|
||||
for (dst = list_head(&ha->dstg->dstg_tasks); dst;
|
||||
dst = list_next(&ha->dstg->dstg_tasks, dst)) {
|
||||
dsl_dataset_t *ds = dst->dst_arg1;
|
||||
|
||||
if (dst->dst_err) {
|
||||
dsl_dataset_name(ds, ha->failed);
|
||||
*strchr(ha->failed, '@') = '\0';
|
||||
}
|
||||
dsl_dataset_rele(ds, ha->dstg);
|
||||
}
|
||||
|
||||
if (error)
|
||||
(void) strcpy(dsname, ha->failed);
|
||||
|
||||
dsl_sync_task_group_destroy(ha->dstg);
|
||||
kmem_free(ha, sizeof (struct dsl_ds_holdarg));
|
||||
spa_close(spa, FTAG);
|
||||
return (error);
|
||||
}
|
||||
|
||||
struct dsl_ds_releasearg {
|
||||
dsl_dataset_t *ds;
|
||||
const char *htag;
|
||||
boolean_t own; /* do we own or just hold ds? */
|
||||
};
|
||||
|
||||
static int
|
||||
dsl_dataset_release_might_destroy(dsl_dataset_t *ds, const char *htag,
|
||||
boolean_t *might_destroy)
|
||||
{
|
||||
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
|
||||
uint64_t zapobj;
|
||||
uint64_t tmp;
|
||||
int error;
|
||||
|
||||
*might_destroy = B_FALSE;
|
||||
|
||||
mutex_enter(&ds->ds_lock);
|
||||
zapobj = ds->ds_phys->ds_userrefs_obj;
|
||||
if (zapobj == 0) {
|
||||
/* The tag can't possibly exist */
|
||||
mutex_exit(&ds->ds_lock);
|
||||
return (ESRCH);
|
||||
}
|
||||
|
||||
/* Make sure the tag exists */
|
||||
error = zap_lookup(mos, zapobj, htag, 8, 1, &tmp);
|
||||
if (error) {
|
||||
mutex_exit(&ds->ds_lock);
|
||||
if (error == ENOENT)
|
||||
error = ESRCH;
|
||||
return (error);
|
||||
}
|
||||
|
||||
if (ds->ds_userrefs == 1 && ds->ds_phys->ds_num_children == 1 &&
|
||||
DS_IS_DEFER_DESTROY(ds))
|
||||
*might_destroy = B_TRUE;
|
||||
|
||||
mutex_exit(&ds->ds_lock);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
dsl_dataset_user_release_check(void *arg1, void *tag, dmu_tx_t *tx)
|
||||
{
|
||||
struct dsl_ds_releasearg *ra = arg1;
|
||||
dsl_dataset_t *ds = ra->ds;
|
||||
boolean_t might_destroy;
|
||||
int error;
|
||||
|
||||
if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS)
|
||||
return (ENOTSUP);
|
||||
|
||||
error = dsl_dataset_release_might_destroy(ds, ra->htag, &might_destroy);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
if (might_destroy) {
|
||||
struct dsl_ds_destroyarg dsda = {0};
|
||||
|
||||
if (dmu_tx_is_syncing(tx)) {
|
||||
/*
|
||||
* If we're not prepared to remove the snapshot,
|
||||
* we can't allow the release to happen right now.
|
||||
*/
|
||||
if (!ra->own)
|
||||
return (EBUSY);
|
||||
if (ds->ds_user_ptr) {
|
||||
ds->ds_user_evict_func(ds, ds->ds_user_ptr);
|
||||
ds->ds_user_ptr = NULL;
|
||||
}
|
||||
}
|
||||
dsda.ds = ds;
|
||||
dsda.releasing = B_TRUE;
|
||||
return (dsl_dataset_destroy_check(&dsda, tag, tx));
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
dsl_dataset_user_release_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
|
||||
{
|
||||
struct dsl_ds_releasearg *ra = arg1;
|
||||
dsl_dataset_t *ds = ra->ds;
|
||||
spa_t *spa = ds->ds_dir->dd_pool->dp_spa;
|
||||
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
|
||||
uint64_t zapobj;
|
||||
uint64_t dsobj = ds->ds_object;
|
||||
uint64_t refs;
|
||||
|
||||
mutex_enter(&ds->ds_lock);
|
||||
ds->ds_userrefs--;
|
||||
refs = ds->ds_userrefs;
|
||||
mutex_exit(&ds->ds_lock);
|
||||
zapobj = ds->ds_phys->ds_userrefs_obj;
|
||||
VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx));
|
||||
if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 &&
|
||||
DS_IS_DEFER_DESTROY(ds)) {
|
||||
struct dsl_ds_destroyarg dsda = {0};
|
||||
|
||||
ASSERT(ra->own);
|
||||
dsda.ds = ds;
|
||||
dsda.releasing = B_TRUE;
|
||||
/* We already did the destroy_check */
|
||||
dsl_dataset_destroy_sync(&dsda, tag, cr, tx);
|
||||
}
|
||||
|
||||
spa_history_internal_log(LOG_DS_USER_RELEASE,
|
||||
spa, tx, cr, "<%s> %lld dataset = %llu",
|
||||
ra->htag, (longlong_t)refs, dsobj);
|
||||
}
|
||||
|
||||
static int
|
||||
dsl_dataset_user_release_one(char *dsname, void *arg)
|
||||
{
|
||||
struct dsl_ds_holdarg *ha = arg;
|
||||
struct dsl_ds_releasearg *ra;
|
||||
dsl_dataset_t *ds;
|
||||
int error;
|
||||
void *dtag = ha->dstg;
|
||||
char *name;
|
||||
size_t buflen;
|
||||
boolean_t own = B_FALSE;
|
||||
boolean_t might_destroy;
|
||||
|
||||
if (strlen(ha->htag) >= ZAP_MAXNAMELEN)
|
||||
return (ENAMETOOLONG);
|
||||
|
||||
/* alloc a buffer to hold dsname@snapname, plus the terminating NULL */
|
||||
buflen = strlen(dsname) + strlen(ha->snapname) + 2;
|
||||
name = kmem_alloc(buflen, KM_SLEEP);
|
||||
(void) snprintf(name, buflen, "%s@%s", dsname, ha->snapname);
|
||||
error = dsl_dataset_hold(name, dtag, &ds);
|
||||
kmem_free(name, buflen);
|
||||
if (error == ENOENT && ha->recursive)
|
||||
return (0);
|
||||
(void) strcpy(ha->failed, dsname);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
ASSERT(dsl_dataset_is_snapshot(ds));
|
||||
|
||||
error = dsl_dataset_release_might_destroy(ds, ha->htag, &might_destroy);
|
||||
if (error) {
|
||||
dsl_dataset_rele(ds, dtag);
|
||||
return (error);
|
||||
}
|
||||
|
||||
if (might_destroy) {
|
||||
#ifdef _KERNEL
|
||||
error = zfs_unmount_snap(name, NULL);
|
||||
if (error) {
|
||||
dsl_dataset_rele(ds, dtag);
|
||||
return (error);
|
||||
}
|
||||
error = dsl_dataset_zvol_cleanup(ds, name);
|
||||
if (error) {
|
||||
dsl_dataset_rele(ds, dtag);
|
||||
return (error);
|
||||
}
|
||||
#endif
|
||||
if (!dsl_dataset_tryown(ds,
|
||||
DS_MODE_READONLY | DS_MODE_INCONSISTENT, dtag)) {
|
||||
dsl_dataset_rele(ds, dtag);
|
||||
return (EBUSY);
|
||||
} else {
|
||||
own = B_TRUE;
|
||||
dsl_dataset_make_exclusive(ds, dtag);
|
||||
}
|
||||
}
|
||||
|
||||
ra = kmem_alloc(sizeof (struct dsl_ds_releasearg), KM_SLEEP);
|
||||
ra->ds = ds;
|
||||
ra->htag = ha->htag;
|
||||
ra->own = own;
|
||||
dsl_sync_task_create(ha->dstg, dsl_dataset_user_release_check,
|
||||
dsl_dataset_user_release_sync, ra, dtag, 0);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
|
||||
boolean_t recursive)
|
||||
{
|
||||
struct dsl_ds_holdarg *ha;
|
||||
dsl_sync_task_t *dst;
|
||||
spa_t *spa;
|
||||
int error;
|
||||
|
||||
ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
|
||||
|
||||
(void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
|
||||
|
||||
error = spa_open(dsname, &spa, FTAG);
|
||||
if (error) {
|
||||
kmem_free(ha, sizeof (struct dsl_ds_holdarg));
|
||||
return (error);
|
||||
}
|
||||
|
||||
ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
|
||||
ha->htag = htag;
|
||||
ha->snapname = snapname;
|
||||
ha->recursive = recursive;
|
||||
if (recursive) {
|
||||
error = dmu_objset_find(dsname, dsl_dataset_user_release_one,
|
||||
ha, DS_FIND_CHILDREN);
|
||||
} else {
|
||||
error = dsl_dataset_user_release_one(dsname, ha);
|
||||
}
|
||||
if (error == 0)
|
||||
error = dsl_sync_task_group_wait(ha->dstg);
|
||||
|
||||
for (dst = list_head(&ha->dstg->dstg_tasks); dst;
|
||||
dst = list_next(&ha->dstg->dstg_tasks, dst)) {
|
||||
struct dsl_ds_releasearg *ra = dst->dst_arg1;
|
||||
dsl_dataset_t *ds = ra->ds;
|
||||
|
||||
if (dst->dst_err)
|
||||
dsl_dataset_name(ds, ha->failed);
|
||||
|
||||
if (ra->own)
|
||||
dsl_dataset_disown(ds, ha->dstg);
|
||||
else
|
||||
dsl_dataset_rele(ds, ha->dstg);
|
||||
|
||||
kmem_free(ra, sizeof (struct dsl_ds_releasearg));
|
||||
}
|
||||
|
||||
if (error)
|
||||
(void) strcpy(dsname, ha->failed);
|
||||
|
||||
dsl_sync_task_group_destroy(ha->dstg);
|
||||
kmem_free(ha, sizeof (struct dsl_ds_holdarg));
|
||||
spa_close(spa, FTAG);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp)
|
||||
{
|
||||
dsl_dataset_t *ds;
|
||||
int err;
|
||||
|
||||
err = dsl_dataset_hold(dsname, FTAG, &ds);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
VERIFY(0 == nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP));
|
||||
if (ds->ds_phys->ds_userrefs_obj != 0) {
|
||||
zap_attribute_t *za;
|
||||
zap_cursor_t zc;
|
||||
|
||||
za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
|
||||
for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset,
|
||||
ds->ds_phys->ds_userrefs_obj);
|
||||
zap_cursor_retrieve(&zc, za) == 0;
|
||||
zap_cursor_advance(&zc)) {
|
||||
VERIFY(0 == nvlist_add_uint64(*nvp, za->za_name,
|
||||
za->za_first_integer));
|
||||
}
|
||||
zap_cursor_fini(&zc);
|
||||
kmem_free(za, sizeof (zap_attribute_t));
|
||||
}
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (0);
|
||||
}
|
||||
|
@ -19,7 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
@ -66,8 +66,6 @@
|
||||
* The ZAP OBJ is referred to as the jump object.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/dmu_tx.h>
|
||||
@ -540,7 +538,7 @@ dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr)
|
||||
dsl_pool_t *dp;
|
||||
void *cookie;
|
||||
int error;
|
||||
char checkflag = ZFS_DELEG_LOCAL;
|
||||
char checkflag;
|
||||
objset_t *mos;
|
||||
avl_tree_t permsets;
|
||||
perm_set_t *setnode;
|
||||
@ -563,6 +561,16 @@ dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr)
|
||||
return (EPERM);
|
||||
}
|
||||
|
||||
if (dsl_dataset_is_snapshot(ds)) {
|
||||
/*
|
||||
* Snapshots are treated as descendents only,
|
||||
* local permissions do not apply.
|
||||
*/
|
||||
checkflag = ZFS_DELEG_DESCENDENT;
|
||||
} else {
|
||||
checkflag = ZFS_DELEG_LOCAL;
|
||||
}
|
||||
|
||||
avl_create(&permsets, perm_set_compare, sizeof (perm_set_t),
|
||||
offsetof(perm_set_t, p_node));
|
||||
|
||||
|
@ -96,7 +96,6 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
|
||||
#endif
|
||||
if (dd == NULL) {
|
||||
dsl_dir_t *winner;
|
||||
int err;
|
||||
|
||||
dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP);
|
||||
dd->dd_object = ddobj;
|
||||
|
@ -442,7 +442,7 @@ dsl_props_set_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
}
|
||||
|
||||
void
|
||||
dsl_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
|
||||
dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
|
||||
cred_t *cr, dmu_tx_t *tx)
|
||||
{
|
||||
objset_t *mos = dd->dd_pool->dp_meta_objset;
|
||||
|
@ -1024,6 +1024,8 @@ dsl_pool_scrub_clean_cb(dsl_pool_t *dp,
|
||||
int
|
||||
dsl_pool_scrub_clean(dsl_pool_t *dp)
|
||||
{
|
||||
spa_t *spa = dp->dp_spa;
|
||||
|
||||
/*
|
||||
* Purge all vdev caches. We do this here rather than in sync
|
||||
* context because this requires a writer lock on the spa_config
|
||||
@ -1031,11 +1033,11 @@ dsl_pool_scrub_clean(dsl_pool_t *dp)
|
||||
* spa_scrub_reopen flag indicates that vdev_open() should not
|
||||
* attempt to start another scrub.
|
||||
*/
|
||||
spa_config_enter(dp->dp_spa, SCL_ALL, FTAG, RW_WRITER);
|
||||
dp->dp_spa->spa_scrub_reopen = B_TRUE;
|
||||
vdev_reopen(dp->dp_spa->spa_root_vdev);
|
||||
dp->dp_spa->spa_scrub_reopen = B_FALSE;
|
||||
spa_config_exit(dp->dp_spa, SCL_ALL, FTAG);
|
||||
spa_vdev_state_enter(spa);
|
||||
spa->spa_scrub_reopen = B_TRUE;
|
||||
vdev_reopen(spa->spa_root_vdev);
|
||||
spa->spa_scrub_reopen = B_FALSE;
|
||||
(void) spa_vdev_state_exit(spa, NULL, 0);
|
||||
|
||||
return (dsl_pool_scrub_setup(dp, SCRUB_FUNC_CLEAN));
|
||||
}
|
||||
|
@ -117,6 +117,7 @@ typedef enum dmu_object_type {
|
||||
DMU_OT_SCRUB_QUEUE, /* ZAP */
|
||||
DMU_OT_USERGROUP_USED, /* ZAP */
|
||||
DMU_OT_USERGROUP_QUOTA, /* ZAP */
|
||||
DMU_OT_USERREFS, /* ZAP */
|
||||
DMU_OT_NUMTYPES
|
||||
} dmu_object_type_t;
|
||||
|
||||
@ -174,8 +175,8 @@ int dmu_objset_evict_dbufs(objset_t *os);
|
||||
int dmu_objset_create(const char *name, dmu_objset_type_t type,
|
||||
objset_t *clone_parent, uint64_t flags,
|
||||
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
|
||||
int dmu_objset_destroy(const char *name);
|
||||
int dmu_snapshots_destroy(char *fsname, char *snapname);
|
||||
int dmu_objset_destroy(const char *name, boolean_t defer);
|
||||
int dmu_snapshots_destroy(char *fsname, char *snapname, boolean_t defer);
|
||||
int dmu_objset_rollback(objset_t *os);
|
||||
int dmu_objset_snapshot(char *fsname, char *snapname, struct nvlist *props,
|
||||
boolean_t recursive);
|
||||
@ -646,10 +647,9 @@ typedef struct dmu_recv_cookie {
|
||||
} dmu_recv_cookie_t;
|
||||
|
||||
int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *,
|
||||
boolean_t force, objset_t *origin, boolean_t online, dmu_recv_cookie_t *);
|
||||
boolean_t force, objset_t *origin, dmu_recv_cookie_t *);
|
||||
int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp);
|
||||
int dmu_recv_end(dmu_recv_cookie_t *drc);
|
||||
void dmu_recv_abort_cleanup(dmu_recv_cookie_t *drc);
|
||||
|
||||
/* CRC64 table */
|
||||
#define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */
|
||||
|
@ -19,7 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
@ -211,10 +211,11 @@ extern "C" {
|
||||
* ds_lock
|
||||
* protects:
|
||||
* ds_user_ptr
|
||||
* ds_user_evice_func
|
||||
* ds_user_evict_func
|
||||
* ds_open_refcount
|
||||
* ds_snapname
|
||||
* ds_phys accounting
|
||||
* ds_phys userrefs zapobj
|
||||
* ds_reserved
|
||||
* held from:
|
||||
* dsl_dataset_*
|
||||
|
@ -117,7 +117,7 @@ void dmu_objset_close(objset_t *os);
|
||||
int dmu_objset_create(const char *name, dmu_objset_type_t type,
|
||||
objset_t *clone_parent, uint64_t flags,
|
||||
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
|
||||
int dmu_objset_destroy(const char *name);
|
||||
int dmu_objset_destroy(const char *name, boolean_t defer);
|
||||
int dmu_objset_rollback(objset_t *os);
|
||||
int dmu_objset_snapshot(char *fsname, char *snapname, nvlist_t *props,
|
||||
boolean_t recursive);
|
||||
|
@ -62,6 +62,14 @@ typedef void dsl_dataset_evict_func_t(struct dsl_dataset *, void *);
|
||||
*/
|
||||
#define DS_FLAG_UNIQUE_ACCURATE (1ULL<<2)
|
||||
|
||||
/*
|
||||
* DS_FLAG_DEFER_DESTROY is set after 'zfs destroy -d' has been called
|
||||
* on a dataset. This allows the dataset to be destroyed using 'zfs release'.
|
||||
*/
|
||||
#define DS_FLAG_DEFER_DESTROY (1ULL<<3)
|
||||
#define DS_IS_DEFER_DESTROY(ds) \
|
||||
((ds)->ds_phys->ds_flags & DS_FLAG_DEFER_DESTROY)
|
||||
|
||||
/*
|
||||
* DS_FLAG_CI_DATASET is set if the dataset contains a file system whose
|
||||
* name lookups should be performed case-insensitively.
|
||||
@ -93,7 +101,8 @@ typedef struct dsl_dataset_phys {
|
||||
blkptr_t ds_bp;
|
||||
uint64_t ds_next_clones_obj; /* DMU_OT_DSL_CLONES */
|
||||
uint64_t ds_props_obj; /* DMU_OT_DSL_PROPS for snaps */
|
||||
uint64_t ds_pad[6]; /* pad out to 320 bytes for good measure */
|
||||
uint64_t ds_userrefs_obj; /* DMU_OT_USERREFS */
|
||||
uint64_t ds_pad[5]; /* pad out to 320 bytes for good measure */
|
||||
} dsl_dataset_phys_t;
|
||||
|
||||
typedef struct dsl_dataset {
|
||||
@ -111,6 +120,9 @@ typedef struct dsl_dataset {
|
||||
/* has internal locking: */
|
||||
bplist_t ds_deadlist;
|
||||
|
||||
/* to protect against multiple concurrent incremental recv */
|
||||
kmutex_t ds_recvlock;
|
||||
|
||||
/* protected by lock on pool's dp_dirty_datasets list */
|
||||
txg_node_t ds_dirty_link;
|
||||
list_node_t ds_synced_link;
|
||||
@ -122,6 +134,7 @@ typedef struct dsl_dataset {
|
||||
kmutex_t ds_lock;
|
||||
void *ds_user_ptr;
|
||||
dsl_dataset_evict_func_t *ds_user_evict_func;
|
||||
uint64_t ds_userrefs;
|
||||
|
||||
/*
|
||||
* ds_owner is protected by the ds_rwlock and the ds_lock
|
||||
@ -143,6 +156,15 @@ typedef struct dsl_dataset {
|
||||
char ds_snapname[MAXNAMELEN];
|
||||
} dsl_dataset_t;
|
||||
|
||||
struct dsl_ds_destroyarg {
|
||||
dsl_dataset_t *ds; /* ds to destroy */
|
||||
dsl_dataset_t *rm_origin; /* also remove our origin? */
|
||||
boolean_t is_origin_rm; /* set if removing origin snap */
|
||||
boolean_t defer; /* destroy -d requested? */
|
||||
boolean_t releasing; /* destroying due to release? */
|
||||
boolean_t need_prep; /* do we need to retry due to EBUSY? */
|
||||
};
|
||||
|
||||
#define dsl_dataset_is_snapshot(ds) \
|
||||
((ds)->ds_phys->ds_num_children != 0)
|
||||
|
||||
@ -167,8 +189,8 @@ uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
|
||||
dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
|
||||
uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
|
||||
uint64_t flags, dmu_tx_t *tx);
|
||||
int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag);
|
||||
int dsl_snapshots_destroy(char *fsname, char *snapname);
|
||||
int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer);
|
||||
int dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer);
|
||||
dsl_checkfunc_t dsl_dataset_destroy_check;
|
||||
dsl_syncfunc_t dsl_dataset_destroy_sync;
|
||||
dsl_checkfunc_t dsl_dataset_snapshot_check;
|
||||
@ -178,6 +200,11 @@ int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive);
|
||||
int dsl_dataset_promote(const char *name);
|
||||
int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
|
||||
boolean_t force);
|
||||
int dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
|
||||
boolean_t recursive);
|
||||
int dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
|
||||
boolean_t recursive);
|
||||
int dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp);
|
||||
|
||||
void *dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
|
||||
void *p, dsl_dataset_evict_func_t func);
|
||||
|
@ -53,6 +53,8 @@ extern "C" {
|
||||
#define ZFS_DELEG_PERM_GROUPQUOTA "groupquota"
|
||||
#define ZFS_DELEG_PERM_USERUSED "userused"
|
||||
#define ZFS_DELEG_PERM_GROUPUSED "groupused"
|
||||
#define ZFS_DELEG_PERM_HOLD "hold"
|
||||
#define ZFS_DELEG_PERM_RELEASE "release"
|
||||
|
||||
/*
|
||||
* Note: the names of properties that are marked delegatable are also
|
||||
|
@ -69,7 +69,7 @@ dsl_syncfunc_t dsl_props_set_sync;
|
||||
int dsl_prop_set(const char *ddname, const char *propname,
|
||||
int intsz, int numints, const void *buf);
|
||||
int dsl_props_set(const char *dsname, nvlist_t *nvl);
|
||||
void dsl_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
|
||||
void dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
|
||||
cred_t *cr, dmu_tx_t *tx);
|
||||
|
||||
void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value);
|
||||
|
@ -500,8 +500,9 @@ extern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read,
|
||||
char *his_buf);
|
||||
extern int spa_history_log(spa_t *spa, const char *his_buf,
|
||||
history_log_type_t what);
|
||||
void spa_history_internal_log(history_internal_events_t event, spa_t *spa,
|
||||
dmu_tx_t *tx, cred_t *cr, const char *fmt, ...);
|
||||
extern void spa_history_internal_log(history_internal_events_t event,
|
||||
spa_t *spa, dmu_tx_t *tx, cred_t *cr, const char *fmt, ...);
|
||||
extern void spa_history_log_version(spa_t *spa, history_internal_events_t evt);
|
||||
|
||||
/* error handling */
|
||||
struct zbookmark;
|
||||
|
@ -105,6 +105,7 @@ struct spa {
|
||||
int spa_inject_ref; /* injection references */
|
||||
uint8_t spa_sync_on; /* sync threads are running */
|
||||
spa_load_state_t spa_load_state; /* current load operation */
|
||||
boolean_t spa_load_verbatim; /* load the given config? */
|
||||
taskq_t *spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES];
|
||||
dsl_pool_t *spa_dsl_pool;
|
||||
metaslab_class_t *spa_normal_class; /* normal data class */
|
||||
|
@ -47,6 +47,7 @@ typedef enum vdev_dtl_type {
|
||||
extern boolean_t zfs_nocacheflush;
|
||||
|
||||
extern int vdev_open(vdev_t *);
|
||||
extern void vdev_open_children(vdev_t *vd);
|
||||
extern int vdev_validate(vdev_t *);
|
||||
extern void vdev_close(vdev_t *);
|
||||
extern int vdev_create(vdev_t *, uint64_t txg, boolean_t isreplace);
|
||||
|
@ -127,6 +127,8 @@ struct vdev {
|
||||
space_map_t vdev_dtl[DTL_TYPES]; /* in-core dirty time logs */
|
||||
vdev_stat_t vdev_stat; /* virtual device statistics */
|
||||
boolean_t vdev_expanding; /* expand the vdev? */
|
||||
int vdev_open_error; /* error on last open */
|
||||
kthread_t *vdev_open_thread; /* thread opening children */
|
||||
|
||||
/*
|
||||
* Top-level vdev state.
|
||||
|
@ -182,8 +182,7 @@ int zap_lookup_norm(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
boolean_t *normalization_conflictp);
|
||||
|
||||
int zap_count_write(objset_t *os, uint64_t zapobj, const char *name,
|
||||
int add, uint64_t *towrite, uint64_t *tooverwrite,
|
||||
uint64_t dn_datablkshift);
|
||||
int add, uint64_t *towrite, uint64_t *tooverwrite);
|
||||
|
||||
/*
|
||||
* Create an attribute with the given name and value.
|
||||
|
@ -203,6 +203,7 @@ void zfs_oldace_byteswap(ace_t *, int);
|
||||
void zfs_ace_byteswap(void *, size_t, boolean_t);
|
||||
extern boolean_t zfs_has_access(struct znode *zp, cred_t *cr);
|
||||
extern int zfs_zaccess(struct znode *, int, int, boolean_t, cred_t *);
|
||||
int zfs_fastaccesschk_execute(struct znode *, cred_t *);
|
||||
extern int zfs_zaccess_rwx(struct znode *, mode_t, int, cred_t *);
|
||||
extern int zfs_zaccess_unix(struct znode *, mode_t, cred_t *);
|
||||
extern int zfs_acl_access(struct znode *, int, cred_t *);
|
||||
|
@ -165,6 +165,7 @@ typedef struct zfs_cmd {
|
||||
dmu_objset_stats_t zc_objset_stats;
|
||||
struct drr_begin zc_begin_record;
|
||||
zinject_record_t zc_inject_record;
|
||||
boolean_t zc_defer_destroy;
|
||||
} zfs_cmd_t;
|
||||
|
||||
typedef struct zfs_useracct {
|
||||
|
@ -73,7 +73,6 @@ struct zfsvfs {
|
||||
boolean_t z_vscan; /* virus scan on/off */
|
||||
boolean_t z_use_fuids; /* version allows fuids */
|
||||
boolean_t z_replay; /* set during ZIL replay */
|
||||
kmutex_t z_online_recv_lock; /* held while recv in progress */
|
||||
uint64_t z_version; /* ZPL version */
|
||||
uint64_t z_shares_dir; /* hidden shares dir */
|
||||
kmutex_t z_lock;
|
||||
|
@ -77,6 +77,7 @@ extern "C" {
|
||||
#define ZFS_ACL_DEFAULTED 0x20 /* ACL should be defaulted */
|
||||
#define ZFS_ACL_AUTO_INHERIT 0x40 /* ACL should be inherited */
|
||||
#define ZFS_BONUS_SCANSTAMP 0x80 /* Scanstamp in bonus area */
|
||||
#define ZFS_NO_EXECS_DENIED 0x100 /* exec was given to everyone */
|
||||
|
||||
/*
|
||||
* Is ID ephemeral?
|
||||
@ -200,6 +201,7 @@ typedef struct znode {
|
||||
uint64_t z_gen; /* generation (same as zp_gen) */
|
||||
uint32_t z_sync_cnt; /* synchronous open count */
|
||||
kmutex_t z_acl_lock; /* acl data lock */
|
||||
zfs_acl_t *z_acl_cached; /* cached acl */
|
||||
list_node_t z_link_node; /* all znodes in fs link */
|
||||
/*
|
||||
* These are dmu managed fields.
|
||||
|
@ -143,6 +143,8 @@ enum zio_compress {
|
||||
#define ZIO_FLAG_GODFATHER 0x080000
|
||||
|
||||
#define ZIO_FLAG_TRYHARD 0x100000
|
||||
#define ZIO_FLAG_NODATA 0x200000
|
||||
#define ZIO_FLAG_OPTIONAL 0x400000
|
||||
|
||||
#define ZIO_FLAG_GANG_INHERIT \
|
||||
(ZIO_FLAG_CANFAIL | \
|
||||
@ -161,7 +163,9 @@ enum zio_compress {
|
||||
ZIO_FLAG_IO_REPAIR | \
|
||||
ZIO_FLAG_IO_RETRY | \
|
||||
ZIO_FLAG_PROBE | \
|
||||
ZIO_FLAG_TRYHARD)
|
||||
ZIO_FLAG_TRYHARD | \
|
||||
ZIO_FLAG_NODATA | \
|
||||
ZIO_FLAG_OPTIONAL)
|
||||
|
||||
#define ZIO_FLAG_AGG_INHERIT \
|
||||
(ZIO_FLAG_DONT_AGGREGATE | \
|
||||
|
@ -19,12 +19,10 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/rrwlock.h>
|
||||
|
||||
@ -118,7 +116,7 @@ rrn_find_and_remove(rrwlock_t *rrl)
|
||||
rrw_node_t *prev = NULL;
|
||||
|
||||
if (refcount_count(&rrl->rr_linked_rcount) == 0)
|
||||
return (NULL);
|
||||
return (B_FALSE);
|
||||
|
||||
for (rn = tsd_get(rrw_tsd_key); rn != NULL; rn = rn->rn_next) {
|
||||
if (rn->rn_rrl == rrl) {
|
||||
@ -159,6 +157,14 @@ static void
|
||||
rrw_enter_read(rrwlock_t *rrl, void *tag)
|
||||
{
|
||||
mutex_enter(&rrl->rr_lock);
|
||||
#if !defined(DEBUG) && defined(_KERNEL)
|
||||
if (!rrl->rr_writer && !rrl->rr_writer_wanted) {
|
||||
rrl->rr_anon_rcount.rc_count++;
|
||||
mutex_exit(&rrl->rr_lock);
|
||||
return;
|
||||
}
|
||||
DTRACE_PROBE(zfs__rrwfastpath__rdmiss);
|
||||
#endif
|
||||
ASSERT(rrl->rr_writer != curthread);
|
||||
ASSERT(refcount_count(&rrl->rr_anon_rcount) >= 0);
|
||||
|
||||
@ -208,19 +214,28 @@ void
|
||||
rrw_exit(rrwlock_t *rrl, void *tag)
|
||||
{
|
||||
mutex_enter(&rrl->rr_lock);
|
||||
#if !defined(DEBUG) && defined(_KERNEL)
|
||||
if (!rrl->rr_writer && rrl->rr_linked_rcount.rc_count == 0) {
|
||||
rrl->rr_anon_rcount.rc_count--;
|
||||
if (rrl->rr_anon_rcount.rc_count == 0)
|
||||
cv_broadcast(&rrl->rr_cv);
|
||||
mutex_exit(&rrl->rr_lock);
|
||||
return;
|
||||
}
|
||||
DTRACE_PROBE(zfs__rrwfastpath__exitmiss);
|
||||
#endif
|
||||
ASSERT(!refcount_is_zero(&rrl->rr_anon_rcount) ||
|
||||
!refcount_is_zero(&rrl->rr_linked_rcount) ||
|
||||
rrl->rr_writer != NULL);
|
||||
|
||||
if (rrl->rr_writer == NULL) {
|
||||
if (rrn_find_and_remove(rrl)) {
|
||||
if (refcount_remove(&rrl->rr_linked_rcount, tag) == 0)
|
||||
int64_t count;
|
||||
if (rrn_find_and_remove(rrl))
|
||||
count = refcount_remove(&rrl->rr_linked_rcount, tag);
|
||||
else
|
||||
count = refcount_remove(&rrl->rr_anon_rcount, tag);
|
||||
if (count == 0)
|
||||
cv_broadcast(&rrl->rr_cv);
|
||||
|
||||
} else {
|
||||
if (refcount_remove(&rrl->rr_anon_rcount, tag) == 0)
|
||||
cv_broadcast(&rrl->rr_cv);
|
||||
}
|
||||
} else {
|
||||
ASSERT(rrl->rr_writer == curthread);
|
||||
ASSERT(refcount_is_zero(&rrl->rr_anon_rcount) &&
|
||||
|
@ -1574,9 +1574,12 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
|
||||
/*
|
||||
* If the config cache is stale, or we have uninitialized
|
||||
* metaslabs (see spa_vdev_add()), then update the config.
|
||||
*
|
||||
* If spa_load_verbatim is true, trust the current
|
||||
* in-core spa_config and update the disk labels.
|
||||
*/
|
||||
if (config_cache_txg != spa->spa_config_txg ||
|
||||
state == SPA_LOAD_IMPORT)
|
||||
state == SPA_LOAD_IMPORT || spa->spa_load_verbatim)
|
||||
need_update = B_TRUE;
|
||||
|
||||
for (int c = 0; c < rvd->vdev_children; c++)
|
||||
@ -2271,6 +2274,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
|
||||
|
||||
if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL)
|
||||
(void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE);
|
||||
spa_history_log_version(spa, LOG_POOL_CREATE);
|
||||
|
||||
spa->spa_minref = refcount_count(&spa->spa_refcount);
|
||||
|
||||
@ -2404,6 +2408,7 @@ spa_import_rootpool(char *devpath, char *devid)
|
||||
|
||||
spa = spa_add(pname, NULL);
|
||||
spa->spa_is_root = B_TRUE;
|
||||
spa->spa_load_verbatim = B_TRUE;
|
||||
|
||||
/*
|
||||
* Build up a vdev tree based on the boot device's label config.
|
||||
@ -2459,6 +2464,7 @@ spa_import_rootpool(char *devpath, char *devid)
|
||||
|
||||
VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0);
|
||||
error = 0;
|
||||
spa_history_log_version(spa, LOG_POOL_IMPORT);
|
||||
out:
|
||||
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
|
||||
vdev_free(rvd);
|
||||
@ -2491,6 +2497,8 @@ spa_import_verbatim(const char *pool, nvlist_t *config, nvlist_t *props)
|
||||
zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
|
||||
spa = spa_add(pool, altroot);
|
||||
|
||||
spa->spa_load_verbatim = B_TRUE;
|
||||
|
||||
VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0);
|
||||
|
||||
if (props != NULL)
|
||||
@ -2499,6 +2507,7 @@ spa_import_verbatim(const char *pool, nvlist_t *config, nvlist_t *props)
|
||||
spa_config_sync(spa, B_FALSE, B_TRUE);
|
||||
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
spa_history_log_version(spa, LOG_POOL_IMPORT);
|
||||
|
||||
return (0);
|
||||
}
|
||||
@ -2624,7 +2633,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props)
|
||||
/*
|
||||
* Update the config cache to include the newly-imported pool.
|
||||
*/
|
||||
spa_config_update_common(spa, SPA_CONFIG_UPDATE_POOL, B_FALSE);
|
||||
spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2634,6 +2643,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props)
|
||||
spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
|
||||
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
spa_history_log_version(spa, LOG_POOL_IMPORT);
|
||||
|
||||
return (0);
|
||||
}
|
||||
@ -2991,7 +3001,6 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd;
|
||||
vdev_ops_t *pvops;
|
||||
dmu_tx_t *tx;
|
||||
char *oldvdpath, *newvdpath;
|
||||
int newvd_isspare;
|
||||
int error;
|
||||
@ -3147,17 +3156,11 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
|
||||
|
||||
(void) spa_vdev_exit(spa, newrootvd, open_txg, 0);
|
||||
|
||||
tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
|
||||
if (dmu_tx_assign(tx, TXG_WAIT) == 0) {
|
||||
spa_history_internal_log(LOG_POOL_VDEV_ATTACH, spa, tx,
|
||||
spa_history_internal_log(LOG_POOL_VDEV_ATTACH, spa, NULL,
|
||||
CRED(), "%s vdev=%s %s vdev=%s",
|
||||
replacing && newvd_isspare ? "spare in" :
|
||||
replacing ? "replace" : "attach", newvdpath,
|
||||
replacing ? "for" : "to", oldvdpath);
|
||||
dmu_tx_commit(tx);
|
||||
} else {
|
||||
dmu_tx_abort(tx);
|
||||
}
|
||||
|
||||
spa_strfree(oldvdpath);
|
||||
spa_strfree(newvdpath);
|
||||
@ -3747,19 +3750,11 @@ spa_async_thread(spa_t *spa)
|
||||
* then log an internal history event.
|
||||
*/
|
||||
if (space_update) {
|
||||
dmu_tx_t *tx;
|
||||
|
||||
tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
|
||||
if (dmu_tx_assign(tx, TXG_WAIT) == 0) {
|
||||
spa_history_internal_log(LOG_POOL_VDEV_ONLINE,
|
||||
spa, tx, CRED(),
|
||||
spa, NULL, CRED(),
|
||||
"pool '%s' size: %llu(+%llu)",
|
||||
spa_name(spa), spa_get_space(spa),
|
||||
space_update);
|
||||
dmu_tx_commit(tx);
|
||||
} else {
|
||||
dmu_tx_abort(tx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -209,7 +209,7 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
|
||||
if (rootdir == NULL)
|
||||
if (rootdir == NULL || !(spa_mode_global & FWRITE))
|
||||
return;
|
||||
|
||||
/*
|
||||
@ -393,24 +393,13 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
|
||||
return (config);
|
||||
}
|
||||
|
||||
/*
|
||||
* For a pool that's not currently a booting rootpool, update all disk labels,
|
||||
* generate a fresh config based on the current in-core state, and sync the
|
||||
* global config cache.
|
||||
*/
|
||||
void
|
||||
spa_config_update(spa_t *spa, int what)
|
||||
{
|
||||
spa_config_update_common(spa, what, FALSE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Update all disk labels, generate a fresh config based on the current
|
||||
* in-core state, and sync the global config cache (do not sync the config
|
||||
* cache if this is a booting rootpool).
|
||||
*/
|
||||
void
|
||||
spa_config_update_common(spa_t *spa, int what, boolean_t isroot)
|
||||
spa_config_update(spa_t *spa, int what)
|
||||
{
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
uint64_t txg;
|
||||
@ -447,9 +436,9 @@ spa_config_update_common(spa_t *spa, int what, boolean_t isroot)
|
||||
/*
|
||||
* Update the global config cache to reflect the new mosconfig.
|
||||
*/
|
||||
if (!isroot)
|
||||
if (!spa->spa_is_root)
|
||||
spa_config_sync(spa, B_FALSE, what != SPA_CONFIG_UPDATE_POOL);
|
||||
|
||||
if (what == SPA_CONFIG_UPDATE_POOL)
|
||||
spa_config_update_common(spa, SPA_CONFIG_UPDATE_VDEVS, isroot);
|
||||
spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS);
|
||||
}
|
||||
|
@ -390,13 +390,12 @@ spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf)
|
||||
return (err);
|
||||
}
|
||||
|
||||
void
|
||||
spa_history_internal_log(history_internal_events_t event, spa_t *spa,
|
||||
dmu_tx_t *tx, cred_t *cr, const char *fmt, ...)
|
||||
static void
|
||||
log_internal(history_internal_events_t event, spa_t *spa,
|
||||
dmu_tx_t *tx, cred_t *cr, const char *fmt, va_list adx)
|
||||
{
|
||||
history_arg_t *hap;
|
||||
char *str;
|
||||
va_list adx;
|
||||
|
||||
/*
|
||||
* If this is part of creating a pool, not everything is
|
||||
@ -408,9 +407,7 @@ spa_history_internal_log(history_internal_events_t event, spa_t *spa,
|
||||
hap = kmem_alloc(sizeof (history_arg_t), KM_SLEEP);
|
||||
str = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
|
||||
|
||||
va_start(adx, fmt);
|
||||
(void) vsnprintf(str, HIS_MAX_RECORD_LEN, fmt, adx);
|
||||
va_end(adx);
|
||||
|
||||
hap->ha_log_type = LOG_INTERNAL;
|
||||
hap->ha_history_str = str;
|
||||
@ -425,3 +422,48 @@ spa_history_internal_log(history_internal_events_t event, spa_t *spa,
|
||||
}
|
||||
/* spa_history_log_sync() will free hap and str */
|
||||
}
|
||||
|
||||
void
|
||||
spa_history_internal_log(history_internal_events_t event, spa_t *spa,
|
||||
dmu_tx_t *tx, cred_t *cr, const char *fmt, ...)
|
||||
{
|
||||
dmu_tx_t *htx = tx;
|
||||
va_list adx;
|
||||
|
||||
/* create a tx if we didn't get one */
|
||||
if (tx == NULL) {
|
||||
htx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
|
||||
if (dmu_tx_assign(htx, TXG_WAIT) != 0) {
|
||||
dmu_tx_abort(htx);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
va_start(adx, fmt);
|
||||
log_internal(event, spa, htx, cr, fmt, adx);
|
||||
va_end(adx);
|
||||
|
||||
/* if we didn't get a tx from the caller, commit the one we made */
|
||||
if (tx == NULL)
|
||||
dmu_tx_commit(htx);
|
||||
}
|
||||
|
||||
void
|
||||
spa_history_log_version(spa_t *spa, history_internal_events_t event)
|
||||
{
|
||||
#ifdef _KERNEL
|
||||
uint64_t current_vers = spa_version(spa);
|
||||
|
||||
if (current_vers >= SPA_VERSION_ZPOOL_HISTORY) {
|
||||
spa_history_internal_log(event, spa, NULL, CRED(),
|
||||
"pool spa %llu; zfs spa %llu; zpl %d; uts %s %s %s %s",
|
||||
(u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION,
|
||||
utsname.nodename, utsname.release, utsname.version,
|
||||
utsname.machine);
|
||||
}
|
||||
cmn_err(CE_CONT, "!%s version %llu pool %s using %llu",
|
||||
event == LOG_POOL_IMPORT ? "imported" :
|
||||
event == LOG_POOL_CREATE ? "created" : "accessed",
|
||||
(u_longlong_t)current_vers, spa_name(spa), SPA_VERSION);
|
||||
#endif
|
||||
}
|
||||
|
@ -310,8 +310,12 @@ spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw)
|
||||
void
|
||||
spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw)
|
||||
{
|
||||
int wlocks_held = 0;
|
||||
|
||||
for (int i = 0; i < SCL_LOCKS; i++) {
|
||||
spa_config_lock_t *scl = &spa->spa_config_lock[i];
|
||||
if (scl->scl_writer == curthread)
|
||||
wlocks_held |= (1 << i);
|
||||
if (!(locks & (1 << i)))
|
||||
continue;
|
||||
mutex_enter(&scl->scl_lock);
|
||||
@ -331,6 +335,7 @@ spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw)
|
||||
(void) refcount_add(&scl->scl_count, tag);
|
||||
mutex_exit(&scl->scl_lock);
|
||||
}
|
||||
ASSERT(wlocks_held <= locks);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -405,22 +405,26 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
|
||||
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
|
||||
&nparity) == 0) {
|
||||
/*
|
||||
* Currently, we can only support 2 parity devices.
|
||||
* Currently, we can only support 3 parity devices.
|
||||
*/
|
||||
if (nparity == 0 || nparity > 2)
|
||||
if (nparity == 0 || nparity > 3)
|
||||
return (EINVAL);
|
||||
/*
|
||||
* Older versions can only support 1 parity device.
|
||||
* Previous versions could only support 1 or 2 parity
|
||||
* device.
|
||||
*/
|
||||
if (nparity == 2 &&
|
||||
spa_version(spa) < SPA_VERSION_RAID6)
|
||||
if (nparity > 1 &&
|
||||
spa_version(spa) < SPA_VERSION_RAIDZ2)
|
||||
return (ENOTSUP);
|
||||
if (nparity > 2 &&
|
||||
spa_version(spa) < SPA_VERSION_RAIDZ3)
|
||||
return (ENOTSUP);
|
||||
} else {
|
||||
/*
|
||||
* We require the parity to be specified for SPAs that
|
||||
* support multiple parity levels.
|
||||
*/
|
||||
if (spa_version(spa) >= SPA_VERSION_RAID6)
|
||||
if (spa_version(spa) >= SPA_VERSION_RAIDZ2)
|
||||
return (EINVAL);
|
||||
/*
|
||||
* Otherwise, we default to 1 parity device for RAID-Z.
|
||||
@ -993,6 +997,32 @@ vdev_probe(vdev_t *vd, zio_t *zio)
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_open_child(void *arg)
|
||||
{
|
||||
vdev_t *vd = arg;
|
||||
|
||||
vd->vdev_open_thread = curthread;
|
||||
vd->vdev_open_error = vdev_open(vd);
|
||||
vd->vdev_open_thread = NULL;
|
||||
}
|
||||
|
||||
void
|
||||
vdev_open_children(vdev_t *vd)
|
||||
{
|
||||
taskq_t *tq;
|
||||
int children = vd->vdev_children;
|
||||
|
||||
tq = taskq_create("vdev_open", children, minclsyspri,
|
||||
children, children, TASKQ_PREPOPULATE);
|
||||
|
||||
for (int c = 0; c < children; c++)
|
||||
VERIFY(taskq_dispatch(tq, vdev_open_child, vd->vdev_child[c],
|
||||
TQ_SLEEP) != NULL);
|
||||
|
||||
taskq_destroy(tq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare a virtual device for access.
|
||||
*/
|
||||
@ -1005,8 +1035,8 @@ vdev_open(vdev_t *vd)
|
||||
uint64_t asize, psize;
|
||||
uint64_t ashift = 0;
|
||||
|
||||
ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
|
||||
|
||||
ASSERT(vd->vdev_open_thread == curthread ||
|
||||
spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
|
||||
ASSERT(vd->vdev_state == VDEV_STATE_CLOSED ||
|
||||
vd->vdev_state == VDEV_STATE_CANT_OPEN ||
|
||||
vd->vdev_state == VDEV_STATE_OFFLINE);
|
||||
@ -1217,7 +1247,12 @@ vdev_validate(vdev_t *vd)
|
||||
|
||||
nvlist_free(label);
|
||||
|
||||
if (spa->spa_load_state == SPA_LOAD_OPEN &&
|
||||
/*
|
||||
* If spa->spa_load_verbatim is true, no need to check the
|
||||
* state of the pool.
|
||||
*/
|
||||
if (!spa->spa_load_verbatim &&
|
||||
spa->spa_load_state == SPA_LOAD_OPEN &&
|
||||
state != POOL_STATE_ACTIVE)
|
||||
return (EBADF);
|
||||
|
||||
|
@ -246,8 +246,10 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
|
||||
* into a crufty old storage pool.
|
||||
*/
|
||||
ASSERT(vd->vdev_nparity == 1 ||
|
||||
(vd->vdev_nparity == 2 &&
|
||||
spa_version(spa) >= SPA_VERSION_RAID6));
|
||||
(vd->vdev_nparity <= 2 &&
|
||||
spa_version(spa) >= SPA_VERSION_RAIDZ2) ||
|
||||
(vd->vdev_nparity <= 3 &&
|
||||
spa_version(spa) >= SPA_VERSION_RAIDZ3));
|
||||
|
||||
/*
|
||||
* Note that we'll add the nparity tag even on storage pools
|
||||
@ -642,8 +644,8 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
|
||||
/*
|
||||
* Initialize uberblock template.
|
||||
*/
|
||||
ub = zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd));
|
||||
bzero(ub, VDEV_UBERBLOCK_SIZE(vd));
|
||||
ub = zio_buf_alloc(VDEV_UBERBLOCK_RING);
|
||||
bzero(ub, VDEV_UBERBLOCK_RING);
|
||||
*ub = spa->spa_uberblock;
|
||||
ub->ub_txg = 0;
|
||||
|
||||
@ -672,11 +674,9 @@ retry:
|
||||
offsetof(vdev_label_t, vl_pad2),
|
||||
VDEV_PAD_SIZE, NULL, NULL, flags);
|
||||
|
||||
for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
|
||||
vdev_label_write(zio, vd, l, ub,
|
||||
VDEV_UBERBLOCK_OFFSET(vd, n),
|
||||
VDEV_UBERBLOCK_SIZE(vd), NULL, NULL, flags);
|
||||
}
|
||||
offsetof(vdev_label_t, vl_uberblock),
|
||||
VDEV_UBERBLOCK_RING, NULL, NULL, flags);
|
||||
}
|
||||
|
||||
error = zio_wait(zio);
|
||||
@ -688,7 +688,7 @@ retry:
|
||||
|
||||
nvlist_free(label);
|
||||
zio_buf_free(pad2, VDEV_PAD_SIZE);
|
||||
zio_buf_free(ub, VDEV_UBERBLOCK_SIZE(vd));
|
||||
zio_buf_free(ub, VDEV_UBERBLOCK_RING);
|
||||
zio_buf_free(vp, sizeof (vdev_phys_t));
|
||||
|
||||
/*
|
||||
|
@ -124,21 +124,21 @@ vdev_mirror_map_alloc(zio_t *zio)
|
||||
static int
|
||||
vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift)
|
||||
{
|
||||
vdev_t *cvd;
|
||||
uint64_t c;
|
||||
int numerrors = 0;
|
||||
int ret, lasterror = 0;
|
||||
int lasterror = 0;
|
||||
|
||||
if (vd->vdev_children == 0) {
|
||||
vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
for (c = 0; c < vd->vdev_children; c++) {
|
||||
cvd = vd->vdev_child[c];
|
||||
vdev_open_children(vd);
|
||||
|
||||
if ((ret = vdev_open(cvd)) != 0) {
|
||||
lasterror = ret;
|
||||
for (int c = 0; c < vd->vdev_children; c++) {
|
||||
vdev_t *cvd = vd->vdev_child[c];
|
||||
|
||||
if (cvd->vdev_open_error) {
|
||||
lasterror = cvd->vdev_open_error;
|
||||
numerrors++;
|
||||
continue;
|
||||
}
|
||||
@ -158,9 +158,7 @@ vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift)
|
||||
static void
|
||||
vdev_mirror_close(vdev_t *vd)
|
||||
{
|
||||
uint64_t c;
|
||||
|
||||
for (c = 0; c < vd->vdev_children; c++)
|
||||
for (int c = 0; c < vd->vdev_children; c++)
|
||||
vdev_close(vd->vdev_child[c]);
|
||||
}
|
||||
|
||||
|
@ -24,7 +24,7 @@
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/spa_impl.h>
|
||||
#include <sys/vdev_impl.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/avl.h>
|
||||
@ -48,11 +48,14 @@ int zfs_vdev_time_shift = 6;
|
||||
int zfs_vdev_ramp_rate = 2;
|
||||
|
||||
/*
|
||||
* To reduce IOPs, we aggregate small adjacent i/os into one large i/o.
|
||||
* For read i/os, we also aggregate across small adjacency gaps.
|
||||
* To reduce IOPs, we aggregate small adjacent I/Os into one large I/O.
|
||||
* For read I/Os, we also aggregate across small adjacency gaps; for writes
|
||||
* we include spans of optional I/Os to aid aggregation at the disk even when
|
||||
* they aren't able to help us aggregate at this level.
|
||||
*/
|
||||
int zfs_vdev_aggregation_limit = SPA_MAXBLOCKSIZE;
|
||||
int zfs_vdev_read_gap_limit = 32 << 10;
|
||||
int zfs_vdev_write_gap_limit = 4 << 10;
|
||||
|
||||
/*
|
||||
* Virtual device vector for disk I/O scheduling.
|
||||
@ -172,12 +175,14 @@ vdev_queue_agg_io_done(zio_t *aio)
|
||||
static zio_t *
|
||||
vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit)
|
||||
{
|
||||
zio_t *fio, *lio, *aio, *dio, *nio;
|
||||
zio_t *fio, *lio, *aio, *dio, *nio, *mio;
|
||||
avl_tree_t *t;
|
||||
int flags;
|
||||
uint64_t maxspan = zfs_vdev_aggregation_limit;
|
||||
uint64_t maxgap;
|
||||
int stretch;
|
||||
|
||||
again:
|
||||
ASSERT(MUTEX_HELD(&vq->vq_lock));
|
||||
|
||||
if (avl_numnodes(&vq->vq_pending_tree) >= pending_limit ||
|
||||
@ -192,21 +197,88 @@ vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit)
|
||||
|
||||
if (!(flags & ZIO_FLAG_DONT_AGGREGATE)) {
|
||||
/*
|
||||
* We can aggregate I/Os that are adjacent and of the
|
||||
* same flavor, as expressed by the AGG_INHERIT flags.
|
||||
* The latter is necessary so that certain attributes
|
||||
* of the I/O, such as whether it's a normal I/O or a
|
||||
* scrub/resilver, can be preserved in the aggregate.
|
||||
* We can aggregate I/Os that are sufficiently adjacent and of
|
||||
* the same flavor, as expressed by the AGG_INHERIT flags.
|
||||
* The latter requirement is necessary so that certain
|
||||
* attributes of the I/O, such as whether it's a normal I/O
|
||||
* or a scrub/resilver, can be preserved in the aggregate.
|
||||
* We can include optional I/Os, but don't allow them
|
||||
* to begin a range as they add no benefit in that situation.
|
||||
*/
|
||||
|
||||
/*
|
||||
* We keep track of the last non-optional I/O.
|
||||
*/
|
||||
mio = (fio->io_flags & ZIO_FLAG_OPTIONAL) ? NULL : fio;
|
||||
|
||||
/*
|
||||
* Walk backwards through sufficiently contiguous I/Os
|
||||
* recording the last non-option I/O.
|
||||
*/
|
||||
while ((dio = AVL_PREV(t, fio)) != NULL &&
|
||||
(dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
|
||||
IO_SPAN(dio, lio) <= maxspan && IO_GAP(dio, fio) <= maxgap)
|
||||
IO_SPAN(dio, lio) <= maxspan &&
|
||||
IO_GAP(dio, fio) <= maxgap) {
|
||||
fio = dio;
|
||||
if (mio == NULL && !(fio->io_flags & ZIO_FLAG_OPTIONAL))
|
||||
mio = fio;
|
||||
}
|
||||
|
||||
/*
|
||||
* Skip any initial optional I/Os.
|
||||
*/
|
||||
while ((fio->io_flags & ZIO_FLAG_OPTIONAL) && fio != lio) {
|
||||
fio = AVL_NEXT(t, fio);
|
||||
ASSERT(fio != NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk forward through sufficiently contiguous I/Os.
|
||||
*/
|
||||
while ((dio = AVL_NEXT(t, lio)) != NULL &&
|
||||
(dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
|
||||
IO_SPAN(fio, dio) <= maxspan && IO_GAP(lio, dio) <= maxgap)
|
||||
IO_SPAN(fio, dio) <= maxspan &&
|
||||
IO_GAP(lio, dio) <= maxgap) {
|
||||
lio = dio;
|
||||
if (!(lio->io_flags & ZIO_FLAG_OPTIONAL))
|
||||
mio = lio;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now that we've established the range of the I/O aggregation
|
||||
* we must decide what to do with trailing optional I/Os.
|
||||
* For reads, there's nothing to do. While we are unable to
|
||||
* aggregate further, it's possible that a trailing optional
|
||||
* I/O would allow the underlying device to aggregate with
|
||||
* subsequent I/Os. We must therefore determine if the next
|
||||
* non-optional I/O is close enough to make aggregation
|
||||
* worthwhile.
|
||||
*/
|
||||
stretch = B_FALSE;
|
||||
if (t != &vq->vq_read_tree && mio != NULL) {
|
||||
nio = lio;
|
||||
while ((dio = AVL_NEXT(t, nio)) != NULL &&
|
||||
IO_GAP(nio, dio) == 0 &&
|
||||
IO_GAP(mio, dio) <= zfs_vdev_write_gap_limit) {
|
||||
nio = dio;
|
||||
if (!(nio->io_flags & ZIO_FLAG_OPTIONAL)) {
|
||||
stretch = B_TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (stretch) {
|
||||
/* This may be a no-op. */
|
||||
VERIFY((dio = AVL_NEXT(t, lio)) != NULL);
|
||||
dio->io_flags &= ~ZIO_FLAG_OPTIONAL;
|
||||
} else {
|
||||
while (lio != mio && lio != fio) {
|
||||
ASSERT(lio->io_flags & ZIO_FLAG_OPTIONAL);
|
||||
lio = AVL_PREV(t, lio);
|
||||
ASSERT(lio != NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (fio != lio) {
|
||||
@ -225,10 +297,15 @@ vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit)
|
||||
ASSERT(dio->io_type == aio->io_type);
|
||||
ASSERT(dio->io_vdev_tree == t);
|
||||
|
||||
if (dio->io_type == ZIO_TYPE_WRITE)
|
||||
if (dio->io_flags & ZIO_FLAG_NODATA) {
|
||||
ASSERT(dio->io_type == ZIO_TYPE_WRITE);
|
||||
bzero((char *)aio->io_data + (dio->io_offset -
|
||||
aio->io_offset), dio->io_size);
|
||||
} else if (dio->io_type == ZIO_TYPE_WRITE) {
|
||||
bcopy(dio->io_data, (char *)aio->io_data +
|
||||
(dio->io_offset - aio->io_offset),
|
||||
dio->io_size);
|
||||
}
|
||||
|
||||
zio_add_child(dio, aio);
|
||||
vdev_queue_io_remove(vq, dio);
|
||||
@ -244,6 +321,20 @@ vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit)
|
||||
ASSERT(fio->io_vdev_tree == t);
|
||||
vdev_queue_io_remove(vq, fio);
|
||||
|
||||
/*
|
||||
* If the I/O is or was optional and therefore has no data, we need to
|
||||
* simply discard it. We need to drop the vdev queue's lock to avoid a
|
||||
* deadlock that we could encounter since this I/O will complete
|
||||
* immediately.
|
||||
*/
|
||||
if (fio->io_flags & ZIO_FLAG_NODATA) {
|
||||
mutex_exit(&vq->vq_lock);
|
||||
zio_vdev_io_bypass(fio);
|
||||
zio_execute(fio);
|
||||
mutex_enter(&vq->vq_lock);
|
||||
goto again;
|
||||
}
|
||||
|
||||
avl_add(&vq->vq_pending_tree, fio);
|
||||
|
||||
return (fio);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -19,7 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
@ -52,7 +52,6 @@ too_many_errors(vdev_t *vd, int numerrors)
|
||||
static int
|
||||
vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift)
|
||||
{
|
||||
int c;
|
||||
int lasterror = 0;
|
||||
int numerrors = 0;
|
||||
|
||||
@ -61,15 +60,14 @@ vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift)
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
for (c = 0; c < vd->vdev_children; c++) {
|
||||
vdev_t *cvd = vd->vdev_child[c];
|
||||
int error;
|
||||
vdev_open_children(vd);
|
||||
|
||||
if ((error = vdev_open(cvd)) != 0 &&
|
||||
!cvd->vdev_islog) {
|
||||
lasterror = error;
|
||||
for (int c = 0; c < vd->vdev_children; c++) {
|
||||
vdev_t *cvd = vd->vdev_child[c];
|
||||
|
||||
if (cvd->vdev_open_error && !cvd->vdev_islog) {
|
||||
lasterror = cvd->vdev_open_error;
|
||||
numerrors++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
@ -87,9 +85,7 @@ vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift)
|
||||
static void
|
||||
vdev_root_close(vdev_t *vd)
|
||||
{
|
||||
int c;
|
||||
|
||||
for (c = 0; c < vd->vdev_children; c++)
|
||||
for (int c = 0; c < vd->vdev_children; c++)
|
||||
vdev_close(vd->vdev_child[c]);
|
||||
}
|
||||
|
||||
|
@ -1068,7 +1068,7 @@ zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs)
|
||||
|
||||
int
|
||||
zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
|
||||
uint64_t *towrite, uint64_t *tooverwrite, uint64_t dn_datablkshift)
|
||||
uint64_t *towrite, uint64_t *tooverwrite)
|
||||
{
|
||||
zap_t *zap;
|
||||
int err = 0;
|
||||
@ -1112,29 +1112,29 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
|
||||
*/
|
||||
*towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE;
|
||||
}
|
||||
} else {
|
||||
if (!add) {
|
||||
if (dmu_buf_freeable(zap->zap_dbuf))
|
||||
*tooverwrite += SPA_MAXBLOCKSIZE;
|
||||
else
|
||||
*towrite += SPA_MAXBLOCKSIZE;
|
||||
} else {
|
||||
/*
|
||||
* We are here if we are adding and (name != NULL).
|
||||
* It is hard to find out if this add will promote this
|
||||
* microzap to fatzap. Hence, we assume the worst case
|
||||
* and account for the blocks assuming this microzap
|
||||
* would be promoted to a fatzap.
|
||||
* We are here if (name != NULL) and this is a micro-zap.
|
||||
* We account for the header block depending on whether it
|
||||
* is freeable.
|
||||
*
|
||||
* Incase of an add-operation it is hard to find out
|
||||
* if this add will promote this microzap to fatzap.
|
||||
* Hence, we consider the worst case and account for the
|
||||
* blocks assuming this microzap would be promoted to a
|
||||
* fatzap.
|
||||
*
|
||||
* 1 block overwritten : header block
|
||||
* 4 new blocks written : 2 new split leaf, 2 grown
|
||||
* ptrtbl blocks
|
||||
*/
|
||||
if (dmu_buf_freeable(zap->zap_dbuf))
|
||||
*tooverwrite += 1 << dn_datablkshift;
|
||||
*tooverwrite += SPA_MAXBLOCKSIZE;
|
||||
else
|
||||
*towrite += 1 << dn_datablkshift;
|
||||
*towrite += 4 << dn_datablkshift;
|
||||
*towrite += SPA_MAXBLOCKSIZE;
|
||||
|
||||
if (add) {
|
||||
*towrite += 4 * SPA_MAXBLOCKSIZE;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -93,6 +93,8 @@
|
||||
#define ZFS_ACL_WIDE_FLAGS (V4_ACL_WIDE_FLAGS|ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|\
|
||||
ZFS_ACL_OBJ_ACE)
|
||||
|
||||
#define ALL_MODE_EXECS (S_IXUSR | S_IXGRP | S_IXOTH)
|
||||
|
||||
static uint16_t
|
||||
zfs_ace_v0_get_type(void *acep)
|
||||
{
|
||||
@ -781,6 +783,7 @@ zfs_mode_compute(znode_t *zp, zfs_acl_t *aclp)
|
||||
uint64_t who;
|
||||
uint16_t iflags, type;
|
||||
uint32_t access_mask;
|
||||
boolean_t an_exec_denied = B_FALSE;
|
||||
|
||||
mode = (zp->z_phys->zp_mode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX));
|
||||
|
||||
@ -905,8 +908,32 @@ zfs_mode_compute(znode_t *zp, zfs_acl_t *aclp)
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Only care if this IDENTIFIER_GROUP or
|
||||
* USER ACE denies execute access to someone,
|
||||
* mode is not affected
|
||||
*/
|
||||
if ((access_mask & ACE_EXECUTE) && type == DENY)
|
||||
an_exec_denied = B_TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Failure to allow is effectively a deny, so execute permission
|
||||
* is denied if it was never mentioned or if we explicitly
|
||||
* weren't allowed it.
|
||||
*/
|
||||
if (!an_exec_denied &&
|
||||
((seen & ALL_MODE_EXECS) != ALL_MODE_EXECS ||
|
||||
(mode & ALL_MODE_EXECS) != ALL_MODE_EXECS))
|
||||
an_exec_denied = B_TRUE;
|
||||
|
||||
if (an_exec_denied)
|
||||
zp->z_phys->zp_flags &= ~ZFS_NO_EXECS_DENIED;
|
||||
else
|
||||
zp->z_phys->zp_flags |= ZFS_NO_EXECS_DENIED;
|
||||
|
||||
return (mode);
|
||||
}
|
||||
|
||||
@ -946,7 +973,8 @@ zfs_acl_node_read_internal(znode_t *zp, boolean_t will_modify)
|
||||
}
|
||||
|
||||
/*
|
||||
* Read an external acl object.
|
||||
* Read an external acl object. If the intent is to modify, always
|
||||
* create a new acl and leave any cached acl in place.
|
||||
*/
|
||||
static int
|
||||
zfs_acl_node_read(znode_t *zp, zfs_acl_t **aclpp, boolean_t will_modify)
|
||||
@ -960,8 +988,15 @@ zfs_acl_node_read(znode_t *zp, zfs_acl_t **aclpp, boolean_t will_modify)
|
||||
|
||||
ASSERT(MUTEX_HELD(&zp->z_acl_lock));
|
||||
|
||||
if (zp->z_acl_cached && !will_modify) {
|
||||
*aclpp = zp->z_acl_cached;
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (zp->z_phys->zp_acl.z_acl_extern_obj == 0) {
|
||||
*aclpp = zfs_acl_node_read_internal(zp, will_modify);
|
||||
if (!will_modify)
|
||||
zp->z_acl_cached = *aclpp;
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -995,6 +1030,8 @@ zfs_acl_node_read(znode_t *zp, zfs_acl_t **aclpp, boolean_t will_modify)
|
||||
}
|
||||
|
||||
*aclpp = aclp;
|
||||
if (!will_modify)
|
||||
zp->z_acl_cached = aclp;
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -1019,11 +1056,16 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
|
||||
|
||||
dmu_buf_will_dirty(zp->z_dbuf, tx);
|
||||
|
||||
if (zp->z_acl_cached) {
|
||||
zfs_acl_free(zp->z_acl_cached);
|
||||
zp->z_acl_cached = NULL;
|
||||
}
|
||||
|
||||
zphys->zp_mode = zfs_mode_compute(zp, aclp);
|
||||
|
||||
/*
|
||||
* Decide which opbject type to use. If we are forced to
|
||||
* use old ACL format than transform ACL into zfs_oldace_t
|
||||
* Decide which object type to use. If we are forced to
|
||||
* use old ACL format then transform ACL into zfs_oldace_t
|
||||
* layout.
|
||||
*/
|
||||
if (!zfsvfs->z_use_fuids) {
|
||||
@ -1869,7 +1911,6 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
|
||||
mutex_exit(&dzp->z_acl_lock);
|
||||
acl_ids->z_aclp = zfs_acl_inherit(zfsvfs,
|
||||
vap->va_type, paclp, acl_ids->z_mode, &need_chmod);
|
||||
zfs_acl_free(paclp);
|
||||
} else {
|
||||
acl_ids->z_aclp =
|
||||
zfs_acl_alloc(zfs_acl_version_zp(dzp));
|
||||
@ -1998,8 +2039,6 @@ zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
|
||||
|
||||
mutex_exit(&zp->z_acl_lock);
|
||||
|
||||
zfs_acl_free(aclp);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -2095,11 +2134,6 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
|
||||
aclp->z_hints |= (zp->z_phys->zp_flags & V4_ACL_WIDE_FLAGS);
|
||||
}
|
||||
top:
|
||||
if (error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr)) {
|
||||
zfs_acl_free(aclp);
|
||||
return (error);
|
||||
}
|
||||
|
||||
mutex_enter(&zp->z_lock);
|
||||
mutex_enter(&zp->z_acl_lock);
|
||||
|
||||
@ -2145,6 +2179,7 @@ top:
|
||||
|
||||
error = zfs_aclset_common(zp, aclp, cr, tx);
|
||||
ASSERT(error == 0);
|
||||
zp->z_acl_cached = aclp;
|
||||
|
||||
if (fuid_dirtied)
|
||||
zfs_fuid_sync(zfsvfs, tx);
|
||||
@ -2154,7 +2189,6 @@ top:
|
||||
|
||||
if (fuidp)
|
||||
zfs_fuid_info_free(fuidp);
|
||||
zfs_acl_free(aclp);
|
||||
dmu_tx_commit(tx);
|
||||
done:
|
||||
mutex_exit(&zp->z_acl_lock);
|
||||
@ -2301,7 +2335,6 @@ zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
|
||||
checkit = B_TRUE;
|
||||
break;
|
||||
} else {
|
||||
zfs_acl_free(aclp);
|
||||
mutex_exit(&zp->z_acl_lock);
|
||||
return (EIO);
|
||||
}
|
||||
@ -2321,7 +2354,6 @@ zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
|
||||
uint32_t, mask_matched);
|
||||
if (anyaccess) {
|
||||
mutex_exit(&zp->z_acl_lock);
|
||||
zfs_acl_free(aclp);
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
@ -2334,7 +2366,6 @@ zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
|
||||
}
|
||||
|
||||
mutex_exit(&zp->z_acl_lock);
|
||||
zfs_acl_free(aclp);
|
||||
|
||||
/* Put the found 'denies' back on the working mode */
|
||||
if (deny_mask) {
|
||||
@ -2366,8 +2397,7 @@ zfs_has_access(znode_t *zp, cred_t *cr)
|
||||
secpolicy_vnode_access(cr, ZTOV(zp), owner, VREAD) == 0 ||
|
||||
secpolicy_vnode_access(cr, ZTOV(zp), owner, VWRITE) == 0 ||
|
||||
secpolicy_vnode_access(cr, ZTOV(zp), owner, VEXEC) == 0 ||
|
||||
secpolicy_vnode_chown(cr, B_TRUE) == 0 ||
|
||||
secpolicy_vnode_chown(cr, B_FALSE) == 0 ||
|
||||
secpolicy_vnode_chown(cr, owner) == 0 ||
|
||||
secpolicy_vnode_setdac(cr, owner) == 0 ||
|
||||
secpolicy_vnode_remove(cr) == 0);
|
||||
}
|
||||
@ -2421,6 +2451,78 @@ zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs,
|
||||
check_privs, B_FALSE, cr));
|
||||
}
|
||||
|
||||
int
|
||||
zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
|
||||
{
|
||||
boolean_t owner = B_FALSE;
|
||||
boolean_t groupmbr = B_FALSE;
|
||||
boolean_t is_attr;
|
||||
uid_t fowner;
|
||||
uid_t gowner;
|
||||
uid_t uid = crgetuid(cr);
|
||||
int error;
|
||||
|
||||
if (zdp->z_phys->zp_flags & ZFS_AV_QUARANTINED)
|
||||
return (EACCES);
|
||||
|
||||
is_attr = ((zdp->z_phys->zp_flags & ZFS_XATTR) &&
|
||||
(ZTOV(zdp)->v_type == VDIR));
|
||||
if (is_attr)
|
||||
goto slow;
|
||||
|
||||
mutex_enter(&zdp->z_acl_lock);
|
||||
|
||||
if (zdp->z_phys->zp_flags & ZFS_NO_EXECS_DENIED) {
|
||||
mutex_exit(&zdp->z_acl_lock);
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (FUID_INDEX(zdp->z_phys->zp_uid) != 0 ||
|
||||
FUID_INDEX(zdp->z_phys->zp_gid) != 0) {
|
||||
mutex_exit(&zdp->z_acl_lock);
|
||||
goto slow;
|
||||
}
|
||||
|
||||
fowner = (uid_t)zdp->z_phys->zp_uid;
|
||||
gowner = (uid_t)zdp->z_phys->zp_gid;
|
||||
|
||||
if (uid == fowner) {
|
||||
owner = B_TRUE;
|
||||
if (zdp->z_phys->zp_mode & S_IXUSR) {
|
||||
mutex_exit(&zdp->z_acl_lock);
|
||||
return (0);
|
||||
} else {
|
||||
mutex_exit(&zdp->z_acl_lock);
|
||||
goto slow;
|
||||
}
|
||||
}
|
||||
if (groupmember(gowner, cr)) {
|
||||
groupmbr = B_TRUE;
|
||||
if (zdp->z_phys->zp_mode & S_IXGRP) {
|
||||
mutex_exit(&zdp->z_acl_lock);
|
||||
return (0);
|
||||
} else {
|
||||
mutex_exit(&zdp->z_acl_lock);
|
||||
goto slow;
|
||||
}
|
||||
}
|
||||
if (!owner && !groupmbr) {
|
||||
if (zdp->z_phys->zp_mode & S_IXOTH) {
|
||||
mutex_exit(&zdp->z_acl_lock);
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
|
||||
mutex_exit(&zdp->z_acl_lock);
|
||||
|
||||
slow:
|
||||
DTRACE_PROBE(zfs__fastpath__execute__access__miss);
|
||||
ZFS_ENTER(zdp->z_zfsvfs);
|
||||
error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr);
|
||||
ZFS_EXIT(zdp->z_zfsvfs);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine whether Access should be granted/denied, invoking least
|
||||
* priv subsytem when a deny is determined.
|
||||
@ -2515,7 +2617,7 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
|
||||
owner, checkmode);
|
||||
|
||||
if (error == 0 && (working_mode & ACE_WRITE_OWNER))
|
||||
error = secpolicy_vnode_chown(cr, B_TRUE);
|
||||
error = secpolicy_vnode_chown(cr, owner);
|
||||
if (error == 0 && (working_mode & ACE_WRITE_ACL))
|
||||
error = secpolicy_vnode_setdac(cr, owner);
|
||||
|
||||
@ -2524,7 +2626,7 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
|
||||
error = secpolicy_vnode_remove(cr);
|
||||
|
||||
if (error == 0 && (working_mode & ACE_SYNCHRONIZE)) {
|
||||
error = secpolicy_vnode_chown(cr, B_FALSE);
|
||||
error = secpolicy_vnode_chown(cr, owner);
|
||||
}
|
||||
if (error == 0) {
|
||||
/*
|
||||
|
@ -700,7 +700,7 @@ zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr,
|
||||
if (err)
|
||||
avl_add(&sdp->sd_snaps, sep);
|
||||
else
|
||||
err = dmu_objset_destroy(snapname);
|
||||
err = dmu_objset_destroy(snapname, B_FALSE);
|
||||
} else {
|
||||
err = ENOENT;
|
||||
}
|
||||
|
@ -353,6 +353,7 @@ retry:
|
||||
rw_exit(&zfsvfs->z_fuid_lock);
|
||||
return (retidx);
|
||||
} else {
|
||||
rw_exit(&zfsvfs->z_fuid_lock);
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
|
@ -761,6 +761,20 @@ zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr)
|
||||
return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION, cr));
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr)
|
||||
{
|
||||
return (zfs_secpolicy_write_perms(zc->zc_name,
|
||||
ZFS_DELEG_PERM_HOLD, cr));
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr)
|
||||
{
|
||||
return (zfs_secpolicy_write_perms(zc->zc_name,
|
||||
ZFS_DELEG_PERM_RELEASE, cr));
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the nvlist as specified by the user in the zfs_cmd_t.
|
||||
*/
|
||||
@ -2466,7 +2480,7 @@ zfs_ioc_create(zfs_cmd_t *zc)
|
||||
*/
|
||||
if (error == 0) {
|
||||
if ((error = zfs_set_prop_nvlist(zc->zc_name, nvprops)) != 0)
|
||||
(void) dmu_objset_destroy(zc->zc_name);
|
||||
(void) dmu_objset_destroy(zc->zc_name, B_FALSE);
|
||||
}
|
||||
nvlist_free(nvprops);
|
||||
return (error);
|
||||
@ -2555,6 +2569,7 @@ zfs_unmount_snap(char *name, void *arg)
|
||||
* inputs:
|
||||
* zc_name name of filesystem
|
||||
* zc_value short name of snapshot
|
||||
* zc_defer_destroy mark for deferred destroy
|
||||
*
|
||||
* outputs: none
|
||||
*/
|
||||
@ -2569,13 +2584,15 @@ zfs_ioc_destroy_snaps(zfs_cmd_t *zc)
|
||||
zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN);
|
||||
if (err)
|
||||
return (err);
|
||||
return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value));
|
||||
return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value,
|
||||
zc->zc_defer_destroy));
|
||||
}
|
||||
|
||||
/*
|
||||
* inputs:
|
||||
* zc_name name of dataset to destroy
|
||||
* zc_objset_type type of objset
|
||||
* zc_defer_destroy mark for deferred destroy
|
||||
*
|
||||
* outputs: none
|
||||
*/
|
||||
@ -2588,7 +2605,7 @@ zfs_ioc_destroy(zfs_cmd_t *zc)
|
||||
return (err);
|
||||
}
|
||||
|
||||
return (dmu_objset_destroy(zc->zc_name));
|
||||
return (dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2708,7 +2725,6 @@ zfs_ioc_recv(zfs_cmd_t *zc)
|
||||
file_t *fp;
|
||||
objset_t *os;
|
||||
dmu_recv_cookie_t drc;
|
||||
zfsvfs_t *zfsvfs = NULL;
|
||||
boolean_t force = (boolean_t)zc->zc_guid;
|
||||
int error, fd;
|
||||
offset_t off;
|
||||
@ -2740,25 +2756,12 @@ zfs_ioc_recv(zfs_cmd_t *zc)
|
||||
return (EBADF);
|
||||
}
|
||||
|
||||
if (getzfsvfs(tofs, &zfsvfs) == 0) {
|
||||
if (!mutex_tryenter(&zfsvfs->z_online_recv_lock)) {
|
||||
VFS_RELE(zfsvfs->z_vfs);
|
||||
zfsvfs = NULL;
|
||||
error = EBUSY;
|
||||
goto out;
|
||||
}
|
||||
if (props && dmu_objset_open(tofs, DMU_OST_ANY,
|
||||
DS_MODE_USER | DS_MODE_READONLY, &os) == 0) {
|
||||
/*
|
||||
* If new properties are supplied, they are to completely
|
||||
* replace the existing ones, so stash away the existing ones.
|
||||
*/
|
||||
if (props)
|
||||
(void) dsl_prop_get_all(zfsvfs->z_os, &origprops, TRUE);
|
||||
} else if (props && dmu_objset_open(tofs, DMU_OST_ANY,
|
||||
DS_MODE_USER | DS_MODE_READONLY, &os) == 0) {
|
||||
/*
|
||||
* Get the props even if there was no zfsvfs (zvol or
|
||||
* unmounted zpl).
|
||||
*/
|
||||
(void) dsl_prop_get_all(os, &origprops, TRUE);
|
||||
|
||||
dmu_objset_close(os);
|
||||
@ -2772,7 +2775,7 @@ zfs_ioc_recv(zfs_cmd_t *zc)
|
||||
}
|
||||
|
||||
error = dmu_recv_begin(tofs, tosnap, &zc->zc_begin_record,
|
||||
force, origin, zfsvfs != NULL, &drc);
|
||||
force, origin, &drc);
|
||||
if (origin)
|
||||
dmu_objset_close(origin);
|
||||
if (error)
|
||||
@ -2793,26 +2796,34 @@ zfs_ioc_recv(zfs_cmd_t *zc)
|
||||
off = fp->f_offset;
|
||||
error = dmu_recv_stream(&drc, fp->f_vnode, &off);
|
||||
|
||||
if (error == 0 && zfsvfs) {
|
||||
if (error == 0) {
|
||||
zfsvfs_t *zfsvfs = NULL;
|
||||
|
||||
if (getzfsvfs(tofs, &zfsvfs) == 0) {
|
||||
/* online recv */
|
||||
int end_err;
|
||||
char *osname;
|
||||
int mode;
|
||||
|
||||
/* online recv */
|
||||
osname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
|
||||
error = zfs_suspend_fs(zfsvfs, osname, &mode);
|
||||
/*
|
||||
* If the suspend fails, then the recv_end will
|
||||
* likely also fail, and clean up after itself.
|
||||
*/
|
||||
end_err = dmu_recv_end(&drc);
|
||||
if (error == 0) {
|
||||
int resume_err;
|
||||
|
||||
error = dmu_recv_end(&drc);
|
||||
resume_err = zfs_resume_fs(zfsvfs, osname, mode);
|
||||
int resume_err =
|
||||
zfs_resume_fs(zfsvfs, osname, mode);
|
||||
error = error ? error : resume_err;
|
||||
} else {
|
||||
dmu_recv_abort_cleanup(&drc);
|
||||
}
|
||||
error = error ? error : end_err;
|
||||
VFS_RELE(zfsvfs->z_vfs);
|
||||
kmem_free(osname, MAXNAMELEN);
|
||||
} else if (error == 0) {
|
||||
} else {
|
||||
error = dmu_recv_end(&drc);
|
||||
}
|
||||
}
|
||||
|
||||
zc->zc_cookie = off - fp->f_offset;
|
||||
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
|
||||
@ -2826,10 +2837,6 @@ zfs_ioc_recv(zfs_cmd_t *zc)
|
||||
(void) zfs_set_prop_nvlist(tofs, origprops);
|
||||
}
|
||||
out:
|
||||
if (zfsvfs) {
|
||||
mutex_exit(&zfsvfs->z_online_recv_lock);
|
||||
VFS_RELE(zfsvfs->z_vfs);
|
||||
}
|
||||
nvlist_free(props);
|
||||
nvlist_free(origprops);
|
||||
releasef(fd);
|
||||
@ -3431,6 +3438,69 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* inputs:
|
||||
* zc_name name of filesystem
|
||||
* zc_value short name of snap
|
||||
* zc_string user-supplied tag for this reference
|
||||
* zc_cookie recursive flag
|
||||
*
|
||||
* outputs: none
|
||||
*/
|
||||
static int
|
||||
zfs_ioc_hold(zfs_cmd_t *zc)
|
||||
{
|
||||
boolean_t recursive = zc->zc_cookie;
|
||||
|
||||
if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
|
||||
return (EINVAL);
|
||||
|
||||
return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value,
|
||||
zc->zc_string, recursive));
|
||||
}
|
||||
|
||||
/*
|
||||
* inputs:
|
||||
* zc_name name of dataset from which we're releasing a user reference
|
||||
* zc_value short name of snap
|
||||
* zc_string user-supplied tag for this reference
|
||||
* zc_cookie recursive flag
|
||||
*
|
||||
* outputs: none
|
||||
*/
|
||||
static int
|
||||
zfs_ioc_release(zfs_cmd_t *zc)
|
||||
{
|
||||
boolean_t recursive = zc->zc_cookie;
|
||||
|
||||
if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
|
||||
return (EINVAL);
|
||||
|
||||
return (dsl_dataset_user_release(zc->zc_name, zc->zc_value,
|
||||
zc->zc_string, recursive));
|
||||
}
|
||||
|
||||
/*
|
||||
* inputs:
|
||||
* zc_name name of filesystem
|
||||
*
|
||||
* outputs:
|
||||
* zc_nvlist_src{_size} nvlist of snapshot holds
|
||||
*/
|
||||
static int
|
||||
zfs_ioc_get_holds(zfs_cmd_t *zc)
|
||||
{
|
||||
nvlist_t *nvp;
|
||||
int error;
|
||||
|
||||
if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) {
|
||||
error = put_nvlist(zc, nvp);
|
||||
nvlist_free(nvp);
|
||||
}
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* pool create, destroy, and export don't log the history as part of
|
||||
* zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
|
||||
@ -3511,8 +3581,8 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = {
|
||||
B_TRUE },
|
||||
{ zfs_ioc_dsobj_to_dsname, zfs_secpolicy_config, POOL_NAME, B_FALSE,
|
||||
B_FALSE },
|
||||
{ zfs_ioc_obj_to_path, zfs_secpolicy_config, NO_NAME, B_FALSE,
|
||||
B_FALSE },
|
||||
{ zfs_ioc_obj_to_path, zfs_secpolicy_config, DATASET_NAME, B_FALSE,
|
||||
B_TRUE },
|
||||
{ zfs_ioc_pool_set_props, zfs_secpolicy_config, POOL_NAME, B_TRUE,
|
||||
B_TRUE },
|
||||
{ zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE,
|
||||
@ -3534,6 +3604,11 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = {
|
||||
DATASET_NAME, B_FALSE, B_FALSE },
|
||||
{ zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
|
||||
DATASET_NAME, B_FALSE, B_TRUE },
|
||||
{ zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE, B_TRUE },
|
||||
{ zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE,
|
||||
B_TRUE },
|
||||
{ zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
|
||||
B_TRUE }
|
||||
};
|
||||
|
||||
int
|
||||
|
@ -935,7 +935,6 @@ zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
|
||||
goto out;
|
||||
|
||||
mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&zfsvfs->z_online_recv_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
|
||||
offsetof(znode_t, z_link_node));
|
||||
@ -1051,7 +1050,6 @@ zfsvfs_free(zfsvfs_t *zfsvfs)
|
||||
zfs_fuid_destroy(zfsvfs);
|
||||
|
||||
mutex_destroy(&zfsvfs->z_znodes_lock);
|
||||
mutex_destroy(&zfsvfs->z_online_recv_lock);
|
||||
mutex_destroy(&zfsvfs->z_lock);
|
||||
list_destroy(&zfsvfs->z_all_znodes);
|
||||
rrw_destroy(&zfsvfs->z_teardown_lock);
|
||||
|
@ -208,6 +208,12 @@ zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
|
||||
znode_t *zp = VTOZ(vp);
|
||||
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
|
||||
|
||||
/*
|
||||
* Clean up any locks held by this process on the vp.
|
||||
*/
|
||||
cleanlocks(vp, ddi_get_pid(), 0);
|
||||
cleanshares(vp, ddi_get_pid());
|
||||
|
||||
ZFS_ENTER(zfsvfs);
|
||||
ZFS_VERIFY_ZP(zp);
|
||||
|
||||
@ -215,12 +221,6 @@ zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
|
||||
if ((flag & (FSYNC | FDSYNC)) && (count == 1))
|
||||
atomic_dec_32(&zp->z_sync_cnt);
|
||||
|
||||
/*
|
||||
* Clean up any locks held by this process on the vp.
|
||||
*/
|
||||
cleanlocks(vp, ddi_get_pid(), 0);
|
||||
cleanshares(vp, ddi_get_pid());
|
||||
|
||||
if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
|
||||
ZTOV(zp)->v_type == VREG &&
|
||||
!(zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) &&
|
||||
@ -855,6 +855,10 @@ zfs_get_done(dmu_buf_t *db, void *vzgd)
|
||||
kmem_free(zgd, sizeof (zgd_t));
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
static int zil_fault_io = 0;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Get data to generate a TX_WRITE intent log record.
|
||||
*/
|
||||
@ -936,7 +940,21 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
|
||||
zgd->zgd_rl = rl;
|
||||
zgd->zgd_zilog = zfsvfs->z_log;
|
||||
zgd->zgd_bp = &lr->lr_blkptr;
|
||||
VERIFY(0 == dmu_buf_hold(os, lr->lr_foid, boff, zgd, &db));
|
||||
#ifdef DEBUG
|
||||
if (zil_fault_io) {
|
||||
error = EIO;
|
||||
zil_fault_io = 0;
|
||||
} else {
|
||||
error = dmu_buf_hold(os, lr->lr_foid, boff, zgd, &db);
|
||||
}
|
||||
#else
|
||||
error = dmu_buf_hold(os, lr->lr_foid, boff, zgd, &db);
|
||||
#endif
|
||||
if (error != 0) {
|
||||
kmem_free(zgd, sizeof (zgd_t));
|
||||
goto out;
|
||||
}
|
||||
|
||||
ASSERT(boff == db->db_offset);
|
||||
lr->lr_blkoff = off - boff;
|
||||
error = dmu_sync(zio, db, &lr->lr_blkptr,
|
||||
@ -987,6 +1005,27 @@ zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr,
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* If vnode is for a device return a specfs vnode instead.
|
||||
*/
|
||||
static int
|
||||
specvp_check(vnode_t **vpp, cred_t *cr)
|
||||
{
|
||||
int error = 0;
|
||||
|
||||
if (IS_DEVVP(*vpp)) {
|
||||
struct vnode *svp;
|
||||
|
||||
svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
|
||||
VN_RELE(*vpp);
|
||||
if (svp == NULL)
|
||||
error = ENOSYS;
|
||||
*vpp = svp;
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Lookup an entry in a directory, or an extended attribute directory.
|
||||
* If it exists, return a held vnode reference for it.
|
||||
@ -1017,7 +1056,46 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
|
||||
{
|
||||
znode_t *zdp = VTOZ(dvp);
|
||||
zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
|
||||
int error;
|
||||
int error = 0;
|
||||
|
||||
/* fast path */
|
||||
if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) {
|
||||
|
||||
if (dvp->v_type != VDIR) {
|
||||
return (ENOTDIR);
|
||||
} else if (zdp->z_dbuf == NULL) {
|
||||
return (EIO);
|
||||
}
|
||||
|
||||
if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) {
|
||||
error = zfs_fastaccesschk_execute(zdp, cr);
|
||||
if (!error) {
|
||||
*vpp = dvp;
|
||||
VN_HOLD(*vpp);
|
||||
return (0);
|
||||
}
|
||||
return (error);
|
||||
} else {
|
||||
vnode_t *tvp = dnlc_lookup(dvp, nm);
|
||||
|
||||
if (tvp) {
|
||||
error = zfs_fastaccesschk_execute(zdp, cr);
|
||||
if (error) {
|
||||
VN_RELE(tvp);
|
||||
return (error);
|
||||
}
|
||||
if (tvp == DNLC_NO_VNODE) {
|
||||
VN_RELE(tvp);
|
||||
return (ENOENT);
|
||||
} else {
|
||||
*vpp = tvp;
|
||||
return (specvp_check(vpp, cr));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm);
|
||||
|
||||
ZFS_ENTER(zfsvfs);
|
||||
ZFS_VERIFY_ZP(zdp);
|
||||
@ -1082,21 +1160,8 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
|
||||
}
|
||||
|
||||
error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp);
|
||||
if (error == 0) {
|
||||
/*
|
||||
* Convert device special files
|
||||
*/
|
||||
if (IS_DEVVP(*vpp)) {
|
||||
vnode_t *svp;
|
||||
|
||||
svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
|
||||
VN_RELE(*vpp);
|
||||
if (svp == NULL)
|
||||
error = ENOSYS;
|
||||
else
|
||||
*vpp = svp;
|
||||
}
|
||||
}
|
||||
if (error == 0)
|
||||
error = specvp_check(vpp, cr);
|
||||
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (error);
|
||||
@ -1235,6 +1300,7 @@ top:
|
||||
&acl_ids)) != 0)
|
||||
goto out;
|
||||
if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
error = EDQUOT;
|
||||
goto out;
|
||||
}
|
||||
@ -1332,19 +1398,7 @@ out:
|
||||
VN_RELE(ZTOV(zp));
|
||||
} else {
|
||||
*vpp = ZTOV(zp);
|
||||
/*
|
||||
* If vnode is for a device return a specfs vnode instead.
|
||||
*/
|
||||
if (IS_DEVVP(*vpp)) {
|
||||
struct vnode *svp;
|
||||
|
||||
svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
|
||||
VN_RELE(*vpp);
|
||||
if (svp == NULL) {
|
||||
error = ENOSYS;
|
||||
}
|
||||
*vpp = svp;
|
||||
}
|
||||
error = specvp_check(vpp, cr);
|
||||
}
|
||||
|
||||
ZFS_EXIT(zfsvfs);
|
||||
@ -1653,6 +1707,7 @@ top:
|
||||
return (error);
|
||||
}
|
||||
if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
zfs_dirent_unlock(dl);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (EDQUOT);
|
||||
@ -2456,6 +2511,7 @@ zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
|
||||
top:
|
||||
attrzp = NULL;
|
||||
|
||||
/* Can this be moved to before the top label? */
|
||||
if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (EROFS);
|
||||
@ -2765,6 +2821,8 @@ top:
|
||||
zp->z_phys->zp_mode = new_mode;
|
||||
err = zfs_aclset_common(zp, aclp, cr, tx);
|
||||
ASSERT3U(err, ==, 0);
|
||||
zp->z_acl_cached = aclp;
|
||||
aclp = NULL;
|
||||
mutex_exit(&zp->z_acl_lock);
|
||||
}
|
||||
|
||||
@ -2856,10 +2914,8 @@ out:
|
||||
if (attrzp)
|
||||
VN_RELE(ZTOV(attrzp));
|
||||
|
||||
if (aclp) {
|
||||
if (aclp)
|
||||
zfs_acl_free(aclp);
|
||||
aclp = NULL;
|
||||
}
|
||||
|
||||
if (fuidp) {
|
||||
zfs_fuid_info_free(fuidp);
|
||||
@ -3724,8 +3780,8 @@ top:
|
||||
if (err == 0) {
|
||||
zfs_time_stamper(zp, CONTENT_MODIFIED, tx);
|
||||
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0);
|
||||
dmu_tx_commit(tx);
|
||||
}
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
out:
|
||||
pvn_write_done(pp, (err ? B_ERROR : 0) | flags);
|
||||
|
@ -133,6 +133,7 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
|
||||
|
||||
zp->z_dbuf = NULL;
|
||||
zp->z_dirlocks = NULL;
|
||||
zp->z_acl_cached = NULL;
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -155,6 +156,7 @@ zfs_znode_cache_destructor(void *buf, void *arg)
|
||||
|
||||
ASSERT(zp->z_dbuf == NULL);
|
||||
ASSERT(zp->z_dirlocks == NULL);
|
||||
ASSERT(zp->z_acl_cached == NULL);
|
||||
}
|
||||
|
||||
#ifdef ZNODE_STATS
|
||||
@ -199,6 +201,18 @@ zfs_znode_move_impl(znode_t *ozp, znode_t *nzp)
|
||||
nzp->z_phys = ozp->z_phys;
|
||||
nzp->z_dbuf = ozp->z_dbuf;
|
||||
|
||||
/*
|
||||
* Release any cached ACL, since it *may* have
|
||||
* zfs_acl_node_t's that directly references an
|
||||
* embedded ACL in the zp_acl of the old znode_phys_t
|
||||
*
|
||||
* It will be recached the next time the ACL is needed.
|
||||
*/
|
||||
if (ozp->z_acl_cached) {
|
||||
zfs_acl_free(ozp->z_acl_cached);
|
||||
ozp->z_acl_cached = NULL;
|
||||
}
|
||||
|
||||
/* Update back pointers. */
|
||||
(void) dmu_buf_update_user(nzp->z_dbuf, ozp, nzp, &nzp->z_phys,
|
||||
znode_evict_error);
|
||||
@ -1081,6 +1095,11 @@ zfs_znode_free(znode_t *zp)
|
||||
list_remove(&zfsvfs->z_all_znodes, zp);
|
||||
mutex_exit(&zfsvfs->z_znodes_lock);
|
||||
|
||||
if (zp->z_acl_cached) {
|
||||
zfs_acl_free(zp->z_acl_cached);
|
||||
zp->z_acl_cached = NULL;
|
||||
}
|
||||
|
||||
kmem_cache_free(znode_cache, zp);
|
||||
|
||||
VFS_RELE(zfsvfs->z_vfs);
|
||||
|
@ -714,14 +714,15 @@ zil_lwb_write_done(zio_t *zio)
|
||||
lwb->lwb_buf = NULL;
|
||||
if (zio->io_error)
|
||||
zilog->zl_log_error = B_TRUE;
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
|
||||
/*
|
||||
* Now that we've written this log block, we have a stable pointer
|
||||
* to the next block in the chain, so it's OK to let the txg in
|
||||
* which we allocated the next block sync.
|
||||
* which we allocated the next block sync. We still have the
|
||||
* zl_lock to ensure zil_sync doesn't kmem free the lwb.
|
||||
*/
|
||||
txg_rele_to_sync(&lwb->lwb_txgh);
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -925,6 +926,10 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
|
||||
}
|
||||
error = zilog->zl_get_data(
|
||||
itx->itx_private, lr, dbuf, lwb->lwb_zio);
|
||||
if (error == EIO) {
|
||||
txg_wait_synced(zilog->zl_dmu_pool, txg);
|
||||
return (lwb);
|
||||
}
|
||||
if (error) {
|
||||
ASSERT(error == ENOENT || error == EEXIST ||
|
||||
error == EALREADY);
|
||||
|
Loading…
Reference in New Issue
Block a user