Merge branch 'illumos'

Merge in ten upstream fixes which have already been made to both
the Illumos and FreeBSD ZFS implementations.  This brings us up
to date with the latest ZFS changes in Illumos.

Credit goes to Martin Matuska of the FreeBSD project for posting
an excellent summary of the upstream patches we were missing.

Illumos #1313: Integer overflow in txg_delay()
Illumos #278:  get rid zfs of python and pyzfs dependencies
Illumos #1043: Recursive zfs snapshot destroy fails
Illumos #883:  ZIL reuse during remount corruption
Illumos #1092: zfs refratio property
Illumos #1051: zfs should handle
Illumos #510:  'zfs get' enhancement - mountpoint as an argument
Illumos #175:  zfs vdev cache consumes excessive memory
Illumos #764:  panic in zfs:dbuf_sync_list
Illumos #xxx:  zdb -vvv broken after zfs diff integration

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #340
This commit is contained in:
Brian Behlendorf 2011-08-01 12:10:54 -07:00
commit 77999e804f
23 changed files with 2701 additions and 107 deletions

File diff suppressed because it is too large Load Diff

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
*/
/*
@ -205,6 +206,7 @@ typedef struct ztest_od {
*/
typedef struct ztest_ds {
objset_t *zd_os;
krwlock_t zd_zilog_lock;
zilog_t *zd_zilog;
uint64_t zd_seq;
ztest_od_t *zd_od; /* debugging aid */
@ -238,6 +240,7 @@ ztest_func_t ztest_dmu_commit_callbacks;
ztest_func_t ztest_zap;
ztest_func_t ztest_zap_parallel;
ztest_func_t ztest_zil_commit;
ztest_func_t ztest_zil_remount;
ztest_func_t ztest_dmu_read_write_zcopy;
ztest_func_t ztest_dmu_objset_create_destroy;
ztest_func_t ztest_dmu_prealloc;
@ -273,6 +276,7 @@ ztest_info_t ztest_info[] = {
{ ztest_zap_parallel, 100, &zopt_always },
{ ztest_split_pool, 1, &zopt_always },
{ ztest_zil_commit, 1, &zopt_incessant },
{ ztest_zil_remount, 1, &zopt_sometimes },
{ ztest_dmu_read_write_zcopy, 1, &zopt_often },
{ ztest_dmu_objset_create_destroy, 1, &zopt_often },
{ ztest_dsl_prop_get_set, 1, &zopt_often },
@ -1006,6 +1010,7 @@ ztest_zd_init(ztest_ds_t *zd, objset_t *os)
dmu_objset_name(os, zd->zd_name);
int l;
rw_init(&zd->zd_zilog_lock, NULL, RW_DEFAULT, NULL);
mutex_init(&zd->zd_dirobj_lock, NULL, MUTEX_DEFAULT, NULL);
for (l = 0; l < ZTEST_OBJECT_LOCKS; l++)
@ -1021,6 +1026,7 @@ ztest_zd_fini(ztest_ds_t *zd)
int l;
mutex_destroy(&zd->zd_dirobj_lock);
rw_destroy(&zd->zd_zilog_lock);
for (l = 0; l < ZTEST_OBJECT_LOCKS; l++)
ztest_rll_destroy(&zd->zd_object_lock[l]);
@ -1992,6 +1998,8 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
if (ztest_random(2) == 0)
io_type = ZTEST_IO_WRITE_TAG;
(void) rw_enter(&zd->zd_zilog_lock, RW_READER);
switch (io_type) {
case ZTEST_IO_WRITE_TAG:
@ -2029,6 +2037,8 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
break;
}
(void) rw_exit(&zd->zd_zilog_lock);
umem_free(data, blocksize);
}
@ -2083,6 +2093,8 @@ ztest_zil_commit(ztest_ds_t *zd, uint64_t id)
{
zilog_t *zilog = zd->zd_zilog;
(void) rw_enter(&zd->zd_zilog_lock, RW_READER);
zil_commit(zilog, ztest_random(ZTEST_OBJECTS));
/*
@ -2094,6 +2106,31 @@ ztest_zil_commit(ztest_ds_t *zd, uint64_t id)
ASSERT(zd->zd_seq <= zilog->zl_commit_lr_seq);
zd->zd_seq = zilog->zl_commit_lr_seq;
mutex_exit(&zilog->zl_lock);
(void) rw_exit(&zd->zd_zilog_lock);
}
/*
* This function is designed to simulate the operations that occur during a
* mount/unmount operation. We hold the dataset across these operations in an
* attempt to expose any implicit assumptions about ZIL management.
*/
/* ARGSUSED */
void
ztest_zil_remount(ztest_ds_t *zd, uint64_t id)
{
objset_t *os = zd->zd_os;
(void) rw_enter(&zd->zd_zilog_lock, RW_WRITER);
/* zfsvfs_teardown() */
zil_close(zd->zd_zilog);
/* zfsvfs_setup() */
VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog);
zil_replay(os, zd, ztest_replay_vector);
(void) rw_exit(&zd->zd_zilog_lock);
}
/*
@ -5300,6 +5337,7 @@ ztest_run(ztest_shared_t *zs)
*/
kernel_init(FREAD | FWRITE);
VERIFY(spa_open(zs->zs_pool, &spa, FTAG) == 0);
spa->spa_debug = B_TRUE;
zs->zs_spa = spa;
spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN;

View File

@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2010 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _LIBZFS_H
@ -572,13 +573,17 @@ extern int zfs_promote(zfs_handle_t *);
extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t,
boolean_t, boolean_t, int, uint64_t, uint64_t);
extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t);
extern int zfs_get_holds(zfs_handle_t *, nvlist_t **);
extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *);
typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain,
uid_t rid, uint64_t space);
extern int zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type,
zfs_userspace_cb_t func, void *arg);
extern int zfs_userspace(zfs_handle_t *, zfs_userquota_prop_t,
zfs_userspace_cb_t, void *);
extern int zfs_get_fsacl(zfs_handle_t *, nvlist_t **);
extern int zfs_set_fsacl(zfs_handle_t *, boolean_t, nvlist_t *);
typedef struct recvflags {
/* print informational messages (ie, -v was specified) */

View File

@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@ -122,6 +123,7 @@ typedef enum {
ZFS_PROP_DEDUP,
ZFS_PROP_MLSLABEL,
ZFS_PROP_SYNC,
ZFS_PROP_REFRATIO,
ZFS_NUM_PROPS
} zfs_prop_t;

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
*/
#ifndef _SYS_METASLAB_H
@ -47,6 +48,8 @@ extern void metaslab_sync_reassess(metaslab_group_t *mg);
#define METASLAB_HINTBP_FAVOR 0x0
#define METASLAB_HINTBP_AVOID 0x1
#define METASLAB_GANG_HEADER 0x2
#define METASLAB_GANG_CHILD 0x4
#define METASLAB_GANG_AVOID 0x8
extern int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
blkptr_t *bp, int ncopies, uint64_t txg, blkptr_t *hintbp, int flags);

View File

@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2011 by Delphix. All rights reserved.
*/
#ifndef _SYS_METASLAB_IMPL_H
@ -52,6 +53,7 @@ struct metaslab_group {
avl_tree_t mg_metaslab_tree;
uint64_t mg_aliquot;
uint64_t mg_bonus_area;
uint64_t mg_alloc_failures;
int64_t mg_bias;
int64_t mg_activation_count;
metaslab_class_t *mg_class;

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
*/
#ifndef _SYS_SPA_H
@ -698,6 +699,13 @@ _NOTE(CONSTCOND) } while (0)
#define dprintf_bp(bp, fmt, ...)
#endif
extern boolean_t spa_debug_enabled(spa_t *spa);
#define spa_dbgmsg(spa, ...) \
{ \
if (spa_debug_enabled(spa)) \
zfs_dbgmsg(__VA_ARGS__); \
}
extern int spa_mode_global; /* mode, e.g. FREAD | FWRITE */
#ifdef __cplusplus

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
*/
#ifndef _SYS_SPA_IMPL_H
@ -196,6 +197,7 @@ struct spa {
kcondvar_t spa_suspend_cv; /* notification of resume */
uint8_t spa_suspended; /* pool is suspended */
uint8_t spa_claiming; /* pool is doing zil_claim() */
boolean_t spa_debug; /* debug enabled? */
boolean_t spa_is_root; /* pool is root */
int spa_minref; /* num refs when first opened */
int spa_mode; /* FREAD | FWRITE */

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2010 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _ZFS_DELEG_H
@ -51,6 +52,7 @@ typedef enum {
ZFS_DELEG_NOTE_CLONE,
ZFS_DELEG_NOTE_PROMOTE,
ZFS_DELEG_NOTE_RENAME,
ZFS_DELEG_NOTE_SEND,
ZFS_DELEG_NOTE_RECEIVE,
ZFS_DELEG_NOTE_ALLOW,
ZFS_DELEG_NOTE_USERPROP,

View File

@ -21,6 +21,8 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
*/
#include <ctype.h>
@ -94,6 +96,7 @@ zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type,
namecheck_err_t why;
char what;
(void) zfs_prop_get_table();
if (dataset_namecheck(path, &why, &what) != 0) {
if (hdl != NULL) {
switch (why) {
@ -2025,6 +2028,7 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
}
break;
case ZFS_PROP_REFRATIO:
case ZFS_PROP_COMPRESSRATIO:
if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
return (-1);
@ -4311,6 +4315,193 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
return (0);
}
int
zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl)
{
zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
libzfs_handle_t *hdl = zhp->zfs_hdl;
int nvsz = 2048;
void *nvbuf;
int err = 0;
char errbuf[ZFS_MAXNAMELEN+32];
assert(zhp->zfs_type == ZFS_TYPE_VOLUME ||
zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
tryagain:
nvbuf = malloc(nvsz);
if (nvbuf == NULL) {
err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno)));
goto out;
}
zc.zc_nvlist_dst_size = nvsz;
zc.zc_nvlist_dst = (uintptr_t)nvbuf;
(void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN);
if (zfs_ioctl(hdl, ZFS_IOC_GET_FSACL, &zc) != 0) {
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot get permissions on '%s'"),
zc.zc_name);
switch (errno) {
case ENOMEM:
free(nvbuf);
nvsz = zc.zc_nvlist_dst_size;
goto tryagain;
case ENOTSUP:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"pool must be upgraded"));
err = zfs_error(hdl, EZFS_BADVERSION, errbuf);
break;
case EINVAL:
err = zfs_error(hdl, EZFS_BADTYPE, errbuf);
break;
case ENOENT:
err = zfs_error(hdl, EZFS_NOENT, errbuf);
break;
default:
err = zfs_standard_error_fmt(hdl, errno, errbuf);
break;
}
} else {
/* success */
int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0);
if (rc) {
(void) snprintf(errbuf, sizeof (errbuf), dgettext(
TEXT_DOMAIN, "cannot get permissions on '%s'"),
zc.zc_name);
err = zfs_standard_error_fmt(hdl, rc, errbuf);
}
}
free(nvbuf);
out:
return (err);
}
int
zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl)
{
zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
libzfs_handle_t *hdl = zhp->zfs_hdl;
char *nvbuf;
char errbuf[ZFS_MAXNAMELEN+32];
size_t nvsz;
int err;
assert(zhp->zfs_type == ZFS_TYPE_VOLUME ||
zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
err = nvlist_size(nvl, &nvsz, NV_ENCODE_NATIVE);
assert(err == 0);
nvbuf = malloc(nvsz);
err = nvlist_pack(nvl, &nvbuf, &nvsz, NV_ENCODE_NATIVE, 0);
assert(err == 0);
zc.zc_nvlist_src_size = nvsz;
zc.zc_nvlist_src = (uintptr_t)nvbuf;
zc.zc_perm_action = un;
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
if (zfs_ioctl(hdl, ZFS_IOC_SET_FSACL, &zc) != 0) {
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot set permissions on '%s'"),
zc.zc_name);
switch (errno) {
case ENOTSUP:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"pool must be upgraded"));
err = zfs_error(hdl, EZFS_BADVERSION, errbuf);
break;
case EINVAL:
err = zfs_error(hdl, EZFS_BADTYPE, errbuf);
break;
case ENOENT:
err = zfs_error(hdl, EZFS_NOENT, errbuf);
break;
default:
err = zfs_standard_error_fmt(hdl, errno, errbuf);
break;
}
}
free(nvbuf);
return (err);
}
int
zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl)
{
zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
libzfs_handle_t *hdl = zhp->zfs_hdl;
int nvsz = 2048;
void *nvbuf;
int err = 0;
char errbuf[ZFS_MAXNAMELEN+32];
assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
tryagain:
nvbuf = malloc(nvsz);
if (nvbuf == NULL) {
err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno)));
goto out;
}
zc.zc_nvlist_dst_size = nvsz;
zc.zc_nvlist_dst = (uintptr_t)nvbuf;
(void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN);
if (zfs_ioctl(hdl, ZFS_IOC_GET_HOLDS, &zc) != 0) {
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"),
zc.zc_name);
switch (errno) {
case ENOMEM:
free(nvbuf);
nvsz = zc.zc_nvlist_dst_size;
goto tryagain;
case ENOTSUP:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"pool must be upgraded"));
err = zfs_error(hdl, EZFS_BADVERSION, errbuf);
break;
case EINVAL:
err = zfs_error(hdl, EZFS_BADTYPE, errbuf);
break;
case ENOENT:
err = zfs_error(hdl, EZFS_NOENT, errbuf);
break;
default:
err = zfs_standard_error_fmt(hdl, errno, errbuf);
break;
}
} else {
/* success */
int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0);
if (rc) {
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"),
zc.zc_name);
err = zfs_standard_error_fmt(hdl, rc, errbuf);
}
}
free(nvbuf);
out:
return (err);
}
uint64_t
zvol_volsize_to_reservation(uint64_t volsize, nvlist_t *props)
{

View File

@ -360,7 +360,7 @@ This property can also be referred to by its shortened column name, \fBavail\fR.
.ad
.sp .6
.RS 4n
The compression ratio achieved for this dataset, expressed as a multiplier. Compression can be turned on by running: \fBzfs set compression=on \fIdataset\fR\fR. The default value is \fBoff\fR.
For non-snapshots, the compression ratio achieved for the \fBused\fR space of this dataset, expressed as a multiplier. The \fBused\fR property includes descendant datasets, and, for clones, does not include the space shared with the origin snapshot. For snapshots, the \fBcompressratio\fR is the same as the \fBrefcompressratio\fR property. Compression can be turned on by running: \fBzfs set compression=on \fIdataset\fR\fR. The default value is \fBoff\fR.
.RE
.sp
@ -420,6 +420,19 @@ The amount of data that is accessible by this dataset, which may or may not be s
This property can also be referred to by its shortened column name, \fBrefer\fR.
.RE
.sp
.ne 2
.mk
.na
\fB\fBrefcompressratio\fR\fR
.ad
.sp .6
.RS 4n
The compression ratio achieved for the \fBreferenced\fR space of this
dataset, expressed as a multiplier. See also the \fBcompressratio\fR
property.
.RE
.sp
.ne 2
.mk
@ -1235,7 +1248,7 @@ Recursively destroy all dependents, including cloned file systems outside the ta
Force an unmount of any file systems using the \fBunmount -f\fR command. This option has no effect on non-file systems or unmounted file systems.
.RE
Extreme care should be taken when applying either the \fB-r\fR or the \fB-f\fR options, as they can destroy large portions of a pool and cause unexpected behavior for mounted file systems in use.
Extreme care should be taken when applying either the \fB-r\fR or the \fB-R\fR options, as they can destroy large portions of a pool and cause unexpected behavior for mounted file systems in use.
.RE
.sp

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2010 Nexenta Systems, Inc. All rights reserved.
*/
#if defined(_KERNEL)
@ -60,7 +61,7 @@ zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = {
{ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK },
{ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT },
{ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE },
{ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_NONE },
{ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND },
{ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP },
{ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA },
{ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA },

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@ -311,6 +312,9 @@ zfs_prop_init(void)
zprop_register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0,
PROP_READONLY, ZFS_TYPE_DATASET,
"<1.00x or higher if compressed>", "RATIO");
zprop_register_number(ZFS_PROP_REFRATIO, "refcompressratio", 0,
PROP_READONLY, ZFS_TYPE_DATASET,
"<1.00x or higher if compressed>", "REFRATIO");
zprop_register_number(ZFS_PROP_VOLBLOCKSIZE, "volblocksize",
ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME,
ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK");

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/zfs_context.h>
@ -1347,13 +1348,17 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
* it, since one of the current holders may be in the
* middle of an update. Note that users of dbuf_undirty()
* should not place a hold on the dbuf before the call.
* Also note: we can get here with a spill block, so
* test for that similar to how dbuf_dirty does.
*/
if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
mutex_exit(&db->db_mtx);
/* Make sure we don't toss this buffer at sync phase */
mutex_enter(&dn->dn_mtx);
dnode_clear_range(dn, db->db_blkid, 1, tx);
mutex_exit(&dn->dn_mtx);
if (db->db_blkid != DMU_SPILL_BLKID) {
mutex_enter(&dn->dn_mtx);
dnode_clear_range(dn, db->db_blkid, 1, tx);
mutex_exit(&dn->dn_mtx);
}
DB_DNODE_EXIT(db);
return (0);
}
@ -1366,11 +1371,18 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
*drp = dr->dr_next;
/*
* Note that there are three places in dbuf_dirty()
* where this dirty record may be put on a list.
* Make sure to do a list_remove corresponding to
* every one of those list_insert calls.
*/
if (dr->dr_parent) {
mutex_enter(&dr->dr_parent->dt.di.dr_mtx);
list_remove(&dr->dr_parent->dt.di.dr_children, dr);
mutex_exit(&dr->dr_parent->dt.di.dr_mtx);
} else if (db->db_level+1 == dn->dn_nlevels) {
} else if (db->db_blkid == DMU_SPILL_BLKID ||
db->db_level+1 == dn->dn_nlevels) {
ASSERT(db->db_blkptr == NULL || db->db_parent == dn->dn_dbuf);
mutex_enter(&dn->dn_mtx);
list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr);

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
*/
#include <sys/dmu_objset.h>
@ -2153,7 +2154,7 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
void
dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
{
uint64_t refd, avail, uobjs, aobjs;
uint64_t refd, avail, uobjs, aobjs, ratio;
dsl_dir_stats(ds->ds_dir, nv);
@ -2180,6 +2181,11 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,
DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
(ds->ds_phys->ds_uncompressed_bytes * 100 /
ds->ds_phys->ds_compressed_bytes);
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
if (ds->ds_phys->ds_next_snap_obj) {
/*
* This is a snapshot; override the dd's space used with
@ -2187,10 +2193,7 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
*/
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
ds->ds_phys->ds_unique_bytes);
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
(ds->ds_phys->ds_uncompressed_bytes * 100 /
ds->ds_phys->ds_compressed_bytes));
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
}
}

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@ -30,11 +31,30 @@
#include <sys/vdev_impl.h>
#include <sys/zio.h>
#define WITH_NDF_BLOCK_ALLOCATOR
#define WITH_DF_BLOCK_ALLOCATOR
/*
* Allow allocations to switch to gang blocks quickly. We do this to
* avoid having to load lots of space_maps in a given txg. There are,
* however, some cases where we want to avoid "fast" ganging and instead
* we want to do an exhaustive search of all metaslabs on this device.
* Currently we don't allow any gang or dump device related allocations
* to "fast" gang.
*/
#define CAN_FASTGANG(flags) \
(!((flags) & (METASLAB_GANG_CHILD | METASLAB_GANG_HEADER | \
METASLAB_GANG_AVOID)))
uint64_t metaslab_aliquot = 512ULL << 10;
uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */
/*
* This value defines the number of allowed allocation failures per vdev.
* If a device reaches this threshold in a given txg then we consider skipping
* allocations on that device.
*/
int zfs_mg_alloc_failures;
/*
* Metaslab debugging: when set, keeps all space maps in core to verify frees.
*/
@ -865,7 +885,7 @@ metaslab_prefetch(metaslab_group_t *mg)
}
static int
metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size)
metaslab_activate(metaslab_t *msp, uint64_t activation_weight)
{
metaslab_group_t *mg = msp->ms_group;
space_map_t *sm = &msp->ms_map;
@ -899,13 +919,6 @@ metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size)
mutex_exit(&mg->mg_lock);
}
/*
* If we were able to load the map then make sure
* that this map is still able to satisfy our request.
*/
if (msp->ms_weight < size)
return (ENOSPC);
metaslab_group_sort(msp->ms_group, msp,
msp->ms_weight | activation_weight);
}
@ -1123,6 +1136,7 @@ void
metaslab_sync_reassess(metaslab_group_t *mg)
{
vdev_t *vd = mg->mg_vd;
int64_t failures = mg->mg_alloc_failures;
int m;
/*
@ -1140,6 +1154,8 @@ metaslab_sync_reassess(metaslab_group_t *mg)
mutex_exit(&msp->ms_lock);
}
atomic_add_64(&mg->mg_alloc_failures, -failures);
/*
* Prefetch the next potential metaslabs
*/
@ -1164,9 +1180,10 @@ metaslab_distance(metaslab_t *msp, dva_t *dva)
}
static uint64_t
metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
uint64_t min_distance, dva_t *dva, int d)
metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
uint64_t txg, uint64_t min_distance, dva_t *dva, int d, int flags)
{
spa_t *spa = mg->mg_vd->vdev_spa;
metaslab_t *msp = NULL;
uint64_t offset = -1ULL;
avl_tree_t *t = &mg->mg_metaslab_tree;
@ -1187,11 +1204,17 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
mutex_enter(&mg->mg_lock);
for (msp = avl_first(t); msp; msp = AVL_NEXT(t, msp)) {
if (msp->ms_weight < size) {
if (msp->ms_weight < asize) {
spa_dbgmsg(spa, "%s: failed to meet weight "
"requirement: vdev %llu, txg %llu, mg %p, "
"msp %p, psize %llu, asize %llu, "
"failures %llu, weight %llu",
spa_name(spa), mg->mg_vd->vdev_id, txg,
mg, msp, psize, asize,
mg->mg_alloc_failures, msp->ms_weight);
mutex_exit(&mg->mg_lock);
return (-1ULL);
}
was_active = msp->ms_weight & METASLAB_ACTIVE_MASK;
if (activation_weight == METASLAB_WEIGHT_PRIMARY)
break;
@ -1210,6 +1233,25 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
if (msp == NULL)
return (-1ULL);
/*
* If we've already reached the allowable number of failed
* allocation attempts on this metaslab group then we
* consider skipping it. We skip it only if we're allowed
* to "fast" gang, the physical size is larger than
* a gang block, and we're attempting to allocate from
* the primary metaslab.
*/
if (mg->mg_alloc_failures > zfs_mg_alloc_failures &&
CAN_FASTGANG(flags) && psize > SPA_GANGBLOCKSIZE &&
activation_weight == METASLAB_WEIGHT_PRIMARY) {
spa_dbgmsg(spa, "%s: skipping metaslab group: "
"vdev %llu, txg %llu, mg %p, psize %llu, "
"asize %llu, failures %llu", spa_name(spa),
mg->mg_vd->vdev_id, txg, mg, psize, asize,
mg->mg_alloc_failures);
return (-1ULL);
}
mutex_enter(&msp->ms_lock);
/*
@ -1218,7 +1260,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
* another thread may have changed the weight while we
* were blocked on the metaslab lock.
*/
if (msp->ms_weight < size || (was_active &&
if (msp->ms_weight < asize || (was_active &&
!(msp->ms_weight & METASLAB_ACTIVE_MASK) &&
activation_weight == METASLAB_WEIGHT_PRIMARY)) {
mutex_exit(&msp->ms_lock);
@ -1233,14 +1275,16 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
continue;
}
if (metaslab_activate(msp, activation_weight, size) != 0) {
if (metaslab_activate(msp, activation_weight) != 0) {
mutex_exit(&msp->ms_lock);
continue;
}
if ((offset = space_map_alloc(&msp->ms_map, size)) != -1ULL)
if ((offset = space_map_alloc(&msp->ms_map, asize)) != -1ULL)
break;
atomic_inc_64(&mg->mg_alloc_failures);
metaslab_passivate(msp, space_map_maxsize(&msp->ms_map));
mutex_exit(&msp->ms_lock);
@ -1249,7 +1293,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0)
vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg);
space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size);
space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, asize);
mutex_exit(&msp->ms_lock);
@ -1376,7 +1420,8 @@ top:
asize = vdev_psize_to_asize(vd, psize);
ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0);
offset = metaslab_group_alloc(mg, asize, txg, distance, dva, d);
offset = metaslab_group_alloc(mg, psize, asize, txg, distance,
dva, d, flags);
if (offset != -1ULL) {
/*
* If we've just selected this metaslab group,
@ -1388,18 +1433,24 @@ top:
vdev_stat_t *vs = &vd->vdev_stat;
int64_t vu, cu;
/*
* Determine percent used in units of 0..1024.
* (This is just to avoid floating point.)
*/
vu = (vs->vs_alloc << 10) / (vs->vs_space + 1);
cu = (mc->mc_alloc << 10) / (mc->mc_space + 1);
vu = (vs->vs_alloc * 100) / (vs->vs_space + 1);
cu = (mc->mc_alloc * 100) / (mc->mc_space + 1);
/*
* Bias by at most +/- 25% of the aliquot.
* Calculate how much more or less we should
* try to allocate from this device during
* this iteration around the rotor.
* For example, if a device is 80% full
* and the pool is 20% full then we should
* reduce allocations by 60% on this device.
*
* mg_bias = (20 - 80) * 512K / 100 = -307K
*
* This reduces allocations by 307K for this
* iteration.
*/
mg->mg_bias = ((cu - vu) *
(int64_t)mg->mg_aliquot) / (1024 * 4);
(int64_t)mg->mg_aliquot) / 100;
}
if (atomic_add_64_nv(&mc->mc_aliquot, asize) >=
@ -1513,7 +1564,7 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg)
mutex_enter(&msp->ms_lock);
if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map.sm_loaded)
error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY, 0);
error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY);
if (error == 0 && !space_map_contains(&msp->ms_map, offset, size))
error = ENOENT;

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@ -1680,6 +1681,12 @@ spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps)
return (0);
}
boolean_t
spa_debug_enabled(spa_t *spa)
{
return (spa->spa_debug);
}
#if defined(_KERNEL) && defined(HAVE_SPL)
/* Namespace manipulation */
EXPORT_SYMBOL(spa_lookup);

View File

@ -506,7 +506,7 @@ void
txg_delay(dsl_pool_t *dp, uint64_t txg, int ticks)
{
tx_state_t *tx = &dp->dp_tx;
int timeout = ddi_get_lbolt() + ticks;
clock_t timeout = ddi_get_lbolt() + ticks;
/* don't delay if this txg could transition to quiesing immediately */
if (tx->tx_open_txg > txg ||

View File

@ -71,9 +71,16 @@
* 1<<zfs_vdev_cache_bshift byte reads by the vdev_cache (aka software
* track buffer). At most zfs_vdev_cache_size bytes will be kept in each
* vdev's vdev_cache.
*
* TODO: Note that with the current ZFS code, it turns out that the
* vdev cache is not helpful, and in some cases actually harmful. It
* is better if we disable this. Once some time has passed, we should
* actually remove this to simplify the code. For now we just disable
* it by setting the zfs_vdev_cache_size to zero. Note that Solaris 11
* has made these same changes.
*/
int zfs_vdev_cache_max = 1<<14; /* 16KB */
int zfs_vdev_cache_size = 10ULL << 20; /* 10MB */
int zfs_vdev_cache_size = 0;
int zfs_vdev_cache_bshift = 16;
#define VCBS (1 << zfs_vdev_cache_bshift) /* 64KB */

View File

@ -701,6 +701,9 @@ zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
* and destroying snapshots requires descendent permissions, a successfull
* check of the top level snapshot applies to snapshots of all descendent
* datasets as well.
*
* The target snapshot may not exist when doing a recursive destroy.
* In this case fallback to permissions of the parent dataset.
*/
static int
zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr)
@ -711,6 +714,8 @@ zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr)
dsname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value);
error = zfs_secpolicy_destroy_perms(dsname, cr);
if (error == ENOENT)
error = zfs_secpolicy_destroy_perms(zc->zc_name, cr);
strfree(dsname);
return (error);

View File

@ -1560,12 +1560,12 @@ zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table)
static int
zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
dmu_buf_t **db)
dmu_buf_t **db, void *tag)
{
dmu_object_info_t doi;
int error;
if ((error = sa_buf_hold(osp, obj, FTAG, db)) != 0)
if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
return (error);
dmu_object_info_from_db(*db, &doi);
@ -1573,13 +1573,13 @@ zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
doi.doi_bonus_type != DMU_OT_ZNODE) ||
(doi.doi_bonus_type == DMU_OT_ZNODE &&
doi.doi_bonus_size < sizeof (znode_phys_t))) {
sa_buf_rele(*db, FTAG);
sa_buf_rele(*db, tag);
return (ENOTSUP);
}
error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
if (error != 0) {
sa_buf_rele(*db, FTAG);
sa_buf_rele(*db, tag);
return (error);
}
@ -1587,10 +1587,10 @@ zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
}
void
zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db)
zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
{
sa_handle_destroy(hdl);
sa_buf_rele(db, FTAG);
sa_buf_rele(db, tag);
}
/*
@ -1667,7 +1667,7 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
int is_xattrdir;
if (prevdb)
zfs_release_sa_handle(prevhdl, prevdb);
zfs_release_sa_handle(prevhdl, prevdb, FTAG);
if ((error = zfs_obj_to_pobj(sa_hdl, sa_table, &pobj,
&is_xattrdir)) != 0)
@ -1699,7 +1699,7 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
prevhdl = sa_hdl;
prevdb = sa_db;
}
error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db);
error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG);
if (error != 0) {
sa_hdl = prevhdl;
sa_db = prevdb;
@ -1709,7 +1709,7 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
if (sa_hdl != NULL && sa_hdl != hdl) {
ASSERT(sa_db != NULL);
zfs_release_sa_handle(sa_hdl, sa_db);
zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
}
if (error == 0)
@ -1730,13 +1730,13 @@ zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
if (error != 0)
return (error);
error = zfs_grab_sa_handle(osp, obj, &hdl, &db);
error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
if (error != 0)
return (error);
error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
zfs_release_sa_handle(hdl, db);
zfs_release_sa_handle(hdl, db, FTAG);
return (error);
}
@ -1756,19 +1756,19 @@ zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
if (error != 0)
return (error);
error = zfs_grab_sa_handle(osp, obj, &hdl, &db);
error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
if (error != 0)
return (error);
error = zfs_obj_to_stats_impl(hdl, sa_table, sb);
if (error != 0) {
zfs_release_sa_handle(hdl, db);
zfs_release_sa_handle(hdl, db, FTAG);
return (error);
}
error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
zfs_release_sa_handle(hdl, db);
zfs_release_sa_handle(hdl, db, FTAG);
return (error);
}

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@ -562,7 +563,7 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first)
if (!list_is_empty(&zilog->zl_lwb_list)) {
ASSERT(zh->zh_claim_txg == 0);
ASSERT(!keep_first);
VERIFY(!keep_first);
while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) {
list_remove(&zilog->zl_lwb_list, lwb);
if (lwb->lwb_buf != NULL)
@ -1665,21 +1666,11 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys)
void
zil_free(zilog_t *zilog)
{
lwb_t *head_lwb;
int i;
zilog->zl_stop_sync = 1;
/*
* After zil_close() there should only be one lwb with a buffer.
*/
head_lwb = list_head(&zilog->zl_lwb_list);
if (head_lwb) {
ASSERT(head_lwb == list_tail(&zilog->zl_lwb_list));
list_remove(&zilog->zl_lwb_list, head_lwb);
zio_buf_free(head_lwb->lwb_buf, head_lwb->lwb_sz);
kmem_cache_free(zil_lwb_cache, head_lwb);
}
ASSERT(list_is_empty(&zilog->zl_lwb_list));
list_destroy(&zilog->zl_lwb_list);
avl_destroy(&zilog->zl_vdev_tree);
@ -1719,6 +1710,10 @@ zil_open(objset_t *os, zil_get_data_t *get_data)
{
zilog_t *zilog = dmu_objset_zil(os);
ASSERT(zilog->zl_clean_taskq == NULL);
ASSERT(zilog->zl_get_data == NULL);
ASSERT(list_is_empty(&zilog->zl_lwb_list));
zilog->zl_get_data = get_data;
zilog->zl_clean_taskq = taskq_create("zil_clean", 1, minclsyspri,
2, 2, TASKQ_PREPOPULATE);
@ -1732,7 +1727,7 @@ zil_open(objset_t *os, zil_get_data_t *get_data)
void
zil_close(zilog_t *zilog)
{
lwb_t *tail_lwb;
lwb_t *lwb;
uint64_t txg = 0;
zil_commit(zilog, 0); /* commit all itx */
@ -1744,9 +1739,9 @@ zil_close(zilog_t *zilog)
* destroy the zl_clean_taskq.
*/
mutex_enter(&zilog->zl_lock);
tail_lwb = list_tail(&zilog->zl_lwb_list);
if (tail_lwb != NULL)
txg = tail_lwb->lwb_max_txg;
lwb = list_tail(&zilog->zl_lwb_list);
if (lwb != NULL)
txg = lwb->lwb_max_txg;
mutex_exit(&zilog->zl_lock);
if (txg)
txg_wait_synced(zilog->zl_dmu_pool, txg);
@ -1754,6 +1749,19 @@ zil_close(zilog_t *zilog)
taskq_destroy(zilog->zl_clean_taskq);
zilog->zl_clean_taskq = NULL;
zilog->zl_get_data = NULL;
/*
* We should have only one LWB left on the list; remove it now.
*/
mutex_enter(&zilog->zl_lock);
lwb = list_head(&zilog->zl_lwb_list);
if (lwb != NULL) {
ASSERT(lwb == list_tail(&zilog->zl_lwb_list));
list_remove(&zilog->zl_lwb_list, lwb);
zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
kmem_cache_free(zil_lwb_cache, lwb);
}
mutex_exit(&zilog->zl_lock);
}
/*

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@ -79,6 +80,7 @@ int zio_delay_max = ZIO_DELAY_MAX;
#ifdef _KERNEL
extern vmem_t *zio_alloc_arena;
#endif
extern int zfs_mg_alloc_failures;
/*
* An allocating zio is one that either currently has the DVA allocate
@ -158,6 +160,12 @@ zio_init(void)
zio_data_buf_cache[c - 1] = zio_data_buf_cache[c];
}
/*
* The zio write taskqs have 1 thread per cpu, allow 1/2 of the taskqs
* to fail 3 times per txg or 8 failures, whichever is greater.
*/
zfs_mg_alloc_failures = MAX((3 * max_ncpus / 2), 8);
zio_inject_init();
}
@ -2151,6 +2159,7 @@ zio_dva_allocate(zio_t *zio)
metaslab_class_t *mc = spa_normal_class(spa);
blkptr_t *bp = zio->io_bp;
int error;
int flags = 0;
if (zio->io_gang_leader == NULL) {
ASSERT(zio->io_child_type > ZIO_CHILD_GANG);
@ -2163,10 +2172,21 @@ zio_dva_allocate(zio_t *zio)
ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa));
ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp));
/*
* The dump device does not support gang blocks so allocation on
* behalf of the dump device (i.e. ZIO_FLAG_NODATA) must avoid
* the "fast" gang feature.
*/
flags |= (zio->io_flags & ZIO_FLAG_NODATA) ? METASLAB_GANG_AVOID : 0;
flags |= (zio->io_flags & ZIO_FLAG_GANG_CHILD) ?
METASLAB_GANG_CHILD : 0;
error = metaslab_alloc(spa, mc, zio->io_size, bp,
zio->io_prop.zp_copies, zio->io_txg, NULL, 0);
zio->io_prop.zp_copies, zio->io_txg, NULL, flags);
if (error) {
spa_dbgmsg(spa, "%s: metaslab allocation failure: zio %p, "
"size %llu, error %d", spa_name(spa), zio, zio->io_size,
error);
if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE)
return (zio_write_gang_block(zio));
zio->io_error = error;