mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-12-27 19:39:35 +03:00
Merge branch 'illumos'
Merge in ten upstream fixes which have already been made to both the Illumos and FreeBSD ZFS implementations. This brings us up to date with the latest ZFS changes in Illumos. Credit goes to Martin Matuska of the FreeBSD project for posting an excellent summary of the upstream patches we were missing. Illumos #1313: Integer overflow in txg_delay() Illumos #278: get rid zfs of python and pyzfs dependencies Illumos #1043: Recursive zfs snapshot destroy fails Illumos #883: ZIL reuse during remount corruption Illumos #1092: zfs refratio property Illumos #1051: zfs should handle Illumos #510: 'zfs get' enhancement - mountpoint as an argument Illumos #175: zfs vdev cache consumes excessive memory Illumos #764: panic in zfs:dbuf_sync_list Illumos #xxx: zdb -vvv broken after zfs diff integration Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #340
This commit is contained in:
commit
77999e804f
2276
cmd/zfs/zfs_main.c
2276
cmd/zfs/zfs_main.c
File diff suppressed because it is too large
Load Diff
@ -20,6 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -205,6 +206,7 @@ typedef struct ztest_od {
|
|||||||
*/
|
*/
|
||||||
typedef struct ztest_ds {
|
typedef struct ztest_ds {
|
||||||
objset_t *zd_os;
|
objset_t *zd_os;
|
||||||
|
krwlock_t zd_zilog_lock;
|
||||||
zilog_t *zd_zilog;
|
zilog_t *zd_zilog;
|
||||||
uint64_t zd_seq;
|
uint64_t zd_seq;
|
||||||
ztest_od_t *zd_od; /* debugging aid */
|
ztest_od_t *zd_od; /* debugging aid */
|
||||||
@ -238,6 +240,7 @@ ztest_func_t ztest_dmu_commit_callbacks;
|
|||||||
ztest_func_t ztest_zap;
|
ztest_func_t ztest_zap;
|
||||||
ztest_func_t ztest_zap_parallel;
|
ztest_func_t ztest_zap_parallel;
|
||||||
ztest_func_t ztest_zil_commit;
|
ztest_func_t ztest_zil_commit;
|
||||||
|
ztest_func_t ztest_zil_remount;
|
||||||
ztest_func_t ztest_dmu_read_write_zcopy;
|
ztest_func_t ztest_dmu_read_write_zcopy;
|
||||||
ztest_func_t ztest_dmu_objset_create_destroy;
|
ztest_func_t ztest_dmu_objset_create_destroy;
|
||||||
ztest_func_t ztest_dmu_prealloc;
|
ztest_func_t ztest_dmu_prealloc;
|
||||||
@ -273,6 +276,7 @@ ztest_info_t ztest_info[] = {
|
|||||||
{ ztest_zap_parallel, 100, &zopt_always },
|
{ ztest_zap_parallel, 100, &zopt_always },
|
||||||
{ ztest_split_pool, 1, &zopt_always },
|
{ ztest_split_pool, 1, &zopt_always },
|
||||||
{ ztest_zil_commit, 1, &zopt_incessant },
|
{ ztest_zil_commit, 1, &zopt_incessant },
|
||||||
|
{ ztest_zil_remount, 1, &zopt_sometimes },
|
||||||
{ ztest_dmu_read_write_zcopy, 1, &zopt_often },
|
{ ztest_dmu_read_write_zcopy, 1, &zopt_often },
|
||||||
{ ztest_dmu_objset_create_destroy, 1, &zopt_often },
|
{ ztest_dmu_objset_create_destroy, 1, &zopt_often },
|
||||||
{ ztest_dsl_prop_get_set, 1, &zopt_often },
|
{ ztest_dsl_prop_get_set, 1, &zopt_often },
|
||||||
@ -1006,6 +1010,7 @@ ztest_zd_init(ztest_ds_t *zd, objset_t *os)
|
|||||||
dmu_objset_name(os, zd->zd_name);
|
dmu_objset_name(os, zd->zd_name);
|
||||||
int l;
|
int l;
|
||||||
|
|
||||||
|
rw_init(&zd->zd_zilog_lock, NULL, RW_DEFAULT, NULL);
|
||||||
mutex_init(&zd->zd_dirobj_lock, NULL, MUTEX_DEFAULT, NULL);
|
mutex_init(&zd->zd_dirobj_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||||
|
|
||||||
for (l = 0; l < ZTEST_OBJECT_LOCKS; l++)
|
for (l = 0; l < ZTEST_OBJECT_LOCKS; l++)
|
||||||
@ -1021,6 +1026,7 @@ ztest_zd_fini(ztest_ds_t *zd)
|
|||||||
int l;
|
int l;
|
||||||
|
|
||||||
mutex_destroy(&zd->zd_dirobj_lock);
|
mutex_destroy(&zd->zd_dirobj_lock);
|
||||||
|
rw_destroy(&zd->zd_zilog_lock);
|
||||||
|
|
||||||
for (l = 0; l < ZTEST_OBJECT_LOCKS; l++)
|
for (l = 0; l < ZTEST_OBJECT_LOCKS; l++)
|
||||||
ztest_rll_destroy(&zd->zd_object_lock[l]);
|
ztest_rll_destroy(&zd->zd_object_lock[l]);
|
||||||
@ -1992,6 +1998,8 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
|
|||||||
if (ztest_random(2) == 0)
|
if (ztest_random(2) == 0)
|
||||||
io_type = ZTEST_IO_WRITE_TAG;
|
io_type = ZTEST_IO_WRITE_TAG;
|
||||||
|
|
||||||
|
(void) rw_enter(&zd->zd_zilog_lock, RW_READER);
|
||||||
|
|
||||||
switch (io_type) {
|
switch (io_type) {
|
||||||
|
|
||||||
case ZTEST_IO_WRITE_TAG:
|
case ZTEST_IO_WRITE_TAG:
|
||||||
@ -2029,6 +2037,8 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
(void) rw_exit(&zd->zd_zilog_lock);
|
||||||
|
|
||||||
umem_free(data, blocksize);
|
umem_free(data, blocksize);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2083,6 +2093,8 @@ ztest_zil_commit(ztest_ds_t *zd, uint64_t id)
|
|||||||
{
|
{
|
||||||
zilog_t *zilog = zd->zd_zilog;
|
zilog_t *zilog = zd->zd_zilog;
|
||||||
|
|
||||||
|
(void) rw_enter(&zd->zd_zilog_lock, RW_READER);
|
||||||
|
|
||||||
zil_commit(zilog, ztest_random(ZTEST_OBJECTS));
|
zil_commit(zilog, ztest_random(ZTEST_OBJECTS));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2094,6 +2106,31 @@ ztest_zil_commit(ztest_ds_t *zd, uint64_t id)
|
|||||||
ASSERT(zd->zd_seq <= zilog->zl_commit_lr_seq);
|
ASSERT(zd->zd_seq <= zilog->zl_commit_lr_seq);
|
||||||
zd->zd_seq = zilog->zl_commit_lr_seq;
|
zd->zd_seq = zilog->zl_commit_lr_seq;
|
||||||
mutex_exit(&zilog->zl_lock);
|
mutex_exit(&zilog->zl_lock);
|
||||||
|
|
||||||
|
(void) rw_exit(&zd->zd_zilog_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This function is designed to simulate the operations that occur during a
|
||||||
|
* mount/unmount operation. We hold the dataset across these operations in an
|
||||||
|
* attempt to expose any implicit assumptions about ZIL management.
|
||||||
|
*/
|
||||||
|
/* ARGSUSED */
|
||||||
|
void
|
||||||
|
ztest_zil_remount(ztest_ds_t *zd, uint64_t id)
|
||||||
|
{
|
||||||
|
objset_t *os = zd->zd_os;
|
||||||
|
|
||||||
|
(void) rw_enter(&zd->zd_zilog_lock, RW_WRITER);
|
||||||
|
|
||||||
|
/* zfsvfs_teardown() */
|
||||||
|
zil_close(zd->zd_zilog);
|
||||||
|
|
||||||
|
/* zfsvfs_setup() */
|
||||||
|
VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog);
|
||||||
|
zil_replay(os, zd, ztest_replay_vector);
|
||||||
|
|
||||||
|
(void) rw_exit(&zd->zd_zilog_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -5300,6 +5337,7 @@ ztest_run(ztest_shared_t *zs)
|
|||||||
*/
|
*/
|
||||||
kernel_init(FREAD | FWRITE);
|
kernel_init(FREAD | FWRITE);
|
||||||
VERIFY(spa_open(zs->zs_pool, &spa, FTAG) == 0);
|
VERIFY(spa_open(zs->zs_pool, &spa, FTAG) == 0);
|
||||||
|
spa->spa_debug = B_TRUE;
|
||||||
zs->zs_spa = spa;
|
zs->zs_spa = spa;
|
||||||
|
|
||||||
spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN;
|
spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN;
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* Copyright 2010 Nexenta Systems, Inc. All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _LIBZFS_H
|
#ifndef _LIBZFS_H
|
||||||
@ -572,13 +573,17 @@ extern int zfs_promote(zfs_handle_t *);
|
|||||||
extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t,
|
extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t,
|
||||||
boolean_t, boolean_t, int, uint64_t, uint64_t);
|
boolean_t, boolean_t, int, uint64_t, uint64_t);
|
||||||
extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t);
|
extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t);
|
||||||
|
extern int zfs_get_holds(zfs_handle_t *, nvlist_t **);
|
||||||
extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *);
|
extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *);
|
||||||
|
|
||||||
typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain,
|
typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain,
|
||||||
uid_t rid, uint64_t space);
|
uid_t rid, uint64_t space);
|
||||||
|
|
||||||
extern int zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type,
|
extern int zfs_userspace(zfs_handle_t *, zfs_userquota_prop_t,
|
||||||
zfs_userspace_cb_t func, void *arg);
|
zfs_userspace_cb_t, void *);
|
||||||
|
|
||||||
|
extern int zfs_get_fsacl(zfs_handle_t *, nvlist_t **);
|
||||||
|
extern int zfs_set_fsacl(zfs_handle_t *, boolean_t, nvlist_t *);
|
||||||
|
|
||||||
typedef struct recvflags {
|
typedef struct recvflags {
|
||||||
/* print informational messages (ie, -v was specified) */
|
/* print informational messages (ie, -v was specified) */
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Portions Copyright 2010 Robert Milkowski */
|
/* Portions Copyright 2010 Robert Milkowski */
|
||||||
@ -122,6 +123,7 @@ typedef enum {
|
|||||||
ZFS_PROP_DEDUP,
|
ZFS_PROP_DEDUP,
|
||||||
ZFS_PROP_MLSLABEL,
|
ZFS_PROP_MLSLABEL,
|
||||||
ZFS_PROP_SYNC,
|
ZFS_PROP_SYNC,
|
||||||
|
ZFS_PROP_REFRATIO,
|
||||||
ZFS_NUM_PROPS
|
ZFS_NUM_PROPS
|
||||||
} zfs_prop_t;
|
} zfs_prop_t;
|
||||||
|
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _SYS_METASLAB_H
|
#ifndef _SYS_METASLAB_H
|
||||||
@ -47,6 +48,8 @@ extern void metaslab_sync_reassess(metaslab_group_t *mg);
|
|||||||
#define METASLAB_HINTBP_FAVOR 0x0
|
#define METASLAB_HINTBP_FAVOR 0x0
|
||||||
#define METASLAB_HINTBP_AVOID 0x1
|
#define METASLAB_HINTBP_AVOID 0x1
|
||||||
#define METASLAB_GANG_HEADER 0x2
|
#define METASLAB_GANG_HEADER 0x2
|
||||||
|
#define METASLAB_GANG_CHILD 0x4
|
||||||
|
#define METASLAB_GANG_AVOID 0x8
|
||||||
|
|
||||||
extern int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
extern int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
||||||
blkptr_t *bp, int ncopies, uint64_t txg, blkptr_t *hintbp, int flags);
|
blkptr_t *bp, int ncopies, uint64_t txg, blkptr_t *hintbp, int flags);
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||||
* Use is subject to license terms.
|
* Use is subject to license terms.
|
||||||
|
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _SYS_METASLAB_IMPL_H
|
#ifndef _SYS_METASLAB_IMPL_H
|
||||||
@ -52,6 +53,7 @@ struct metaslab_group {
|
|||||||
avl_tree_t mg_metaslab_tree;
|
avl_tree_t mg_metaslab_tree;
|
||||||
uint64_t mg_aliquot;
|
uint64_t mg_aliquot;
|
||||||
uint64_t mg_bonus_area;
|
uint64_t mg_bonus_area;
|
||||||
|
uint64_t mg_alloc_failures;
|
||||||
int64_t mg_bias;
|
int64_t mg_bias;
|
||||||
int64_t mg_activation_count;
|
int64_t mg_activation_count;
|
||||||
metaslab_class_t *mg_class;
|
metaslab_class_t *mg_class;
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _SYS_SPA_H
|
#ifndef _SYS_SPA_H
|
||||||
@ -698,6 +699,13 @@ _NOTE(CONSTCOND) } while (0)
|
|||||||
#define dprintf_bp(bp, fmt, ...)
|
#define dprintf_bp(bp, fmt, ...)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
extern boolean_t spa_debug_enabled(spa_t *spa);
|
||||||
|
#define spa_dbgmsg(spa, ...) \
|
||||||
|
{ \
|
||||||
|
if (spa_debug_enabled(spa)) \
|
||||||
|
zfs_dbgmsg(__VA_ARGS__); \
|
||||||
|
}
|
||||||
|
|
||||||
extern int spa_mode_global; /* mode, e.g. FREAD | FWRITE */
|
extern int spa_mode_global; /* mode, e.g. FREAD | FWRITE */
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _SYS_SPA_IMPL_H
|
#ifndef _SYS_SPA_IMPL_H
|
||||||
@ -196,6 +197,7 @@ struct spa {
|
|||||||
kcondvar_t spa_suspend_cv; /* notification of resume */
|
kcondvar_t spa_suspend_cv; /* notification of resume */
|
||||||
uint8_t spa_suspended; /* pool is suspended */
|
uint8_t spa_suspended; /* pool is suspended */
|
||||||
uint8_t spa_claiming; /* pool is doing zil_claim() */
|
uint8_t spa_claiming; /* pool is doing zil_claim() */
|
||||||
|
boolean_t spa_debug; /* debug enabled? */
|
||||||
boolean_t spa_is_root; /* pool is root */
|
boolean_t spa_is_root; /* pool is root */
|
||||||
int spa_minref; /* num refs when first opened */
|
int spa_minref; /* num refs when first opened */
|
||||||
int spa_mode; /* FREAD | FWRITE */
|
int spa_mode; /* FREAD | FWRITE */
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* Copyright 2010 Nexenta Systems, Inc. All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _ZFS_DELEG_H
|
#ifndef _ZFS_DELEG_H
|
||||||
@ -51,6 +52,7 @@ typedef enum {
|
|||||||
ZFS_DELEG_NOTE_CLONE,
|
ZFS_DELEG_NOTE_CLONE,
|
||||||
ZFS_DELEG_NOTE_PROMOTE,
|
ZFS_DELEG_NOTE_PROMOTE,
|
||||||
ZFS_DELEG_NOTE_RENAME,
|
ZFS_DELEG_NOTE_RENAME,
|
||||||
|
ZFS_DELEG_NOTE_SEND,
|
||||||
ZFS_DELEG_NOTE_RECEIVE,
|
ZFS_DELEG_NOTE_RECEIVE,
|
||||||
ZFS_DELEG_NOTE_ALLOW,
|
ZFS_DELEG_NOTE_ALLOW,
|
||||||
ZFS_DELEG_NOTE_USERPROP,
|
ZFS_DELEG_NOTE_USERPROP,
|
||||||
|
@ -21,6 +21,8 @@
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* Copyright (c) 2010 Nexenta Systems, Inc. All rights reserved.
|
||||||
|
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
@ -94,6 +96,7 @@ zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type,
|
|||||||
namecheck_err_t why;
|
namecheck_err_t why;
|
||||||
char what;
|
char what;
|
||||||
|
|
||||||
|
(void) zfs_prop_get_table();
|
||||||
if (dataset_namecheck(path, &why, &what) != 0) {
|
if (dataset_namecheck(path, &why, &what) != 0) {
|
||||||
if (hdl != NULL) {
|
if (hdl != NULL) {
|
||||||
switch (why) {
|
switch (why) {
|
||||||
@ -2025,6 +2028,7 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case ZFS_PROP_REFRATIO:
|
||||||
case ZFS_PROP_COMPRESSRATIO:
|
case ZFS_PROP_COMPRESSRATIO:
|
||||||
if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
|
if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
|
||||||
return (-1);
|
return (-1);
|
||||||
@ -4311,6 +4315,193 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
|
|||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl)
|
||||||
|
{
|
||||||
|
zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
|
||||||
|
libzfs_handle_t *hdl = zhp->zfs_hdl;
|
||||||
|
int nvsz = 2048;
|
||||||
|
void *nvbuf;
|
||||||
|
int err = 0;
|
||||||
|
char errbuf[ZFS_MAXNAMELEN+32];
|
||||||
|
|
||||||
|
assert(zhp->zfs_type == ZFS_TYPE_VOLUME ||
|
||||||
|
zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
|
||||||
|
|
||||||
|
tryagain:
|
||||||
|
|
||||||
|
nvbuf = malloc(nvsz);
|
||||||
|
if (nvbuf == NULL) {
|
||||||
|
err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno)));
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
zc.zc_nvlist_dst_size = nvsz;
|
||||||
|
zc.zc_nvlist_dst = (uintptr_t)nvbuf;
|
||||||
|
|
||||||
|
(void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN);
|
||||||
|
|
||||||
|
if (zfs_ioctl(hdl, ZFS_IOC_GET_FSACL, &zc) != 0) {
|
||||||
|
(void) snprintf(errbuf, sizeof (errbuf),
|
||||||
|
dgettext(TEXT_DOMAIN, "cannot get permissions on '%s'"),
|
||||||
|
zc.zc_name);
|
||||||
|
switch (errno) {
|
||||||
|
case ENOMEM:
|
||||||
|
free(nvbuf);
|
||||||
|
nvsz = zc.zc_nvlist_dst_size;
|
||||||
|
goto tryagain;
|
||||||
|
|
||||||
|
case ENOTSUP:
|
||||||
|
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||||
|
"pool must be upgraded"));
|
||||||
|
err = zfs_error(hdl, EZFS_BADVERSION, errbuf);
|
||||||
|
break;
|
||||||
|
case EINVAL:
|
||||||
|
err = zfs_error(hdl, EZFS_BADTYPE, errbuf);
|
||||||
|
break;
|
||||||
|
case ENOENT:
|
||||||
|
err = zfs_error(hdl, EZFS_NOENT, errbuf);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
err = zfs_standard_error_fmt(hdl, errno, errbuf);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* success */
|
||||||
|
int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0);
|
||||||
|
if (rc) {
|
||||||
|
(void) snprintf(errbuf, sizeof (errbuf), dgettext(
|
||||||
|
TEXT_DOMAIN, "cannot get permissions on '%s'"),
|
||||||
|
zc.zc_name);
|
||||||
|
err = zfs_standard_error_fmt(hdl, rc, errbuf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
free(nvbuf);
|
||||||
|
out:
|
||||||
|
return (err);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl)
|
||||||
|
{
|
||||||
|
zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
|
||||||
|
libzfs_handle_t *hdl = zhp->zfs_hdl;
|
||||||
|
char *nvbuf;
|
||||||
|
char errbuf[ZFS_MAXNAMELEN+32];
|
||||||
|
size_t nvsz;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
assert(zhp->zfs_type == ZFS_TYPE_VOLUME ||
|
||||||
|
zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
|
||||||
|
|
||||||
|
err = nvlist_size(nvl, &nvsz, NV_ENCODE_NATIVE);
|
||||||
|
assert(err == 0);
|
||||||
|
|
||||||
|
nvbuf = malloc(nvsz);
|
||||||
|
|
||||||
|
err = nvlist_pack(nvl, &nvbuf, &nvsz, NV_ENCODE_NATIVE, 0);
|
||||||
|
assert(err == 0);
|
||||||
|
|
||||||
|
zc.zc_nvlist_src_size = nvsz;
|
||||||
|
zc.zc_nvlist_src = (uintptr_t)nvbuf;
|
||||||
|
zc.zc_perm_action = un;
|
||||||
|
|
||||||
|
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
|
||||||
|
|
||||||
|
if (zfs_ioctl(hdl, ZFS_IOC_SET_FSACL, &zc) != 0) {
|
||||||
|
(void) snprintf(errbuf, sizeof (errbuf),
|
||||||
|
dgettext(TEXT_DOMAIN, "cannot set permissions on '%s'"),
|
||||||
|
zc.zc_name);
|
||||||
|
switch (errno) {
|
||||||
|
case ENOTSUP:
|
||||||
|
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||||
|
"pool must be upgraded"));
|
||||||
|
err = zfs_error(hdl, EZFS_BADVERSION, errbuf);
|
||||||
|
break;
|
||||||
|
case EINVAL:
|
||||||
|
err = zfs_error(hdl, EZFS_BADTYPE, errbuf);
|
||||||
|
break;
|
||||||
|
case ENOENT:
|
||||||
|
err = zfs_error(hdl, EZFS_NOENT, errbuf);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
err = zfs_standard_error_fmt(hdl, errno, errbuf);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
free(nvbuf);
|
||||||
|
|
||||||
|
return (err);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl)
|
||||||
|
{
|
||||||
|
zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
|
||||||
|
libzfs_handle_t *hdl = zhp->zfs_hdl;
|
||||||
|
int nvsz = 2048;
|
||||||
|
void *nvbuf;
|
||||||
|
int err = 0;
|
||||||
|
char errbuf[ZFS_MAXNAMELEN+32];
|
||||||
|
|
||||||
|
assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
|
||||||
|
|
||||||
|
tryagain:
|
||||||
|
|
||||||
|
nvbuf = malloc(nvsz);
|
||||||
|
if (nvbuf == NULL) {
|
||||||
|
err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno)));
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
zc.zc_nvlist_dst_size = nvsz;
|
||||||
|
zc.zc_nvlist_dst = (uintptr_t)nvbuf;
|
||||||
|
|
||||||
|
(void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN);
|
||||||
|
|
||||||
|
if (zfs_ioctl(hdl, ZFS_IOC_GET_HOLDS, &zc) != 0) {
|
||||||
|
(void) snprintf(errbuf, sizeof (errbuf),
|
||||||
|
dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"),
|
||||||
|
zc.zc_name);
|
||||||
|
switch (errno) {
|
||||||
|
case ENOMEM:
|
||||||
|
free(nvbuf);
|
||||||
|
nvsz = zc.zc_nvlist_dst_size;
|
||||||
|
goto tryagain;
|
||||||
|
|
||||||
|
case ENOTSUP:
|
||||||
|
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||||
|
"pool must be upgraded"));
|
||||||
|
err = zfs_error(hdl, EZFS_BADVERSION, errbuf);
|
||||||
|
break;
|
||||||
|
case EINVAL:
|
||||||
|
err = zfs_error(hdl, EZFS_BADTYPE, errbuf);
|
||||||
|
break;
|
||||||
|
case ENOENT:
|
||||||
|
err = zfs_error(hdl, EZFS_NOENT, errbuf);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
err = zfs_standard_error_fmt(hdl, errno, errbuf);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* success */
|
||||||
|
int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0);
|
||||||
|
if (rc) {
|
||||||
|
(void) snprintf(errbuf, sizeof (errbuf),
|
||||||
|
dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"),
|
||||||
|
zc.zc_name);
|
||||||
|
err = zfs_standard_error_fmt(hdl, rc, errbuf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
free(nvbuf);
|
||||||
|
out:
|
||||||
|
return (err);
|
||||||
|
}
|
||||||
|
|
||||||
uint64_t
|
uint64_t
|
||||||
zvol_volsize_to_reservation(uint64_t volsize, nvlist_t *props)
|
zvol_volsize_to_reservation(uint64_t volsize, nvlist_t *props)
|
||||||
{
|
{
|
||||||
|
@ -360,7 +360,7 @@ This property can also be referred to by its shortened column name, \fBavail\fR.
|
|||||||
.ad
|
.ad
|
||||||
.sp .6
|
.sp .6
|
||||||
.RS 4n
|
.RS 4n
|
||||||
The compression ratio achieved for this dataset, expressed as a multiplier. Compression can be turned on by running: \fBzfs set compression=on \fIdataset\fR\fR. The default value is \fBoff\fR.
|
For non-snapshots, the compression ratio achieved for the \fBused\fR space of this dataset, expressed as a multiplier. The \fBused\fR property includes descendant datasets, and, for clones, does not include the space shared with the origin snapshot. For snapshots, the \fBcompressratio\fR is the same as the \fBrefcompressratio\fR property. Compression can be turned on by running: \fBzfs set compression=on \fIdataset\fR\fR. The default value is \fBoff\fR.
|
||||||
.RE
|
.RE
|
||||||
|
|
||||||
.sp
|
.sp
|
||||||
@ -420,6 +420,19 @@ The amount of data that is accessible by this dataset, which may or may not be s
|
|||||||
This property can also be referred to by its shortened column name, \fBrefer\fR.
|
This property can also be referred to by its shortened column name, \fBrefer\fR.
|
||||||
.RE
|
.RE
|
||||||
|
|
||||||
|
.sp
|
||||||
|
.ne 2
|
||||||
|
.mk
|
||||||
|
.na
|
||||||
|
\fB\fBrefcompressratio\fR\fR
|
||||||
|
.ad
|
||||||
|
.sp .6
|
||||||
|
.RS 4n
|
||||||
|
The compression ratio achieved for the \fBreferenced\fR space of this
|
||||||
|
dataset, expressed as a multiplier. See also the \fBcompressratio\fR
|
||||||
|
property.
|
||||||
|
.RE
|
||||||
|
|
||||||
.sp
|
.sp
|
||||||
.ne 2
|
.ne 2
|
||||||
.mk
|
.mk
|
||||||
@ -1235,7 +1248,7 @@ Recursively destroy all dependents, including cloned file systems outside the ta
|
|||||||
Force an unmount of any file systems using the \fBunmount -f\fR command. This option has no effect on non-file systems or unmounted file systems.
|
Force an unmount of any file systems using the \fBunmount -f\fR command. This option has no effect on non-file systems or unmounted file systems.
|
||||||
.RE
|
.RE
|
||||||
|
|
||||||
Extreme care should be taken when applying either the \fB-r\fR or the \fB-f\fR options, as they can destroy large portions of a pool and cause unexpected behavior for mounted file systems in use.
|
Extreme care should be taken when applying either the \fB-r\fR or the \fB-R\fR options, as they can destroy large portions of a pool and cause unexpected behavior for mounted file systems in use.
|
||||||
.RE
|
.RE
|
||||||
|
|
||||||
.sp
|
.sp
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* Copyright 2010 Nexenta Systems, Inc. All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
@ -60,7 +61,7 @@ zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = {
|
|||||||
{ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK },
|
{ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK },
|
||||||
{ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT },
|
{ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT },
|
||||||
{ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE },
|
{ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE },
|
||||||
{ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_NONE },
|
{ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND },
|
||||||
{ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP },
|
{ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP },
|
||||||
{ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA },
|
{ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA },
|
||||||
{ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA },
|
{ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA },
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Portions Copyright 2010 Robert Milkowski */
|
/* Portions Copyright 2010 Robert Milkowski */
|
||||||
@ -311,6 +312,9 @@ zfs_prop_init(void)
|
|||||||
zprop_register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0,
|
zprop_register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0,
|
||||||
PROP_READONLY, ZFS_TYPE_DATASET,
|
PROP_READONLY, ZFS_TYPE_DATASET,
|
||||||
"<1.00x or higher if compressed>", "RATIO");
|
"<1.00x or higher if compressed>", "RATIO");
|
||||||
|
zprop_register_number(ZFS_PROP_REFRATIO, "refcompressratio", 0,
|
||||||
|
PROP_READONLY, ZFS_TYPE_DATASET,
|
||||||
|
"<1.00x or higher if compressed>", "REFRATIO");
|
||||||
zprop_register_number(ZFS_PROP_VOLBLOCKSIZE, "volblocksize",
|
zprop_register_number(ZFS_PROP_VOLBLOCKSIZE, "volblocksize",
|
||||||
ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME,
|
ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME,
|
||||||
ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK");
|
ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK");
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <sys/zfs_context.h>
|
#include <sys/zfs_context.h>
|
||||||
@ -1347,13 +1348,17 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
|||||||
* it, since one of the current holders may be in the
|
* it, since one of the current holders may be in the
|
||||||
* middle of an update. Note that users of dbuf_undirty()
|
* middle of an update. Note that users of dbuf_undirty()
|
||||||
* should not place a hold on the dbuf before the call.
|
* should not place a hold on the dbuf before the call.
|
||||||
|
* Also note: we can get here with a spill block, so
|
||||||
|
* test for that similar to how dbuf_dirty does.
|
||||||
*/
|
*/
|
||||||
if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
|
if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
/* Make sure we don't toss this buffer at sync phase */
|
/* Make sure we don't toss this buffer at sync phase */
|
||||||
|
if (db->db_blkid != DMU_SPILL_BLKID) {
|
||||||
mutex_enter(&dn->dn_mtx);
|
mutex_enter(&dn->dn_mtx);
|
||||||
dnode_clear_range(dn, db->db_blkid, 1, tx);
|
dnode_clear_range(dn, db->db_blkid, 1, tx);
|
||||||
mutex_exit(&dn->dn_mtx);
|
mutex_exit(&dn->dn_mtx);
|
||||||
|
}
|
||||||
DB_DNODE_EXIT(db);
|
DB_DNODE_EXIT(db);
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
@ -1366,11 +1371,18 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
|||||||
|
|
||||||
*drp = dr->dr_next;
|
*drp = dr->dr_next;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note that there are three places in dbuf_dirty()
|
||||||
|
* where this dirty record may be put on a list.
|
||||||
|
* Make sure to do a list_remove corresponding to
|
||||||
|
* every one of those list_insert calls.
|
||||||
|
*/
|
||||||
if (dr->dr_parent) {
|
if (dr->dr_parent) {
|
||||||
mutex_enter(&dr->dr_parent->dt.di.dr_mtx);
|
mutex_enter(&dr->dr_parent->dt.di.dr_mtx);
|
||||||
list_remove(&dr->dr_parent->dt.di.dr_children, dr);
|
list_remove(&dr->dr_parent->dt.di.dr_children, dr);
|
||||||
mutex_exit(&dr->dr_parent->dt.di.dr_mtx);
|
mutex_exit(&dr->dr_parent->dt.di.dr_mtx);
|
||||||
} else if (db->db_level+1 == dn->dn_nlevels) {
|
} else if (db->db_blkid == DMU_SPILL_BLKID ||
|
||||||
|
db->db_level+1 == dn->dn_nlevels) {
|
||||||
ASSERT(db->db_blkptr == NULL || db->db_parent == dn->dn_dbuf);
|
ASSERT(db->db_blkptr == NULL || db->db_parent == dn->dn_dbuf);
|
||||||
mutex_enter(&dn->dn_mtx);
|
mutex_enter(&dn->dn_mtx);
|
||||||
list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr);
|
list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr);
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <sys/dmu_objset.h>
|
#include <sys/dmu_objset.h>
|
||||||
@ -2153,7 +2154,7 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
|
|||||||
void
|
void
|
||||||
dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
|
dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
|
||||||
{
|
{
|
||||||
uint64_t refd, avail, uobjs, aobjs;
|
uint64_t refd, avail, uobjs, aobjs, ratio;
|
||||||
|
|
||||||
dsl_dir_stats(ds->ds_dir, nv);
|
dsl_dir_stats(ds->ds_dir, nv);
|
||||||
|
|
||||||
@ -2180,6 +2181,11 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
|
|||||||
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,
|
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,
|
||||||
DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
|
DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
|
||||||
|
|
||||||
|
ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
|
||||||
|
(ds->ds_phys->ds_uncompressed_bytes * 100 /
|
||||||
|
ds->ds_phys->ds_compressed_bytes);
|
||||||
|
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
|
||||||
|
|
||||||
if (ds->ds_phys->ds_next_snap_obj) {
|
if (ds->ds_phys->ds_next_snap_obj) {
|
||||||
/*
|
/*
|
||||||
* This is a snapshot; override the dd's space used with
|
* This is a snapshot; override the dd's space used with
|
||||||
@ -2187,10 +2193,7 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
|
|||||||
*/
|
*/
|
||||||
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
|
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
|
||||||
ds->ds_phys->ds_unique_bytes);
|
ds->ds_phys->ds_unique_bytes);
|
||||||
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
|
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
|
||||||
ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
|
|
||||||
(ds->ds_phys->ds_uncompressed_bytes * 100 /
|
|
||||||
ds->ds_phys->ds_compressed_bytes));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <sys/zfs_context.h>
|
#include <sys/zfs_context.h>
|
||||||
@ -30,11 +31,30 @@
|
|||||||
#include <sys/vdev_impl.h>
|
#include <sys/vdev_impl.h>
|
||||||
#include <sys/zio.h>
|
#include <sys/zio.h>
|
||||||
|
|
||||||
#define WITH_NDF_BLOCK_ALLOCATOR
|
#define WITH_DF_BLOCK_ALLOCATOR
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allow allocations to switch to gang blocks quickly. We do this to
|
||||||
|
* avoid having to load lots of space_maps in a given txg. There are,
|
||||||
|
* however, some cases where we want to avoid "fast" ganging and instead
|
||||||
|
* we want to do an exhaustive search of all metaslabs on this device.
|
||||||
|
* Currently we don't allow any gang or dump device related allocations
|
||||||
|
* to "fast" gang.
|
||||||
|
*/
|
||||||
|
#define CAN_FASTGANG(flags) \
|
||||||
|
(!((flags) & (METASLAB_GANG_CHILD | METASLAB_GANG_HEADER | \
|
||||||
|
METASLAB_GANG_AVOID)))
|
||||||
|
|
||||||
uint64_t metaslab_aliquot = 512ULL << 10;
|
uint64_t metaslab_aliquot = 512ULL << 10;
|
||||||
uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */
|
uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This value defines the number of allowed allocation failures per vdev.
|
||||||
|
* If a device reaches this threshold in a given txg then we consider skipping
|
||||||
|
* allocations on that device.
|
||||||
|
*/
|
||||||
|
int zfs_mg_alloc_failures;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Metaslab debugging: when set, keeps all space maps in core to verify frees.
|
* Metaslab debugging: when set, keeps all space maps in core to verify frees.
|
||||||
*/
|
*/
|
||||||
@ -865,7 +885,7 @@ metaslab_prefetch(metaslab_group_t *mg)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size)
|
metaslab_activate(metaslab_t *msp, uint64_t activation_weight)
|
||||||
{
|
{
|
||||||
metaslab_group_t *mg = msp->ms_group;
|
metaslab_group_t *mg = msp->ms_group;
|
||||||
space_map_t *sm = &msp->ms_map;
|
space_map_t *sm = &msp->ms_map;
|
||||||
@ -899,13 +919,6 @@ metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size)
|
|||||||
mutex_exit(&mg->mg_lock);
|
mutex_exit(&mg->mg_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* If we were able to load the map then make sure
|
|
||||||
* that this map is still able to satisfy our request.
|
|
||||||
*/
|
|
||||||
if (msp->ms_weight < size)
|
|
||||||
return (ENOSPC);
|
|
||||||
|
|
||||||
metaslab_group_sort(msp->ms_group, msp,
|
metaslab_group_sort(msp->ms_group, msp,
|
||||||
msp->ms_weight | activation_weight);
|
msp->ms_weight | activation_weight);
|
||||||
}
|
}
|
||||||
@ -1123,6 +1136,7 @@ void
|
|||||||
metaslab_sync_reassess(metaslab_group_t *mg)
|
metaslab_sync_reassess(metaslab_group_t *mg)
|
||||||
{
|
{
|
||||||
vdev_t *vd = mg->mg_vd;
|
vdev_t *vd = mg->mg_vd;
|
||||||
|
int64_t failures = mg->mg_alloc_failures;
|
||||||
int m;
|
int m;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1140,6 +1154,8 @@ metaslab_sync_reassess(metaslab_group_t *mg)
|
|||||||
mutex_exit(&msp->ms_lock);
|
mutex_exit(&msp->ms_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
atomic_add_64(&mg->mg_alloc_failures, -failures);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Prefetch the next potential metaslabs
|
* Prefetch the next potential metaslabs
|
||||||
*/
|
*/
|
||||||
@ -1164,9 +1180,10 @@ metaslab_distance(metaslab_t *msp, dva_t *dva)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static uint64_t
|
static uint64_t
|
||||||
metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
|
metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
|
||||||
uint64_t min_distance, dva_t *dva, int d)
|
uint64_t txg, uint64_t min_distance, dva_t *dva, int d, int flags)
|
||||||
{
|
{
|
||||||
|
spa_t *spa = mg->mg_vd->vdev_spa;
|
||||||
metaslab_t *msp = NULL;
|
metaslab_t *msp = NULL;
|
||||||
uint64_t offset = -1ULL;
|
uint64_t offset = -1ULL;
|
||||||
avl_tree_t *t = &mg->mg_metaslab_tree;
|
avl_tree_t *t = &mg->mg_metaslab_tree;
|
||||||
@ -1187,11 +1204,17 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
|
|||||||
|
|
||||||
mutex_enter(&mg->mg_lock);
|
mutex_enter(&mg->mg_lock);
|
||||||
for (msp = avl_first(t); msp; msp = AVL_NEXT(t, msp)) {
|
for (msp = avl_first(t); msp; msp = AVL_NEXT(t, msp)) {
|
||||||
if (msp->ms_weight < size) {
|
if (msp->ms_weight < asize) {
|
||||||
|
spa_dbgmsg(spa, "%s: failed to meet weight "
|
||||||
|
"requirement: vdev %llu, txg %llu, mg %p, "
|
||||||
|
"msp %p, psize %llu, asize %llu, "
|
||||||
|
"failures %llu, weight %llu",
|
||||||
|
spa_name(spa), mg->mg_vd->vdev_id, txg,
|
||||||
|
mg, msp, psize, asize,
|
||||||
|
mg->mg_alloc_failures, msp->ms_weight);
|
||||||
mutex_exit(&mg->mg_lock);
|
mutex_exit(&mg->mg_lock);
|
||||||
return (-1ULL);
|
return (-1ULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
was_active = msp->ms_weight & METASLAB_ACTIVE_MASK;
|
was_active = msp->ms_weight & METASLAB_ACTIVE_MASK;
|
||||||
if (activation_weight == METASLAB_WEIGHT_PRIMARY)
|
if (activation_weight == METASLAB_WEIGHT_PRIMARY)
|
||||||
break;
|
break;
|
||||||
@ -1210,6 +1233,25 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
|
|||||||
if (msp == NULL)
|
if (msp == NULL)
|
||||||
return (-1ULL);
|
return (-1ULL);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we've already reached the allowable number of failed
|
||||||
|
* allocation attempts on this metaslab group then we
|
||||||
|
* consider skipping it. We skip it only if we're allowed
|
||||||
|
* to "fast" gang, the physical size is larger than
|
||||||
|
* a gang block, and we're attempting to allocate from
|
||||||
|
* the primary metaslab.
|
||||||
|
*/
|
||||||
|
if (mg->mg_alloc_failures > zfs_mg_alloc_failures &&
|
||||||
|
CAN_FASTGANG(flags) && psize > SPA_GANGBLOCKSIZE &&
|
||||||
|
activation_weight == METASLAB_WEIGHT_PRIMARY) {
|
||||||
|
spa_dbgmsg(spa, "%s: skipping metaslab group: "
|
||||||
|
"vdev %llu, txg %llu, mg %p, psize %llu, "
|
||||||
|
"asize %llu, failures %llu", spa_name(spa),
|
||||||
|
mg->mg_vd->vdev_id, txg, mg, psize, asize,
|
||||||
|
mg->mg_alloc_failures);
|
||||||
|
return (-1ULL);
|
||||||
|
}
|
||||||
|
|
||||||
mutex_enter(&msp->ms_lock);
|
mutex_enter(&msp->ms_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1218,7 +1260,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
|
|||||||
* another thread may have changed the weight while we
|
* another thread may have changed the weight while we
|
||||||
* were blocked on the metaslab lock.
|
* were blocked on the metaslab lock.
|
||||||
*/
|
*/
|
||||||
if (msp->ms_weight < size || (was_active &&
|
if (msp->ms_weight < asize || (was_active &&
|
||||||
!(msp->ms_weight & METASLAB_ACTIVE_MASK) &&
|
!(msp->ms_weight & METASLAB_ACTIVE_MASK) &&
|
||||||
activation_weight == METASLAB_WEIGHT_PRIMARY)) {
|
activation_weight == METASLAB_WEIGHT_PRIMARY)) {
|
||||||
mutex_exit(&msp->ms_lock);
|
mutex_exit(&msp->ms_lock);
|
||||||
@ -1233,14 +1275,16 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (metaslab_activate(msp, activation_weight, size) != 0) {
|
if (metaslab_activate(msp, activation_weight) != 0) {
|
||||||
mutex_exit(&msp->ms_lock);
|
mutex_exit(&msp->ms_lock);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((offset = space_map_alloc(&msp->ms_map, size)) != -1ULL)
|
if ((offset = space_map_alloc(&msp->ms_map, asize)) != -1ULL)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
atomic_inc_64(&mg->mg_alloc_failures);
|
||||||
|
|
||||||
metaslab_passivate(msp, space_map_maxsize(&msp->ms_map));
|
metaslab_passivate(msp, space_map_maxsize(&msp->ms_map));
|
||||||
|
|
||||||
mutex_exit(&msp->ms_lock);
|
mutex_exit(&msp->ms_lock);
|
||||||
@ -1249,7 +1293,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
|
|||||||
if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0)
|
if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0)
|
||||||
vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg);
|
vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg);
|
||||||
|
|
||||||
space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size);
|
space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, asize);
|
||||||
|
|
||||||
mutex_exit(&msp->ms_lock);
|
mutex_exit(&msp->ms_lock);
|
||||||
|
|
||||||
@ -1376,7 +1420,8 @@ top:
|
|||||||
asize = vdev_psize_to_asize(vd, psize);
|
asize = vdev_psize_to_asize(vd, psize);
|
||||||
ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0);
|
ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0);
|
||||||
|
|
||||||
offset = metaslab_group_alloc(mg, asize, txg, distance, dva, d);
|
offset = metaslab_group_alloc(mg, psize, asize, txg, distance,
|
||||||
|
dva, d, flags);
|
||||||
if (offset != -1ULL) {
|
if (offset != -1ULL) {
|
||||||
/*
|
/*
|
||||||
* If we've just selected this metaslab group,
|
* If we've just selected this metaslab group,
|
||||||
@ -1388,18 +1433,24 @@ top:
|
|||||||
vdev_stat_t *vs = &vd->vdev_stat;
|
vdev_stat_t *vs = &vd->vdev_stat;
|
||||||
int64_t vu, cu;
|
int64_t vu, cu;
|
||||||
|
|
||||||
/*
|
vu = (vs->vs_alloc * 100) / (vs->vs_space + 1);
|
||||||
* Determine percent used in units of 0..1024.
|
cu = (mc->mc_alloc * 100) / (mc->mc_space + 1);
|
||||||
* (This is just to avoid floating point.)
|
|
||||||
*/
|
|
||||||
vu = (vs->vs_alloc << 10) / (vs->vs_space + 1);
|
|
||||||
cu = (mc->mc_alloc << 10) / (mc->mc_space + 1);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Bias by at most +/- 25% of the aliquot.
|
* Calculate how much more or less we should
|
||||||
|
* try to allocate from this device during
|
||||||
|
* this iteration around the rotor.
|
||||||
|
* For example, if a device is 80% full
|
||||||
|
* and the pool is 20% full then we should
|
||||||
|
* reduce allocations by 60% on this device.
|
||||||
|
*
|
||||||
|
* mg_bias = (20 - 80) * 512K / 100 = -307K
|
||||||
|
*
|
||||||
|
* This reduces allocations by 307K for this
|
||||||
|
* iteration.
|
||||||
*/
|
*/
|
||||||
mg->mg_bias = ((cu - vu) *
|
mg->mg_bias = ((cu - vu) *
|
||||||
(int64_t)mg->mg_aliquot) / (1024 * 4);
|
(int64_t)mg->mg_aliquot) / 100;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (atomic_add_64_nv(&mc->mc_aliquot, asize) >=
|
if (atomic_add_64_nv(&mc->mc_aliquot, asize) >=
|
||||||
@ -1513,7 +1564,7 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg)
|
|||||||
mutex_enter(&msp->ms_lock);
|
mutex_enter(&msp->ms_lock);
|
||||||
|
|
||||||
if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map.sm_loaded)
|
if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map.sm_loaded)
|
||||||
error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY, 0);
|
error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY);
|
||||||
|
|
||||||
if (error == 0 && !space_map_contains(&msp->ms_map, offset, size))
|
if (error == 0 && !space_map_contains(&msp->ms_map, offset, size))
|
||||||
error = ENOENT;
|
error = ENOENT;
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <sys/zfs_context.h>
|
#include <sys/zfs_context.h>
|
||||||
@ -1680,6 +1681,12 @@ spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps)
|
|||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
boolean_t
|
||||||
|
spa_debug_enabled(spa_t *spa)
|
||||||
|
{
|
||||||
|
return (spa->spa_debug);
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(_KERNEL) && defined(HAVE_SPL)
|
#if defined(_KERNEL) && defined(HAVE_SPL)
|
||||||
/* Namespace manipulation */
|
/* Namespace manipulation */
|
||||||
EXPORT_SYMBOL(spa_lookup);
|
EXPORT_SYMBOL(spa_lookup);
|
||||||
|
@ -506,7 +506,7 @@ void
|
|||||||
txg_delay(dsl_pool_t *dp, uint64_t txg, int ticks)
|
txg_delay(dsl_pool_t *dp, uint64_t txg, int ticks)
|
||||||
{
|
{
|
||||||
tx_state_t *tx = &dp->dp_tx;
|
tx_state_t *tx = &dp->dp_tx;
|
||||||
int timeout = ddi_get_lbolt() + ticks;
|
clock_t timeout = ddi_get_lbolt() + ticks;
|
||||||
|
|
||||||
/* don't delay if this txg could transition to quiesing immediately */
|
/* don't delay if this txg could transition to quiesing immediately */
|
||||||
if (tx->tx_open_txg > txg ||
|
if (tx->tx_open_txg > txg ||
|
||||||
|
@ -71,9 +71,16 @@
|
|||||||
* 1<<zfs_vdev_cache_bshift byte reads by the vdev_cache (aka software
|
* 1<<zfs_vdev_cache_bshift byte reads by the vdev_cache (aka software
|
||||||
* track buffer). At most zfs_vdev_cache_size bytes will be kept in each
|
* track buffer). At most zfs_vdev_cache_size bytes will be kept in each
|
||||||
* vdev's vdev_cache.
|
* vdev's vdev_cache.
|
||||||
|
*
|
||||||
|
* TODO: Note that with the current ZFS code, it turns out that the
|
||||||
|
* vdev cache is not helpful, and in some cases actually harmful. It
|
||||||
|
* is better if we disable this. Once some time has passed, we should
|
||||||
|
* actually remove this to simplify the code. For now we just disable
|
||||||
|
* it by setting the zfs_vdev_cache_size to zero. Note that Solaris 11
|
||||||
|
* has made these same changes.
|
||||||
*/
|
*/
|
||||||
int zfs_vdev_cache_max = 1<<14; /* 16KB */
|
int zfs_vdev_cache_max = 1<<14; /* 16KB */
|
||||||
int zfs_vdev_cache_size = 10ULL << 20; /* 10MB */
|
int zfs_vdev_cache_size = 0;
|
||||||
int zfs_vdev_cache_bshift = 16;
|
int zfs_vdev_cache_bshift = 16;
|
||||||
|
|
||||||
#define VCBS (1 << zfs_vdev_cache_bshift) /* 64KB */
|
#define VCBS (1 << zfs_vdev_cache_bshift) /* 64KB */
|
||||||
|
@ -701,6 +701,9 @@ zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
|
|||||||
* and destroying snapshots requires descendent permissions, a successfull
|
* and destroying snapshots requires descendent permissions, a successfull
|
||||||
* check of the top level snapshot applies to snapshots of all descendent
|
* check of the top level snapshot applies to snapshots of all descendent
|
||||||
* datasets as well.
|
* datasets as well.
|
||||||
|
*
|
||||||
|
* The target snapshot may not exist when doing a recursive destroy.
|
||||||
|
* In this case fallback to permissions of the parent dataset.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr)
|
zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr)
|
||||||
@ -711,6 +714,8 @@ zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr)
|
|||||||
dsname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value);
|
dsname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value);
|
||||||
|
|
||||||
error = zfs_secpolicy_destroy_perms(dsname, cr);
|
error = zfs_secpolicy_destroy_perms(dsname, cr);
|
||||||
|
if (error == ENOENT)
|
||||||
|
error = zfs_secpolicy_destroy_perms(zc->zc_name, cr);
|
||||||
|
|
||||||
strfree(dsname);
|
strfree(dsname);
|
||||||
return (error);
|
return (error);
|
||||||
|
@ -1560,12 +1560,12 @@ zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table)
|
|||||||
|
|
||||||
static int
|
static int
|
||||||
zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
|
zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
|
||||||
dmu_buf_t **db)
|
dmu_buf_t **db, void *tag)
|
||||||
{
|
{
|
||||||
dmu_object_info_t doi;
|
dmu_object_info_t doi;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
if ((error = sa_buf_hold(osp, obj, FTAG, db)) != 0)
|
if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
|
||||||
return (error);
|
return (error);
|
||||||
|
|
||||||
dmu_object_info_from_db(*db, &doi);
|
dmu_object_info_from_db(*db, &doi);
|
||||||
@ -1573,13 +1573,13 @@ zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
|
|||||||
doi.doi_bonus_type != DMU_OT_ZNODE) ||
|
doi.doi_bonus_type != DMU_OT_ZNODE) ||
|
||||||
(doi.doi_bonus_type == DMU_OT_ZNODE &&
|
(doi.doi_bonus_type == DMU_OT_ZNODE &&
|
||||||
doi.doi_bonus_size < sizeof (znode_phys_t))) {
|
doi.doi_bonus_size < sizeof (znode_phys_t))) {
|
||||||
sa_buf_rele(*db, FTAG);
|
sa_buf_rele(*db, tag);
|
||||||
return (ENOTSUP);
|
return (ENOTSUP);
|
||||||
}
|
}
|
||||||
|
|
||||||
error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
|
error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
|
||||||
if (error != 0) {
|
if (error != 0) {
|
||||||
sa_buf_rele(*db, FTAG);
|
sa_buf_rele(*db, tag);
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1587,10 +1587,10 @@ zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db)
|
zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
|
||||||
{
|
{
|
||||||
sa_handle_destroy(hdl);
|
sa_handle_destroy(hdl);
|
||||||
sa_buf_rele(db, FTAG);
|
sa_buf_rele(db, tag);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1667,7 +1667,7 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
|
|||||||
int is_xattrdir;
|
int is_xattrdir;
|
||||||
|
|
||||||
if (prevdb)
|
if (prevdb)
|
||||||
zfs_release_sa_handle(prevhdl, prevdb);
|
zfs_release_sa_handle(prevhdl, prevdb, FTAG);
|
||||||
|
|
||||||
if ((error = zfs_obj_to_pobj(sa_hdl, sa_table, &pobj,
|
if ((error = zfs_obj_to_pobj(sa_hdl, sa_table, &pobj,
|
||||||
&is_xattrdir)) != 0)
|
&is_xattrdir)) != 0)
|
||||||
@ -1699,7 +1699,7 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
|
|||||||
prevhdl = sa_hdl;
|
prevhdl = sa_hdl;
|
||||||
prevdb = sa_db;
|
prevdb = sa_db;
|
||||||
}
|
}
|
||||||
error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db);
|
error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG);
|
||||||
if (error != 0) {
|
if (error != 0) {
|
||||||
sa_hdl = prevhdl;
|
sa_hdl = prevhdl;
|
||||||
sa_db = prevdb;
|
sa_db = prevdb;
|
||||||
@ -1709,7 +1709,7 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
|
|||||||
|
|
||||||
if (sa_hdl != NULL && sa_hdl != hdl) {
|
if (sa_hdl != NULL && sa_hdl != hdl) {
|
||||||
ASSERT(sa_db != NULL);
|
ASSERT(sa_db != NULL);
|
||||||
zfs_release_sa_handle(sa_hdl, sa_db);
|
zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (error == 0)
|
if (error == 0)
|
||||||
@ -1730,13 +1730,13 @@ zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
|
|||||||
if (error != 0)
|
if (error != 0)
|
||||||
return (error);
|
return (error);
|
||||||
|
|
||||||
error = zfs_grab_sa_handle(osp, obj, &hdl, &db);
|
error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
|
||||||
if (error != 0)
|
if (error != 0)
|
||||||
return (error);
|
return (error);
|
||||||
|
|
||||||
error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
|
error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
|
||||||
|
|
||||||
zfs_release_sa_handle(hdl, db);
|
zfs_release_sa_handle(hdl, db, FTAG);
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1756,19 +1756,19 @@ zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
|
|||||||
if (error != 0)
|
if (error != 0)
|
||||||
return (error);
|
return (error);
|
||||||
|
|
||||||
error = zfs_grab_sa_handle(osp, obj, &hdl, &db);
|
error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
|
||||||
if (error != 0)
|
if (error != 0)
|
||||||
return (error);
|
return (error);
|
||||||
|
|
||||||
error = zfs_obj_to_stats_impl(hdl, sa_table, sb);
|
error = zfs_obj_to_stats_impl(hdl, sa_table, sb);
|
||||||
if (error != 0) {
|
if (error != 0) {
|
||||||
zfs_release_sa_handle(hdl, db);
|
zfs_release_sa_handle(hdl, db, FTAG);
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
|
error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
|
||||||
|
|
||||||
zfs_release_sa_handle(hdl, db);
|
zfs_release_sa_handle(hdl, db, FTAG);
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Portions Copyright 2010 Robert Milkowski */
|
/* Portions Copyright 2010 Robert Milkowski */
|
||||||
@ -562,7 +563,7 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first)
|
|||||||
|
|
||||||
if (!list_is_empty(&zilog->zl_lwb_list)) {
|
if (!list_is_empty(&zilog->zl_lwb_list)) {
|
||||||
ASSERT(zh->zh_claim_txg == 0);
|
ASSERT(zh->zh_claim_txg == 0);
|
||||||
ASSERT(!keep_first);
|
VERIFY(!keep_first);
|
||||||
while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) {
|
while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) {
|
||||||
list_remove(&zilog->zl_lwb_list, lwb);
|
list_remove(&zilog->zl_lwb_list, lwb);
|
||||||
if (lwb->lwb_buf != NULL)
|
if (lwb->lwb_buf != NULL)
|
||||||
@ -1665,21 +1666,11 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys)
|
|||||||
void
|
void
|
||||||
zil_free(zilog_t *zilog)
|
zil_free(zilog_t *zilog)
|
||||||
{
|
{
|
||||||
lwb_t *head_lwb;
|
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
zilog->zl_stop_sync = 1;
|
zilog->zl_stop_sync = 1;
|
||||||
|
|
||||||
/*
|
ASSERT(list_is_empty(&zilog->zl_lwb_list));
|
||||||
* After zil_close() there should only be one lwb with a buffer.
|
|
||||||
*/
|
|
||||||
head_lwb = list_head(&zilog->zl_lwb_list);
|
|
||||||
if (head_lwb) {
|
|
||||||
ASSERT(head_lwb == list_tail(&zilog->zl_lwb_list));
|
|
||||||
list_remove(&zilog->zl_lwb_list, head_lwb);
|
|
||||||
zio_buf_free(head_lwb->lwb_buf, head_lwb->lwb_sz);
|
|
||||||
kmem_cache_free(zil_lwb_cache, head_lwb);
|
|
||||||
}
|
|
||||||
list_destroy(&zilog->zl_lwb_list);
|
list_destroy(&zilog->zl_lwb_list);
|
||||||
|
|
||||||
avl_destroy(&zilog->zl_vdev_tree);
|
avl_destroy(&zilog->zl_vdev_tree);
|
||||||
@ -1719,6 +1710,10 @@ zil_open(objset_t *os, zil_get_data_t *get_data)
|
|||||||
{
|
{
|
||||||
zilog_t *zilog = dmu_objset_zil(os);
|
zilog_t *zilog = dmu_objset_zil(os);
|
||||||
|
|
||||||
|
ASSERT(zilog->zl_clean_taskq == NULL);
|
||||||
|
ASSERT(zilog->zl_get_data == NULL);
|
||||||
|
ASSERT(list_is_empty(&zilog->zl_lwb_list));
|
||||||
|
|
||||||
zilog->zl_get_data = get_data;
|
zilog->zl_get_data = get_data;
|
||||||
zilog->zl_clean_taskq = taskq_create("zil_clean", 1, minclsyspri,
|
zilog->zl_clean_taskq = taskq_create("zil_clean", 1, minclsyspri,
|
||||||
2, 2, TASKQ_PREPOPULATE);
|
2, 2, TASKQ_PREPOPULATE);
|
||||||
@ -1732,7 +1727,7 @@ zil_open(objset_t *os, zil_get_data_t *get_data)
|
|||||||
void
|
void
|
||||||
zil_close(zilog_t *zilog)
|
zil_close(zilog_t *zilog)
|
||||||
{
|
{
|
||||||
lwb_t *tail_lwb;
|
lwb_t *lwb;
|
||||||
uint64_t txg = 0;
|
uint64_t txg = 0;
|
||||||
|
|
||||||
zil_commit(zilog, 0); /* commit all itx */
|
zil_commit(zilog, 0); /* commit all itx */
|
||||||
@ -1744,9 +1739,9 @@ zil_close(zilog_t *zilog)
|
|||||||
* destroy the zl_clean_taskq.
|
* destroy the zl_clean_taskq.
|
||||||
*/
|
*/
|
||||||
mutex_enter(&zilog->zl_lock);
|
mutex_enter(&zilog->zl_lock);
|
||||||
tail_lwb = list_tail(&zilog->zl_lwb_list);
|
lwb = list_tail(&zilog->zl_lwb_list);
|
||||||
if (tail_lwb != NULL)
|
if (lwb != NULL)
|
||||||
txg = tail_lwb->lwb_max_txg;
|
txg = lwb->lwb_max_txg;
|
||||||
mutex_exit(&zilog->zl_lock);
|
mutex_exit(&zilog->zl_lock);
|
||||||
if (txg)
|
if (txg)
|
||||||
txg_wait_synced(zilog->zl_dmu_pool, txg);
|
txg_wait_synced(zilog->zl_dmu_pool, txg);
|
||||||
@ -1754,6 +1749,19 @@ zil_close(zilog_t *zilog)
|
|||||||
taskq_destroy(zilog->zl_clean_taskq);
|
taskq_destroy(zilog->zl_clean_taskq);
|
||||||
zilog->zl_clean_taskq = NULL;
|
zilog->zl_clean_taskq = NULL;
|
||||||
zilog->zl_get_data = NULL;
|
zilog->zl_get_data = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We should have only one LWB left on the list; remove it now.
|
||||||
|
*/
|
||||||
|
mutex_enter(&zilog->zl_lock);
|
||||||
|
lwb = list_head(&zilog->zl_lwb_list);
|
||||||
|
if (lwb != NULL) {
|
||||||
|
ASSERT(lwb == list_tail(&zilog->zl_lwb_list));
|
||||||
|
list_remove(&zilog->zl_lwb_list, lwb);
|
||||||
|
zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
|
||||||
|
kmem_cache_free(zil_lwb_cache, lwb);
|
||||||
|
}
|
||||||
|
mutex_exit(&zilog->zl_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <sys/zfs_context.h>
|
#include <sys/zfs_context.h>
|
||||||
@ -79,6 +80,7 @@ int zio_delay_max = ZIO_DELAY_MAX;
|
|||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
extern vmem_t *zio_alloc_arena;
|
extern vmem_t *zio_alloc_arena;
|
||||||
#endif
|
#endif
|
||||||
|
extern int zfs_mg_alloc_failures;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* An allocating zio is one that either currently has the DVA allocate
|
* An allocating zio is one that either currently has the DVA allocate
|
||||||
@ -158,6 +160,12 @@ zio_init(void)
|
|||||||
zio_data_buf_cache[c - 1] = zio_data_buf_cache[c];
|
zio_data_buf_cache[c - 1] = zio_data_buf_cache[c];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The zio write taskqs have 1 thread per cpu, allow 1/2 of the taskqs
|
||||||
|
* to fail 3 times per txg or 8 failures, whichever is greater.
|
||||||
|
*/
|
||||||
|
zfs_mg_alloc_failures = MAX((3 * max_ncpus / 2), 8);
|
||||||
|
|
||||||
zio_inject_init();
|
zio_inject_init();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2151,6 +2159,7 @@ zio_dva_allocate(zio_t *zio)
|
|||||||
metaslab_class_t *mc = spa_normal_class(spa);
|
metaslab_class_t *mc = spa_normal_class(spa);
|
||||||
blkptr_t *bp = zio->io_bp;
|
blkptr_t *bp = zio->io_bp;
|
||||||
int error;
|
int error;
|
||||||
|
int flags = 0;
|
||||||
|
|
||||||
if (zio->io_gang_leader == NULL) {
|
if (zio->io_gang_leader == NULL) {
|
||||||
ASSERT(zio->io_child_type > ZIO_CHILD_GANG);
|
ASSERT(zio->io_child_type > ZIO_CHILD_GANG);
|
||||||
@ -2163,10 +2172,21 @@ zio_dva_allocate(zio_t *zio)
|
|||||||
ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa));
|
ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa));
|
||||||
ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp));
|
ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The dump device does not support gang blocks so allocation on
|
||||||
|
* behalf of the dump device (i.e. ZIO_FLAG_NODATA) must avoid
|
||||||
|
* the "fast" gang feature.
|
||||||
|
*/
|
||||||
|
flags |= (zio->io_flags & ZIO_FLAG_NODATA) ? METASLAB_GANG_AVOID : 0;
|
||||||
|
flags |= (zio->io_flags & ZIO_FLAG_GANG_CHILD) ?
|
||||||
|
METASLAB_GANG_CHILD : 0;
|
||||||
error = metaslab_alloc(spa, mc, zio->io_size, bp,
|
error = metaslab_alloc(spa, mc, zio->io_size, bp,
|
||||||
zio->io_prop.zp_copies, zio->io_txg, NULL, 0);
|
zio->io_prop.zp_copies, zio->io_txg, NULL, flags);
|
||||||
|
|
||||||
if (error) {
|
if (error) {
|
||||||
|
spa_dbgmsg(spa, "%s: metaslab allocation failure: zio %p, "
|
||||||
|
"size %llu, error %d", spa_name(spa), zio, zio->io_size,
|
||||||
|
error);
|
||||||
if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE)
|
if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE)
|
||||||
return (zio_write_gang_block(zio));
|
return (zio_write_gang_block(zio));
|
||||||
zio->io_error = error;
|
zio->io_error = error;
|
||||||
|
Loading…
Reference in New Issue
Block a user