mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-27 02:14:28 +03:00
Update core ZFS code from build 121 to build 141.
This commit is contained in:
parent
6119cb885a
commit
428870ff73
@ -1 +1 @@
|
||||
http://dlc.sun.com/osol/on/downloads/b121/on-src.tar.bz2
|
||||
ssh://anon@hg.opensolaris.org/hg/onnv/onnv-gate/onnv_141
|
||||
|
2303
cmd/zdb/zdb.c
2303
cmd/zdb/zdb.c
File diff suppressed because it is too large
Load Diff
141
cmd/zdb/zdb_il.c
141
cmd/zdb/zdb_il.c
@ -40,12 +40,14 @@
|
||||
|
||||
extern uint8_t dump_opt[256];
|
||||
|
||||
static char prefix[4] = "\t\t\t";
|
||||
|
||||
static void
|
||||
print_log_bp(const blkptr_t *bp, const char *prefix)
|
||||
{
|
||||
char blkbuf[BP_SPRINTF_LEN];
|
||||
|
||||
sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
|
||||
sprintf_blkptr(blkbuf, bp);
|
||||
(void) printf("%s%s\n", prefix, blkbuf);
|
||||
}
|
||||
|
||||
@ -54,19 +56,29 @@ static void
|
||||
zil_prt_rec_create(zilog_t *zilog, int txtype, lr_create_t *lr)
|
||||
{
|
||||
time_t crtime = lr->lr_crtime[0];
|
||||
char *name = (char *)(lr + 1);
|
||||
char *link = name + strlen(name) + 1;
|
||||
char *name, *link;
|
||||
lr_attr_t *lrattr;
|
||||
|
||||
if (txtype == TX_SYMLINK)
|
||||
(void) printf("\t\t\t%s -> %s\n", name, link);
|
||||
else
|
||||
(void) printf("\t\t\t%s\n", name);
|
||||
name = (char *)(lr + 1);
|
||||
|
||||
(void) printf("\t\t\t%s", ctime(&crtime));
|
||||
(void) printf("\t\t\tdoid %llu, foid %llu, mode %llo\n",
|
||||
if (lr->lr_common.lrc_txtype == TX_CREATE_ATTR ||
|
||||
lr->lr_common.lrc_txtype == TX_MKDIR_ATTR) {
|
||||
lrattr = (lr_attr_t *)(lr + 1);
|
||||
name += ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
|
||||
}
|
||||
|
||||
if (txtype == TX_SYMLINK) {
|
||||
link = name + strlen(name) + 1;
|
||||
(void) printf("%s%s -> %s\n", prefix, name, link);
|
||||
} else if (txtype != TX_MKXATTR) {
|
||||
(void) printf("%s%s\n", prefix, name);
|
||||
}
|
||||
|
||||
(void) printf("%s%s", prefix, ctime(&crtime));
|
||||
(void) printf("%sdoid %llu, foid %llu, mode %llo\n", prefix,
|
||||
(u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_foid,
|
||||
(longlong_t)lr->lr_mode);
|
||||
(void) printf("\t\t\tuid %llu, gid %llu, gen %llu, rdev 0x%llx\n",
|
||||
(void) printf("%suid %llu, gid %llu, gen %llu, rdev 0x%llx\n", prefix,
|
||||
(u_longlong_t)lr->lr_uid, (u_longlong_t)lr->lr_gid,
|
||||
(u_longlong_t)lr->lr_gen, (u_longlong_t)lr->lr_rdev);
|
||||
}
|
||||
@ -75,7 +87,7 @@ zil_prt_rec_create(zilog_t *zilog, int txtype, lr_create_t *lr)
|
||||
static void
|
||||
zil_prt_rec_remove(zilog_t *zilog, int txtype, lr_remove_t *lr)
|
||||
{
|
||||
(void) printf("\t\t\tdoid %llu, name %s\n",
|
||||
(void) printf("%sdoid %llu, name %s\n", prefix,
|
||||
(u_longlong_t)lr->lr_doid, (char *)(lr + 1));
|
||||
}
|
||||
|
||||
@ -83,7 +95,7 @@ zil_prt_rec_remove(zilog_t *zilog, int txtype, lr_remove_t *lr)
|
||||
static void
|
||||
zil_prt_rec_link(zilog_t *zilog, int txtype, lr_link_t *lr)
|
||||
{
|
||||
(void) printf("\t\t\tdoid %llu, link_obj %llu, name %s\n",
|
||||
(void) printf("%sdoid %llu, link_obj %llu, name %s\n", prefix,
|
||||
(u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_link_obj,
|
||||
(char *)(lr + 1));
|
||||
}
|
||||
@ -95,9 +107,9 @@ zil_prt_rec_rename(zilog_t *zilog, int txtype, lr_rename_t *lr)
|
||||
char *snm = (char *)(lr + 1);
|
||||
char *tnm = snm + strlen(snm) + 1;
|
||||
|
||||
(void) printf("\t\t\tsdoid %llu, tdoid %llu\n",
|
||||
(void) printf("%ssdoid %llu, tdoid %llu\n", prefix,
|
||||
(u_longlong_t)lr->lr_sdoid, (u_longlong_t)lr->lr_tdoid);
|
||||
(void) printf("\t\t\tsrc %s tgt %s\n", snm, tnm);
|
||||
(void) printf("%ssrc %s tgt %s\n", prefix, snm, tnm);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
@ -106,43 +118,48 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
|
||||
{
|
||||
char *data, *dlimit;
|
||||
blkptr_t *bp = &lr->lr_blkptr;
|
||||
zbookmark_t zb;
|
||||
char buf[SPA_MAXBLOCKSIZE];
|
||||
int verbose = MAX(dump_opt['d'], dump_opt['i']);
|
||||
int error;
|
||||
|
||||
(void) printf("\t\t\tfoid %llu, offset 0x%llx,"
|
||||
" length 0x%llx, blkoff 0x%llx\n",
|
||||
(u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset,
|
||||
(u_longlong_t)lr->lr_length, (u_longlong_t)lr->lr_blkoff);
|
||||
(void) printf("%sfoid %llu, offset %llx, length %llx\n", prefix,
|
||||
(u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_offset,
|
||||
(u_longlong_t)lr->lr_length);
|
||||
|
||||
if (verbose < 5)
|
||||
if (txtype == TX_WRITE2 || verbose < 5)
|
||||
return;
|
||||
|
||||
if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
|
||||
(void) printf("\t\t\thas blkptr, %s\n",
|
||||
(void) printf("%shas blkptr, %s\n", prefix,
|
||||
bp->blk_birth >= spa_first_txg(zilog->zl_spa) ?
|
||||
"will claim" : "won't claim");
|
||||
print_log_bp(bp, "\t\t\t");
|
||||
print_log_bp(bp, prefix);
|
||||
|
||||
if (BP_IS_HOLE(bp)) {
|
||||
(void) printf("\t\t\tLSIZE 0x%llx\n",
|
||||
(u_longlong_t)BP_GET_LSIZE(bp));
|
||||
}
|
||||
if (bp->blk_birth == 0) {
|
||||
bzero(buf, sizeof (buf));
|
||||
} else {
|
||||
zbookmark_t zb;
|
||||
|
||||
ASSERT3U(bp->blk_cksum.zc_word[ZIL_ZC_OBJSET], ==,
|
||||
dmu_objset_id(zilog->zl_os));
|
||||
|
||||
zb.zb_objset = bp->blk_cksum.zc_word[ZIL_ZC_OBJSET];
|
||||
zb.zb_object = 0;
|
||||
zb.zb_level = -1;
|
||||
zb.zb_blkid = bp->blk_cksum.zc_word[ZIL_ZC_SEQ];
|
||||
|
||||
error = zio_wait(zio_read(NULL, zilog->zl_spa,
|
||||
bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
|
||||
ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb));
|
||||
if (error)
|
||||
return;
|
||||
(void) printf("%s<hole>\n", prefix);
|
||||
return;
|
||||
}
|
||||
data = buf + lr->lr_blkoff;
|
||||
if (bp->blk_birth < zilog->zl_header->zh_claim_txg) {
|
||||
(void) printf("%s<block already committed>\n", prefix);
|
||||
return;
|
||||
}
|
||||
|
||||
SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os),
|
||||
lr->lr_foid, ZB_ZIL_LEVEL,
|
||||
lr->lr_offset / BP_GET_LSIZE(bp));
|
||||
|
||||
error = zio_wait(zio_read(NULL, zilog->zl_spa,
|
||||
bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
|
||||
ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb));
|
||||
if (error)
|
||||
return;
|
||||
data = buf;
|
||||
} else {
|
||||
data = (char *)(lr + 1);
|
||||
}
|
||||
@ -150,7 +167,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
|
||||
dlimit = data + MIN(lr->lr_length,
|
||||
(verbose < 6 ? 20 : SPA_MAXBLOCKSIZE));
|
||||
|
||||
(void) printf("\t\t\t");
|
||||
(void) printf("%s", prefix);
|
||||
while (data < dlimit) {
|
||||
if (isprint(*data))
|
||||
(void) printf("%c ", *data);
|
||||
@ -165,7 +182,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
|
||||
static void
|
||||
zil_prt_rec_truncate(zilog_t *zilog, int txtype, lr_truncate_t *lr)
|
||||
{
|
||||
(void) printf("\t\t\tfoid %llu, offset 0x%llx, length 0x%llx\n",
|
||||
(void) printf("%sfoid %llu, offset 0x%llx, length 0x%llx\n", prefix,
|
||||
(u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset,
|
||||
(u_longlong_t)lr->lr_length);
|
||||
}
|
||||
@ -177,38 +194,38 @@ zil_prt_rec_setattr(zilog_t *zilog, int txtype, lr_setattr_t *lr)
|
||||
time_t atime = (time_t)lr->lr_atime[0];
|
||||
time_t mtime = (time_t)lr->lr_mtime[0];
|
||||
|
||||
(void) printf("\t\t\tfoid %llu, mask 0x%llx\n",
|
||||
(void) printf("%sfoid %llu, mask 0x%llx\n", prefix,
|
||||
(u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_mask);
|
||||
|
||||
if (lr->lr_mask & AT_MODE) {
|
||||
(void) printf("\t\t\tAT_MODE %llo\n",
|
||||
(void) printf("%sAT_MODE %llo\n", prefix,
|
||||
(longlong_t)lr->lr_mode);
|
||||
}
|
||||
|
||||
if (lr->lr_mask & AT_UID) {
|
||||
(void) printf("\t\t\tAT_UID %llu\n",
|
||||
(void) printf("%sAT_UID %llu\n", prefix,
|
||||
(u_longlong_t)lr->lr_uid);
|
||||
}
|
||||
|
||||
if (lr->lr_mask & AT_GID) {
|
||||
(void) printf("\t\t\tAT_GID %llu\n",
|
||||
(void) printf("%sAT_GID %llu\n", prefix,
|
||||
(u_longlong_t)lr->lr_gid);
|
||||
}
|
||||
|
||||
if (lr->lr_mask & AT_SIZE) {
|
||||
(void) printf("\t\t\tAT_SIZE %llu\n",
|
||||
(void) printf("%sAT_SIZE %llu\n", prefix,
|
||||
(u_longlong_t)lr->lr_size);
|
||||
}
|
||||
|
||||
if (lr->lr_mask & AT_ATIME) {
|
||||
(void) printf("\t\t\tAT_ATIME %llu.%09llu %s",
|
||||
(void) printf("%sAT_ATIME %llu.%09llu %s", prefix,
|
||||
(u_longlong_t)lr->lr_atime[0],
|
||||
(u_longlong_t)lr->lr_atime[1],
|
||||
ctime(&atime));
|
||||
}
|
||||
|
||||
if (lr->lr_mask & AT_MTIME) {
|
||||
(void) printf("\t\t\tAT_MTIME %llu.%09llu %s",
|
||||
(void) printf("%sAT_MTIME %llu.%09llu %s", prefix,
|
||||
(u_longlong_t)lr->lr_mtime[0],
|
||||
(u_longlong_t)lr->lr_mtime[1],
|
||||
ctime(&mtime));
|
||||
@ -219,7 +236,7 @@ zil_prt_rec_setattr(zilog_t *zilog, int txtype, lr_setattr_t *lr)
|
||||
static void
|
||||
zil_prt_rec_acl(zilog_t *zilog, int txtype, lr_acl_t *lr)
|
||||
{
|
||||
(void) printf("\t\t\tfoid %llu, aclcnt %llu\n",
|
||||
(void) printf("%sfoid %llu, aclcnt %llu\n", prefix,
|
||||
(u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_aclcnt);
|
||||
}
|
||||
|
||||
@ -251,10 +268,11 @@ static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = {
|
||||
{ zil_prt_rec_create, "TX_MKDIR_ACL " },
|
||||
{ zil_prt_rec_create, "TX_MKDIR_ATTR " },
|
||||
{ zil_prt_rec_create, "TX_MKDIR_ACL_ATTR " },
|
||||
{ zil_prt_rec_write, "TX_WRITE2 " },
|
||||
};
|
||||
|
||||
/* ARGSUSED */
|
||||
static void
|
||||
static int
|
||||
print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg)
|
||||
{
|
||||
int txtype;
|
||||
@ -278,23 +296,24 @@ print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg)
|
||||
|
||||
zil_rec_info[txtype].zri_count++;
|
||||
zil_rec_info[0].zri_count++;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static void
|
||||
static int
|
||||
print_log_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
|
||||
{
|
||||
char blkbuf[BP_SPRINTF_LEN];
|
||||
char blkbuf[BP_SPRINTF_LEN + 10];
|
||||
int verbose = MAX(dump_opt['d'], dump_opt['i']);
|
||||
char *claim;
|
||||
|
||||
if (verbose <= 3)
|
||||
return;
|
||||
return (0);
|
||||
|
||||
if (verbose >= 5) {
|
||||
(void) strcpy(blkbuf, ", ");
|
||||
sprintf_blkptr(blkbuf + strlen(blkbuf),
|
||||
BP_SPRINTF_LEN - strlen(blkbuf), bp);
|
||||
sprintf_blkptr(blkbuf + strlen(blkbuf), bp);
|
||||
} else {
|
||||
blkbuf[0] = '\0';
|
||||
}
|
||||
@ -308,6 +327,8 @@ print_log_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
|
||||
|
||||
(void) printf("\tBlock seqno %llu, %s%s\n",
|
||||
(u_longlong_t)bp->blk_cksum.zc_word[ZIL_ZC_SEQ], claim, blkbuf);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -340,17 +361,17 @@ dump_intent_log(zilog_t *zilog)
|
||||
int verbose = MAX(dump_opt['d'], dump_opt['i']);
|
||||
int i;
|
||||
|
||||
if (zh->zh_log.blk_birth == 0 || verbose < 2)
|
||||
if (zh->zh_log.blk_birth == 0 || verbose < 1)
|
||||
return;
|
||||
|
||||
(void) printf("\n ZIL header: claim_txg %llu, claim_seq %llu",
|
||||
(u_longlong_t)zh->zh_claim_txg, (u_longlong_t)zh->zh_claim_seq);
|
||||
(void) printf("\n ZIL header: claim_txg %llu, "
|
||||
"claim_blk_seq %llu, claim_lr_seq %llu",
|
||||
(u_longlong_t)zh->zh_claim_txg,
|
||||
(u_longlong_t)zh->zh_claim_blk_seq,
|
||||
(u_longlong_t)zh->zh_claim_lr_seq);
|
||||
(void) printf(" replay_seq %llu, flags 0x%llx\n",
|
||||
(u_longlong_t)zh->zh_replay_seq, (u_longlong_t)zh->zh_flags);
|
||||
|
||||
if (verbose >= 4)
|
||||
print_log_bp(&zh->zh_log, "\n\tfirst block: ");
|
||||
|
||||
for (i = 0; i < TX_MAX_TYPE; i++)
|
||||
zil_rec_info[i].zri_count = 0;
|
||||
|
||||
|
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <libintl.h>
|
||||
@ -107,7 +106,8 @@ zfs_callback(zfs_handle_t *zhp, void *data)
|
||||
zfs_prune_proplist(zhp,
|
||||
cb->cb_props_table);
|
||||
|
||||
if (zfs_expand_proplist(zhp, cb->cb_proplist)
|
||||
if (zfs_expand_proplist(zhp, cb->cb_proplist,
|
||||
(cb->cb_flags & ZFS_ITER_RECVD_PROPS))
|
||||
!= 0) {
|
||||
free(node);
|
||||
return (-1);
|
||||
@ -350,11 +350,8 @@ zfs_for_each(int argc, char **argv, int flags, zfs_type_t types,
|
||||
avl_pool = uu_avl_pool_create("zfs_pool", sizeof (zfs_node_t),
|
||||
offsetof(zfs_node_t, zn_avlnode), zfs_sort, UU_DEFAULT);
|
||||
|
||||
if (avl_pool == NULL) {
|
||||
(void) fprintf(stderr,
|
||||
gettext("internal error: out of memory\n"));
|
||||
exit(1);
|
||||
}
|
||||
if (avl_pool == NULL)
|
||||
nomem();
|
||||
|
||||
cb.cb_sortcol = sortcol;
|
||||
cb.cb_flags = flags;
|
||||
@ -399,11 +396,8 @@ zfs_for_each(int argc, char **argv, int flags, zfs_type_t types,
|
||||
sizeof (cb.cb_props_table));
|
||||
}
|
||||
|
||||
if ((cb.cb_avl = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL) {
|
||||
(void) fprintf(stderr,
|
||||
gettext("internal error: out of memory\n"));
|
||||
exit(1);
|
||||
}
|
||||
if ((cb.cb_avl = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL)
|
||||
nomem();
|
||||
|
||||
if (argc == 0) {
|
||||
/*
|
||||
@ -453,11 +447,8 @@ zfs_for_each(int argc, char **argv, int flags, zfs_type_t types,
|
||||
/*
|
||||
* Finally, clean up the AVL tree.
|
||||
*/
|
||||
if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL) {
|
||||
(void) fprintf(stderr,
|
||||
gettext("internal error: out of memory"));
|
||||
exit(1);
|
||||
}
|
||||
if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL)
|
||||
nomem();
|
||||
|
||||
while ((node = uu_avl_walk_next(walk)) != NULL) {
|
||||
uu_avl_remove(cb.cb_avl, node);
|
||||
|
@ -42,6 +42,7 @@ typedef struct zfs_sort_column {
|
||||
#define ZFS_ITER_ARGS_CAN_BE_PATHS (1 << 1)
|
||||
#define ZFS_ITER_PROP_LISTSNAPS (1 << 2)
|
||||
#define ZFS_ITER_DEPTH_LIMIT (1 << 3)
|
||||
#define ZFS_ITER_RECVD_PROPS (1 << 4)
|
||||
|
||||
int zfs_for_each(int, char **, int options, zfs_type_t,
|
||||
zfs_sort_column_t *, zprop_list_t **, int, zfs_iter_f, void *);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -19,15 +19,12 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ZFS_UTIL_H
|
||||
#define _ZFS_UTIL_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <libzfs.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
@ -35,6 +32,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
void * safe_malloc(size_t size);
|
||||
void nomem(void);
|
||||
libzfs_handle_t *g_zfs;
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -19,14 +19,11 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <libzfs.h>
|
||||
|
||||
#undef verify /* both libzfs.h and zfs_context.h want to define this */
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#include <errno.h>
|
||||
@ -69,6 +66,18 @@ ziprintf(const char *fmt, ...)
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
static void
|
||||
compress_slashes(const char *src, char *dest)
|
||||
{
|
||||
while (*src != '\0') {
|
||||
*dest = *src++;
|
||||
while (*dest == '/' && *src == '/')
|
||||
++src;
|
||||
++dest;
|
||||
}
|
||||
*dest = '\0';
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a full path to a file, translate into a dataset name and a relative
|
||||
* path within the dataset. 'dataset' must be at least MAXNAMELEN characters,
|
||||
@ -76,13 +85,16 @@ ziprintf(const char *fmt, ...)
|
||||
* buffer, which we need later to get the object ID.
|
||||
*/
|
||||
static int
|
||||
parse_pathname(const char *fullpath, char *dataset, char *relpath,
|
||||
parse_pathname(const char *inpath, char *dataset, char *relpath,
|
||||
struct stat64 *statbuf)
|
||||
{
|
||||
struct extmnttab mp;
|
||||
FILE *fp;
|
||||
int match;
|
||||
const char *rel;
|
||||
char fullpath[MAXPATHLEN];
|
||||
|
||||
compress_slashes(inpath, fullpath);
|
||||
|
||||
if (fullpath[0] != '/') {
|
||||
(void) fprintf(stderr, "invalid object '%s': must be full "
|
||||
@ -162,8 +174,8 @@ object_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
|
||||
*/
|
||||
sync();
|
||||
|
||||
if ((err = dmu_objset_open(dataset, DMU_OST_ZFS,
|
||||
DS_MODE_USER | DS_MODE_READONLY, &os)) != 0) {
|
||||
err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, FTAG, &os);
|
||||
if (err != 0) {
|
||||
(void) fprintf(stderr, "cannot open dataset '%s': %s\n",
|
||||
dataset, strerror(err));
|
||||
return (-1);
|
||||
@ -172,7 +184,7 @@ object_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
|
||||
record->zi_objset = dmu_objset_id(os);
|
||||
record->zi_object = statbuf->st_ino;
|
||||
|
||||
dmu_objset_close(os);
|
||||
dmu_objset_disown(os, FTAG);
|
||||
|
||||
return (0);
|
||||
}
|
||||
@ -247,17 +259,17 @@ calculate_range(const char *dataset, err_type_t type, int level, char *range,
|
||||
* Get the dnode associated with object, so we can calculate the block
|
||||
* size.
|
||||
*/
|
||||
if ((err = dmu_objset_open(dataset, DMU_OST_ANY,
|
||||
DS_MODE_USER | DS_MODE_READONLY, &os)) != 0) {
|
||||
if ((err = dmu_objset_own(dataset, DMU_OST_ANY,
|
||||
B_TRUE, FTAG, &os)) != 0) {
|
||||
(void) fprintf(stderr, "cannot open dataset '%s': %s\n",
|
||||
dataset, strerror(err));
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (record->zi_object == 0) {
|
||||
dn = os->os->os_meta_dnode;
|
||||
dn = os->os_meta_dnode;
|
||||
} else {
|
||||
err = dnode_hold(os->os, record->zi_object, FTAG, &dn);
|
||||
err = dnode_hold(os, record->zi_object, FTAG, &dn);
|
||||
if (err != 0) {
|
||||
(void) fprintf(stderr, "failed to hold dnode "
|
||||
"for object %llu\n",
|
||||
@ -306,11 +318,11 @@ calculate_range(const char *dataset, err_type_t type, int level, char *range,
|
||||
ret = 0;
|
||||
out:
|
||||
if (dn) {
|
||||
if (dn != os->os->os_meta_dnode)
|
||||
if (dn != os->os_meta_dnode)
|
||||
dnode_rele(dn, FTAG);
|
||||
}
|
||||
if (os)
|
||||
dmu_objset_close(os);
|
||||
dmu_objset_disown(os, FTAG);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
@ -347,8 +359,8 @@ translate_record(err_type_t type, const char *object, const char *range,
|
||||
case TYPE_CONFIG:
|
||||
record->zi_type = DMU_OT_PACKED_NVLIST;
|
||||
break;
|
||||
case TYPE_BPLIST:
|
||||
record->zi_type = DMU_OT_BPLIST;
|
||||
case TYPE_BPOBJ:
|
||||
record->zi_type = DMU_OT_BPOBJ;
|
||||
break;
|
||||
case TYPE_SPACEMAP:
|
||||
record->zi_type = DMU_OT_SPACE_MAP;
|
||||
@ -469,6 +481,14 @@ translate_device(const char *pool, const char *device, err_type_t label_type,
|
||||
record->zi_start = offsetof(vdev_label_t, vl_vdev_phys);
|
||||
record->zi_end = record->zi_start + VDEV_PHYS_SIZE - 1;
|
||||
break;
|
||||
case TYPE_LABEL_PAD1:
|
||||
record->zi_start = offsetof(vdev_label_t, vl_pad1);
|
||||
record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1;
|
||||
break;
|
||||
case TYPE_LABEL_PAD2:
|
||||
record->zi_start = offsetof(vdev_label_t, vl_pad2);
|
||||
record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1;
|
||||
break;
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -42,12 +41,12 @@
|
||||
* any attempt to read from the device will return EIO, but any attempt to
|
||||
* reopen the device will also return ENXIO.
|
||||
* For label faults, the -L option must be specified. This allows faults
|
||||
* to be injected into either the nvlist or uberblock region of all the labels
|
||||
* for the specified device.
|
||||
* to be injected into either the nvlist, uberblock, pad1, or pad2 region
|
||||
* of all the labels for the specified device.
|
||||
*
|
||||
* This form of the command looks like:
|
||||
*
|
||||
* zinject -d device [-e errno] [-L <uber | nvlist>] pool
|
||||
* zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool
|
||||
*
|
||||
*
|
||||
* DATA FAULTS
|
||||
@ -70,7 +69,7 @@
|
||||
* mos Any data in the MOS
|
||||
* mosdir object directory
|
||||
* config pool configuration
|
||||
* bplist blkptr list
|
||||
* bpobj blkptr list
|
||||
* spacemap spacemap
|
||||
* metaslab metaslab
|
||||
* errlog persistent error log
|
||||
@ -164,11 +163,13 @@ static const char *errtable[TYPE_INVAL] = {
|
||||
"mosdir",
|
||||
"metaslab",
|
||||
"config",
|
||||
"bplist",
|
||||
"bpobj",
|
||||
"spacemap",
|
||||
"errlog",
|
||||
"uber",
|
||||
"nvlist"
|
||||
"nvlist",
|
||||
"pad1",
|
||||
"pad2"
|
||||
};
|
||||
|
||||
static err_type_t
|
||||
@ -192,8 +193,8 @@ type_to_name(uint64_t type)
|
||||
return ("metaslab");
|
||||
case DMU_OT_PACKED_NVLIST:
|
||||
return ("config");
|
||||
case DMU_OT_BPLIST:
|
||||
return ("bplist");
|
||||
case DMU_OT_BPOBJ:
|
||||
return ("bpobj");
|
||||
case DMU_OT_SPACE_MAP:
|
||||
return ("spacemap");
|
||||
case DMU_OT_ERROR_LOG:
|
||||
@ -222,11 +223,28 @@ usage(void)
|
||||
"\t\tClear the particular record (if given a numeric ID), or\n"
|
||||
"\t\tall records if 'all' is specificed.\n"
|
||||
"\n"
|
||||
"\tzinject -d device [-e errno] [-L <nvlist|uber>] [-F] pool\n"
|
||||
"\tzinject -p <function name> pool\n"
|
||||
"\t\tInject a panic fault at the specified function. Only \n"
|
||||
"\t\tfunctions which call spa_vdev_config_exit(), or \n"
|
||||
"\t\tspa_vdev_exit() will trigger a panic.\n"
|
||||
"\n"
|
||||
"\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
|
||||
"\t [-T <read|write|free|claim|all> pool\n"
|
||||
"\t\tInject a fault into a particular device or the device's\n"
|
||||
"\t\tlabel. Label injection can either be 'nvlist' or 'uber'.\n"
|
||||
"\t\tlabel. Label injection can either be 'nvlist', 'uber',\n "
|
||||
"\t\t'pad1', or 'pad2'.\n"
|
||||
"\t\t'errno' can either be 'nxio' (the default) or 'io'.\n"
|
||||
"\n"
|
||||
"\tzinject -d device -A <degrade|fault> pool\n"
|
||||
"\t\tPerform a specific action on a particular device\n"
|
||||
"\n"
|
||||
"\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
|
||||
"\t\tCause the pool to stop writing blocks yet not\n"
|
||||
"\t\treport errors for a duration. Simulates buggy hardware\n"
|
||||
"\t\tthat fails to honor cache flush requests.\n"
|
||||
"\t\tDefault duration is 30 seconds. The machine is panicked\n"
|
||||
"\t\tat the end of the duration.\n"
|
||||
"\n"
|
||||
"\tzinject -b objset:object:level:blkid pool\n"
|
||||
"\n"
|
||||
"\t\tInject an error into pool 'pool' with the numeric bookmark\n"
|
||||
@ -267,7 +285,7 @@ usage(void)
|
||||
"\t\t\ton a ZFS filesystem.\n"
|
||||
"\n"
|
||||
"\t-t <mos>\tInject errors into the MOS for objects of the given\n"
|
||||
"\t\t\ttype. Valid types are: mos, mosdir, config, bplist,\n"
|
||||
"\t\t\ttype. Valid types are: mos, mosdir, config, bpobj,\n"
|
||||
"\t\t\tspacemap, metaslab, errlog. The only valid <object> is\n"
|
||||
"\t\t\tthe poolname.\n");
|
||||
}
|
||||
@ -286,6 +304,12 @@ iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
|
||||
&zc.zc_inject_record, data)) != 0)
|
||||
return (ret);
|
||||
|
||||
if (errno != ENOENT) {
|
||||
(void) fprintf(stderr, "Unable to list handlers: %s\n",
|
||||
strerror(errno));
|
||||
return (-1);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -295,7 +319,7 @@ print_data_handler(int id, const char *pool, zinject_record_t *record,
|
||||
{
|
||||
int *count = data;
|
||||
|
||||
if (record->zi_guid != 0)
|
||||
if (record->zi_guid != 0 || record->zi_func[0] != '\0')
|
||||
return (0);
|
||||
|
||||
if (*count == 0) {
|
||||
@ -327,7 +351,7 @@ print_device_handler(int id, const char *pool, zinject_record_t *record,
|
||||
{
|
||||
int *count = data;
|
||||
|
||||
if (record->zi_guid == 0)
|
||||
if (record->zi_guid == 0 || record->zi_func[0] != '\0')
|
||||
return (0);
|
||||
|
||||
if (*count == 0) {
|
||||
@ -343,6 +367,27 @@ print_device_handler(int id, const char *pool, zinject_record_t *record,
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
print_panic_handler(int id, const char *pool, zinject_record_t *record,
|
||||
void *data)
|
||||
{
|
||||
int *count = data;
|
||||
|
||||
if (record->zi_func[0] == '\0')
|
||||
return (0);
|
||||
|
||||
if (*count == 0) {
|
||||
(void) printf("%3s %-15s %s\n", "ID", "POOL", "FUNCTION");
|
||||
(void) printf("--- --------------- ----------------\n");
|
||||
}
|
||||
|
||||
*count += 1;
|
||||
|
||||
(void) printf("%3d %-15s %s\n", id, pool, record->zi_func);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Print all registered error handlers. Returns the number of handlers
|
||||
* registered.
|
||||
@ -356,6 +401,9 @@ print_all_handlers(void)
|
||||
(void) printf("\n");
|
||||
count = 0;
|
||||
(void) iter_handlers(print_data_handler, &count);
|
||||
(void) printf("\n");
|
||||
count = 0;
|
||||
(void) iter_handlers(print_panic_handler, &count);
|
||||
|
||||
return (count);
|
||||
}
|
||||
@ -386,7 +434,8 @@ cancel_all_handlers(void)
|
||||
{
|
||||
int ret = iter_handlers(cancel_one_handler, NULL);
|
||||
|
||||
(void) printf("removed all registered handlers\n");
|
||||
if (ret == 0)
|
||||
(void) printf("removed all registered handlers\n");
|
||||
|
||||
return (ret);
|
||||
}
|
||||
@ -443,6 +492,15 @@ register_handler(const char *pool, int flags, zinject_record_t *record,
|
||||
if (record->zi_guid) {
|
||||
(void) printf(" vdev: %llx\n",
|
||||
(u_longlong_t)record->zi_guid);
|
||||
} else if (record->zi_func[0] != '\0') {
|
||||
(void) printf(" panic function: %s\n",
|
||||
record->zi_func);
|
||||
} else if (record->zi_duration > 0) {
|
||||
(void) printf(" time: %lld seconds\n",
|
||||
(u_longlong_t)record->zi_duration);
|
||||
} else if (record->zi_duration < 0) {
|
||||
(void) printf(" txgs: %lld \n",
|
||||
(u_longlong_t)-record->zi_duration);
|
||||
} else {
|
||||
(void) printf("objset: %llu\n",
|
||||
(u_longlong_t)record->zi_objset);
|
||||
@ -464,6 +522,22 @@ register_handler(const char *pool, int flags, zinject_record_t *record,
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
perform_action(const char *pool, zinject_record_t *record, int cmd)
|
||||
{
|
||||
zfs_cmd_t zc;
|
||||
|
||||
ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED);
|
||||
(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
|
||||
zc.zc_guid = record->zi_guid;
|
||||
zc.zc_cookie = cmd;
|
||||
|
||||
if (ioctl(zfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
|
||||
return (0);
|
||||
|
||||
return (1);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
@ -477,12 +551,17 @@ main(int argc, char **argv)
|
||||
int quiet = 0;
|
||||
int error = 0;
|
||||
int domount = 0;
|
||||
int io_type = ZIO_TYPES;
|
||||
int action = VDEV_STATE_UNKNOWN;
|
||||
err_type_t type = TYPE_INVAL;
|
||||
err_type_t label = TYPE_INVAL;
|
||||
zinject_record_t record = { 0 };
|
||||
char pool[MAXNAMELEN];
|
||||
char dataset[MAXNAMELEN];
|
||||
zfs_handle_t *zhp;
|
||||
int nowrites = 0;
|
||||
int dur_txg = 0;
|
||||
int dur_secs = 0;
|
||||
int ret;
|
||||
int flags = 0;
|
||||
|
||||
@ -514,11 +593,24 @@ main(int argc, char **argv)
|
||||
return (0);
|
||||
}
|
||||
|
||||
while ((c = getopt(argc, argv, ":ab:d:f:Fqhc:t:l:mr:e:uL:")) != -1) {
|
||||
while ((c = getopt(argc, argv,
|
||||
":aA:b:d:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:")) != -1) {
|
||||
switch (c) {
|
||||
case 'a':
|
||||
flags |= ZINJECT_FLUSH_ARC;
|
||||
break;
|
||||
case 'A':
|
||||
if (strcasecmp(optarg, "degrade") == 0) {
|
||||
action = VDEV_STATE_DEGRADED;
|
||||
} else if (strcasecmp(optarg, "fault") == 0) {
|
||||
action = VDEV_STATE_FAULTED;
|
||||
} else {
|
||||
(void) fprintf(stderr, "invalid action '%s': "
|
||||
"must be 'degrade' or 'fault'\n", optarg);
|
||||
usage();
|
||||
return (1);
|
||||
}
|
||||
break;
|
||||
case 'b':
|
||||
raw = optarg;
|
||||
break;
|
||||
@ -554,9 +646,27 @@ main(int argc, char **argv)
|
||||
case 'F':
|
||||
record.zi_failfast = B_TRUE;
|
||||
break;
|
||||
case 'g':
|
||||
dur_txg = 1;
|
||||
record.zi_duration = (int)strtol(optarg, &end, 10);
|
||||
if (record.zi_duration <= 0 || *end != '\0') {
|
||||
(void) fprintf(stderr, "invalid duration '%s': "
|
||||
"must be a positive integer\n", optarg);
|
||||
usage();
|
||||
return (1);
|
||||
}
|
||||
/* store duration of txgs as its negative */
|
||||
record.zi_duration *= -1;
|
||||
break;
|
||||
case 'h':
|
||||
usage();
|
||||
return (0);
|
||||
case 'I':
|
||||
/* default duration, if one hasn't yet been defined */
|
||||
nowrites = 1;
|
||||
if (dur_secs == 0 && dur_txg == 0)
|
||||
record.zi_duration = 30;
|
||||
break;
|
||||
case 'l':
|
||||
level = (int)strtol(optarg, &end, 10);
|
||||
if (*end != '\0') {
|
||||
@ -569,12 +679,45 @@ main(int argc, char **argv)
|
||||
case 'm':
|
||||
domount = 1;
|
||||
break;
|
||||
case 'p':
|
||||
(void) strlcpy(record.zi_func, optarg,
|
||||
sizeof (record.zi_func));
|
||||
break;
|
||||
case 'q':
|
||||
quiet = 1;
|
||||
break;
|
||||
case 'r':
|
||||
range = optarg;
|
||||
break;
|
||||
case 's':
|
||||
dur_secs = 1;
|
||||
record.zi_duration = (int)strtol(optarg, &end, 10);
|
||||
if (record.zi_duration <= 0 || *end != '\0') {
|
||||
(void) fprintf(stderr, "invalid duration '%s': "
|
||||
"must be a positive integer\n", optarg);
|
||||
usage();
|
||||
return (1);
|
||||
}
|
||||
break;
|
||||
case 'T':
|
||||
if (strcasecmp(optarg, "read") == 0) {
|
||||
io_type = ZIO_TYPE_READ;
|
||||
} else if (strcasecmp(optarg, "write") == 0) {
|
||||
io_type = ZIO_TYPE_WRITE;
|
||||
} else if (strcasecmp(optarg, "free") == 0) {
|
||||
io_type = ZIO_TYPE_FREE;
|
||||
} else if (strcasecmp(optarg, "claim") == 0) {
|
||||
io_type = ZIO_TYPE_CLAIM;
|
||||
} else if (strcasecmp(optarg, "all") == 0) {
|
||||
io_type = ZIO_TYPES;
|
||||
} else {
|
||||
(void) fprintf(stderr, "invalid I/O type "
|
||||
"'%s': must be 'read', 'write', 'free', "
|
||||
"'claim' or 'all'\n", optarg);
|
||||
usage();
|
||||
return (1);
|
||||
}
|
||||
break;
|
||||
case 't':
|
||||
if ((type = name_to_type(optarg)) == TYPE_INVAL &&
|
||||
!MOS_TYPE(type)) {
|
||||
@ -617,7 +760,8 @@ main(int argc, char **argv)
|
||||
* '-c' is invalid with any other options.
|
||||
*/
|
||||
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
|
||||
level != 0) {
|
||||
level != 0 || record.zi_func[0] != '\0' ||
|
||||
record.zi_duration != 0) {
|
||||
(void) fprintf(stderr, "cancel (-c) incompatible with "
|
||||
"any other options\n");
|
||||
usage();
|
||||
@ -649,7 +793,8 @@ main(int argc, char **argv)
|
||||
* for doing injection, so handle it separately here.
|
||||
*/
|
||||
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
|
||||
level != 0) {
|
||||
level != 0 || record.zi_func[0] != '\0' ||
|
||||
record.zi_duration != 0) {
|
||||
(void) fprintf(stderr, "device (-d) incompatible with "
|
||||
"data error injection\n");
|
||||
usage();
|
||||
@ -672,12 +817,18 @@ main(int argc, char **argv)
|
||||
return (1);
|
||||
}
|
||||
|
||||
record.zi_iotype = io_type;
|
||||
if (translate_device(pool, device, label, &record) != 0)
|
||||
return (1);
|
||||
if (!error)
|
||||
error = ENXIO;
|
||||
|
||||
if (action != VDEV_STATE_UNKNOWN)
|
||||
return (perform_action(pool, &record, action));
|
||||
|
||||
} else if (raw != NULL) {
|
||||
if (range != NULL || type != TYPE_INVAL || level != 0) {
|
||||
if (range != NULL || type != TYPE_INVAL || level != 0 ||
|
||||
record.zi_func[0] != '\0' || record.zi_duration != 0) {
|
||||
(void) fprintf(stderr, "raw (-b) format with "
|
||||
"any other options\n");
|
||||
usage();
|
||||
@ -704,10 +855,52 @@ main(int argc, char **argv)
|
||||
return (1);
|
||||
if (!error)
|
||||
error = EIO;
|
||||
} else if (record.zi_func[0] != '\0') {
|
||||
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
|
||||
level != 0 || device != NULL || record.zi_duration != 0) {
|
||||
(void) fprintf(stderr, "panic (-p) incompatible with "
|
||||
"other options\n");
|
||||
usage();
|
||||
return (2);
|
||||
}
|
||||
|
||||
if (argc < 1 || argc > 2) {
|
||||
(void) fprintf(stderr, "panic (-p) injection requires "
|
||||
"a single pool name and an optional id\n");
|
||||
usage();
|
||||
return (2);
|
||||
}
|
||||
|
||||
(void) strcpy(pool, argv[0]);
|
||||
if (argv[1] != NULL)
|
||||
record.zi_type = atoi(argv[1]);
|
||||
dataset[0] = '\0';
|
||||
} else if (record.zi_duration != 0) {
|
||||
if (nowrites == 0) {
|
||||
(void) fprintf(stderr, "-s or -g meaningless "
|
||||
"without -I (ignore writes)\n");
|
||||
usage();
|
||||
return (2);
|
||||
} else if (dur_secs && dur_txg) {
|
||||
(void) fprintf(stderr, "choose a duration either "
|
||||
"in seconds (-s) or a number of txgs (-g) "
|
||||
"but not both\n");
|
||||
usage();
|
||||
return (2);
|
||||
} else if (argc != 1) {
|
||||
(void) fprintf(stderr, "ignore writes (-I) "
|
||||
"injection requires a single pool name\n");
|
||||
usage();
|
||||
return (2);
|
||||
}
|
||||
|
||||
(void) strcpy(pool, argv[0]);
|
||||
dataset[0] = '\0';
|
||||
} else if (type == TYPE_INVAL) {
|
||||
if (flags == 0) {
|
||||
(void) fprintf(stderr, "at least one of '-b', '-d', "
|
||||
"'-t', '-a', or '-u' must be specified\n");
|
||||
"'-t', '-a', '-p', '-I' or '-u' "
|
||||
"must be specified\n");
|
||||
usage();
|
||||
return (2);
|
||||
}
|
||||
|
@ -19,15 +19,12 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ZINJECT_H
|
||||
#define _ZINJECT_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/zfs_ioctl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
@ -41,11 +38,13 @@ typedef enum {
|
||||
TYPE_MOSDIR, /* MOS object directory */
|
||||
TYPE_METASLAB, /* metaslab objects */
|
||||
TYPE_CONFIG, /* MOS config */
|
||||
TYPE_BPLIST, /* block pointer list */
|
||||
TYPE_BPOBJ, /* block pointer list */
|
||||
TYPE_SPACEMAP, /* space map objects */
|
||||
TYPE_ERRLOG, /* persistent error log */
|
||||
TYPE_LABEL_UBERBLOCK, /* label specific uberblock */
|
||||
TYPE_LABEL_NVLIST, /* label specific nvlist */
|
||||
TYPE_LABEL_PAD1, /* label specific 8K pad1 area */
|
||||
TYPE_LABEL_PAD2, /* label specific 8K pad2 area */
|
||||
TYPE_INVAL
|
||||
} err_type_t;
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -19,12 +19,10 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <errno.h>
|
||||
#include <libgen.h>
|
||||
#include <libintl.h>
|
||||
@ -50,22 +48,6 @@ safe_malloc(size_t size)
|
||||
return (data);
|
||||
}
|
||||
|
||||
/*
|
||||
* Same as above, but for strdup()
|
||||
*/
|
||||
char *
|
||||
safe_strdup(const char *str)
|
||||
{
|
||||
char *ret;
|
||||
|
||||
if ((ret = strdup(str)) == NULL) {
|
||||
(void) fprintf(stderr, "internal error: out of memory\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Display an out of memory error message and abort the current program.
|
||||
*/
|
||||
|
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef ZPOOL_UTIL_H
|
||||
@ -37,7 +36,6 @@ extern "C" {
|
||||
* Basic utility functions
|
||||
*/
|
||||
void *safe_malloc(size_t);
|
||||
char *safe_strdup(const char *);
|
||||
void zpool_no_memory(void);
|
||||
uint_t num_logs(nvlist_t *nv);
|
||||
|
||||
@ -46,7 +44,9 @@ uint_t num_logs(nvlist_t *nv);
|
||||
*/
|
||||
|
||||
nvlist_t *make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
|
||||
boolean_t isreplace, boolean_t dryrun, int argc, char **argv);
|
||||
boolean_t replacing, boolean_t dryrun, int argc, char **argv);
|
||||
nvlist_t *split_mirror_vdev(zpool_handle_t *zhp, char *newname,
|
||||
nvlist_t *props, splitflags_t flags, int argc, char **argv);
|
||||
|
||||
/*
|
||||
* Pool list functions
|
||||
|
@ -20,8 +20,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -1004,8 +1003,8 @@ is_spare(nvlist_t *config, const char *path)
|
||||
return (B_FALSE);
|
||||
}
|
||||
free(name);
|
||||
|
||||
(void) close(fd);
|
||||
|
||||
verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0);
|
||||
nvlist_free(label);
|
||||
|
||||
@ -1029,8 +1028,8 @@ is_spare(nvlist_t *config, const char *path)
|
||||
* the majority of this task.
|
||||
*/
|
||||
static int
|
||||
check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
|
||||
int isspare)
|
||||
check_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force,
|
||||
boolean_t replacing, boolean_t isspare)
|
||||
{
|
||||
nvlist_t **child;
|
||||
uint_t c, children;
|
||||
@ -1051,13 +1050,14 @@ check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
|
||||
* hot spare within the same pool. If so, we allow it
|
||||
* regardless of what libdiskmgt or zpool_in_use() says.
|
||||
*/
|
||||
if (isreplacing) {
|
||||
if (replacing) {
|
||||
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
|
||||
&wholedisk) == 0 && wholedisk)
|
||||
(void) snprintf(buf, sizeof (buf), "%ss0",
|
||||
path);
|
||||
else
|
||||
(void) strlcpy(buf, path, sizeof (buf));
|
||||
|
||||
if (is_spare(config, buf))
|
||||
return (0);
|
||||
}
|
||||
@ -1073,21 +1073,21 @@ check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
|
||||
|
||||
for (c = 0; c < children; c++)
|
||||
if ((ret = check_in_use(config, child[c], force,
|
||||
isreplacing, B_FALSE)) != 0)
|
||||
replacing, B_FALSE)) != 0)
|
||||
return (ret);
|
||||
|
||||
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
|
||||
&child, &children) == 0)
|
||||
for (c = 0; c < children; c++)
|
||||
if ((ret = check_in_use(config, child[c], force,
|
||||
isreplacing, B_TRUE)) != 0)
|
||||
replacing, B_TRUE)) != 0)
|
||||
return (ret);
|
||||
|
||||
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
|
||||
&child, &children) == 0)
|
||||
for (c = 0; c < children; c++)
|
||||
if ((ret = check_in_use(config, child[c], force,
|
||||
isreplacing, B_FALSE)) != 0)
|
||||
replacing, B_FALSE)) != 0)
|
||||
return (ret);
|
||||
|
||||
return (0);
|
||||
@ -1360,6 +1360,52 @@ construct_spec(int argc, char **argv)
|
||||
return (nvroot);
|
||||
}
|
||||
|
||||
nvlist_t *
|
||||
split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props,
|
||||
splitflags_t flags, int argc, char **argv)
|
||||
{
|
||||
nvlist_t *newroot = NULL, **child;
|
||||
uint_t c, children;
|
||||
|
||||
if (argc > 0) {
|
||||
if ((newroot = construct_spec(argc, argv)) == NULL) {
|
||||
(void) fprintf(stderr, gettext("Unable to build a "
|
||||
"pool from the specified devices\n"));
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
if (!flags.dryrun && make_disks(zhp, newroot) != 0) {
|
||||
nvlist_free(newroot);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/* avoid any tricks in the spec */
|
||||
verify(nvlist_lookup_nvlist_array(newroot,
|
||||
ZPOOL_CONFIG_CHILDREN, &child, &children) == 0);
|
||||
for (c = 0; c < children; c++) {
|
||||
char *path;
|
||||
const char *type;
|
||||
int min, max;
|
||||
|
||||
verify(nvlist_lookup_string(child[c],
|
||||
ZPOOL_CONFIG_PATH, &path) == 0);
|
||||
if ((type = is_grouping(path, &min, &max)) != NULL) {
|
||||
(void) fprintf(stderr, gettext("Cannot use "
|
||||
"'%s' as a device for splitting\n"), type);
|
||||
nvlist_free(newroot);
|
||||
return (NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (zpool_vdev_split(zhp, newname, &newroot, props, flags) != 0) {
|
||||
if (newroot != NULL)
|
||||
nvlist_free(newroot);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
return (newroot);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get and validate the contents of the given vdev specification. This ensures
|
||||
@ -1373,7 +1419,7 @@ construct_spec(int argc, char **argv)
|
||||
*/
|
||||
nvlist_t *
|
||||
make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
|
||||
boolean_t isreplacing, boolean_t dryrun, int argc, char **argv)
|
||||
boolean_t replacing, boolean_t dryrun, int argc, char **argv)
|
||||
{
|
||||
nvlist_t *newroot;
|
||||
nvlist_t *poolconfig = NULL;
|
||||
@ -1396,8 +1442,7 @@ make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
|
||||
* uses (such as a dedicated dump device) that even '-f' cannot
|
||||
* override.
|
||||
*/
|
||||
if (check_in_use(poolconfig, newroot, force, isreplacing,
|
||||
B_FALSE) != 0) {
|
||||
if (check_in_use(poolconfig, newroot, force, replacing, B_FALSE) != 0) {
|
||||
nvlist_free(newroot);
|
||||
return (NULL);
|
||||
}
|
||||
|
4774
cmd/ztest/ztest.c
4774
cmd/ztest/ztest.c
File diff suppressed because it is too large
Load Diff
@ -19,15 +19,13 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _LIBNVPAIR_H
|
||||
#define _LIBNVPAIR_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/nvpair.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
@ -40,6 +38,7 @@ extern "C" {
|
||||
void nvlist_print(FILE *, nvlist_t *);
|
||||
int nvpair_value_match(nvpair_t *, int, char *, char **);
|
||||
int nvpair_value_match_regex(nvpair_t *, int, char *, regex_t *, char **);
|
||||
void dump_nvlist(nvlist_t *, int);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -19,14 +19,13 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <unistd.h>
|
||||
#include <strings.h>
|
||||
#include <libintl.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/inttypes.h>
|
||||
#include "libnvpair.h"
|
||||
@ -272,6 +271,156 @@ nvlist_print(FILE *fp, nvlist_t *nvl)
|
||||
nvlist_print_with_indent(fp, nvl, 0);
|
||||
}
|
||||
|
||||
|
||||
#define NVP(elem, type, vtype, ptype, format) { \
|
||||
vtype value; \
|
||||
\
|
||||
(void) nvpair_value_##type(elem, &value); \
|
||||
(void) printf("%*s%s: " format "\n", indent, "", \
|
||||
nvpair_name(elem), (ptype)value); \
|
||||
}
|
||||
|
||||
#define NVPA(elem, type, vtype, ptype, format) { \
|
||||
uint_t i, count; \
|
||||
vtype *value; \
|
||||
\
|
||||
(void) nvpair_value_##type(elem, &value, &count); \
|
||||
for (i = 0; i < count; i++) { \
|
||||
(void) printf("%*s%s[%d]: " format "\n", indent, "", \
|
||||
nvpair_name(elem), i, (ptype)value[i]); \
|
||||
} \
|
||||
}
|
||||
|
||||
/*
|
||||
* Similar to nvlist_print() but handles arrays slightly differently.
|
||||
*/
|
||||
void
|
||||
dump_nvlist(nvlist_t *list, int indent)
|
||||
{
|
||||
nvpair_t *elem = NULL;
|
||||
boolean_t bool_value;
|
||||
nvlist_t *nvlist_value;
|
||||
nvlist_t **nvlist_array_value;
|
||||
uint_t i, count;
|
||||
|
||||
if (list == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
while ((elem = nvlist_next_nvpair(list, elem)) != NULL) {
|
||||
switch (nvpair_type(elem)) {
|
||||
case DATA_TYPE_BOOLEAN_VALUE:
|
||||
(void) nvpair_value_boolean_value(elem, &bool_value);
|
||||
(void) printf("%*s%s: %s\n", indent, "",
|
||||
nvpair_name(elem), bool_value ? "true" : "false");
|
||||
break;
|
||||
|
||||
case DATA_TYPE_BYTE:
|
||||
NVP(elem, byte, uchar_t, int, "%u");
|
||||
break;
|
||||
|
||||
case DATA_TYPE_INT8:
|
||||
NVP(elem, int8, int8_t, int, "%d");
|
||||
break;
|
||||
|
||||
case DATA_TYPE_UINT8:
|
||||
NVP(elem, uint8, uint8_t, int, "%u");
|
||||
break;
|
||||
|
||||
case DATA_TYPE_INT16:
|
||||
NVP(elem, int16, int16_t, int, "%d");
|
||||
break;
|
||||
|
||||
case DATA_TYPE_UINT16:
|
||||
NVP(elem, uint16, uint16_t, int, "%u");
|
||||
break;
|
||||
|
||||
case DATA_TYPE_INT32:
|
||||
NVP(elem, int32, int32_t, long, "%ld");
|
||||
break;
|
||||
|
||||
case DATA_TYPE_UINT32:
|
||||
NVP(elem, uint32, uint32_t, ulong_t, "%lu");
|
||||
break;
|
||||
|
||||
case DATA_TYPE_INT64:
|
||||
NVP(elem, int64, int64_t, longlong_t, "%lld");
|
||||
break;
|
||||
|
||||
case DATA_TYPE_UINT64:
|
||||
NVP(elem, uint64, uint64_t, u_longlong_t, "%llu");
|
||||
break;
|
||||
|
||||
case DATA_TYPE_STRING:
|
||||
NVP(elem, string, char *, char *, "'%s'");
|
||||
break;
|
||||
|
||||
case DATA_TYPE_BYTE_ARRAY:
|
||||
NVPA(elem, byte_array, uchar_t, int, "%u");
|
||||
break;
|
||||
|
||||
case DATA_TYPE_INT8_ARRAY:
|
||||
NVPA(elem, int8_array, int8_t, int, "%d");
|
||||
break;
|
||||
|
||||
case DATA_TYPE_UINT8_ARRAY:
|
||||
NVPA(elem, uint8_array, uint8_t, int, "%u");
|
||||
break;
|
||||
|
||||
case DATA_TYPE_INT16_ARRAY:
|
||||
NVPA(elem, int16_array, int16_t, int, "%d");
|
||||
break;
|
||||
|
||||
case DATA_TYPE_UINT16_ARRAY:
|
||||
NVPA(elem, uint16_array, uint16_t, int, "%u");
|
||||
break;
|
||||
|
||||
case DATA_TYPE_INT32_ARRAY:
|
||||
NVPA(elem, int32_array, int32_t, long, "%ld");
|
||||
break;
|
||||
|
||||
case DATA_TYPE_UINT32_ARRAY:
|
||||
NVPA(elem, uint32_array, uint32_t, ulong_t, "%lu");
|
||||
break;
|
||||
|
||||
case DATA_TYPE_INT64_ARRAY:
|
||||
NVPA(elem, int64_array, int64_t, longlong_t, "%lld");
|
||||
break;
|
||||
|
||||
case DATA_TYPE_UINT64_ARRAY:
|
||||
NVPA(elem, uint64_array, uint64_t, u_longlong_t,
|
||||
"%llu");
|
||||
break;
|
||||
|
||||
case DATA_TYPE_STRING_ARRAY:
|
||||
NVPA(elem, string_array, char *, char *, "'%s'");
|
||||
break;
|
||||
|
||||
case DATA_TYPE_NVLIST:
|
||||
(void) nvpair_value_nvlist(elem, &nvlist_value);
|
||||
(void) printf("%*s%s:\n", indent, "",
|
||||
nvpair_name(elem));
|
||||
dump_nvlist(nvlist_value, indent + 4);
|
||||
break;
|
||||
|
||||
case DATA_TYPE_NVLIST_ARRAY:
|
||||
(void) nvpair_value_nvlist_array(elem,
|
||||
&nvlist_array_value, &count);
|
||||
for (i = 0; i < count; i++) {
|
||||
(void) printf("%*s%s[%u]:\n", indent, "",
|
||||
nvpair_name(elem), i);
|
||||
dump_nvlist(nvlist_array_value[i], indent + 4);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
(void) printf(dgettext(TEXT_DOMAIN, "bad config type "
|
||||
"%d for %s\n"), nvpair_type(elem),
|
||||
nvpair_name(elem));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine if string 'value' matches 'nvp' value. The 'value' string is
|
||||
* converted, depending on the type of 'nvp', prior to match. For numeric
|
||||
|
@ -20,8 +20,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _LIBZFS_H
|
||||
@ -66,7 +65,6 @@ enum {
|
||||
EZFS_BADSTREAM, /* bad backup stream */
|
||||
EZFS_DSREADONLY, /* dataset is readonly */
|
||||
EZFS_VOLTOOBIG, /* volume is too large for 32-bit system */
|
||||
EZFS_VOLHASDATA, /* volume already contains data */
|
||||
EZFS_INVALIDNAME, /* invalid dataset name */
|
||||
EZFS_BADRESTORE, /* unable to restore to destination */
|
||||
EZFS_BADBACKUP, /* backup failed */
|
||||
@ -85,17 +83,15 @@ enum {
|
||||
EZFS_UMOUNTFAILED, /* failed to unmount dataset */
|
||||
EZFS_UNSHARENFSFAILED, /* unshare(1M) failed */
|
||||
EZFS_SHARENFSFAILED, /* share(1M) failed */
|
||||
EZFS_DEVLINKS, /* failed to create zvol links */
|
||||
EZFS_PERM, /* permission denied */
|
||||
EZFS_NOSPC, /* out of space */
|
||||
EZFS_FAULT, /* bad address */
|
||||
EZFS_IO, /* I/O error */
|
||||
EZFS_INTR, /* signal received */
|
||||
EZFS_ISSPARE, /* device is a hot spare */
|
||||
EZFS_INVALCONFIG, /* invalid vdev configuration */
|
||||
EZFS_RECURSIVE, /* recursive dependency */
|
||||
EZFS_NOHISTORY, /* no history object */
|
||||
EZFS_UNSHAREISCSIFAILED, /* iscsitgtd failed request to unshare */
|
||||
EZFS_SHAREISCSIFAILED, /* iscsitgtd failed request to share */
|
||||
EZFS_POOLPROPS, /* couldn't retrieve pool props */
|
||||
EZFS_POOL_NOTSUP, /* ops not supported for this type of pool */
|
||||
EZFS_POOL_INVALARG, /* invalid argument for this pool operation */
|
||||
@ -103,7 +99,6 @@ enum {
|
||||
EZFS_OPENFAILED, /* open of device failed */
|
||||
EZFS_NOCAP, /* couldn't get capacity */
|
||||
EZFS_LABELFAILED, /* write of label failed */
|
||||
EZFS_ISCSISVCUNAVAIL, /* iscsi service unavailable */
|
||||
EZFS_BADWHO, /* invalid permission who */
|
||||
EZFS_BADPERM, /* invalid permission */
|
||||
EZFS_BADPERMSET, /* invalid permission set name */
|
||||
@ -119,6 +114,12 @@ enum {
|
||||
EZFS_UNPLAYED_LOGS, /* log device has unplayed logs */
|
||||
EZFS_REFTAG_RELE, /* snapshot release: tag not found */
|
||||
EZFS_REFTAG_HOLD, /* snapshot hold: tag already exists */
|
||||
EZFS_TAGTOOLONG, /* snapshot hold/rele: tag too long */
|
||||
EZFS_PIPEFAILED, /* pipe create failed */
|
||||
EZFS_THREADCREATEFAILED, /* thread create failed */
|
||||
EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */
|
||||
EZFS_SCRUBBING, /* currently scrubbing */
|
||||
EZFS_NO_SCRUB, /* no active scrub */
|
||||
EZFS_UNKNOWN
|
||||
};
|
||||
|
||||
@ -213,11 +214,19 @@ extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *,
|
||||
extern int zpool_destroy(zpool_handle_t *);
|
||||
extern int zpool_add(zpool_handle_t *, nvlist_t *);
|
||||
|
||||
typedef struct splitflags {
|
||||
/* do not split, but return the config that would be split off */
|
||||
int dryrun : 1;
|
||||
|
||||
/* after splitting, import the pool */
|
||||
int import : 1;
|
||||
} splitflags_t;
|
||||
|
||||
/*
|
||||
* Functions to manipulate pool and vdev state
|
||||
*/
|
||||
extern int zpool_scrub(zpool_handle_t *, pool_scrub_type_t);
|
||||
extern int zpool_clear(zpool_handle_t *, const char *);
|
||||
extern int zpool_scan(zpool_handle_t *, pool_scan_func_t);
|
||||
extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *);
|
||||
|
||||
extern int zpool_vdev_online(zpool_handle_t *, const char *, int,
|
||||
vdev_state_t *);
|
||||
@ -226,9 +235,11 @@ extern int zpool_vdev_attach(zpool_handle_t *, const char *,
|
||||
const char *, nvlist_t *, int);
|
||||
extern int zpool_vdev_detach(zpool_handle_t *, const char *);
|
||||
extern int zpool_vdev_remove(zpool_handle_t *, const char *);
|
||||
extern int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **, nvlist_t *,
|
||||
splitflags_t);
|
||||
|
||||
extern int zpool_vdev_fault(zpool_handle_t *, uint64_t);
|
||||
extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t);
|
||||
extern int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t);
|
||||
extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t);
|
||||
extern int zpool_vdev_clear(zpool_handle_t *, uint64_t);
|
||||
|
||||
extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *,
|
||||
@ -298,6 +309,7 @@ typedef enum {
|
||||
|
||||
extern zpool_status_t zpool_get_status(zpool_handle_t *, char **);
|
||||
extern zpool_status_t zpool_import_status(nvlist_t *, char **);
|
||||
extern void zpool_dump_ddt(const ddt_stat_t *dds, const ddt_histogram_t *ddh);
|
||||
|
||||
/*
|
||||
* Statistics and configuration functions.
|
||||
@ -319,23 +331,38 @@ extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *,
|
||||
/*
|
||||
* Search for pools to import
|
||||
*/
|
||||
|
||||
typedef struct importargs {
|
||||
char **path; /* a list of paths to search */
|
||||
int paths; /* number of paths to search */
|
||||
char *poolname; /* name of a pool to find */
|
||||
uint64_t guid; /* guid of a pool to find */
|
||||
char *cachefile; /* cachefile to use for import */
|
||||
int can_be_active : 1; /* can the pool be active? */
|
||||
int unique : 1; /* does 'poolname' already exist? */
|
||||
int exists : 1; /* set on return if pool already exists */
|
||||
} importargs_t;
|
||||
|
||||
extern nvlist_t *zpool_search_import(libzfs_handle_t *, importargs_t *);
|
||||
|
||||
/* legacy pool search routines */
|
||||
extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **);
|
||||
extern nvlist_t *zpool_find_import_cached(libzfs_handle_t *, const char *,
|
||||
char *, uint64_t);
|
||||
extern nvlist_t *zpool_find_import_byname(libzfs_handle_t *, int, char **,
|
||||
char *);
|
||||
extern nvlist_t *zpool_find_import_byguid(libzfs_handle_t *, int, char **,
|
||||
uint64_t);
|
||||
extern nvlist_t *zpool_find_import_activeok(libzfs_handle_t *, int, char **);
|
||||
|
||||
/*
|
||||
* Miscellaneous pool functions
|
||||
*/
|
||||
struct zfs_cmd;
|
||||
|
||||
extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *);
|
||||
extern const char *zfs_history_event_names[LOG_END];
|
||||
|
||||
extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *,
|
||||
boolean_t verbose);
|
||||
extern int zpool_upgrade(zpool_handle_t *, uint64_t);
|
||||
extern int zpool_get_history(zpool_handle_t *, nvlist_t **);
|
||||
extern int zpool_history_unpack(char *, uint64_t, uint64_t *,
|
||||
nvlist_t ***, uint_t *);
|
||||
extern void zpool_set_history_str(const char *subcommand, int argc,
|
||||
char **argv, char *history_str);
|
||||
extern int zpool_stage_history(libzfs_handle_t *, const char *);
|
||||
@ -343,6 +370,8 @@ extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *,
|
||||
size_t len);
|
||||
extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *);
|
||||
extern int zpool_get_physpath(zpool_handle_t *, char *, size_t);
|
||||
extern void zpool_explain_recover(libzfs_handle_t *, const char *, int,
|
||||
nvlist_t *);
|
||||
|
||||
/*
|
||||
* Basic handle manipulations. These functions do not create or destroy the
|
||||
@ -374,6 +403,8 @@ extern const char *zfs_prop_to_name(zfs_prop_t);
|
||||
extern int zfs_prop_set(zfs_handle_t *, const char *, const char *);
|
||||
extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t,
|
||||
zprop_source_t *, char *, size_t, boolean_t);
|
||||
extern int zfs_prop_get_recvd(zfs_handle_t *, const char *, char *, size_t,
|
||||
boolean_t);
|
||||
extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *,
|
||||
zprop_source_t *, char *, size_t);
|
||||
extern int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname,
|
||||
@ -381,10 +412,11 @@ extern int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname,
|
||||
extern int zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname,
|
||||
char *propbuf, int proplen, boolean_t literal);
|
||||
extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);
|
||||
extern int zfs_prop_inherit(zfs_handle_t *, const char *);
|
||||
extern int zfs_prop_inherit(zfs_handle_t *, const char *, boolean_t);
|
||||
extern const char *zfs_prop_values(zfs_prop_t);
|
||||
extern int zfs_prop_is_string(zfs_prop_t prop);
|
||||
extern nvlist_t *zfs_get_user_props(zfs_handle_t *);
|
||||
extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *);
|
||||
|
||||
typedef struct zprop_list {
|
||||
int pl_prop;
|
||||
@ -392,10 +424,11 @@ typedef struct zprop_list {
|
||||
struct zprop_list *pl_next;
|
||||
boolean_t pl_all;
|
||||
size_t pl_width;
|
||||
size_t pl_recvd_width;
|
||||
boolean_t pl_fixed;
|
||||
} zprop_list_t;
|
||||
|
||||
extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **);
|
||||
extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **, boolean_t);
|
||||
extern void zfs_prune_proplist(zfs_handle_t *, uint8_t *);
|
||||
|
||||
#define ZFS_MOUNTPOINT_NONE "none"
|
||||
@ -419,13 +452,24 @@ extern int zprop_get_list(libzfs_handle_t *, char *, zprop_list_t **,
|
||||
zfs_type_t);
|
||||
extern void zprop_free_list(zprop_list_t *);
|
||||
|
||||
#define ZFS_GET_NCOLS 5
|
||||
|
||||
typedef enum {
|
||||
GET_COL_NONE,
|
||||
GET_COL_NAME,
|
||||
GET_COL_PROPERTY,
|
||||
GET_COL_VALUE,
|
||||
GET_COL_RECVD,
|
||||
GET_COL_SOURCE
|
||||
} zfs_get_column_t;
|
||||
|
||||
/*
|
||||
* Functions for printing zfs or zpool properties
|
||||
*/
|
||||
typedef struct zprop_get_cbdata {
|
||||
int cb_sources;
|
||||
int cb_columns[4];
|
||||
int cb_colwidths[5];
|
||||
zfs_get_column_t cb_columns[ZFS_GET_NCOLS];
|
||||
int cb_colwidths[ZFS_GET_NCOLS + 1];
|
||||
boolean_t cb_scripted;
|
||||
boolean_t cb_literal;
|
||||
boolean_t cb_first;
|
||||
@ -434,12 +478,8 @@ typedef struct zprop_get_cbdata {
|
||||
} zprop_get_cbdata_t;
|
||||
|
||||
void zprop_print_one_property(const char *, zprop_get_cbdata_t *,
|
||||
const char *, const char *, zprop_source_t, const char *);
|
||||
|
||||
#define GET_COL_NAME 1
|
||||
#define GET_COL_PROPERTY 2
|
||||
#define GET_COL_VALUE 3
|
||||
#define GET_COL_SOURCE 4
|
||||
const char *, const char *, zprop_source_t, const char *,
|
||||
const char *);
|
||||
|
||||
/*
|
||||
* Iterator functions.
|
||||
@ -450,6 +490,7 @@ extern int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *);
|
||||
extern int zfs_iter_dependents(zfs_handle_t *, boolean_t, zfs_iter_f, void *);
|
||||
extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *);
|
||||
extern int zfs_iter_snapshots(zfs_handle_t *, zfs_iter_f, void *);
|
||||
extern int zfs_iter_snapshots_sorted(zfs_handle_t *, zfs_iter_f, void *);
|
||||
|
||||
/*
|
||||
* Functions to create and destroy datasets.
|
||||
@ -463,11 +504,42 @@ extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *);
|
||||
extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *);
|
||||
extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t);
|
||||
extern int zfs_rename(zfs_handle_t *, const char *, boolean_t);
|
||||
extern int zfs_send(zfs_handle_t *, const char *, const char *,
|
||||
boolean_t, boolean_t, boolean_t, boolean_t, int);
|
||||
|
||||
typedef struct sendflags {
|
||||
/* print informational messages (ie, -v was specified) */
|
||||
int verbose : 1;
|
||||
|
||||
/* recursive send (ie, -R) */
|
||||
int replicate : 1;
|
||||
|
||||
/* for incrementals, do all intermediate snapshots */
|
||||
int doall : 1; /* (ie, -I) */
|
||||
|
||||
/* if dataset is a clone, do incremental from its origin */
|
||||
int fromorigin : 1;
|
||||
|
||||
/* do deduplication */
|
||||
int dedup : 1;
|
||||
|
||||
/* send properties (ie, -p) */
|
||||
int props : 1;
|
||||
} sendflags_t;
|
||||
|
||||
typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);
|
||||
|
||||
extern int zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
|
||||
sendflags_t flags, int outfd, snapfilter_cb_t filter_func,
|
||||
void *cb_arg, nvlist_t **debugnvp);
|
||||
|
||||
extern int zfs_promote(zfs_handle_t *);
|
||||
extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t);
|
||||
extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t,
|
||||
boolean_t, boolean_t);
|
||||
extern int zfs_hold_range(zfs_handle_t *, const char *, const char *,
|
||||
const char *, boolean_t, boolean_t, snapfilter_cb_t, void *);
|
||||
extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t);
|
||||
extern int zfs_release_range(zfs_handle_t *, const char *, const char *,
|
||||
const char *, boolean_t);
|
||||
extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *);
|
||||
|
||||
typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain,
|
||||
uid_t rid, uint64_t space);
|
||||
@ -482,6 +554,12 @@ typedef struct recvflags {
|
||||
/* the destination is a prefix, not the exact fs (ie, -d) */
|
||||
int isprefix : 1;
|
||||
|
||||
/*
|
||||
* Only the tail of the sent snapshot path is appended to the
|
||||
* destination to determine the received snapshot name (ie, -e).
|
||||
*/
|
||||
int istail : 1;
|
||||
|
||||
/* do not actually do the recv, just check if it would work (ie, -n) */
|
||||
int dryrun : 1;
|
||||
|
||||
@ -542,10 +620,6 @@ extern int zfs_unshareall_nfs(zfs_handle_t *);
|
||||
extern int zfs_unshareall_smb(zfs_handle_t *);
|
||||
extern int zfs_unshareall_bypath(zfs_handle_t *, const char *);
|
||||
extern int zfs_unshareall(zfs_handle_t *);
|
||||
extern boolean_t zfs_is_shared_iscsi(zfs_handle_t *);
|
||||
extern int zfs_share_iscsi(zfs_handle_t *);
|
||||
extern int zfs_unshare_iscsi(zfs_handle_t *);
|
||||
extern int zfs_iscsi_perm_check(libzfs_handle_t *, char *, ucred_t *);
|
||||
extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *,
|
||||
void *, void *, int, zfs_share_op_t);
|
||||
|
||||
@ -571,15 +645,10 @@ extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **,
|
||||
boolean_t *);
|
||||
|
||||
/*
|
||||
* ftyp special. Read the label from a given device.
|
||||
* Label manipulation.
|
||||
*/
|
||||
extern int zpool_read_label(int, nvlist_t **);
|
||||
|
||||
/*
|
||||
* Create and remove zvol /dev links.
|
||||
*/
|
||||
extern int zpool_create_zvol_links(zpool_handle_t *);
|
||||
extern int zpool_remove_zvol_links(zpool_handle_t *);
|
||||
extern int zpool_clear_label(int);
|
||||
|
||||
/* is this zvol valid for use as a dump device? */
|
||||
extern int zvol_check_dump_config(char *);
|
||||
@ -600,6 +669,17 @@ int zfs_smb_acl_rename(libzfs_handle_t *, char *, char *, char *, char *);
|
||||
extern int zpool_enable_datasets(zpool_handle_t *, const char *, int);
|
||||
extern int zpool_disable_datasets(zpool_handle_t *, boolean_t);
|
||||
|
||||
/*
|
||||
* Mappings between vdev and FRU.
|
||||
*/
|
||||
extern void libzfs_fru_refresh(libzfs_handle_t *);
|
||||
extern const char *libzfs_fru_lookup(libzfs_handle_t *, const char *);
|
||||
extern const char *libzfs_fru_devpath(libzfs_handle_t *, const char *);
|
||||
extern boolean_t libzfs_fru_compare(libzfs_handle_t *, const char *,
|
||||
const char *);
|
||||
extern boolean_t libzfs_fru_notself(libzfs_handle_t *, const char *);
|
||||
extern int zpool_fru_set(zpool_handle_t *, uint64_t, const char *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
@ -30,7 +30,6 @@
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/zfs_ioctl.h>
|
||||
#include <sys/zfs_acl.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/nvpair.h>
|
||||
|
||||
@ -38,6 +37,8 @@
|
||||
#include <libzfs.h>
|
||||
#include <libshare.h>
|
||||
|
||||
#include <fm/libtopo.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
@ -47,6 +48,13 @@ extern "C" {
|
||||
#endif
|
||||
#define VERIFY verify
|
||||
|
||||
typedef struct libzfs_fru {
|
||||
char *zf_device;
|
||||
char *zf_fru;
|
||||
struct libzfs_fru *zf_chain;
|
||||
struct libzfs_fru *zf_next;
|
||||
} libzfs_fru_t;
|
||||
|
||||
struct libzfs_handle {
|
||||
int libzfs_error;
|
||||
int libzfs_fd;
|
||||
@ -65,7 +73,13 @@ struct libzfs_handle {
|
||||
uint_t libzfs_shareflags;
|
||||
boolean_t libzfs_mnttab_enable;
|
||||
avl_tree_t libzfs_mnttab_cache;
|
||||
int libzfs_pool_iter;
|
||||
topo_hdl_t *libzfs_topo_hdl;
|
||||
libzfs_fru_t **libzfs_fru_hash;
|
||||
libzfs_fru_t *libzfs_fru_list;
|
||||
char libzfs_chassis_id[256];
|
||||
};
|
||||
|
||||
#define ZFSSHARE_MISS 0x01 /* Didn't find entry in cache */
|
||||
|
||||
struct zfs_handle {
|
||||
@ -77,6 +91,7 @@ struct zfs_handle {
|
||||
dmu_objset_stats_t zfs_dmustats;
|
||||
nvlist_t *zfs_props;
|
||||
nvlist_t *zfs_user_props;
|
||||
nvlist_t *zfs_recvd_props;
|
||||
boolean_t zfs_mntcheck;
|
||||
char *zfs_mntopts;
|
||||
uint8_t *zfs_props_table;
|
||||
@ -112,7 +127,6 @@ typedef enum {
|
||||
*/
|
||||
typedef enum {
|
||||
SHARED_NOT_SHARED = 0x0,
|
||||
SHARED_ISCSI = 0x1,
|
||||
SHARED_NFS = 0x2,
|
||||
SHARED_SMB = 0x4
|
||||
} zfs_share_type_t;
|
||||
@ -172,9 +186,6 @@ zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *);
|
||||
|
||||
int zpool_open_silent(libzfs_handle_t *, const char *, zpool_handle_t **);
|
||||
|
||||
int zvol_create_link(libzfs_handle_t *, const char *);
|
||||
int zvol_remove_link(libzfs_handle_t *, const char *);
|
||||
int zpool_iter_zvol(zpool_handle_t *, int (*)(const char *, void *), void *);
|
||||
boolean_t zpool_name_valid(libzfs_handle_t *, boolean_t, const char *);
|
||||
|
||||
void namespace_clear(libzfs_handle_t *);
|
||||
@ -189,6 +200,9 @@ extern int zfs_parse_options(char *, zfs_share_proto_t);
|
||||
|
||||
extern int zfs_unshare_proto(zfs_handle_t *,
|
||||
const char *, zfs_share_proto_t *);
|
||||
|
||||
extern void libzfs_fru_clear(libzfs_handle_t *, boolean_t);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*
|
||||
* Portions Copyright 2007 Ramprakash Jelari
|
||||
@ -116,32 +116,7 @@ changelist_prefix(prop_changelist_t *clp)
|
||||
if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned)
|
||||
continue;
|
||||
|
||||
if (ZFS_IS_VOLUME(cn->cn_handle)) {
|
||||
switch (clp->cl_realprop) {
|
||||
case ZFS_PROP_NAME:
|
||||
/*
|
||||
* If this was a rename, unshare the zvol, and
|
||||
* remove the /dev/zvol links.
|
||||
*/
|
||||
(void) zfs_unshare_iscsi(cn->cn_handle);
|
||||
|
||||
if (zvol_remove_link(cn->cn_handle->zfs_hdl,
|
||||
cn->cn_handle->zfs_name) != 0) {
|
||||
ret = -1;
|
||||
cn->cn_needpost = B_FALSE;
|
||||
(void) zfs_share_iscsi(cn->cn_handle);
|
||||
}
|
||||
break;
|
||||
|
||||
case ZFS_PROP_VOLSIZE:
|
||||
/*
|
||||
* If this was a change to the volume size, we
|
||||
* need to unshare and reshare the volume.
|
||||
*/
|
||||
(void) zfs_unshare_iscsi(cn->cn_handle);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (!ZFS_IS_VOLUME(cn->cn_handle)) {
|
||||
/*
|
||||
* Do the property specific processing.
|
||||
*/
|
||||
@ -234,32 +209,8 @@ changelist_postfix(prop_changelist_t *clp)
|
||||
|
||||
zfs_refresh_properties(cn->cn_handle);
|
||||
|
||||
if (ZFS_IS_VOLUME(cn->cn_handle)) {
|
||||
/*
|
||||
* If we're doing a rename, recreate the /dev/zvol
|
||||
* links.
|
||||
*/
|
||||
if (clp->cl_realprop == ZFS_PROP_NAME &&
|
||||
zvol_create_link(cn->cn_handle->zfs_hdl,
|
||||
cn->cn_handle->zfs_name) != 0) {
|
||||
errors++;
|
||||
} else if (cn->cn_shared ||
|
||||
clp->cl_prop == ZFS_PROP_SHAREISCSI) {
|
||||
if (zfs_prop_get(cn->cn_handle,
|
||||
ZFS_PROP_SHAREISCSI, shareopts,
|
||||
sizeof (shareopts), NULL, NULL, 0,
|
||||
B_FALSE) == 0 &&
|
||||
strcmp(shareopts, "off") == 0) {
|
||||
errors +=
|
||||
zfs_unshare_iscsi(cn->cn_handle);
|
||||
} else {
|
||||
errors +=
|
||||
zfs_share_iscsi(cn->cn_handle);
|
||||
}
|
||||
}
|
||||
|
||||
if (ZFS_IS_VOLUME(cn->cn_handle))
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remount if previously mounted or mountpoint was legacy,
|
||||
@ -658,8 +609,7 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int gather_flags,
|
||||
|
||||
if (clp->cl_prop != ZFS_PROP_MOUNTPOINT &&
|
||||
clp->cl_prop != ZFS_PROP_SHARENFS &&
|
||||
clp->cl_prop != ZFS_PROP_SHARESMB &&
|
||||
clp->cl_prop != ZFS_PROP_SHAREISCSI)
|
||||
clp->cl_prop != ZFS_PROP_SHARESMB)
|
||||
return (clp);
|
||||
|
||||
/*
|
||||
|
@ -19,12 +19,10 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
/*
|
||||
* The pool configuration repository is stored in /etc/zfs/zpool.cache as a
|
||||
* single packed nvlist. While it would be nice to just read in this
|
||||
@ -313,21 +311,33 @@ zpool_iter(libzfs_handle_t *hdl, zpool_iter_f func, void *data)
|
||||
zpool_handle_t *zhp;
|
||||
int ret;
|
||||
|
||||
if (namespace_reload(hdl) != 0)
|
||||
/*
|
||||
* If someone makes a recursive call to zpool_iter(), we want to avoid
|
||||
* refreshing the namespace because that will invalidate the parent
|
||||
* context. We allow recursive calls, but simply re-use the same
|
||||
* namespace AVL tree.
|
||||
*/
|
||||
if (!hdl->libzfs_pool_iter && namespace_reload(hdl) != 0)
|
||||
return (-1);
|
||||
|
||||
hdl->libzfs_pool_iter++;
|
||||
for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL;
|
||||
cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) {
|
||||
|
||||
if (zpool_open_silent(hdl, cn->cn_name, &zhp) != 0)
|
||||
if (zpool_open_silent(hdl, cn->cn_name, &zhp) != 0) {
|
||||
hdl->libzfs_pool_iter--;
|
||||
return (-1);
|
||||
}
|
||||
|
||||
if (zhp == NULL)
|
||||
continue;
|
||||
|
||||
if ((ret = func(zhp, data)) != 0)
|
||||
if ((ret = func(zhp, data)) != 0) {
|
||||
hdl->libzfs_pool_iter--;
|
||||
return (ret);
|
||||
}
|
||||
}
|
||||
hdl->libzfs_pool_iter--;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
452
lib/libzfs/libzfs_fru.c
Normal file
452
lib/libzfs/libzfs_fru.c
Normal file
@ -0,0 +1,452 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#include <dlfcn.h>
|
||||
#include <errno.h>
|
||||
#include <libintl.h>
|
||||
#include <link.h>
|
||||
#include <pthread.h>
|
||||
#include <strings.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <libzfs.h>
|
||||
|
||||
#include <fm/libtopo.h>
|
||||
#include <sys/fm/protocol.h>
|
||||
#include <sys/systeminfo.h>
|
||||
|
||||
#include "libzfs_impl.h"
|
||||
|
||||
/*
|
||||
* This file is responsible for determining the relationship between I/O
|
||||
* devices paths and physical locations. In the world of MPxIO and external
|
||||
* enclosures, the device path is not synonymous with the physical location.
|
||||
* If you remove a drive and insert it into a different slot, it will end up
|
||||
* with the same path under MPxIO. If you recable storage enclosures, the
|
||||
* device paths may change. All of this makes it difficult to implement the
|
||||
* 'autoreplace' property, which is supposed to automatically manage disk
|
||||
* replacement based on physical slot.
|
||||
*
|
||||
* In order to work around these limitations, we have a per-vdev FRU property
|
||||
* that is the libtopo path (minus disk-specific authority information) to the
|
||||
* physical location of the device on the system. This is an optional
|
||||
* property, and is only needed when using the 'autoreplace' property or when
|
||||
* generating FMA faults against vdevs.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Because the FMA packages depend on ZFS, we have to dlopen() libtopo in case
|
||||
* it is not present. We only need this once per library instance, so it is
|
||||
* not part of the libzfs handle.
|
||||
*/
|
||||
static void *_topo_dlhandle;
|
||||
static topo_hdl_t *(*_topo_open)(int, const char *, int *);
|
||||
static void (*_topo_close)(topo_hdl_t *);
|
||||
static char *(*_topo_snap_hold)(topo_hdl_t *, const char *, int *);
|
||||
static void (*_topo_snap_release)(topo_hdl_t *);
|
||||
static topo_walk_t *(*_topo_walk_init)(topo_hdl_t *, const char *,
|
||||
topo_walk_cb_t, void *, int *);
|
||||
static int (*_topo_walk_step)(topo_walk_t *, int);
|
||||
static void (*_topo_walk_fini)(topo_walk_t *);
|
||||
static void (*_topo_hdl_strfree)(topo_hdl_t *, char *);
|
||||
static char *(*_topo_node_name)(tnode_t *);
|
||||
static int (*_topo_prop_get_string)(tnode_t *, const char *, const char *,
|
||||
char **, int *);
|
||||
static int (*_topo_node_fru)(tnode_t *, nvlist_t **, nvlist_t *, int *);
|
||||
static int (*_topo_fmri_nvl2str)(topo_hdl_t *, nvlist_t *, char **, int *);
|
||||
static int (*_topo_fmri_strcmp_noauth)(topo_hdl_t *, const char *,
|
||||
const char *);
|
||||
|
||||
#define ZFS_FRU_HASH_SIZE 257
|
||||
|
||||
static size_t
|
||||
fru_strhash(const char *key)
|
||||
{
|
||||
ulong_t g, h = 0;
|
||||
const char *p;
|
||||
|
||||
for (p = key; *p != '\0'; p++) {
|
||||
h = (h << 4) + *p;
|
||||
|
||||
if ((g = (h & 0xf0000000)) != 0) {
|
||||
h ^= (g >> 24);
|
||||
h ^= g;
|
||||
}
|
||||
}
|
||||
|
||||
return (h % ZFS_FRU_HASH_SIZE);
|
||||
}
|
||||
|
||||
static int
|
||||
libzfs_fru_gather(topo_hdl_t *thp, tnode_t *tn, void *arg)
|
||||
{
|
||||
libzfs_handle_t *hdl = arg;
|
||||
nvlist_t *fru;
|
||||
char *devpath, *frustr;
|
||||
int err;
|
||||
libzfs_fru_t *frup;
|
||||
size_t idx;
|
||||
|
||||
/*
|
||||
* If this is the chassis node, and we don't yet have the system
|
||||
* chassis ID, then fill in this value now.
|
||||
*/
|
||||
if (hdl->libzfs_chassis_id[0] == '\0' &&
|
||||
strcmp(_topo_node_name(tn), "chassis") == 0) {
|
||||
if (_topo_prop_get_string(tn, FM_FMRI_AUTHORITY,
|
||||
FM_FMRI_AUTH_CHASSIS, &devpath, &err) == 0)
|
||||
(void) strlcpy(hdl->libzfs_chassis_id, devpath,
|
||||
sizeof (hdl->libzfs_chassis_id));
|
||||
}
|
||||
|
||||
/*
|
||||
* Skip non-disk nodes.
|
||||
*/
|
||||
if (strcmp(_topo_node_name(tn), "disk") != 0)
|
||||
return (TOPO_WALK_NEXT);
|
||||
|
||||
/*
|
||||
* Get the devfs path and FRU.
|
||||
*/
|
||||
if (_topo_prop_get_string(tn, "io", "devfs-path", &devpath, &err) != 0)
|
||||
return (TOPO_WALK_NEXT);
|
||||
|
||||
if (libzfs_fru_lookup(hdl, devpath) != NULL) {
|
||||
_topo_hdl_strfree(thp, devpath);
|
||||
return (TOPO_WALK_NEXT);
|
||||
}
|
||||
|
||||
if (_topo_node_fru(tn, &fru, NULL, &err) != 0) {
|
||||
_topo_hdl_strfree(thp, devpath);
|
||||
return (TOPO_WALK_NEXT);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert the FRU into a string.
|
||||
*/
|
||||
if (_topo_fmri_nvl2str(thp, fru, &frustr, &err) != 0) {
|
||||
nvlist_free(fru);
|
||||
_topo_hdl_strfree(thp, devpath);
|
||||
return (TOPO_WALK_NEXT);
|
||||
}
|
||||
|
||||
nvlist_free(fru);
|
||||
|
||||
/*
|
||||
* Finally, we have a FRU string and device path. Add it to the hash.
|
||||
*/
|
||||
if ((frup = calloc(sizeof (libzfs_fru_t), 1)) == NULL) {
|
||||
_topo_hdl_strfree(thp, devpath);
|
||||
_topo_hdl_strfree(thp, frustr);
|
||||
return (TOPO_WALK_NEXT);
|
||||
}
|
||||
|
||||
if ((frup->zf_device = strdup(devpath)) == NULL ||
|
||||
(frup->zf_fru = strdup(frustr)) == NULL) {
|
||||
free(frup->zf_device);
|
||||
free(frup);
|
||||
_topo_hdl_strfree(thp, devpath);
|
||||
_topo_hdl_strfree(thp, frustr);
|
||||
return (TOPO_WALK_NEXT);
|
||||
}
|
||||
|
||||
_topo_hdl_strfree(thp, devpath);
|
||||
_topo_hdl_strfree(thp, frustr);
|
||||
|
||||
idx = fru_strhash(frup->zf_device);
|
||||
frup->zf_chain = hdl->libzfs_fru_hash[idx];
|
||||
hdl->libzfs_fru_hash[idx] = frup;
|
||||
frup->zf_next = hdl->libzfs_fru_list;
|
||||
hdl->libzfs_fru_list = frup;
|
||||
|
||||
return (TOPO_WALK_NEXT);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called during initialization to setup the dynamic libtopo connection.
|
||||
*/
|
||||
#pragma init(libzfs_init_fru)
|
||||
static void
|
||||
libzfs_init_fru(void)
|
||||
{
|
||||
char path[MAXPATHLEN];
|
||||
char isa[257];
|
||||
|
||||
#if defined(_LP64)
|
||||
if (sysinfo(SI_ARCHITECTURE_64, isa, sizeof (isa)) < 0)
|
||||
isa[0] = '\0';
|
||||
#else
|
||||
isa[0] = '\0';
|
||||
#endif
|
||||
(void) snprintf(path, sizeof (path),
|
||||
"/usr/lib/fm/%s/libtopo.so", isa);
|
||||
|
||||
if ((_topo_dlhandle = dlopen(path, RTLD_LAZY)) == NULL)
|
||||
return;
|
||||
|
||||
_topo_open = (topo_hdl_t *(*)())
|
||||
dlsym(_topo_dlhandle, "topo_open");
|
||||
_topo_close = (void (*)())
|
||||
dlsym(_topo_dlhandle, "topo_close");
|
||||
_topo_snap_hold = (char *(*)())
|
||||
dlsym(_topo_dlhandle, "topo_snap_hold");
|
||||
_topo_snap_release = (void (*)())
|
||||
dlsym(_topo_dlhandle, "topo_snap_release");
|
||||
_topo_walk_init = (topo_walk_t *(*)())
|
||||
dlsym(_topo_dlhandle, "topo_walk_init");
|
||||
_topo_walk_step = (int (*)())
|
||||
dlsym(_topo_dlhandle, "topo_walk_step");
|
||||
_topo_walk_fini = (void (*)())
|
||||
dlsym(_topo_dlhandle, "topo_walk_fini");
|
||||
_topo_hdl_strfree = (void (*)())
|
||||
dlsym(_topo_dlhandle, "topo_hdl_strfree");
|
||||
_topo_node_name = (char *(*)())
|
||||
dlsym(_topo_dlhandle, "topo_node_name");
|
||||
_topo_prop_get_string = (int (*)())
|
||||
dlsym(_topo_dlhandle, "topo_prop_get_string");
|
||||
_topo_node_fru = (int (*)())
|
||||
dlsym(_topo_dlhandle, "topo_node_fru");
|
||||
_topo_fmri_nvl2str = (int (*)())
|
||||
dlsym(_topo_dlhandle, "topo_fmri_nvl2str");
|
||||
_topo_fmri_strcmp_noauth = (int (*)())
|
||||
dlsym(_topo_dlhandle, "topo_fmri_strcmp_noauth");
|
||||
|
||||
if (_topo_open == NULL || _topo_close == NULL ||
|
||||
_topo_snap_hold == NULL || _topo_snap_release == NULL ||
|
||||
_topo_walk_init == NULL || _topo_walk_step == NULL ||
|
||||
_topo_walk_fini == NULL || _topo_hdl_strfree == NULL ||
|
||||
_topo_node_name == NULL || _topo_prop_get_string == NULL ||
|
||||
_topo_node_fru == NULL || _topo_fmri_nvl2str == NULL ||
|
||||
_topo_fmri_strcmp_noauth == NULL) {
|
||||
(void) dlclose(_topo_dlhandle);
|
||||
_topo_dlhandle = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Refresh the mappings from device path -> FMRI. We do this by walking the
|
||||
* hc topology looking for disk nodes, and recording the io/devfs-path and FRU.
|
||||
* Note that we strip out the disk-specific authority information (serial,
|
||||
* part, revision, etc) so that we are left with only the identifying
|
||||
* characteristics of the slot (hc path and chassis-id).
|
||||
*/
|
||||
void
|
||||
libzfs_fru_refresh(libzfs_handle_t *hdl)
|
||||
{
|
||||
int err;
|
||||
char *uuid;
|
||||
topo_hdl_t *thp;
|
||||
topo_walk_t *twp;
|
||||
|
||||
if (_topo_dlhandle == NULL)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Clear the FRU hash and initialize our basic structures.
|
||||
*/
|
||||
libzfs_fru_clear(hdl, B_FALSE);
|
||||
|
||||
if ((hdl->libzfs_topo_hdl = _topo_open(TOPO_VERSION,
|
||||
NULL, &err)) == NULL)
|
||||
return;
|
||||
|
||||
thp = hdl->libzfs_topo_hdl;
|
||||
|
||||
if ((uuid = _topo_snap_hold(thp, NULL, &err)) == NULL)
|
||||
return;
|
||||
|
||||
_topo_hdl_strfree(thp, uuid);
|
||||
|
||||
if (hdl->libzfs_fru_hash == NULL &&
|
||||
(hdl->libzfs_fru_hash =
|
||||
calloc(ZFS_FRU_HASH_SIZE * sizeof (void *), 1)) == NULL)
|
||||
return;
|
||||
|
||||
/*
|
||||
* We now have a topo snapshot, so iterate over the hc topology looking
|
||||
* for disks to add to the hash.
|
||||
*/
|
||||
twp = _topo_walk_init(thp, FM_FMRI_SCHEME_HC,
|
||||
libzfs_fru_gather, hdl, &err);
|
||||
if (twp != NULL) {
|
||||
(void) _topo_walk_step(twp, TOPO_WALK_CHILD);
|
||||
_topo_walk_fini(twp);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a devfs path, return the FRU for the device, if known. This will
|
||||
* automatically call libzfs_fru_refresh() if it hasn't already been called by
|
||||
* the consumer. The string returned is valid until the next call to
|
||||
* libzfs_fru_refresh().
|
||||
*/
|
||||
const char *
|
||||
libzfs_fru_lookup(libzfs_handle_t *hdl, const char *devpath)
|
||||
{
|
||||
size_t idx = fru_strhash(devpath);
|
||||
libzfs_fru_t *frup;
|
||||
|
||||
if (hdl->libzfs_fru_hash == NULL)
|
||||
libzfs_fru_refresh(hdl);
|
||||
|
||||
if (hdl->libzfs_fru_hash == NULL)
|
||||
return (NULL);
|
||||
|
||||
for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL;
|
||||
frup = frup->zf_chain) {
|
||||
if (strcmp(devpath, frup->zf_device) == 0)
|
||||
return (frup->zf_fru);
|
||||
}
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a fru path, return the device path. This will automatically call
|
||||
* libzfs_fru_refresh() if it hasn't already been called by the consumer. The
|
||||
* string returned is valid until the next call to libzfs_fru_refresh().
|
||||
*/
|
||||
const char *
|
||||
libzfs_fru_devpath(libzfs_handle_t *hdl, const char *fru)
|
||||
{
|
||||
libzfs_fru_t *frup;
|
||||
size_t idx;
|
||||
|
||||
if (hdl->libzfs_fru_hash == NULL)
|
||||
libzfs_fru_refresh(hdl);
|
||||
|
||||
if (hdl->libzfs_fru_hash == NULL)
|
||||
return (NULL);
|
||||
|
||||
for (idx = 0; idx < ZFS_FRU_HASH_SIZE; idx++) {
|
||||
for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL;
|
||||
frup = frup->zf_next) {
|
||||
if (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl,
|
||||
fru, frup->zf_fru))
|
||||
return (frup->zf_device);
|
||||
}
|
||||
}
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Change the stored FRU for the given vdev.
|
||||
*/
|
||||
int
|
||||
zpool_fru_set(zpool_handle_t *zhp, uint64_t vdev_guid, const char *fru)
|
||||
{
|
||||
zfs_cmd_t zc = { 0 };
|
||||
|
||||
(void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
|
||||
(void) strncpy(zc.zc_value, fru, sizeof (zc.zc_value));
|
||||
zc.zc_guid = vdev_guid;
|
||||
|
||||
if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SETFRU, &zc) != 0)
|
||||
return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
|
||||
dgettext(TEXT_DOMAIN, "cannot set FRU")));
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compare to two FRUs, ignoring any authority information.
|
||||
*/
|
||||
boolean_t
|
||||
libzfs_fru_compare(libzfs_handle_t *hdl, const char *a, const char *b)
|
||||
{
|
||||
if (hdl->libzfs_fru_hash == NULL)
|
||||
libzfs_fru_refresh(hdl);
|
||||
|
||||
if (hdl->libzfs_fru_hash == NULL)
|
||||
return (strcmp(a, b) == 0);
|
||||
|
||||
return (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl, a, b));
|
||||
}
|
||||
|
||||
/*
|
||||
* This special function checks to see whether the FRU indicates it's supposed
|
||||
* to be in the system chassis, but the chassis-id doesn't match. This can
|
||||
* happen in a clustered case, where both head nodes have the same logical
|
||||
* disk, but opening the device on the other head node is meaningless.
|
||||
*/
|
||||
boolean_t
|
||||
libzfs_fru_notself(libzfs_handle_t *hdl, const char *fru)
|
||||
{
|
||||
const char *chassisid;
|
||||
size_t len;
|
||||
|
||||
if (hdl->libzfs_fru_hash == NULL)
|
||||
libzfs_fru_refresh(hdl);
|
||||
|
||||
if (hdl->libzfs_chassis_id[0] == '\0')
|
||||
return (B_FALSE);
|
||||
|
||||
if (strstr(fru, "/chassis=0/") == NULL)
|
||||
return (B_FALSE);
|
||||
|
||||
if ((chassisid = strstr(fru, ":chassis-id=")) == NULL)
|
||||
return (B_FALSE);
|
||||
|
||||
chassisid += 12;
|
||||
len = strlen(hdl->libzfs_chassis_id);
|
||||
if (strncmp(chassisid, hdl->libzfs_chassis_id, len) == 0 &&
|
||||
(chassisid[len] == '/' || chassisid[len] == ':'))
|
||||
return (B_FALSE);
|
||||
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear memory associated with the FRU hash.
|
||||
*/
|
||||
void
|
||||
libzfs_fru_clear(libzfs_handle_t *hdl, boolean_t final)
|
||||
{
|
||||
libzfs_fru_t *frup;
|
||||
|
||||
while ((frup = hdl->libzfs_fru_list) != NULL) {
|
||||
hdl->libzfs_fru_list = frup->zf_next;
|
||||
free(frup->zf_device);
|
||||
free(frup->zf_fru);
|
||||
free(frup);
|
||||
}
|
||||
|
||||
hdl->libzfs_fru_list = NULL;
|
||||
|
||||
if (hdl->libzfs_topo_hdl != NULL) {
|
||||
_topo_snap_release(hdl->libzfs_topo_hdl);
|
||||
_topo_close(hdl->libzfs_topo_hdl);
|
||||
hdl->libzfs_topo_hdl = NULL;
|
||||
}
|
||||
|
||||
if (final) {
|
||||
free(hdl->libzfs_fru_hash);
|
||||
} else if (hdl->libzfs_fru_hash != NULL) {
|
||||
bzero(hdl->libzfs_fru_hash,
|
||||
ZFS_FRU_HASH_SIZE * sizeof (void *));
|
||||
}
|
||||
}
|
@ -19,12 +19,10 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
/*
|
||||
* Pool import support functions.
|
||||
*
|
||||
@ -41,15 +39,21 @@
|
||||
* using our derived config, and record the results.
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include <devid.h>
|
||||
#include <dirent.h>
|
||||
#include <errno.h>
|
||||
#include <libintl.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/vtoc.h>
|
||||
#include <sys/dktp/fdisk.h>
|
||||
#include <sys/efi_partition.h>
|
||||
#include <thread_pool.h>
|
||||
|
||||
#include <sys/vdev_impl.h>
|
||||
|
||||
@ -388,8 +392,6 @@ refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
|
||||
}
|
||||
|
||||
if (err) {
|
||||
(void) zpool_standard_error(hdl, errno,
|
||||
dgettext(TEXT_DOMAIN, "cannot discover pools"));
|
||||
zcmd_free_nvlists(&zc);
|
||||
return (NULL);
|
||||
}
|
||||
@ -403,6 +405,21 @@ refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
|
||||
return (nvl);
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine if the vdev id is a hole in the namespace.
|
||||
*/
|
||||
boolean_t
|
||||
vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id)
|
||||
{
|
||||
for (int c = 0; c < holes; c++) {
|
||||
|
||||
/* Top-level is a hole */
|
||||
if (hole_array[c] == id)
|
||||
return (B_TRUE);
|
||||
}
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert our list of pools into the definitive set of configurations. We
|
||||
* start by picking the best config for each toplevel vdev. Once that's done,
|
||||
@ -425,17 +442,20 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
|
||||
uint64_t version, guid;
|
||||
uint_t children = 0;
|
||||
nvlist_t **child = NULL;
|
||||
uint_t holes;
|
||||
uint64_t *hole_array, max_id;
|
||||
uint_t c;
|
||||
boolean_t isactive;
|
||||
uint64_t hostid;
|
||||
nvlist_t *nvl;
|
||||
boolean_t found_one = B_FALSE;
|
||||
boolean_t valid_top_config = B_FALSE;
|
||||
|
||||
if (nvlist_alloc(&ret, 0, 0) != 0)
|
||||
goto nomem;
|
||||
|
||||
for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
|
||||
uint64_t id;
|
||||
uint64_t id, max_txg = 0;
|
||||
|
||||
if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
|
||||
goto nomem;
|
||||
@ -463,6 +483,42 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We rely on the fact that the max txg for the
|
||||
* pool will contain the most up-to-date information
|
||||
* about the valid top-levels in the vdev namespace.
|
||||
*/
|
||||
if (best_txg > max_txg) {
|
||||
(void) nvlist_remove(config,
|
||||
ZPOOL_CONFIG_VDEV_CHILDREN,
|
||||
DATA_TYPE_UINT64);
|
||||
(void) nvlist_remove(config,
|
||||
ZPOOL_CONFIG_HOLE_ARRAY,
|
||||
DATA_TYPE_UINT64_ARRAY);
|
||||
|
||||
max_txg = best_txg;
|
||||
hole_array = NULL;
|
||||
holes = 0;
|
||||
max_id = 0;
|
||||
valid_top_config = B_FALSE;
|
||||
|
||||
if (nvlist_lookup_uint64(tmp,
|
||||
ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) {
|
||||
verify(nvlist_add_uint64(config,
|
||||
ZPOOL_CONFIG_VDEV_CHILDREN,
|
||||
max_id) == 0);
|
||||
valid_top_config = B_TRUE;
|
||||
}
|
||||
|
||||
if (nvlist_lookup_uint64_array(tmp,
|
||||
ZPOOL_CONFIG_HOLE_ARRAY, &hole_array,
|
||||
&holes) == 0) {
|
||||
verify(nvlist_add_uint64_array(config,
|
||||
ZPOOL_CONFIG_HOLE_ARRAY,
|
||||
hole_array, holes) == 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (!config_seen) {
|
||||
/*
|
||||
* Copy the relevant pieces of data to the pool
|
||||
@ -522,6 +578,7 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
|
||||
ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
|
||||
verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
|
||||
&id) == 0);
|
||||
|
||||
if (id >= children) {
|
||||
nvlist_t **newchild;
|
||||
|
||||
@ -542,9 +599,74 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* If we have information about all the top-levels then
|
||||
* clean up the nvlist which we've constructed. This
|
||||
* means removing any extraneous devices that are
|
||||
* beyond the valid range or adding devices to the end
|
||||
* of our array which appear to be missing.
|
||||
*/
|
||||
if (valid_top_config) {
|
||||
if (max_id < children) {
|
||||
for (c = max_id; c < children; c++)
|
||||
nvlist_free(child[c]);
|
||||
children = max_id;
|
||||
} else if (max_id > children) {
|
||||
nvlist_t **newchild;
|
||||
|
||||
newchild = zfs_alloc(hdl, (max_id) *
|
||||
sizeof (nvlist_t *));
|
||||
if (newchild == NULL)
|
||||
goto nomem;
|
||||
|
||||
for (c = 0; c < children; c++)
|
||||
newchild[c] = child[c];
|
||||
|
||||
free(child);
|
||||
child = newchild;
|
||||
children = max_id;
|
||||
}
|
||||
}
|
||||
|
||||
verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
|
||||
&guid) == 0);
|
||||
|
||||
/*
|
||||
* The vdev namespace may contain holes as a result of
|
||||
* device removal. We must add them back into the vdev
|
||||
* tree before we process any missing devices.
|
||||
*/
|
||||
if (holes > 0) {
|
||||
ASSERT(valid_top_config);
|
||||
|
||||
for (c = 0; c < children; c++) {
|
||||
nvlist_t *holey;
|
||||
|
||||
if (child[c] != NULL ||
|
||||
!vdev_is_hole(hole_array, holes, c))
|
||||
continue;
|
||||
|
||||
if (nvlist_alloc(&holey, NV_UNIQUE_NAME,
|
||||
0) != 0)
|
||||
goto nomem;
|
||||
|
||||
/*
|
||||
* Holes in the namespace are treated as
|
||||
* "hole" top-level vdevs and have a
|
||||
* special flag set on them.
|
||||
*/
|
||||
if (nvlist_add_string(holey,
|
||||
ZPOOL_CONFIG_TYPE,
|
||||
VDEV_TYPE_HOLE) != 0 ||
|
||||
nvlist_add_uint64(holey,
|
||||
ZPOOL_CONFIG_ID, c) != 0 ||
|
||||
nvlist_add_uint64(holey,
|
||||
ZPOOL_CONFIG_GUID, 0ULL) != 0)
|
||||
goto nomem;
|
||||
child[c] = holey;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Look for any missing top-level vdevs. If this is the case,
|
||||
* create a faked up 'missing' vdev as a placeholder. We cannot
|
||||
@ -552,7 +674,7 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
|
||||
* certain checks to make sure the vdev IDs match their location
|
||||
* in the configuration.
|
||||
*/
|
||||
for (c = 0; c < children; c++)
|
||||
for (c = 0; c < children; c++) {
|
||||
if (child[c] == NULL) {
|
||||
nvlist_t *missing;
|
||||
if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
|
||||
@ -570,6 +692,7 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
|
||||
}
|
||||
child[c] = missing;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Put all of this pool's top-level vdevs into a root vdev.
|
||||
@ -636,8 +759,11 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((nvl = refresh_config(hdl, config)) == NULL)
|
||||
goto error;
|
||||
if ((nvl = refresh_config(hdl, config)) == NULL) {
|
||||
nvlist_free(config);
|
||||
config = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
nvlist_free(config);
|
||||
config = nvl;
|
||||
@ -777,6 +903,212 @@ zpool_read_label(int fd, nvlist_t **config)
|
||||
return (0);
|
||||
}
|
||||
|
||||
typedef struct rdsk_node {
|
||||
char *rn_name;
|
||||
int rn_dfd;
|
||||
libzfs_handle_t *rn_hdl;
|
||||
nvlist_t *rn_config;
|
||||
avl_tree_t *rn_avl;
|
||||
avl_node_t rn_node;
|
||||
boolean_t rn_nozpool;
|
||||
} rdsk_node_t;
|
||||
|
||||
static int
|
||||
slice_cache_compare(const void *arg1, const void *arg2)
|
||||
{
|
||||
const char *nm1 = ((rdsk_node_t *)arg1)->rn_name;
|
||||
const char *nm2 = ((rdsk_node_t *)arg2)->rn_name;
|
||||
char *nm1slice, *nm2slice;
|
||||
int rv;
|
||||
|
||||
/*
|
||||
* slices zero and two are the most likely to provide results,
|
||||
* so put those first
|
||||
*/
|
||||
nm1slice = strstr(nm1, "s0");
|
||||
nm2slice = strstr(nm2, "s0");
|
||||
if (nm1slice && !nm2slice) {
|
||||
return (-1);
|
||||
}
|
||||
if (!nm1slice && nm2slice) {
|
||||
return (1);
|
||||
}
|
||||
nm1slice = strstr(nm1, "s2");
|
||||
nm2slice = strstr(nm2, "s2");
|
||||
if (nm1slice && !nm2slice) {
|
||||
return (-1);
|
||||
}
|
||||
if (!nm1slice && nm2slice) {
|
||||
return (1);
|
||||
}
|
||||
|
||||
rv = strcmp(nm1, nm2);
|
||||
if (rv == 0)
|
||||
return (0);
|
||||
return (rv > 0 ? 1 : -1);
|
||||
}
|
||||
|
||||
static void
|
||||
check_one_slice(avl_tree_t *r, char *diskname, uint_t partno,
|
||||
diskaddr_t size, uint_t blksz)
|
||||
{
|
||||
rdsk_node_t tmpnode;
|
||||
rdsk_node_t *node;
|
||||
char sname[MAXNAMELEN];
|
||||
|
||||
tmpnode.rn_name = &sname[0];
|
||||
(void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u",
|
||||
diskname, partno);
|
||||
/*
|
||||
* protect against division by zero for disk labels that
|
||||
* contain a bogus sector size
|
||||
*/
|
||||
if (blksz == 0)
|
||||
blksz = DEV_BSIZE;
|
||||
/* too small to contain a zpool? */
|
||||
if ((size < (SPA_MINDEVSIZE / blksz)) &&
|
||||
(node = avl_find(r, &tmpnode, NULL)))
|
||||
node->rn_nozpool = B_TRUE;
|
||||
}
|
||||
|
||||
static void
|
||||
nozpool_all_slices(avl_tree_t *r, const char *sname)
|
||||
{
|
||||
char diskname[MAXNAMELEN];
|
||||
char *ptr;
|
||||
int i;
|
||||
|
||||
(void) strncpy(diskname, sname, MAXNAMELEN);
|
||||
if (((ptr = strrchr(diskname, 's')) == NULL) &&
|
||||
((ptr = strrchr(diskname, 'p')) == NULL))
|
||||
return;
|
||||
ptr[0] = 's';
|
||||
ptr[1] = '\0';
|
||||
for (i = 0; i < NDKMAP; i++)
|
||||
check_one_slice(r, diskname, i, 0, 1);
|
||||
ptr[0] = 'p';
|
||||
for (i = 0; i <= FD_NUMPART; i++)
|
||||
check_one_slice(r, diskname, i, 0, 1);
|
||||
}
|
||||
|
||||
static void
|
||||
check_slices(avl_tree_t *r, int fd, const char *sname)
|
||||
{
|
||||
struct extvtoc vtoc;
|
||||
struct dk_gpt *gpt;
|
||||
char diskname[MAXNAMELEN];
|
||||
char *ptr;
|
||||
int i;
|
||||
|
||||
(void) strncpy(diskname, sname, MAXNAMELEN);
|
||||
if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1]))
|
||||
return;
|
||||
ptr[1] = '\0';
|
||||
|
||||
if (read_extvtoc(fd, &vtoc) >= 0) {
|
||||
for (i = 0; i < NDKMAP; i++)
|
||||
check_one_slice(r, diskname, i,
|
||||
vtoc.v_part[i].p_size, vtoc.v_sectorsz);
|
||||
} else if (efi_alloc_and_read(fd, &gpt) >= 0) {
|
||||
/*
|
||||
* on x86 we'll still have leftover links that point
|
||||
* to slices s[9-15], so use NDKMAP instead
|
||||
*/
|
||||
for (i = 0; i < NDKMAP; i++)
|
||||
check_one_slice(r, diskname, i,
|
||||
gpt->efi_parts[i].p_size, gpt->efi_lbasize);
|
||||
/* nodes p[1-4] are never used with EFI labels */
|
||||
ptr[0] = 'p';
|
||||
for (i = 1; i <= FD_NUMPART; i++)
|
||||
check_one_slice(r, diskname, i, 0, 1);
|
||||
efi_free(gpt);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
zpool_open_func(void *arg)
|
||||
{
|
||||
rdsk_node_t *rn = arg;
|
||||
struct stat64 statbuf;
|
||||
nvlist_t *config;
|
||||
int fd;
|
||||
|
||||
if (rn->rn_nozpool)
|
||||
return;
|
||||
if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) {
|
||||
/* symlink to a device that's no longer there */
|
||||
if (errno == ENOENT)
|
||||
nozpool_all_slices(rn->rn_avl, rn->rn_name);
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* Ignore failed stats. We only want regular
|
||||
* files, character devs and block devs.
|
||||
*/
|
||||
if (fstat64(fd, &statbuf) != 0 ||
|
||||
(!S_ISREG(statbuf.st_mode) &&
|
||||
!S_ISCHR(statbuf.st_mode) &&
|
||||
!S_ISBLK(statbuf.st_mode))) {
|
||||
(void) close(fd);
|
||||
return;
|
||||
}
|
||||
/* this file is too small to hold a zpool */
|
||||
if (S_ISREG(statbuf.st_mode) &&
|
||||
statbuf.st_size < SPA_MINDEVSIZE) {
|
||||
(void) close(fd);
|
||||
return;
|
||||
} else if (!S_ISREG(statbuf.st_mode)) {
|
||||
/*
|
||||
* Try to read the disk label first so we don't have to
|
||||
* open a bunch of minor nodes that can't have a zpool.
|
||||
*/
|
||||
check_slices(rn->rn_avl, fd, rn->rn_name);
|
||||
}
|
||||
|
||||
if ((zpool_read_label(fd, &config)) != 0) {
|
||||
(void) close(fd);
|
||||
(void) no_memory(rn->rn_hdl);
|
||||
return;
|
||||
}
|
||||
(void) close(fd);
|
||||
|
||||
|
||||
rn->rn_config = config;
|
||||
if (config != NULL) {
|
||||
assert(rn->rn_nozpool == B_FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a file descriptor, clear (zero) the label information. This function
|
||||
* is currently only used in the appliance stack as part of the ZFS sysevent
|
||||
* module.
|
||||
*/
|
||||
int
|
||||
zpool_clear_label(int fd)
|
||||
{
|
||||
struct stat64 statbuf;
|
||||
int l;
|
||||
vdev_label_t *label;
|
||||
uint64_t size;
|
||||
|
||||
if (fstat64(fd, &statbuf) == -1)
|
||||
return (0);
|
||||
size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
|
||||
|
||||
if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL)
|
||||
return (-1);
|
||||
|
||||
for (l = 0; l < VDEV_LABELS; l++) {
|
||||
if (pwrite64(fd, label, sizeof (vdev_label_t),
|
||||
label_offset(size, l)) != sizeof (vdev_label_t))
|
||||
return (-1);
|
||||
}
|
||||
|
||||
free(label);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a list of directories to search, find all pools stored on disk. This
|
||||
* includes partial pools which are not available to import. If no args are
|
||||
@ -785,30 +1117,28 @@ zpool_read_label(int fd, nvlist_t **config)
|
||||
* to import a specific pool.
|
||||
*/
|
||||
static nvlist_t *
|
||||
zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
|
||||
boolean_t active_ok, char *poolname, uint64_t guid)
|
||||
zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
|
||||
{
|
||||
int i;
|
||||
int i, dirs = iarg->paths;
|
||||
DIR *dirp = NULL;
|
||||
struct dirent64 *dp;
|
||||
char path[MAXPATHLEN];
|
||||
char *end;
|
||||
char *end, **dir = iarg->path;
|
||||
size_t pathleft;
|
||||
struct stat64 statbuf;
|
||||
nvlist_t *ret = NULL, *config;
|
||||
nvlist_t *ret = NULL;
|
||||
static char *default_dir = "/dev/dsk";
|
||||
int fd;
|
||||
pool_list_t pools = { 0 };
|
||||
pool_entry_t *pe, *penext;
|
||||
vdev_entry_t *ve, *venext;
|
||||
config_entry_t *ce, *cenext;
|
||||
name_entry_t *ne, *nenext;
|
||||
avl_tree_t slice_cache;
|
||||
rdsk_node_t *slice;
|
||||
void *cookie;
|
||||
|
||||
verify(poolname == NULL || guid == 0);
|
||||
|
||||
if (argc == 0) {
|
||||
argc = 1;
|
||||
argv = &default_dir;
|
||||
if (dirs == 0) {
|
||||
dirs = 1;
|
||||
dir = &default_dir;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -816,15 +1146,15 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
|
||||
* possible device, organizing the information according to pool GUID
|
||||
* and toplevel GUID.
|
||||
*/
|
||||
for (i = 0; i < argc; i++) {
|
||||
for (i = 0; i < dirs; i++) {
|
||||
tpool_t *t;
|
||||
char *rdsk;
|
||||
int dfd;
|
||||
|
||||
/* use realpath to normalize the path */
|
||||
if (realpath(argv[i], path) == 0) {
|
||||
if (realpath(dir[i], path) == 0) {
|
||||
(void) zfs_error_fmt(hdl, EZFS_BADPATH,
|
||||
dgettext(TEXT_DOMAIN, "cannot open '%s'"),
|
||||
argv[i]);
|
||||
dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]);
|
||||
goto error;
|
||||
}
|
||||
end = &path[strlen(path)];
|
||||
@ -851,6 +1181,8 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
|
||||
goto error;
|
||||
}
|
||||
|
||||
avl_create(&slice_cache, slice_cache_compare,
|
||||
sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node));
|
||||
/*
|
||||
* This is not MT-safe, but we have no MT consumers of libzfs
|
||||
*/
|
||||
@ -860,46 +1192,53 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
|
||||
(name[1] == 0 || (name[1] == '.' && name[2] == 0)))
|
||||
continue;
|
||||
|
||||
if ((fd = openat64(dfd, name, O_RDONLY)) < 0)
|
||||
continue;
|
||||
slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
|
||||
slice->rn_name = zfs_strdup(hdl, name);
|
||||
slice->rn_avl = &slice_cache;
|
||||
slice->rn_dfd = dfd;
|
||||
slice->rn_hdl = hdl;
|
||||
slice->rn_nozpool = B_FALSE;
|
||||
avl_add(&slice_cache, slice);
|
||||
}
|
||||
/*
|
||||
* create a thread pool to do all of this in parallel;
|
||||
* rn_nozpool is not protected, so this is racy in that
|
||||
* multiple tasks could decide that the same slice can
|
||||
* not hold a zpool, which is benign. Also choose
|
||||
* double the number of processors; we hold a lot of
|
||||
* locks in the kernel, so going beyond this doesn't
|
||||
* buy us much.
|
||||
*/
|
||||
t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN),
|
||||
0, NULL);
|
||||
for (slice = avl_first(&slice_cache); slice;
|
||||
(slice = avl_walk(&slice_cache, slice,
|
||||
AVL_AFTER)))
|
||||
(void) tpool_dispatch(t, zpool_open_func, slice);
|
||||
tpool_wait(t);
|
||||
tpool_destroy(t);
|
||||
|
||||
/*
|
||||
* Ignore failed stats. We only want regular
|
||||
* files, character devs and block devs.
|
||||
*/
|
||||
if (fstat64(fd, &statbuf) != 0 ||
|
||||
(!S_ISREG(statbuf.st_mode) &&
|
||||
!S_ISCHR(statbuf.st_mode) &&
|
||||
!S_ISBLK(statbuf.st_mode))) {
|
||||
(void) close(fd);
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((zpool_read_label(fd, &config)) != 0) {
|
||||
(void) close(fd);
|
||||
(void) no_memory(hdl);
|
||||
goto error;
|
||||
}
|
||||
|
||||
(void) close(fd);
|
||||
|
||||
if (config != NULL) {
|
||||
cookie = NULL;
|
||||
while ((slice = avl_destroy_nodes(&slice_cache,
|
||||
&cookie)) != NULL) {
|
||||
if (slice->rn_config != NULL) {
|
||||
nvlist_t *config = slice->rn_config;
|
||||
boolean_t matched = B_TRUE;
|
||||
|
||||
if (poolname != NULL) {
|
||||
if (iarg->poolname != NULL) {
|
||||
char *pname;
|
||||
|
||||
matched = nvlist_lookup_string(config,
|
||||
ZPOOL_CONFIG_POOL_NAME,
|
||||
&pname) == 0 &&
|
||||
strcmp(poolname, pname) == 0;
|
||||
} else if (guid != 0) {
|
||||
strcmp(iarg->poolname, pname) == 0;
|
||||
} else if (iarg->guid != 0) {
|
||||
uint64_t this_guid;
|
||||
|
||||
matched = nvlist_lookup_uint64(config,
|
||||
ZPOOL_CONFIG_POOL_GUID,
|
||||
&this_guid) == 0 &&
|
||||
guid == this_guid;
|
||||
iarg->guid == this_guid;
|
||||
}
|
||||
if (!matched) {
|
||||
nvlist_free(config);
|
||||
@ -907,17 +1246,20 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
|
||||
continue;
|
||||
}
|
||||
/* use the non-raw path for the config */
|
||||
(void) strlcpy(end, name, pathleft);
|
||||
(void) strlcpy(end, slice->rn_name, pathleft);
|
||||
if (add_config(hdl, &pools, path, config) != 0)
|
||||
goto error;
|
||||
}
|
||||
free(slice->rn_name);
|
||||
free(slice);
|
||||
}
|
||||
avl_destroy(&slice_cache);
|
||||
|
||||
(void) closedir(dirp);
|
||||
dirp = NULL;
|
||||
}
|
||||
|
||||
ret = get_configs(hdl, &pools, active_ok);
|
||||
ret = get_configs(hdl, &pools, iarg->can_be_active);
|
||||
|
||||
error:
|
||||
for (pe = pools.pools; pe != NULL; pe = penext) {
|
||||
@ -951,27 +1293,12 @@ error:
|
||||
nvlist_t *
|
||||
zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
|
||||
{
|
||||
return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, NULL, 0));
|
||||
}
|
||||
importargs_t iarg = { 0 };
|
||||
|
||||
nvlist_t *
|
||||
zpool_find_import_byname(libzfs_handle_t *hdl, int argc, char **argv,
|
||||
char *pool)
|
||||
{
|
||||
return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, pool, 0));
|
||||
}
|
||||
iarg.paths = argc;
|
||||
iarg.path = argv;
|
||||
|
||||
nvlist_t *
|
||||
zpool_find_import_byguid(libzfs_handle_t *hdl, int argc, char **argv,
|
||||
uint64_t guid)
|
||||
{
|
||||
return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, NULL, guid));
|
||||
}
|
||||
|
||||
nvlist_t *
|
||||
zpool_find_import_activeok(libzfs_handle_t *hdl, int argc, char **argv)
|
||||
{
|
||||
return (zpool_find_import_impl(hdl, argc, argv, B_TRUE, NULL, 0));
|
||||
return (zpool_find_import_impl(hdl, &iarg));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1093,6 +1420,46 @@ zpool_find_import_cached(libzfs_handle_t *hdl, const char *cachefile,
|
||||
return (pools);
|
||||
}
|
||||
|
||||
static int
|
||||
name_or_guid_exists(zpool_handle_t *zhp, void *data)
|
||||
{
|
||||
importargs_t *import = data;
|
||||
int found = 0;
|
||||
|
||||
if (import->poolname != NULL) {
|
||||
char *pool_name;
|
||||
|
||||
verify(nvlist_lookup_string(zhp->zpool_config,
|
||||
ZPOOL_CONFIG_POOL_NAME, &pool_name) == 0);
|
||||
if (strcmp(pool_name, import->poolname) == 0)
|
||||
found = 1;
|
||||
} else {
|
||||
uint64_t pool_guid;
|
||||
|
||||
verify(nvlist_lookup_uint64(zhp->zpool_config,
|
||||
ZPOOL_CONFIG_POOL_GUID, &pool_guid) == 0);
|
||||
if (pool_guid == import->guid)
|
||||
found = 1;
|
||||
}
|
||||
|
||||
zpool_close(zhp);
|
||||
return (found);
|
||||
}
|
||||
|
||||
nvlist_t *
|
||||
zpool_search_import(libzfs_handle_t *hdl, importargs_t *import)
|
||||
{
|
||||
verify(import->poolname == NULL || import->guid == 0);
|
||||
|
||||
if (import->unique)
|
||||
import->exists = zpool_iter(hdl, name_or_guid_exists, import);
|
||||
|
||||
if (import->cachefile != NULL)
|
||||
return (zpool_find_import_cached(hdl, import->cachefile,
|
||||
import->poolname, import->guid));
|
||||
|
||||
return (zpool_find_import_impl(hdl, import));
|
||||
}
|
||||
|
||||
boolean_t
|
||||
find_guid(nvlist_t *nv, uint64_t guid)
|
||||
|
@ -20,8 +20,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -44,17 +43,14 @@
|
||||
*
|
||||
* zfs_is_shared_nfs()
|
||||
* zfs_is_shared_smb()
|
||||
* zfs_is_shared_iscsi()
|
||||
* zfs_share_proto()
|
||||
* zfs_shareall();
|
||||
* zfs_share_iscsi()
|
||||
* zfs_unshare_nfs()
|
||||
* zfs_unshare_smb()
|
||||
* zfs_unshareall_nfs()
|
||||
* zfs_unshareall_smb()
|
||||
* zfs_unshareall()
|
||||
* zfs_unshareall_bypath()
|
||||
* zfs_unshare_iscsi()
|
||||
*
|
||||
* The following functions are available for pool consumers, and will
|
||||
* mount/unmount and share/unshare all datasets within pool:
|
||||
@ -89,11 +85,6 @@ static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *);
|
||||
zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **,
|
||||
zfs_share_proto_t);
|
||||
|
||||
static int (*iscsitgt_zfs_share)(const char *);
|
||||
static int (*iscsitgt_zfs_unshare)(const char *);
|
||||
static int (*iscsitgt_zfs_is_shared)(const char *);
|
||||
static int (*iscsitgt_svc_online)();
|
||||
|
||||
/*
|
||||
* The share protocols table must be in the same order as the zfs_share_prot_t
|
||||
* enum in libzfs_impl.h
|
||||
@ -125,29 +116,6 @@ zfs_share_proto_t share_all_proto[] = {
|
||||
PROTO_END
|
||||
};
|
||||
|
||||
#pragma init(zfs_iscsi_init)
|
||||
static void
|
||||
zfs_iscsi_init(void)
|
||||
{
|
||||
void *libiscsitgt;
|
||||
|
||||
if ((libiscsitgt = dlopen("/lib/libiscsitgt.so.1",
|
||||
RTLD_LAZY | RTLD_GLOBAL)) == NULL ||
|
||||
(iscsitgt_zfs_share = (int (*)(const char *))dlsym(libiscsitgt,
|
||||
"iscsitgt_zfs_share")) == NULL ||
|
||||
(iscsitgt_zfs_unshare = (int (*)(const char *))dlsym(libiscsitgt,
|
||||
"iscsitgt_zfs_unshare")) == NULL ||
|
||||
(iscsitgt_zfs_is_shared = (int (*)(const char *))dlsym(libiscsitgt,
|
||||
"iscsitgt_zfs_is_shared")) == NULL ||
|
||||
(iscsitgt_svc_online = (int (*)(const char *))dlsym(libiscsitgt,
|
||||
"iscsitgt_svc_online")) == NULL) {
|
||||
iscsitgt_zfs_share = NULL;
|
||||
iscsitgt_zfs_unshare = NULL;
|
||||
iscsitgt_zfs_is_shared = NULL;
|
||||
iscsitgt_svc_online = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Search the sharetab for the given mountpoint and protocol, returning
|
||||
* a zfs_share_type_t value.
|
||||
@ -345,6 +313,18 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
|
||||
} else if (errno == EPERM) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"Insufficient privileges"));
|
||||
} else if (errno == ENOTSUP) {
|
||||
char buf[256];
|
||||
int spa_version;
|
||||
|
||||
VERIFY(zfs_spa_version(zhp, &spa_version) == 0);
|
||||
(void) snprintf(buf, sizeof (buf),
|
||||
dgettext(TEXT_DOMAIN, "Can't mount a version %lld "
|
||||
"file system on a version %d pool. Pool must be"
|
||||
" upgraded to mount this file system."),
|
||||
(u_longlong_t)zfs_prop_get_int(zhp,
|
||||
ZFS_PROP_VERSION), spa_version);
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, buf));
|
||||
} else {
|
||||
zfs_error_aux(hdl, strerror(errno));
|
||||
}
|
||||
@ -445,7 +425,7 @@ zfs_is_shared(zfs_handle_t *zhp)
|
||||
zfs_share_proto_t *curr_proto;
|
||||
|
||||
if (ZFS_IS_VOLUME(zhp))
|
||||
return (zfs_is_shared_iscsi(zhp));
|
||||
return (B_FALSE);
|
||||
|
||||
for (curr_proto = share_all_proto; *curr_proto != PROTO_END;
|
||||
curr_proto++)
|
||||
@ -458,7 +438,7 @@ int
|
||||
zfs_share(zfs_handle_t *zhp)
|
||||
{
|
||||
if (ZFS_IS_VOLUME(zhp))
|
||||
return (zfs_share_iscsi(zhp));
|
||||
return (0);
|
||||
|
||||
return (zfs_share_proto(zhp, share_all_proto));
|
||||
}
|
||||
@ -467,7 +447,7 @@ int
|
||||
zfs_unshare(zfs_handle_t *zhp)
|
||||
{
|
||||
if (ZFS_IS_VOLUME(zhp))
|
||||
return (zfs_unshare_iscsi(zhp));
|
||||
return (0);
|
||||
|
||||
return (zfs_unshareall(zhp));
|
||||
}
|
||||
@ -999,81 +979,6 @@ remove_mountpoint(zfs_handle_t *zhp)
|
||||
}
|
||||
}
|
||||
|
||||
boolean_t
|
||||
zfs_is_shared_iscsi(zfs_handle_t *zhp)
|
||||
{
|
||||
|
||||
/*
|
||||
* If iscsi deamon isn't running then we aren't shared
|
||||
*/
|
||||
if (iscsitgt_svc_online && iscsitgt_svc_online() == 1)
|
||||
return (B_FALSE);
|
||||
else
|
||||
return (iscsitgt_zfs_is_shared != NULL &&
|
||||
iscsitgt_zfs_is_shared(zhp->zfs_name) != 0);
|
||||
}
|
||||
|
||||
int
|
||||
zfs_share_iscsi(zfs_handle_t *zhp)
|
||||
{
|
||||
char shareopts[ZFS_MAXPROPLEN];
|
||||
const char *dataset = zhp->zfs_name;
|
||||
libzfs_handle_t *hdl = zhp->zfs_hdl;
|
||||
|
||||
/*
|
||||
* Return success if there are no share options.
|
||||
*/
|
||||
if (zfs_prop_get(zhp, ZFS_PROP_SHAREISCSI, shareopts,
|
||||
sizeof (shareopts), NULL, NULL, 0, B_FALSE) != 0 ||
|
||||
strcmp(shareopts, "off") == 0)
|
||||
return (0);
|
||||
|
||||
if (iscsitgt_zfs_share == NULL || iscsitgt_zfs_share(dataset) != 0) {
|
||||
int error = EZFS_SHAREISCSIFAILED;
|
||||
|
||||
/*
|
||||
* If service isn't availabele and EPERM was
|
||||
* returned then use special error.
|
||||
*/
|
||||
if (iscsitgt_svc_online && errno == EPERM &&
|
||||
(iscsitgt_svc_online() != 0))
|
||||
error = EZFS_ISCSISVCUNAVAIL;
|
||||
|
||||
return (zfs_error_fmt(hdl, error,
|
||||
dgettext(TEXT_DOMAIN, "cannot share '%s'"), dataset));
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
zfs_unshare_iscsi(zfs_handle_t *zhp)
|
||||
{
|
||||
const char *dataset = zfs_get_name(zhp);
|
||||
libzfs_handle_t *hdl = zhp->zfs_hdl;
|
||||
|
||||
/*
|
||||
* Return if the volume is not shared
|
||||
*/
|
||||
if (zfs_is_shared_iscsi(zhp) != SHARED_ISCSI)
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* If this fails with ENODEV it indicates that zvol wasn't shared so
|
||||
* we should return success in that case.
|
||||
*/
|
||||
if (iscsitgt_zfs_unshare == NULL ||
|
||||
(iscsitgt_zfs_unshare(dataset) != 0 && errno != ENODEV)) {
|
||||
if (errno == EPERM)
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"Insufficient privileges to unshare iscsi"));
|
||||
return (zfs_error_fmt(hdl, EZFS_UNSHAREISCSIFAILED,
|
||||
dgettext(TEXT_DOMAIN, "cannot unshare '%s'"), dataset));
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
typedef struct mount_cbdata {
|
||||
zfs_handle_t **cb_datasets;
|
||||
int cb_used;
|
||||
@ -1215,28 +1120,6 @@ out:
|
||||
return (ret);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
zvol_cb(const char *dataset, void *data)
|
||||
{
|
||||
libzfs_handle_t *hdl = data;
|
||||
zfs_handle_t *zhp;
|
||||
|
||||
/*
|
||||
* Ignore snapshots and ignore failures from non-existant datasets.
|
||||
*/
|
||||
if (strchr(dataset, '@') != NULL ||
|
||||
(zhp = zfs_open(hdl, dataset, ZFS_TYPE_VOLUME)) == NULL)
|
||||
return (0);
|
||||
|
||||
if (zfs_unshare_iscsi(zhp) != 0)
|
||||
return (-1);
|
||||
|
||||
zfs_close(zhp);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
mountpoint_compare(const void *a, const void *b)
|
||||
{
|
||||
@ -1246,6 +1129,8 @@ mountpoint_compare(const void *a, const void *b)
|
||||
return (strcmp(mountb, mounta));
|
||||
}
|
||||
|
||||
/* alias for 2002/240 */
|
||||
#pragma weak zpool_unmount_datasets = zpool_disable_datasets
|
||||
/*
|
||||
* Unshare and unmount all datasets within the given pool. We don't want to
|
||||
* rely on traversing the DSL to discover the filesystems within the pool,
|
||||
@ -1253,7 +1138,6 @@ mountpoint_compare(const void *a, const void *b)
|
||||
* arbitrarily (on I/O error, for example). Instead, we walk /etc/mnttab and
|
||||
* gather all the filesystems that are currently mounted.
|
||||
*/
|
||||
#pragma weak zpool_unmount_datasets = zpool_disable_datasets
|
||||
int
|
||||
zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
|
||||
{
|
||||
@ -1267,12 +1151,6 @@ zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
|
||||
int ret = -1;
|
||||
int flags = (force ? MS_FORCE : 0);
|
||||
|
||||
/*
|
||||
* First unshare all zvols.
|
||||
*/
|
||||
if (zpool_iter_zvol(zhp, zvol_cb, hdl) != 0)
|
||||
return (-1);
|
||||
|
||||
namelen = strlen(zhp->zpool_name);
|
||||
|
||||
rewind(hdl->libzfs_mnttab);
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -138,7 +137,7 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
|
||||
if (find_vdev_problem(child[c], func))
|
||||
return (B_TRUE);
|
||||
} else {
|
||||
verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS,
|
||||
verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
|
||||
(uint64_t **)&vs, &c) == 0);
|
||||
|
||||
if (func(vs->vs_state, vs->vs_aux,
|
||||
@ -173,7 +172,8 @@ check_status(nvlist_t *config, boolean_t isimport)
|
||||
{
|
||||
nvlist_t *nvroot;
|
||||
vdev_stat_t *vs;
|
||||
uint_t vsc;
|
||||
pool_scan_stat_t *ps = NULL;
|
||||
uint_t vsc, psc;
|
||||
uint64_t nerr;
|
||||
uint64_t version;
|
||||
uint64_t stateval;
|
||||
@ -184,15 +184,24 @@ check_status(nvlist_t *config, boolean_t isimport)
|
||||
&version) == 0);
|
||||
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
|
||||
&nvroot) == 0);
|
||||
verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
|
||||
verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
|
||||
(uint64_t **)&vs, &vsc) == 0);
|
||||
verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
|
||||
&stateval) == 0);
|
||||
(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
|
||||
|
||||
/*
|
||||
* Currently resilvering a vdev
|
||||
*/
|
||||
(void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS,
|
||||
(uint64_t **)&ps, &psc);
|
||||
if (ps && ps->pss_func == POOL_SCAN_RESILVER &&
|
||||
ps->pss_state == DSS_SCANNING)
|
||||
return (ZPOOL_STATUS_RESILVERING);
|
||||
|
||||
/*
|
||||
* Pool last accessed by another system.
|
||||
*/
|
||||
(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
|
||||
if (hostid != 0 && (unsigned long)hostid != gethostid() &&
|
||||
stateval == POOL_STATE_ACTIVE)
|
||||
return (ZPOOL_STATUS_HOSTID_MISMATCH);
|
||||
@ -288,12 +297,6 @@ check_status(nvlist_t *config, boolean_t isimport)
|
||||
if (find_vdev_problem(nvroot, vdev_removed))
|
||||
return (ZPOOL_STATUS_REMOVED_DEV);
|
||||
|
||||
/*
|
||||
* Currently resilvering
|
||||
*/
|
||||
if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER)
|
||||
return (ZPOOL_STATUS_RESILVERING);
|
||||
|
||||
/*
|
||||
* Outdated, but usable, version
|
||||
*/
|
||||
@ -328,3 +331,68 @@ zpool_import_status(nvlist_t *config, char **msgid)
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static void
|
||||
dump_ddt_stat(const ddt_stat_t *dds, int h)
|
||||
{
|
||||
char refcnt[6];
|
||||
char blocks[6], lsize[6], psize[6], dsize[6];
|
||||
char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6];
|
||||
|
||||
if (dds == NULL || dds->dds_blocks == 0)
|
||||
return;
|
||||
|
||||
if (h == -1)
|
||||
(void) strcpy(refcnt, "Total");
|
||||
else
|
||||
zfs_nicenum(1ULL << h, refcnt, sizeof (refcnt));
|
||||
|
||||
zfs_nicenum(dds->dds_blocks, blocks, sizeof (blocks));
|
||||
zfs_nicenum(dds->dds_lsize, lsize, sizeof (lsize));
|
||||
zfs_nicenum(dds->dds_psize, psize, sizeof (psize));
|
||||
zfs_nicenum(dds->dds_dsize, dsize, sizeof (dsize));
|
||||
zfs_nicenum(dds->dds_ref_blocks, ref_blocks, sizeof (ref_blocks));
|
||||
zfs_nicenum(dds->dds_ref_lsize, ref_lsize, sizeof (ref_lsize));
|
||||
zfs_nicenum(dds->dds_ref_psize, ref_psize, sizeof (ref_psize));
|
||||
zfs_nicenum(dds->dds_ref_dsize, ref_dsize, sizeof (ref_dsize));
|
||||
|
||||
(void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n",
|
||||
refcnt,
|
||||
blocks, lsize, psize, dsize,
|
||||
ref_blocks, ref_lsize, ref_psize, ref_dsize);
|
||||
}
|
||||
|
||||
/*
|
||||
* Print the DDT histogram and the column totals.
|
||||
*/
|
||||
void
|
||||
zpool_dump_ddt(const ddt_stat_t *dds_total, const ddt_histogram_t *ddh)
|
||||
{
|
||||
int h;
|
||||
|
||||
(void) printf("\n");
|
||||
|
||||
(void) printf("bucket "
|
||||
" allocated "
|
||||
" referenced \n");
|
||||
(void) printf("______ "
|
||||
"______________________________ "
|
||||
"______________________________\n");
|
||||
|
||||
(void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n",
|
||||
"refcnt",
|
||||
"blocks", "LSIZE", "PSIZE", "DSIZE",
|
||||
"blocks", "LSIZE", "PSIZE", "DSIZE");
|
||||
|
||||
(void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n",
|
||||
"------",
|
||||
"------", "-----", "-----", "-----",
|
||||
"------", "-----", "-----", "-----");
|
||||
|
||||
for (h = 0; h < 64; h++)
|
||||
dump_ddt_stat(&ddh->ddh_stat[h], h);
|
||||
|
||||
dump_ddt_stat(dds_total, -1);
|
||||
|
||||
(void) printf("\n");
|
||||
}
|
||||
|
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -94,8 +93,6 @@ libzfs_error_description(libzfs_handle_t *hdl)
|
||||
case EZFS_VOLTOOBIG:
|
||||
return (dgettext(TEXT_DOMAIN, "volume size exceeds limit for "
|
||||
"this system"));
|
||||
case EZFS_VOLHASDATA:
|
||||
return (dgettext(TEXT_DOMAIN, "volume has data"));
|
||||
case EZFS_INVALIDNAME:
|
||||
return (dgettext(TEXT_DOMAIN, "invalid name"));
|
||||
case EZFS_BADRESTORE:
|
||||
@ -138,16 +135,12 @@ libzfs_error_description(libzfs_handle_t *hdl)
|
||||
return (dgettext(TEXT_DOMAIN, "smb remove share failed"));
|
||||
case EZFS_SHARESMBFAILED:
|
||||
return (dgettext(TEXT_DOMAIN, "smb add share failed"));
|
||||
case EZFS_ISCSISVCUNAVAIL:
|
||||
return (dgettext(TEXT_DOMAIN,
|
||||
"iscsitgt service need to be enabled by "
|
||||
"a privileged user"));
|
||||
case EZFS_DEVLINKS:
|
||||
return (dgettext(TEXT_DOMAIN, "failed to create /dev links"));
|
||||
case EZFS_PERM:
|
||||
return (dgettext(TEXT_DOMAIN, "permission denied"));
|
||||
case EZFS_NOSPC:
|
||||
return (dgettext(TEXT_DOMAIN, "out of space"));
|
||||
case EZFS_FAULT:
|
||||
return (dgettext(TEXT_DOMAIN, "bad address"));
|
||||
case EZFS_IO:
|
||||
return (dgettext(TEXT_DOMAIN, "I/O error"));
|
||||
case EZFS_INTR:
|
||||
@ -161,12 +154,6 @@ libzfs_error_description(libzfs_handle_t *hdl)
|
||||
return (dgettext(TEXT_DOMAIN, "recursive dataset dependency"));
|
||||
case EZFS_NOHISTORY:
|
||||
return (dgettext(TEXT_DOMAIN, "no history available"));
|
||||
case EZFS_UNSHAREISCSIFAILED:
|
||||
return (dgettext(TEXT_DOMAIN,
|
||||
"iscsitgtd failed request to unshare"));
|
||||
case EZFS_SHAREISCSIFAILED:
|
||||
return (dgettext(TEXT_DOMAIN,
|
||||
"iscsitgtd failed request to share"));
|
||||
case EZFS_POOLPROPS:
|
||||
return (dgettext(TEXT_DOMAIN, "failed to retrieve "
|
||||
"pool properties"));
|
||||
@ -218,6 +205,20 @@ libzfs_error_description(libzfs_handle_t *hdl)
|
||||
case EZFS_REFTAG_HOLD:
|
||||
return (dgettext(TEXT_DOMAIN, "tag already exists on this "
|
||||
"dataset"));
|
||||
case EZFS_TAGTOOLONG:
|
||||
return (dgettext(TEXT_DOMAIN, "tag too long"));
|
||||
case EZFS_PIPEFAILED:
|
||||
return (dgettext(TEXT_DOMAIN, "pipe create failed"));
|
||||
case EZFS_THREADCREATEFAILED:
|
||||
return (dgettext(TEXT_DOMAIN, "thread create failed"));
|
||||
case EZFS_POSTSPLIT_ONLINE:
|
||||
return (dgettext(TEXT_DOMAIN, "disk was split from this pool "
|
||||
"into a new one"));
|
||||
case EZFS_SCRUBBING:
|
||||
return (dgettext(TEXT_DOMAIN, "currently scrubbing; "
|
||||
"use 'zpool scrub -s' to cancel current scrub"));
|
||||
case EZFS_NO_SCRUB:
|
||||
return (dgettext(TEXT_DOMAIN, "there is no active scrub"));
|
||||
case EZFS_UNKNOWN:
|
||||
return (dgettext(TEXT_DOMAIN, "unknown error"));
|
||||
default:
|
||||
@ -306,6 +307,10 @@ zfs_common_error(libzfs_handle_t *hdl, int error, const char *fmt,
|
||||
zfs_verror(hdl, EZFS_IO, fmt, ap);
|
||||
return (-1);
|
||||
|
||||
case EFAULT:
|
||||
zfs_verror(hdl, EZFS_FAULT, fmt, ap);
|
||||
return (-1);
|
||||
|
||||
case EINTR:
|
||||
zfs_verror(hdl, EZFS_INTR, fmt, ap);
|
||||
return (-1);
|
||||
@ -378,7 +383,7 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
|
||||
zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap);
|
||||
break;
|
||||
default:
|
||||
zfs_error_aux(hdl, strerror(errno));
|
||||
zfs_error_aux(hdl, strerror(error));
|
||||
zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
|
||||
break;
|
||||
}
|
||||
@ -610,6 +615,7 @@ libzfs_fini(libzfs_handle_t *hdl)
|
||||
if (hdl->libzfs_log_str)
|
||||
(void) free(hdl->libzfs_log_str);
|
||||
zpool_free_handles(hdl);
|
||||
libzfs_fru_clear(hdl, B_TRUE);
|
||||
namespace_clear(hdl);
|
||||
libzfs_mnttab_fini(hdl);
|
||||
free(hdl);
|
||||
@ -686,7 +692,7 @@ int
|
||||
zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len)
|
||||
{
|
||||
if (len == 0)
|
||||
len = 2048;
|
||||
len = 4*1024;
|
||||
zc->zc_nvlist_dst_size = len;
|
||||
if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t)
|
||||
zfs_alloc(hdl, zc->zc_nvlist_dst_size)) == NULL)
|
||||
@ -812,6 +818,8 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
|
||||
"PROPERTY"));
|
||||
cbp->cb_colwidths[GET_COL_VALUE] = strlen(dgettext(TEXT_DOMAIN,
|
||||
"VALUE"));
|
||||
cbp->cb_colwidths[GET_COL_RECVD] = strlen(dgettext(TEXT_DOMAIN,
|
||||
"RECEIVED"));
|
||||
cbp->cb_colwidths[GET_COL_SOURCE] = strlen(dgettext(TEXT_DOMAIN,
|
||||
"SOURCE"));
|
||||
|
||||
@ -825,7 +833,7 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
|
||||
* inheriting from the longest name. This is acceptable because in the
|
||||
* majority of cases 'SOURCE' is the last column displayed, and we don't
|
||||
* use the width anyway. Note that the 'VALUE' column can be oversized,
|
||||
* if the name of the property is much longer the any values we find.
|
||||
* if the name of the property is much longer than any values we find.
|
||||
*/
|
||||
for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) {
|
||||
/*
|
||||
@ -856,6 +864,11 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
|
||||
pl->pl_width > cbp->cb_colwidths[GET_COL_VALUE])
|
||||
cbp->cb_colwidths[GET_COL_VALUE] = pl->pl_width;
|
||||
|
||||
/* 'RECEIVED' column. */
|
||||
if (pl != cbp->cb_proplist &&
|
||||
pl->pl_recvd_width > cbp->cb_colwidths[GET_COL_RECVD])
|
||||
cbp->cb_colwidths[GET_COL_RECVD] = pl->pl_recvd_width;
|
||||
|
||||
/*
|
||||
* 'NAME' and 'SOURCE' columns
|
||||
*/
|
||||
@ -871,7 +884,7 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
|
||||
/*
|
||||
* Now go through and print the headers.
|
||||
*/
|
||||
for (i = 0; i < 4; i++) {
|
||||
for (i = 0; i < ZFS_GET_NCOLS; i++) {
|
||||
switch (cbp->cb_columns[i]) {
|
||||
case GET_COL_NAME:
|
||||
title = dgettext(TEXT_DOMAIN, "NAME");
|
||||
@ -882,6 +895,9 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
|
||||
case GET_COL_VALUE:
|
||||
title = dgettext(TEXT_DOMAIN, "VALUE");
|
||||
break;
|
||||
case GET_COL_RECVD:
|
||||
title = dgettext(TEXT_DOMAIN, "RECEIVED");
|
||||
break;
|
||||
case GET_COL_SOURCE:
|
||||
title = dgettext(TEXT_DOMAIN, "SOURCE");
|
||||
break;
|
||||
@ -890,7 +906,8 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
|
||||
}
|
||||
|
||||
if (title != NULL) {
|
||||
if (i == 3 || cbp->cb_columns[i + 1] == 0)
|
||||
if (i == (ZFS_GET_NCOLS - 1) ||
|
||||
cbp->cb_columns[i + 1] == GET_COL_NONE)
|
||||
(void) printf("%s", title);
|
||||
else
|
||||
(void) printf("%-*s ",
|
||||
@ -908,7 +925,7 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
|
||||
void
|
||||
zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp,
|
||||
const char *propname, const char *value, zprop_source_t sourcetype,
|
||||
const char *source)
|
||||
const char *source, const char *recvd_value)
|
||||
{
|
||||
int i;
|
||||
const char *str;
|
||||
@ -923,7 +940,7 @@ zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp,
|
||||
if (cbp->cb_first)
|
||||
zprop_print_headers(cbp, cbp->cb_type);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
for (i = 0; i < ZFS_GET_NCOLS; i++) {
|
||||
switch (cbp->cb_columns[i]) {
|
||||
case GET_COL_NAME:
|
||||
str = name;
|
||||
@ -960,14 +977,21 @@ zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp,
|
||||
"inherited from %s", source);
|
||||
str = buf;
|
||||
break;
|
||||
case ZPROP_SRC_RECEIVED:
|
||||
str = "received";
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case GET_COL_RECVD:
|
||||
str = (recvd_value == NULL ? "-" : recvd_value);
|
||||
break;
|
||||
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cbp->cb_columns[i + 1] == 0)
|
||||
if (cbp->cb_columns[i + 1] == GET_COL_NONE)
|
||||
(void) printf("%s", str);
|
||||
else if (cbp->cb_scripted)
|
||||
(void) printf("%s\t", str);
|
||||
@ -975,7 +999,6 @@ zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp,
|
||||
(void) printf("%-*s ",
|
||||
cbp->cb_colwidths[cbp->cb_columns[i]],
|
||||
str);
|
||||
|
||||
}
|
||||
|
||||
(void) printf("\n");
|
||||
@ -1037,7 +1060,7 @@ zfs_nicestrtonum(libzfs_handle_t *hdl, const char *value, uint64_t *num)
|
||||
return (-1);
|
||||
}
|
||||
|
||||
/* Rely on stroull() to process the numeric portion. */
|
||||
/* Rely on strtoull() to process the numeric portion. */
|
||||
errno = 0;
|
||||
*num = strtoull(value, &end, 10);
|
||||
|
||||
|
@ -19,7 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
@ -75,6 +75,7 @@ extern "C" {
|
||||
#include <sys/u8_textprep.h>
|
||||
#include <sys/sysevent/eventdefs.h>
|
||||
#include <sys/sysevent/dev.h>
|
||||
#include <sys/sunddi.h>
|
||||
|
||||
/*
|
||||
* Debugging
|
||||
@ -105,21 +106,27 @@ extern void vpanic(const char *, __va_list);
|
||||
|
||||
#define fm_panic panic
|
||||
|
||||
extern int aok;
|
||||
|
||||
/* This definition is copied from assert.h. */
|
||||
#if defined(__STDC__)
|
||||
#if __STDC_VERSION__ - 0 >= 199901L
|
||||
#define verify(EX) (void)((EX) || \
|
||||
#define zverify(EX) (void)((EX) || (aok) || \
|
||||
(__assert_c99(#EX, __FILE__, __LINE__, __func__), 0))
|
||||
#else
|
||||
#define verify(EX) (void)((EX) || (__assert(#EX, __FILE__, __LINE__), 0))
|
||||
#define zverify(EX) (void)((EX) || (aok) || \
|
||||
(__assert(#EX, __FILE__, __LINE__), 0))
|
||||
#endif /* __STDC_VERSION__ - 0 >= 199901L */
|
||||
#else
|
||||
#define verify(EX) (void)((EX) || (_assert("EX", __FILE__, __LINE__), 0))
|
||||
#define zverify(EX) (void)((EX) || (aok) || \
|
||||
(_assert("EX", __FILE__, __LINE__), 0))
|
||||
#endif /* __STDC__ */
|
||||
|
||||
|
||||
#define VERIFY verify
|
||||
#define ASSERT assert
|
||||
#define VERIFY zverify
|
||||
#define ASSERT zverify
|
||||
#undef assert
|
||||
#define assert zverify
|
||||
|
||||
extern void __assert(const char *, const char *, int);
|
||||
|
||||
@ -130,7 +137,7 @@ extern void __assert(const char *, const char *, int);
|
||||
#define VERIFY3_IMPL(LEFT, OP, RIGHT, TYPE) do { \
|
||||
const TYPE __left = (TYPE)(LEFT); \
|
||||
const TYPE __right = (TYPE)(RIGHT); \
|
||||
if (!(__left OP __right)) { \
|
||||
if (!(__left OP __right) && (!aok)) { \
|
||||
char *__buf = alloca(256); \
|
||||
(void) snprintf(__buf, 256, "%s %s %s (0x%llx %s 0x%llx)", \
|
||||
#LEFT, #OP, #RIGHT, \
|
||||
@ -196,6 +203,18 @@ typedef struct kthread kthread_t;
|
||||
#define thread_create(stk, stksize, func, arg, len, pp, state, pri) \
|
||||
zk_thread_create(func, arg)
|
||||
#define thread_exit() thr_exit(NULL)
|
||||
#define thread_join(t) panic("libzpool cannot join threads")
|
||||
|
||||
#define newproc(f, a, cid, pri, ctp, pid) (ENOSYS)
|
||||
|
||||
/* in libzpool, p0 exists only to have its address taken */
|
||||
struct proc {
|
||||
uintptr_t this_is_never_used_dont_dereference_it;
|
||||
};
|
||||
|
||||
extern struct proc p0;
|
||||
|
||||
#define PS_NONE -1
|
||||
|
||||
extern kthread_t *zk_thread_create(void (*func)(), void *arg);
|
||||
|
||||
@ -318,20 +337,27 @@ typedef void (task_func_t)(void *);
|
||||
#define TASKQ_PREPOPULATE 0x0001
|
||||
#define TASKQ_CPR_SAFE 0x0002 /* Use CPR safe protocol */
|
||||
#define TASKQ_DYNAMIC 0x0004 /* Use dynamic thread scheduling */
|
||||
#define TASKQ_THREADS_CPU_PCT 0x0008 /* Use dynamic thread scheduling */
|
||||
#define TASKQ_THREADS_CPU_PCT 0x0008 /* Scale # threads by # cpus */
|
||||
#define TASKQ_DC_BATCH 0x0010 /* Mark threads as batch */
|
||||
|
||||
#define TQ_SLEEP KM_SLEEP /* Can block for memory */
|
||||
#define TQ_NOSLEEP KM_NOSLEEP /* cannot block for memory; may fail */
|
||||
#define TQ_NOQUEUE 0x02 /* Do not enqueue if can't dispatch */
|
||||
#define TQ_NOQUEUE 0x02 /* Do not enqueue if can't dispatch */
|
||||
#define TQ_FRONT 0x08 /* Queue in front */
|
||||
|
||||
extern taskq_t *system_taskq;
|
||||
|
||||
extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
|
||||
#define taskq_create_proc(a, b, c, d, e, p, f) \
|
||||
(taskq_create(a, b, c, d, e, f))
|
||||
#define taskq_create_sysdc(a, b, d, e, p, dc, f) \
|
||||
(taskq_create(a, b, maxclsyspri, d, e, f))
|
||||
extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
|
||||
extern void taskq_destroy(taskq_t *);
|
||||
extern void taskq_wait(taskq_t *);
|
||||
extern int taskq_member(taskq_t *, void *);
|
||||
extern void system_taskq_init(void);
|
||||
extern void system_taskq_fini(void);
|
||||
|
||||
#define XVA_MAPSIZE 3
|
||||
#define XVA_MAGIC 0x78766174
|
||||
@ -345,6 +371,7 @@ typedef struct vnode {
|
||||
char *v_path;
|
||||
} vnode_t;
|
||||
|
||||
#define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */
|
||||
|
||||
typedef struct xoptattr {
|
||||
timestruc_t xoa_createtime; /* Create time of file */
|
||||
@ -360,6 +387,8 @@ typedef struct xoptattr {
|
||||
uint8_t xoa_opaque;
|
||||
uint8_t xoa_av_quarantined;
|
||||
uint8_t xoa_av_modified;
|
||||
uint8_t xoa_av_scanstamp[AV_SCANSTAMP_SZ];
|
||||
uint8_t xoa_reparse;
|
||||
} xoptattr_t;
|
||||
|
||||
typedef struct vattr {
|
||||
@ -406,9 +435,11 @@ typedef struct vsecattr {
|
||||
|
||||
#define CRCREAT 0
|
||||
|
||||
extern int fop_getattr(vnode_t *vp, vattr_t *vap);
|
||||
|
||||
#define VOP_CLOSE(vp, f, c, o, cr, ct) 0
|
||||
#define VOP_PUTPAGE(vp, of, sz, fl, cr, ct) 0
|
||||
#define VOP_GETATTR(vp, vap, fl, cr, ct) ((vap)->va_size = (vp)->v_size, 0)
|
||||
#define VOP_GETATTR(vp, vap, fl, cr, ct) fop_getattr((vp), (vap));
|
||||
|
||||
#define VOP_FSYNC(vp, f, cr, ct) fsync((vp)->v_fd)
|
||||
|
||||
@ -433,13 +464,18 @@ extern vnode_t *rootdir;
|
||||
/*
|
||||
* Random stuff
|
||||
*/
|
||||
#define lbolt (gethrtime() >> 23)
|
||||
#define lbolt64 (gethrtime() >> 23)
|
||||
#define ddi_get_lbolt() (gethrtime() >> 23)
|
||||
#define ddi_get_lbolt64() (gethrtime() >> 23)
|
||||
#define hz 119 /* frequency when using gethrtime() >> 23 for lbolt */
|
||||
|
||||
extern void delay(clock_t ticks);
|
||||
|
||||
#define gethrestime_sec() time(NULL)
|
||||
#define gethrestime(t) \
|
||||
do {\
|
||||
(t)->tv_sec = gethrestime_sec();\
|
||||
(t)->tv_nsec = 0;\
|
||||
} while (0);
|
||||
|
||||
#define max_ncpus 64
|
||||
|
||||
@ -490,6 +526,9 @@ typedef struct callb_cpr {
|
||||
#define zone_dataset_visible(x, y) (1)
|
||||
#define INGLOBALZONE(z) (1)
|
||||
|
||||
extern char *kmem_asprintf(const char *fmt, ...);
|
||||
#define strfree(str) kmem_free((str), strlen(str)+1)
|
||||
|
||||
/*
|
||||
* Hostname information
|
||||
*/
|
||||
@ -497,6 +536,9 @@ extern char hw_serial[]; /* for userland-emulated hostid access */
|
||||
extern int ddi_strtoul(const char *str, char **nptr, int base,
|
||||
unsigned long *result);
|
||||
|
||||
extern int ddi_strtoull(const char *str, char **nptr, int base,
|
||||
u_longlong_t *result);
|
||||
|
||||
/* ZFS Boot Related stuff. */
|
||||
|
||||
struct _buf {
|
||||
|
@ -19,7 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
@ -42,6 +42,7 @@
|
||||
* Emulation of kernel services in userland.
|
||||
*/
|
||||
|
||||
int aok;
|
||||
uint64_t physmem;
|
||||
vnode_t *rootdir = (vnode_t *)0xabcd1234;
|
||||
char hw_serial[HW_HOSTID_LEN];
|
||||
@ -50,6 +51,9 @@ struct utsname utsname = {
|
||||
"userland", "libzpool", "1", "1", "na"
|
||||
};
|
||||
|
||||
/* this only exists to have its address taken */
|
||||
struct proc p0;
|
||||
|
||||
/*
|
||||
* =========================================================================
|
||||
* threads
|
||||
@ -269,7 +273,7 @@ cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
|
||||
clock_t delta;
|
||||
|
||||
top:
|
||||
delta = abstime - lbolt;
|
||||
delta = abstime - ddi_get_lbolt();
|
||||
if (delta <= 0)
|
||||
return (-1);
|
||||
|
||||
@ -444,6 +448,24 @@ vn_close(vnode_t *vp)
|
||||
umem_free(vp, sizeof (vnode_t));
|
||||
}
|
||||
|
||||
/*
|
||||
* At a minimum we need to update the size since vdev_reopen()
|
||||
* will no longer call vn_openat().
|
||||
*/
|
||||
int
|
||||
fop_getattr(vnode_t *vp, vattr_t *vap)
|
||||
{
|
||||
struct stat64 st;
|
||||
|
||||
if (fstat64(vp->v_fd, &st) == -1) {
|
||||
close(vp->v_fd);
|
||||
return (errno);
|
||||
}
|
||||
|
||||
vap->va_size = st.st_size;
|
||||
return (0);
|
||||
}
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
|
||||
/*
|
||||
@ -754,6 +776,17 @@ ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
|
||||
{
|
||||
char *end;
|
||||
|
||||
*result = strtoull(str, &end, base);
|
||||
if (*result == 0)
|
||||
return (errno);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* =========================================================================
|
||||
* kernel emulation setup & teardown
|
||||
@ -779,7 +812,8 @@ kernel_init(int mode)
|
||||
dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
|
||||
(double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
|
||||
|
||||
(void) snprintf(hw_serial, sizeof (hw_serial), "%ld", gethostid());
|
||||
(void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
|
||||
(mode & FWRITE) ? gethostid() : 0);
|
||||
|
||||
VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
|
||||
VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
|
||||
@ -794,6 +828,8 @@ kernel_fini(void)
|
||||
{
|
||||
spa_fini();
|
||||
|
||||
system_taskq_fini();
|
||||
|
||||
close(random_fd);
|
||||
close(urandom_fd);
|
||||
|
||||
@ -884,3 +920,27 @@ ksiddomain_rele(ksiddomain_t *ksid)
|
||||
spa_strfree(ksid->kd_name);
|
||||
umem_free(ksid, sizeof (ksiddomain_t));
|
||||
}
|
||||
|
||||
/*
|
||||
* Do not change the length of the returned string; it must be freed
|
||||
* with strfree().
|
||||
*/
|
||||
char *
|
||||
kmem_asprintf(const char *fmt, ...)
|
||||
{
|
||||
int size;
|
||||
va_list adx;
|
||||
char *buf;
|
||||
|
||||
va_start(adx, fmt);
|
||||
size = vsnprintf(NULL, 0, fmt, adx) + 1;
|
||||
va_end(adx);
|
||||
|
||||
buf = kmem_alloc(size, KM_SLEEP);
|
||||
|
||||
va_start(adx, fmt);
|
||||
size = vsnprintf(buf, size, fmt, adx);
|
||||
va_end(adx);
|
||||
|
||||
return (buf);
|
||||
}
|
||||
|
@ -19,7 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
@ -49,6 +49,8 @@ struct taskq {
|
||||
int tq_nalloc;
|
||||
int tq_minalloc;
|
||||
int tq_maxalloc;
|
||||
kcondvar_t tq_maxalloc_cv;
|
||||
int tq_maxalloc_wait;
|
||||
task_t *tq_freelist;
|
||||
task_t tq_task;
|
||||
};
|
||||
@ -57,26 +59,36 @@ static task_t *
|
||||
task_alloc(taskq_t *tq, int tqflags)
|
||||
{
|
||||
task_t *t;
|
||||
int rv;
|
||||
|
||||
if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) {
|
||||
again: if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) {
|
||||
tq->tq_freelist = t->task_next;
|
||||
} else {
|
||||
mutex_exit(&tq->tq_lock);
|
||||
if (tq->tq_nalloc >= tq->tq_maxalloc) {
|
||||
if (!(tqflags & KM_SLEEP)) {
|
||||
mutex_enter(&tq->tq_lock);
|
||||
if (!(tqflags & KM_SLEEP))
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't want to exceed tq_maxalloc, but we can't
|
||||
* wait for other tasks to complete (and thus free up
|
||||
* task structures) without risking deadlock with
|
||||
* the caller. So, we just delay for one second
|
||||
* to throttle the allocation rate.
|
||||
* to throttle the allocation rate. If we have tasks
|
||||
* complete before one second timeout expires then
|
||||
* taskq_ent_free will signal us and we will
|
||||
* immediately retry the allocation.
|
||||
*/
|
||||
delay(hz);
|
||||
tq->tq_maxalloc_wait++;
|
||||
rv = cv_timedwait(&tq->tq_maxalloc_cv,
|
||||
&tq->tq_lock, ddi_get_lbolt() + hz);
|
||||
tq->tq_maxalloc_wait--;
|
||||
if (rv > 0)
|
||||
goto again; /* signaled */
|
||||
}
|
||||
mutex_exit(&tq->tq_lock);
|
||||
|
||||
t = kmem_alloc(sizeof (task_t), tqflags);
|
||||
|
||||
mutex_enter(&tq->tq_lock);
|
||||
if (t != NULL)
|
||||
tq->tq_nalloc++;
|
||||
@ -96,6 +108,9 @@ task_free(taskq_t *tq, task_t *t)
|
||||
kmem_free(t, sizeof (task_t));
|
||||
mutex_enter(&tq->tq_lock);
|
||||
}
|
||||
|
||||
if (tq->tq_maxalloc_wait)
|
||||
cv_signal(&tq->tq_maxalloc_cv);
|
||||
}
|
||||
|
||||
taskqid_t
|
||||
@ -114,8 +129,13 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags)
|
||||
mutex_exit(&tq->tq_lock);
|
||||
return (0);
|
||||
}
|
||||
t->task_next = &tq->tq_task;
|
||||
t->task_prev = tq->tq_task.task_prev;
|
||||
if (tqflags & TQ_FRONT) {
|
||||
t->task_next = tq->tq_task.task_next;
|
||||
t->task_prev = &tq->tq_task;
|
||||
} else {
|
||||
t->task_next = &tq->tq_task;
|
||||
t->task_prev = tq->tq_task.task_prev;
|
||||
}
|
||||
t->task_next->task_prev = t;
|
||||
t->task_prev->task_next = t;
|
||||
t->task_func = func;
|
||||
@ -191,6 +211,7 @@ taskq_create(const char *name, int nthreads, pri_t pri,
|
||||
mutex_init(&tq->tq_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
cv_init(&tq->tq_dispatch_cv, NULL, CV_DEFAULT, NULL);
|
||||
cv_init(&tq->tq_wait_cv, NULL, CV_DEFAULT, NULL);
|
||||
cv_init(&tq->tq_maxalloc_cv, NULL, CV_DEFAULT, NULL);
|
||||
tq->tq_flags = flags | TASKQ_ACTIVE;
|
||||
tq->tq_active = nthreads;
|
||||
tq->tq_nthreads = nthreads;
|
||||
@ -247,6 +268,7 @@ taskq_destroy(taskq_t *tq)
|
||||
mutex_destroy(&tq->tq_lock);
|
||||
cv_destroy(&tq->tq_dispatch_cv);
|
||||
cv_destroy(&tq->tq_wait_cv);
|
||||
cv_destroy(&tq->tq_maxalloc_cv);
|
||||
|
||||
kmem_free(tq, sizeof (taskq_t));
|
||||
}
|
||||
@ -272,3 +294,10 @@ system_taskq_init(void)
|
||||
system_taskq = taskq_create("system_taskq", 64, minclsyspri, 4, 512,
|
||||
TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
|
||||
}
|
||||
|
||||
void
|
||||
system_taskq_fini(void)
|
||||
{
|
||||
taskq_destroy(system_taskq);
|
||||
system_taskq = NULL; /* defensive */
|
||||
}
|
||||
|
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
@ -90,7 +89,7 @@ show_vdev_stats(const char *desc, const char *ctype, nvlist_t *nv, int indent)
|
||||
if (is_log)
|
||||
prefix = "log ";
|
||||
|
||||
if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
|
||||
if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
|
||||
(uint64_t **)&vs, &c) != 0)
|
||||
vs = &v0;
|
||||
|
||||
|
@ -19,13 +19,10 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
|
||||
/*
|
||||
* AVL - generic AVL tree implementation for kernel use
|
||||
*
|
||||
@ -243,7 +240,7 @@ avl_nearest(avl_tree_t *tree, avl_index_t where, int direction)
|
||||
* "void *" of the found tree node
|
||||
*/
|
||||
void *
|
||||
avl_find(avl_tree_t *tree, void *value, avl_index_t *where)
|
||||
avl_find(avl_tree_t *tree, const void *value, avl_index_t *where)
|
||||
{
|
||||
avl_node_t *node;
|
||||
avl_node_t *prev = NULL;
|
||||
|
@ -19,15 +19,13 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _AVL_H
|
||||
#define _AVL_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
/*
|
||||
* This is a private header file. Applications should not directly include
|
||||
* this file.
|
||||
@ -163,7 +161,7 @@ extern void avl_create(avl_tree_t *tree,
|
||||
* node - node that has the value being looked for
|
||||
* where - position for use with avl_nearest() or avl_insert(), may be NULL
|
||||
*/
|
||||
extern void *avl_find(avl_tree_t *tree, void *node, avl_index_t *where);
|
||||
extern void *avl_find(avl_tree_t *tree, const void *node, avl_index_t *where);
|
||||
|
||||
/*
|
||||
* Insert a node into the tree.
|
||||
|
@ -19,15 +19,13 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_NVPAIR_H
|
||||
#define _SYS_NVPAIR_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/va_list.h>
|
||||
@ -199,6 +197,7 @@ int nvlist_add_double(nvlist_t *, const char *, double);
|
||||
|
||||
int nvlist_remove(nvlist_t *, const char *, data_type_t);
|
||||
int nvlist_remove_all(nvlist_t *, const char *);
|
||||
int nvlist_remove_nvpair(nvlist_t *, nvpair_t *);
|
||||
|
||||
int nvlist_lookup_boolean(nvlist_t *, const char *);
|
||||
int nvlist_lookup_boolean_value(nvlist_t *, const char *, boolean_t *);
|
||||
@ -237,9 +236,11 @@ int nvlist_lookup_nvpair(nvlist_t *, const char *, nvpair_t **);
|
||||
int nvlist_lookup_nvpair_embedded_index(nvlist_t *, const char *, nvpair_t **,
|
||||
int *, char **);
|
||||
boolean_t nvlist_exists(nvlist_t *, const char *);
|
||||
boolean_t nvlist_empty(nvlist_t *);
|
||||
|
||||
/* processing nvpair */
|
||||
nvpair_t *nvlist_next_nvpair(nvlist_t *, nvpair_t *);
|
||||
nvpair_t *nvlist_prev_nvpair(nvlist_t *, nvpair_t *);
|
||||
char *nvpair_name(nvpair_t *);
|
||||
data_type_t nvpair_type(nvpair_t *);
|
||||
int nvpair_type_is_array(nvpair_t *);
|
||||
|
@ -20,12 +20,10 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/stropts.h>
|
||||
#include <sys/debug.h>
|
||||
#include <sys/isa_defs.h>
|
||||
@ -692,6 +690,18 @@ nvlist_remove(nvlist_t *nvl, const char *name, data_type_t type)
|
||||
return (ENOENT);
|
||||
}
|
||||
|
||||
int
|
||||
nvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp)
|
||||
{
|
||||
if (nvl == NULL || nvp == NULL)
|
||||
return (EINVAL);
|
||||
|
||||
nvp_buf_unlink(nvl, nvp);
|
||||
nvpair_free(nvp);
|
||||
nvp_buf_free(nvl, nvp);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function calculates the size of an nvpair value.
|
||||
*
|
||||
@ -1162,6 +1172,42 @@ nvlist_next_nvpair(nvlist_t *nvl, nvpair_t *nvp)
|
||||
return (curr != NULL ? &curr->nvi_nvp : NULL);
|
||||
}
|
||||
|
||||
nvpair_t *
|
||||
nvlist_prev_nvpair(nvlist_t *nvl, nvpair_t *nvp)
|
||||
{
|
||||
nvpriv_t *priv;
|
||||
i_nvp_t *curr;
|
||||
|
||||
if (nvl == NULL ||
|
||||
(priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
|
||||
return (NULL);
|
||||
|
||||
curr = NVPAIR2I_NVP(nvp);
|
||||
|
||||
if (nvp == NULL)
|
||||
curr = priv->nvp_last;
|
||||
else if (priv->nvp_curr == curr || nvlist_contains_nvp(nvl, nvp))
|
||||
curr = curr->nvi_prev;
|
||||
else
|
||||
curr = NULL;
|
||||
|
||||
priv->nvp_curr = curr;
|
||||
|
||||
return (curr != NULL ? &curr->nvi_nvp : NULL);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
nvlist_empty(nvlist_t *nvl)
|
||||
{
|
||||
nvpriv_t *priv;
|
||||
|
||||
if (nvl == NULL ||
|
||||
(priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
|
||||
return (B_TRUE);
|
||||
|
||||
return (priv->nvp_list == NULL);
|
||||
}
|
||||
|
||||
char *
|
||||
nvpair_name(nvpair_t *nvp)
|
||||
{
|
||||
|
@ -20,10 +20,11 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
/* Portions Copyright 2010 Robert Milkowski */
|
||||
|
||||
#ifndef _SYS_FS_ZFS_H
|
||||
#define _SYS_FS_ZFS_H
|
||||
|
||||
@ -86,12 +87,11 @@ typedef enum {
|
||||
ZFS_PROP_READONLY,
|
||||
ZFS_PROP_ZONED,
|
||||
ZFS_PROP_SNAPDIR,
|
||||
ZFS_PROP_ACLMODE,
|
||||
ZFS_PROP_PRIVATE, /* not exposed to user, temporary */
|
||||
ZFS_PROP_ACLINHERIT,
|
||||
ZFS_PROP_CREATETXG, /* not exposed to the user */
|
||||
ZFS_PROP_NAME, /* not exposed to the user */
|
||||
ZFS_PROP_CANMOUNT,
|
||||
ZFS_PROP_SHAREISCSI,
|
||||
ZFS_PROP_ISCSIOPTIONS, /* not exposed to the user */
|
||||
ZFS_PROP_XATTR,
|
||||
ZFS_PROP_NUMCLONES, /* not exposed to the user */
|
||||
@ -116,6 +116,12 @@ typedef enum {
|
||||
ZFS_PROP_STMF_SHAREINFO, /* not exposed to the user */
|
||||
ZFS_PROP_DEFER_DESTROY,
|
||||
ZFS_PROP_USERREFS,
|
||||
ZFS_PROP_LOGBIAS,
|
||||
ZFS_PROP_UNIQUE, /* not exposed to the user */
|
||||
ZFS_PROP_OBJSETID, /* not exposed to the user */
|
||||
ZFS_PROP_DEDUP,
|
||||
ZFS_PROP_MLSLABEL,
|
||||
ZFS_PROP_SYNC,
|
||||
ZFS_NUM_PROPS
|
||||
} zfs_prop_t;
|
||||
|
||||
@ -138,8 +144,6 @@ extern const char *zfs_userquota_prop_prefixes[ZFS_NUM_USERQUOTA_PROPS];
|
||||
typedef enum {
|
||||
ZPOOL_PROP_NAME,
|
||||
ZPOOL_PROP_SIZE,
|
||||
ZPOOL_PROP_USED,
|
||||
ZPOOL_PROP_AVAILABLE,
|
||||
ZPOOL_PROP_CAPACITY,
|
||||
ZPOOL_PROP_ALTROOT,
|
||||
ZPOOL_PROP_HEALTH,
|
||||
@ -152,6 +156,10 @@ typedef enum {
|
||||
ZPOOL_PROP_FAILUREMODE,
|
||||
ZPOOL_PROP_LISTSNAPS,
|
||||
ZPOOL_PROP_AUTOEXPAND,
|
||||
ZPOOL_PROP_DEDUPDITTO,
|
||||
ZPOOL_PROP_DEDUPRATIO,
|
||||
ZPOOL_PROP_FREE,
|
||||
ZPOOL_PROP_ALLOCATED,
|
||||
ZPOOL_NUM_PROPS
|
||||
} zpool_prop_t;
|
||||
|
||||
@ -166,10 +174,27 @@ typedef enum {
|
||||
ZPROP_SRC_DEFAULT = 0x2,
|
||||
ZPROP_SRC_TEMPORARY = 0x4,
|
||||
ZPROP_SRC_LOCAL = 0x8,
|
||||
ZPROP_SRC_INHERITED = 0x10
|
||||
ZPROP_SRC_INHERITED = 0x10,
|
||||
ZPROP_SRC_RECEIVED = 0x20
|
||||
} zprop_source_t;
|
||||
|
||||
#define ZPROP_SRC_ALL 0x1f
|
||||
#define ZPROP_SRC_ALL 0x3f
|
||||
|
||||
#define ZPROP_SOURCE_VAL_RECVD "$recvd"
|
||||
#define ZPROP_N_MORE_ERRORS "N_MORE_ERRORS"
|
||||
/*
|
||||
* Dataset flag implemented as a special entry in the props zap object
|
||||
* indicating that the dataset has received properties on or after
|
||||
* SPA_VERSION_RECVD_PROPS. The first such receive blows away local properties
|
||||
* just as it did in earlier versions, and thereafter, local properties are
|
||||
* preserved.
|
||||
*/
|
||||
#define ZPROP_HAS_RECVD "$hasrecvd"
|
||||
|
||||
typedef enum {
|
||||
ZPROP_ERR_NOCLEAR = 0x1, /* failure to clear existing props */
|
||||
ZPROP_ERR_NORESTORE = 0x2 /* failure to restore props on error */
|
||||
} zprop_errflags_t;
|
||||
|
||||
typedef int (*zprop_func)(int, void *);
|
||||
|
||||
@ -191,9 +216,10 @@ boolean_t zfs_prop_setonce(zfs_prop_t);
|
||||
const char *zfs_prop_to_name(zfs_prop_t);
|
||||
zfs_prop_t zfs_name_to_prop(const char *);
|
||||
boolean_t zfs_prop_user(const char *);
|
||||
boolean_t zfs_prop_userquota(const char *name);
|
||||
boolean_t zfs_prop_userquota(const char *);
|
||||
int zfs_prop_index_to_string(zfs_prop_t, uint64_t, const char **);
|
||||
int zfs_prop_string_to_index(zfs_prop_t, const char *, uint64_t *);
|
||||
uint64_t zfs_prop_random_value(zfs_prop_t, uint64_t seed);
|
||||
boolean_t zfs_prop_valid_for_type(int, zfs_type_t);
|
||||
|
||||
/*
|
||||
@ -206,6 +232,7 @@ uint64_t zpool_prop_default_numeric(zpool_prop_t);
|
||||
boolean_t zpool_prop_readonly(zpool_prop_t);
|
||||
int zpool_prop_index_to_string(zpool_prop_t, uint64_t, const char **);
|
||||
int zpool_prop_string_to_index(zpool_prop_t, const char *, uint64_t *);
|
||||
uint64_t zpool_prop_random_value(zpool_prop_t, uint64_t seed);
|
||||
|
||||
/*
|
||||
* Definitions for the Delegation.
|
||||
@ -236,6 +263,8 @@ typedef enum {
|
||||
#define ZFS_DELEG_PERM_GID "gid"
|
||||
#define ZFS_DELEG_PERM_GROUPS "groups"
|
||||
|
||||
#define ZFS_MLSLABEL_DEFAULT "none"
|
||||
|
||||
#define ZFS_SMB_ACL_SRC "src"
|
||||
#define ZFS_SMB_ACL_TARGET "target"
|
||||
|
||||
@ -245,6 +274,11 @@ typedef enum {
|
||||
ZFS_CANMOUNT_NOAUTO = 2
|
||||
} zfs_canmount_type_t;
|
||||
|
||||
typedef enum {
|
||||
ZFS_LOGBIAS_LATENCY = 0,
|
||||
ZFS_LOGBIAS_THROUGHPUT = 1
|
||||
} zfs_logbias_op_t;
|
||||
|
||||
typedef enum zfs_share_op {
|
||||
ZFS_SHARE_NFS = 0,
|
||||
ZFS_UNSHARE_NFS = 1,
|
||||
@ -265,6 +299,12 @@ typedef enum zfs_cache_type {
|
||||
ZFS_CACHE_ALL = 2
|
||||
} zfs_cache_type_t;
|
||||
|
||||
typedef enum {
|
||||
ZFS_SYNC_STANDARD = 0,
|
||||
ZFS_SYNC_ALWAYS = 1,
|
||||
ZFS_SYNC_DISABLED = 2
|
||||
} zfs_sync_type_t;
|
||||
|
||||
|
||||
/*
|
||||
* On-disk version number.
|
||||
@ -287,14 +327,22 @@ typedef enum zfs_cache_type {
|
||||
#define SPA_VERSION_16 16ULL
|
||||
#define SPA_VERSION_17 17ULL
|
||||
#define SPA_VERSION_18 18ULL
|
||||
#define SPA_VERSION_19 19ULL
|
||||
#define SPA_VERSION_20 20ULL
|
||||
#define SPA_VERSION_21 21ULL
|
||||
#define SPA_VERSION_22 22ULL
|
||||
#define SPA_VERSION_23 23ULL
|
||||
#define SPA_VERSION_24 24ULL
|
||||
#define SPA_VERSION_25 25ULL
|
||||
#define SPA_VERSION_26 26ULL
|
||||
/*
|
||||
* When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
|
||||
* format change. Go to usr/src/grub/grub-0.97/stage2/{zfs-include/, fsys_zfs*},
|
||||
* and do the appropriate changes. Also bump the version number in
|
||||
* usr/src/grub/capability.
|
||||
*/
|
||||
#define SPA_VERSION SPA_VERSION_18
|
||||
#define SPA_VERSION_STRING "18"
|
||||
#define SPA_VERSION SPA_VERSION_26
|
||||
#define SPA_VERSION_STRING "26"
|
||||
|
||||
/*
|
||||
* Symbolic names for the changes that caused a SPA_VERSION switch.
|
||||
@ -311,7 +359,7 @@ typedef enum zfs_cache_type {
|
||||
#define SPA_VERSION_DITTO_BLOCKS SPA_VERSION_2
|
||||
#define SPA_VERSION_SPARES SPA_VERSION_3
|
||||
#define SPA_VERSION_RAIDZ2 SPA_VERSION_3
|
||||
#define SPA_VERSION_BPLIST_ACCOUNT SPA_VERSION_3
|
||||
#define SPA_VERSION_BPOBJ_ACCOUNT SPA_VERSION_3
|
||||
#define SPA_VERSION_RAIDZ_DEFLATE SPA_VERSION_3
|
||||
#define SPA_VERSION_DNODE_BYTES SPA_VERSION_3
|
||||
#define SPA_VERSION_ZPOOL_HISTORY SPA_VERSION_4
|
||||
@ -334,6 +382,15 @@ typedef enum zfs_cache_type {
|
||||
#define SPA_VERSION_STMF_PROP SPA_VERSION_16
|
||||
#define SPA_VERSION_RAIDZ3 SPA_VERSION_17
|
||||
#define SPA_VERSION_USERREFS SPA_VERSION_18
|
||||
#define SPA_VERSION_HOLES SPA_VERSION_19
|
||||
#define SPA_VERSION_ZLE_COMPRESSION SPA_VERSION_20
|
||||
#define SPA_VERSION_DEDUP SPA_VERSION_21
|
||||
#define SPA_VERSION_RECVD_PROPS SPA_VERSION_22
|
||||
#define SPA_VERSION_SLIM_ZIL SPA_VERSION_23
|
||||
#define SPA_VERSION_SA SPA_VERSION_24
|
||||
#define SPA_VERSION_SCAN SPA_VERSION_25
|
||||
#define SPA_VERSION_DIR_CLONES SPA_VERSION_26
|
||||
#define SPA_VERSION_DEADLISTS SPA_VERSION_26
|
||||
|
||||
/*
|
||||
* ZPL version - rev'd whenever an incompatible on-disk format change
|
||||
@ -347,8 +404,9 @@ typedef enum zfs_cache_type {
|
||||
#define ZPL_VERSION_2 2ULL
|
||||
#define ZPL_VERSION_3 3ULL
|
||||
#define ZPL_VERSION_4 4ULL
|
||||
#define ZPL_VERSION ZPL_VERSION_4
|
||||
#define ZPL_VERSION_STRING "4"
|
||||
#define ZPL_VERSION_5 5ULL
|
||||
#define ZPL_VERSION ZPL_VERSION_5
|
||||
#define ZPL_VERSION_STRING "5"
|
||||
|
||||
#define ZPL_VERSION_INITIAL ZPL_VERSION_1
|
||||
#define ZPL_VERSION_DIRENT_TYPE ZPL_VERSION_2
|
||||
@ -356,6 +414,23 @@ typedef enum zfs_cache_type {
|
||||
#define ZPL_VERSION_NORMALIZATION ZPL_VERSION_3
|
||||
#define ZPL_VERSION_SYSATTR ZPL_VERSION_3
|
||||
#define ZPL_VERSION_USERSPACE ZPL_VERSION_4
|
||||
#define ZPL_VERSION_SA ZPL_VERSION_5
|
||||
|
||||
/* Rewind request information */
|
||||
#define ZPOOL_NO_REWIND 1 /* No policy - default behavior */
|
||||
#define ZPOOL_NEVER_REWIND 2 /* Do not search for best txg or rewind */
|
||||
#define ZPOOL_TRY_REWIND 4 /* Search for best txg, but do not rewind */
|
||||
#define ZPOOL_DO_REWIND 8 /* Rewind to best txg w/in deferred frees */
|
||||
#define ZPOOL_EXTREME_REWIND 16 /* Allow extreme measures to find best txg */
|
||||
#define ZPOOL_REWIND_MASK 28 /* All the possible rewind bits */
|
||||
#define ZPOOL_REWIND_POLICIES 31 /* All the possible policy bits */
|
||||
|
||||
typedef struct zpool_rewind_policy {
|
||||
uint32_t zrp_request; /* rewind behavior requested */
|
||||
uint64_t zrp_maxmeta; /* max acceptable meta-data errors */
|
||||
uint64_t zrp_maxdata; /* max acceptable data errors */
|
||||
uint64_t zrp_txg; /* specific txg to load */
|
||||
} zpool_rewind_policy_t;
|
||||
|
||||
/*
|
||||
* The following are configuration names used in the nvlist describing a pool's
|
||||
@ -380,7 +455,8 @@ typedef enum zfs_cache_type {
|
||||
#define ZPOOL_CONFIG_ASHIFT "ashift"
|
||||
#define ZPOOL_CONFIG_ASIZE "asize"
|
||||
#define ZPOOL_CONFIG_DTL "DTL"
|
||||
#define ZPOOL_CONFIG_STATS "stats"
|
||||
#define ZPOOL_CONFIG_SCAN_STATS "scan_stats" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_VDEV_STATS "vdev_stats" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_WHOLE_DISK "whole_disk"
|
||||
#define ZPOOL_CONFIG_ERRCOUNT "error_count"
|
||||
#define ZPOOL_CONFIG_NOT_PRESENT "not_present"
|
||||
@ -393,6 +469,17 @@ typedef enum zfs_cache_type {
|
||||
#define ZPOOL_CONFIG_PHYS_PATH "phys_path"
|
||||
#define ZPOOL_CONFIG_IS_LOG "is_log"
|
||||
#define ZPOOL_CONFIG_L2CACHE "l2cache"
|
||||
#define ZPOOL_CONFIG_HOLE_ARRAY "hole_array"
|
||||
#define ZPOOL_CONFIG_VDEV_CHILDREN "vdev_children"
|
||||
#define ZPOOL_CONFIG_IS_HOLE "is_hole"
|
||||
#define ZPOOL_CONFIG_DDT_HISTOGRAM "ddt_histogram"
|
||||
#define ZPOOL_CONFIG_DDT_OBJ_STATS "ddt_object_stats"
|
||||
#define ZPOOL_CONFIG_DDT_STATS "ddt_stats"
|
||||
#define ZPOOL_CONFIG_SPLIT "splitcfg"
|
||||
#define ZPOOL_CONFIG_ORIG_GUID "orig_guid"
|
||||
#define ZPOOL_CONFIG_SPLIT_GUID "split_guid"
|
||||
#define ZPOOL_CONFIG_SPLIT_LIST "guid_list"
|
||||
#define ZPOOL_CONFIG_REMOVING "removing"
|
||||
#define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */
|
||||
@ -406,6 +493,19 @@ typedef enum zfs_cache_type {
|
||||
#define ZPOOL_CONFIG_DEGRADED "degraded"
|
||||
#define ZPOOL_CONFIG_REMOVED "removed"
|
||||
#define ZPOOL_CONFIG_FRU "fru"
|
||||
#define ZPOOL_CONFIG_AUX_STATE "aux_state"
|
||||
|
||||
/* Rewind policy parameters */
|
||||
#define ZPOOL_REWIND_POLICY "rewind-policy"
|
||||
#define ZPOOL_REWIND_REQUEST "rewind-request"
|
||||
#define ZPOOL_REWIND_REQUEST_TXG "rewind-request-txg"
|
||||
#define ZPOOL_REWIND_META_THRESH "rewind-meta-thresh"
|
||||
#define ZPOOL_REWIND_DATA_THRESH "rewind-data-thresh"
|
||||
|
||||
/* Rewind data discovered */
|
||||
#define ZPOOL_CONFIG_LOAD_TIME "rewind_txg_ts"
|
||||
#define ZPOOL_CONFIG_LOAD_DATA_ERRORS "verify_data_errors"
|
||||
#define ZPOOL_CONFIG_REWIND_TIME "seconds_of_rewind"
|
||||
|
||||
#define VDEV_TYPE_ROOT "root"
|
||||
#define VDEV_TYPE_MIRROR "mirror"
|
||||
@ -414,6 +514,7 @@ typedef enum zfs_cache_type {
|
||||
#define VDEV_TYPE_DISK "disk"
|
||||
#define VDEV_TYPE_FILE "file"
|
||||
#define VDEV_TYPE_MISSING "missing"
|
||||
#define VDEV_TYPE_HOLE "hole"
|
||||
#define VDEV_TYPE_SPARE "spare"
|
||||
#define VDEV_TYPE_LOG "log"
|
||||
#define VDEV_TYPE_L2CACHE "l2cache"
|
||||
@ -463,7 +564,9 @@ typedef enum vdev_aux {
|
||||
VDEV_AUX_SPARED, /* hot spare used in another pool */
|
||||
VDEV_AUX_ERR_EXCEEDED, /* too many errors */
|
||||
VDEV_AUX_IO_FAILURE, /* experienced I/O failure */
|
||||
VDEV_AUX_BAD_LOG /* cannot read log chain(s) */
|
||||
VDEV_AUX_BAD_LOG, /* cannot read log chain(s) */
|
||||
VDEV_AUX_EXTERNAL, /* external diagnosis */
|
||||
VDEV_AUX_SPLIT_POOL /* vdev was split off into another pool */
|
||||
} vdev_aux_t;
|
||||
|
||||
/*
|
||||
@ -484,14 +587,14 @@ typedef enum pool_state {
|
||||
} pool_state_t;
|
||||
|
||||
/*
|
||||
* Scrub types.
|
||||
* Scan Functions.
|
||||
*/
|
||||
typedef enum pool_scrub_type {
|
||||
POOL_SCRUB_NONE,
|
||||
POOL_SCRUB_RESILVER,
|
||||
POOL_SCRUB_EVERYTHING,
|
||||
POOL_SCRUB_TYPES
|
||||
} pool_scrub_type_t;
|
||||
typedef enum pool_scan_func {
|
||||
POOL_SCAN_NONE,
|
||||
POOL_SCAN_SCRUB,
|
||||
POOL_SCAN_RESILVER,
|
||||
POOL_SCAN_FUNCS
|
||||
} pool_scan_func_t;
|
||||
|
||||
/*
|
||||
* ZIO types. Needed to interpret vdev statistics below.
|
||||
@ -506,6 +609,36 @@ typedef enum zio_type {
|
||||
ZIO_TYPES
|
||||
} zio_type_t;
|
||||
|
||||
/*
|
||||
* Pool statistics. Note: all fields should be 64-bit because this
|
||||
* is passed between kernel and userland as an nvlist uint64 array.
|
||||
*/
|
||||
typedef struct pool_scan_stat {
|
||||
/* values stored on disk */
|
||||
uint64_t pss_func; /* pool_scan_func_t */
|
||||
uint64_t pss_state; /* dsl_scan_state_t */
|
||||
uint64_t pss_start_time; /* scan start time */
|
||||
uint64_t pss_end_time; /* scan end time */
|
||||
uint64_t pss_to_examine; /* total bytes to scan */
|
||||
uint64_t pss_examined; /* total examined bytes */
|
||||
uint64_t pss_to_process; /* total bytes to process */
|
||||
uint64_t pss_processed; /* total processed bytes */
|
||||
uint64_t pss_errors; /* scan errors */
|
||||
|
||||
/* values not stored on disk */
|
||||
uint64_t pss_pass_exam; /* examined bytes per scan pass */
|
||||
uint64_t pss_pass_start; /* start time of a scan pass */
|
||||
} pool_scan_stat_t;
|
||||
|
||||
typedef enum dsl_scan_state {
|
||||
DSS_NONE,
|
||||
DSS_SCANNING,
|
||||
DSS_FINISHED,
|
||||
DSS_CANCELED,
|
||||
DSS_NUM_STATES
|
||||
} dsl_scan_state_t;
|
||||
|
||||
|
||||
/*
|
||||
* Vdev statistics. Note: all fields should be 64-bit because this
|
||||
* is passed between kernel and userland as an nvlist uint64 array.
|
||||
@ -524,34 +657,49 @@ typedef struct vdev_stat {
|
||||
uint64_t vs_write_errors; /* write errors */
|
||||
uint64_t vs_checksum_errors; /* checksum errors */
|
||||
uint64_t vs_self_healed; /* self-healed bytes */
|
||||
uint64_t vs_scrub_type; /* pool_scrub_type_t */
|
||||
uint64_t vs_scrub_complete; /* completed? */
|
||||
uint64_t vs_scrub_examined; /* bytes examined; top */
|
||||
uint64_t vs_scrub_repaired; /* bytes repaired; leaf */
|
||||
uint64_t vs_scrub_errors; /* errors during scrub */
|
||||
uint64_t vs_scrub_start; /* UTC scrub start time */
|
||||
uint64_t vs_scrub_end; /* UTC scrub end time */
|
||||
uint64_t vs_scan_removing; /* removing? */
|
||||
uint64_t vs_scan_processed; /* scan processed bytes */
|
||||
} vdev_stat_t;
|
||||
|
||||
/*
|
||||
* DDT statistics. Note: all fields should be 64-bit because this
|
||||
* is passed between kernel and userland as an nvlist uint64 array.
|
||||
*/
|
||||
typedef struct ddt_object {
|
||||
uint64_t ddo_count; /* number of elments in ddt */
|
||||
uint64_t ddo_dspace; /* size of ddt on disk */
|
||||
uint64_t ddo_mspace; /* size of ddt in-core */
|
||||
} ddt_object_t;
|
||||
|
||||
typedef struct ddt_stat {
|
||||
uint64_t dds_blocks; /* blocks */
|
||||
uint64_t dds_lsize; /* logical size */
|
||||
uint64_t dds_psize; /* physical size */
|
||||
uint64_t dds_dsize; /* deflated allocated size */
|
||||
uint64_t dds_ref_blocks; /* referenced blocks */
|
||||
uint64_t dds_ref_lsize; /* referenced lsize * refcnt */
|
||||
uint64_t dds_ref_psize; /* referenced psize * refcnt */
|
||||
uint64_t dds_ref_dsize; /* referenced dsize * refcnt */
|
||||
} ddt_stat_t;
|
||||
|
||||
typedef struct ddt_histogram {
|
||||
ddt_stat_t ddh_stat[64]; /* power-of-two histogram buckets */
|
||||
} ddt_histogram_t;
|
||||
|
||||
#define ZVOL_DRIVER "zvol"
|
||||
#define ZFS_DRIVER "zfs"
|
||||
#define ZFS_DEV "/dev/zfs"
|
||||
|
||||
/*
|
||||
* zvol paths. Irritatingly, the devfsadm interfaces want all these
|
||||
* paths without the /dev prefix, but for some things, we want the
|
||||
* /dev prefix. Below are the names without /dev.
|
||||
*/
|
||||
#define ZVOL_DEV_DIR "zvol/dsk"
|
||||
#define ZVOL_RDEV_DIR "zvol/rdsk"
|
||||
|
||||
/*
|
||||
* And here are the things we need with /dev, etc. in front of them.
|
||||
*/
|
||||
/* general zvol path */
|
||||
#define ZVOL_DIR "/dev/zvol"
|
||||
/* expansion */
|
||||
#define ZVOL_PSEUDO_DEV "/devices/pseudo/zfs@0:"
|
||||
#define ZVOL_FULL_DEV_DIR "/dev/" ZVOL_DEV_DIR "/"
|
||||
/* for dump and swap */
|
||||
#define ZVOL_FULL_DEV_DIR ZVOL_DIR "/dsk/"
|
||||
#define ZVOL_FULL_RDEV_DIR ZVOL_DIR "/rdsk/"
|
||||
|
||||
#define ZVOL_PROP_NAME "name"
|
||||
#define ZVOL_DEFAULT_BLOCKSIZE 8192
|
||||
|
||||
/*
|
||||
* /dev/zfs ioctl numbers.
|
||||
@ -566,7 +714,7 @@ typedef enum zfs_ioc {
|
||||
ZFS_IOC_POOL_CONFIGS,
|
||||
ZFS_IOC_POOL_STATS,
|
||||
ZFS_IOC_POOL_TRYIMPORT,
|
||||
ZFS_IOC_POOL_SCRUB,
|
||||
ZFS_IOC_POOL_SCAN,
|
||||
ZFS_IOC_POOL_FREEZE,
|
||||
ZFS_IOC_POOL_UPGRADE,
|
||||
ZFS_IOC_POOL_GET_HISTORY,
|
||||
@ -582,8 +730,6 @@ typedef enum zfs_ioc {
|
||||
ZFS_IOC_DATASET_LIST_NEXT,
|
||||
ZFS_IOC_SNAPSHOT_LIST_NEXT,
|
||||
ZFS_IOC_SET_PROP,
|
||||
ZFS_IOC_CREATE_MINOR,
|
||||
ZFS_IOC_REMOVE_MINOR,
|
||||
ZFS_IOC_CREATE,
|
||||
ZFS_IOC_DESTROY,
|
||||
ZFS_IOC_ROLLBACK,
|
||||
@ -604,7 +750,6 @@ typedef enum zfs_ioc {
|
||||
ZFS_IOC_POOL_GET_PROPS,
|
||||
ZFS_IOC_SET_FSACL,
|
||||
ZFS_IOC_GET_FSACL,
|
||||
ZFS_IOC_ISCSI_PERM_CHECK,
|
||||
ZFS_IOC_SHARE,
|
||||
ZFS_IOC_INHERIT_PROP,
|
||||
ZFS_IOC_SMB_ACL,
|
||||
@ -613,17 +758,21 @@ typedef enum zfs_ioc {
|
||||
ZFS_IOC_USERSPACE_UPGRADE,
|
||||
ZFS_IOC_HOLD,
|
||||
ZFS_IOC_RELEASE,
|
||||
ZFS_IOC_GET_HOLDS
|
||||
ZFS_IOC_GET_HOLDS,
|
||||
ZFS_IOC_OBJSET_RECVD_PROPS,
|
||||
ZFS_IOC_VDEV_SPLIT
|
||||
} zfs_ioc_t;
|
||||
|
||||
/*
|
||||
* Internal SPA load state. Used by FMA diagnosis engine.
|
||||
*/
|
||||
typedef enum {
|
||||
SPA_LOAD_NONE, /* no load in progress */
|
||||
SPA_LOAD_OPEN, /* normal open */
|
||||
SPA_LOAD_IMPORT, /* import in progress */
|
||||
SPA_LOAD_TRYIMPORT /* tryimport in progress */
|
||||
SPA_LOAD_NONE, /* no load in progress */
|
||||
SPA_LOAD_OPEN, /* normal open */
|
||||
SPA_LOAD_IMPORT, /* import in progress */
|
||||
SPA_LOAD_TRYIMPORT, /* tryimport in progress */
|
||||
SPA_LOAD_RECOVER, /* recovery requested */
|
||||
SPA_LOAD_ERROR /* load failed */
|
||||
} spa_load_state_t;
|
||||
|
||||
/*
|
||||
@ -686,7 +835,7 @@ typedef enum {
|
||||
/*
|
||||
* Note: This is encoded on-disk, so new events must be added to the
|
||||
* end, and unused events can not be removed. Be sure to edit
|
||||
* zpool_main.c: hist_event_table[].
|
||||
* libzfs_pool.c: hist_event_table[].
|
||||
*/
|
||||
typedef enum history_internal_events {
|
||||
LOG_NO_EVENT = 0,
|
||||
@ -703,7 +852,7 @@ typedef enum history_internal_events {
|
||||
LOG_POOL_VDEV_OFFLINE,
|
||||
LOG_POOL_UPGRADE,
|
||||
LOG_POOL_CLEAR,
|
||||
LOG_POOL_SCRUB,
|
||||
LOG_POOL_SCAN,
|
||||
LOG_POOL_PROPSET,
|
||||
LOG_DS_CREATE,
|
||||
LOG_DS_CLONE,
|
||||
@ -726,9 +875,10 @@ typedef enum history_internal_events {
|
||||
LOG_DS_UPGRADE,
|
||||
LOG_DS_REFQUOTA,
|
||||
LOG_DS_REFRESERV,
|
||||
LOG_POOL_SCRUB_DONE,
|
||||
LOG_POOL_SCAN_DONE,
|
||||
LOG_DS_USER_HOLD,
|
||||
LOG_DS_USER_RELEASE,
|
||||
LOG_POOL_SPLIT,
|
||||
LOG_END
|
||||
} history_internal_events_t;
|
||||
|
||||
|
@ -19,15 +19,12 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ZFS_COMUTIL_H
|
||||
#define _ZFS_COMUTIL_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
@ -35,7 +32,12 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern boolean_t zfs_allocatable_devs(nvlist_t *nv);
|
||||
extern boolean_t zfs_allocatable_devs(nvlist_t *);
|
||||
extern void zpool_get_rewind_policy(nvlist_t *, zpool_rewind_policy_t *);
|
||||
|
||||
extern int zfs_zpl_version_map(int spa_version);
|
||||
extern int zfs_spa_version_map(int zpl_version);
|
||||
extern const char *zfs_history_event_names[LOG_END];
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
53
module/zcommon/include/zfs_fletcher.h
Normal file
53
module/zcommon/include/zfs_fletcher.h
Normal file
@ -0,0 +1,53 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _ZFS_FLETCHER_H
|
||||
#define _ZFS_FLETCHER_H
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/spa.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* fletcher checksum functions
|
||||
*/
|
||||
|
||||
void fletcher_2_native(const void *, uint64_t, zio_cksum_t *);
|
||||
void fletcher_2_byteswap(const void *, uint64_t, zio_cksum_t *);
|
||||
void fletcher_4_native(const void *, uint64_t, zio_cksum_t *);
|
||||
void fletcher_4_byteswap(const void *, uint64_t, zio_cksum_t *);
|
||||
void fletcher_4_incremental_native(const void *, uint64_t,
|
||||
zio_cksum_t *);
|
||||
void fletcher_4_incremental_byteswap(const void *, uint64_t,
|
||||
zio_cksum_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ZFS_FLETCHER_H */
|
@ -19,15 +19,13 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _ZFS_PROP_H
|
||||
#define _ZFS_PROP_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
@ -79,6 +77,7 @@ typedef struct {
|
||||
/* "zfs get" help message */
|
||||
const zprop_index_t *pd_table; /* for index properties, a table */
|
||||
/* defining the possible values */
|
||||
size_t pd_table_size; /* number of entries in pd_table[] */
|
||||
} zprop_desc_t;
|
||||
|
||||
/*
|
||||
@ -99,16 +98,16 @@ zprop_desc_t *zpool_prop_get_table(void);
|
||||
/*
|
||||
* Common routines to initialize property tables
|
||||
*/
|
||||
void register_impl(int, const char *, zprop_type_t, uint64_t,
|
||||
void zprop_register_impl(int, const char *, zprop_type_t, uint64_t,
|
||||
const char *, zprop_attr_t, int, const char *, const char *,
|
||||
boolean_t, boolean_t, const zprop_index_t *);
|
||||
void register_string(int, const char *, const char *, zprop_attr_t attr,
|
||||
int, const char *, const char *);
|
||||
void register_number(int, const char *, uint64_t, zprop_attr_t, int,
|
||||
void zprop_register_string(int, const char *, const char *,
|
||||
zprop_attr_t attr, int, const char *, const char *);
|
||||
void zprop_register_number(int, const char *, uint64_t, zprop_attr_t, int,
|
||||
const char *, const char *);
|
||||
void register_index(int, const char *, uint64_t, zprop_attr_t, int,
|
||||
void zprop_register_index(int, const char *, uint64_t, zprop_attr_t, int,
|
||||
const char *, const char *, const zprop_index_t *);
|
||||
void register_hidden(int, const char *, zprop_type_t, zprop_attr_t,
|
||||
void zprop_register_hidden(int, const char *, zprop_type_t, zprop_attr_t,
|
||||
int, const char *);
|
||||
|
||||
/*
|
||||
@ -118,6 +117,7 @@ int zprop_iter_common(zprop_func, void *, boolean_t, boolean_t, zfs_type_t);
|
||||
int zprop_name_to_prop(const char *, zfs_type_t);
|
||||
int zprop_string_to_index(int, const char *, uint64_t *, zfs_type_t);
|
||||
int zprop_index_to_string(int, uint64_t, const char **, zfs_type_t);
|
||||
uint64_t zprop_random_value(int, uint64_t, zfs_type_t);
|
||||
const char *zprop_values(int, zfs_type_t);
|
||||
size_t zprop_width(int, boolean_t *, zfs_type_t);
|
||||
boolean_t zprop_valid_for_type(int, zfs_type_t);
|
||||
|
@ -19,12 +19,9 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
/*
|
||||
* This file is intended for functions that ought to be common between user
|
||||
* land (libzfs) and the kernel. When many common routines need to be shared
|
||||
@ -33,11 +30,15 @@
|
||||
|
||||
#if defined(_KERNEL)
|
||||
#include <sys/systm.h>
|
||||
#else
|
||||
#include <string.h>
|
||||
#endif
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/int_limits.h>
|
||||
#include <sys/nvpair.h>
|
||||
#include "zfs_comutil.h"
|
||||
|
||||
/*
|
||||
* Are there allocatable vdevs?
|
||||
@ -63,3 +64,139 @@ zfs_allocatable_devs(nvlist_t *nv)
|
||||
}
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
void
|
||||
zpool_get_rewind_policy(nvlist_t *nvl, zpool_rewind_policy_t *zrpp)
|
||||
{
|
||||
nvlist_t *policy;
|
||||
nvpair_t *elem;
|
||||
char *nm;
|
||||
|
||||
/* Defaults */
|
||||
zrpp->zrp_request = ZPOOL_NO_REWIND;
|
||||
zrpp->zrp_maxmeta = 0;
|
||||
zrpp->zrp_maxdata = UINT64_MAX;
|
||||
zrpp->zrp_txg = UINT64_MAX;
|
||||
|
||||
if (nvl == NULL)
|
||||
return;
|
||||
|
||||
elem = NULL;
|
||||
while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
|
||||
nm = nvpair_name(elem);
|
||||
if (strcmp(nm, ZPOOL_REWIND_POLICY) == 0) {
|
||||
if (nvpair_value_nvlist(elem, &policy) == 0)
|
||||
zpool_get_rewind_policy(policy, zrpp);
|
||||
return;
|
||||
} else if (strcmp(nm, ZPOOL_REWIND_REQUEST) == 0) {
|
||||
if (nvpair_value_uint32(elem, &zrpp->zrp_request) == 0)
|
||||
if (zrpp->zrp_request & ~ZPOOL_REWIND_POLICIES)
|
||||
zrpp->zrp_request = ZPOOL_NO_REWIND;
|
||||
} else if (strcmp(nm, ZPOOL_REWIND_REQUEST_TXG) == 0) {
|
||||
(void) nvpair_value_uint64(elem, &zrpp->zrp_txg);
|
||||
} else if (strcmp(nm, ZPOOL_REWIND_META_THRESH) == 0) {
|
||||
(void) nvpair_value_uint64(elem, &zrpp->zrp_maxmeta);
|
||||
} else if (strcmp(nm, ZPOOL_REWIND_DATA_THRESH) == 0) {
|
||||
(void) nvpair_value_uint64(elem, &zrpp->zrp_maxdata);
|
||||
}
|
||||
}
|
||||
if (zrpp->zrp_request == 0)
|
||||
zrpp->zrp_request = ZPOOL_NO_REWIND;
|
||||
}
|
||||
|
||||
typedef struct zfs_version_spa_map {
|
||||
int version_zpl;
|
||||
int version_spa;
|
||||
} zfs_version_spa_map_t;
|
||||
|
||||
/*
|
||||
* Keep this table in monotonically increasing version number order.
|
||||
*/
|
||||
static zfs_version_spa_map_t zfs_version_table[] = {
|
||||
{ZPL_VERSION_INITIAL, SPA_VERSION_INITIAL},
|
||||
{ZPL_VERSION_DIRENT_TYPE, SPA_VERSION_INITIAL},
|
||||
{ZPL_VERSION_FUID, SPA_VERSION_FUID},
|
||||
{ZPL_VERSION_USERSPACE, SPA_VERSION_USERSPACE},
|
||||
{ZPL_VERSION_SA, SPA_VERSION_SA},
|
||||
{0, 0}
|
||||
};
|
||||
|
||||
/*
|
||||
* Return the max zpl version for a corresponding spa version
|
||||
* -1 is returned if no mapping exists.
|
||||
*/
|
||||
int
|
||||
zfs_zpl_version_map(int spa_version)
|
||||
{
|
||||
int i;
|
||||
int version = -1;
|
||||
|
||||
for (i = 0; zfs_version_table[i].version_spa; i++) {
|
||||
if (spa_version >= zfs_version_table[i].version_spa)
|
||||
version = zfs_version_table[i].version_zpl;
|
||||
}
|
||||
|
||||
return (version);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the min spa version for a corresponding spa version
|
||||
* -1 is returned if no mapping exists.
|
||||
*/
|
||||
int
|
||||
zfs_spa_version_map(int zpl_version)
|
||||
{
|
||||
int i;
|
||||
int version = -1;
|
||||
|
||||
for (i = 0; zfs_version_table[i].version_zpl; i++) {
|
||||
if (zfs_version_table[i].version_zpl >= zpl_version)
|
||||
return (zfs_version_table[i].version_spa);
|
||||
}
|
||||
|
||||
return (version);
|
||||
}
|
||||
|
||||
const char *zfs_history_event_names[LOG_END] = {
|
||||
"invalid event",
|
||||
"pool create",
|
||||
"vdev add",
|
||||
"pool remove",
|
||||
"pool destroy",
|
||||
"pool export",
|
||||
"pool import",
|
||||
"vdev attach",
|
||||
"vdev replace",
|
||||
"vdev detach",
|
||||
"vdev online",
|
||||
"vdev offline",
|
||||
"vdev upgrade",
|
||||
"pool clear",
|
||||
"pool scrub",
|
||||
"pool property set",
|
||||
"create",
|
||||
"clone",
|
||||
"destroy",
|
||||
"destroy_begin_sync",
|
||||
"inherit",
|
||||
"property set",
|
||||
"quota set",
|
||||
"permission update",
|
||||
"permission remove",
|
||||
"permission who remove",
|
||||
"promote",
|
||||
"receive",
|
||||
"rename",
|
||||
"reservation set",
|
||||
"replay_inc_sync",
|
||||
"replay_full_sync",
|
||||
"rollback",
|
||||
"snapshot",
|
||||
"filesystem version upgrade",
|
||||
"refquota set",
|
||||
"refreservation set",
|
||||
"pool scrub done",
|
||||
"user hold",
|
||||
"user release",
|
||||
"pool split",
|
||||
};
|
||||
|
@ -128,6 +128,7 @@
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#include <sys/byteorder.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/spa.h>
|
||||
|
||||
void
|
@ -19,10 +19,11 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
/* Portions Copyright 2010 Robert Milkowski */
|
||||
|
||||
#include <sys/zio.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/u8_textprep.h>
|
||||
@ -69,6 +70,16 @@ zfs_prop_init(void)
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static zprop_index_t dedup_table[] = {
|
||||
{ "on", ZIO_CHECKSUM_ON },
|
||||
{ "off", ZIO_CHECKSUM_OFF },
|
||||
{ "verify", ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY },
|
||||
{ "sha256", ZIO_CHECKSUM_SHA256 },
|
||||
{ "sha256,verify",
|
||||
ZIO_CHECKSUM_SHA256 | ZIO_CHECKSUM_VERIFY },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static zprop_index_t compress_table[] = {
|
||||
{ "on", ZIO_COMPRESS_ON },
|
||||
{ "off", ZIO_COMPRESS_OFF },
|
||||
@ -83,6 +94,7 @@ zfs_prop_init(void)
|
||||
{ "gzip-7", ZIO_COMPRESS_GZIP_7 },
|
||||
{ "gzip-8", ZIO_COMPRESS_GZIP_8 },
|
||||
{ "gzip-9", ZIO_COMPRESS_GZIP_9 },
|
||||
{ "zle", ZIO_COMPRESS_ZLE },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
@ -92,13 +104,6 @@ zfs_prop_init(void)
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static zprop_index_t acl_mode_table[] = {
|
||||
{ "discard", ZFS_ACL_DISCARD },
|
||||
{ "groupmask", ZFS_ACL_GROUPMASK },
|
||||
{ "passthrough", ZFS_ACL_PASSTHROUGH },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static zprop_index_t acl_inherit_table[] = {
|
||||
{ "discard", ZFS_ACL_DISCARD },
|
||||
{ "noallow", ZFS_ACL_NOALLOW },
|
||||
@ -142,6 +147,7 @@ zfs_prop_init(void)
|
||||
{ "2", 2 },
|
||||
{ "3", 3 },
|
||||
{ "4", 4 },
|
||||
{ "5", 5 },
|
||||
{ "current", ZPL_VERSION },
|
||||
{ NULL }
|
||||
};
|
||||
@ -152,6 +158,12 @@ zfs_prop_init(void)
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static zprop_index_t logbias_table[] = {
|
||||
{ "latency", ZFS_LOGBIAS_LATENCY },
|
||||
{ "throughput", ZFS_LOGBIAS_THROUGHPUT },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static zprop_index_t canmount_table[] = {
|
||||
{ "off", ZFS_CANMOUNT_OFF },
|
||||
{ "on", ZFS_CANMOUNT_ON },
|
||||
@ -166,170 +178,208 @@ zfs_prop_init(void)
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static zprop_index_t sync_table[] = {
|
||||
{ "standard", ZFS_SYNC_STANDARD },
|
||||
{ "always", ZFS_SYNC_ALWAYS },
|
||||
{ "disabled", ZFS_SYNC_DISABLED },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
/* inherit index properties */
|
||||
register_index(ZFS_PROP_CHECKSUM, "checksum", ZIO_CHECKSUM_DEFAULT,
|
||||
zprop_register_index(ZFS_PROP_SYNC, "sync", ZFS_SYNC_STANDARD,
|
||||
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
|
||||
"standard | always | disabled", "SYNC",
|
||||
sync_table);
|
||||
zprop_register_index(ZFS_PROP_CHECKSUM, "checksum",
|
||||
ZIO_CHECKSUM_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM |
|
||||
ZFS_TYPE_VOLUME,
|
||||
"on | off | fletcher2 | fletcher4 | sha256", "CHECKSUM",
|
||||
checksum_table);
|
||||
register_index(ZFS_PROP_COMPRESSION, "compression",
|
||||
zprop_register_index(ZFS_PROP_DEDUP, "dedup", ZIO_CHECKSUM_OFF,
|
||||
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
|
||||
"on | off | verify | sha256[,verify]", "DEDUP",
|
||||
dedup_table);
|
||||
zprop_register_index(ZFS_PROP_COMPRESSION, "compression",
|
||||
ZIO_COMPRESS_DEFAULT, PROP_INHERIT,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
|
||||
"on | off | lzjb | gzip | gzip-[1-9]", "COMPRESS", compress_table);
|
||||
register_index(ZFS_PROP_SNAPDIR, "snapdir", ZFS_SNAPDIR_HIDDEN,
|
||||
"on | off | lzjb | gzip | gzip-[1-9] | zle", "COMPRESS",
|
||||
compress_table);
|
||||
zprop_register_index(ZFS_PROP_SNAPDIR, "snapdir", ZFS_SNAPDIR_HIDDEN,
|
||||
PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
|
||||
"hidden | visible", "SNAPDIR", snapdir_table);
|
||||
register_index(ZFS_PROP_ACLMODE, "aclmode", ZFS_ACL_GROUPMASK,
|
||||
PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
|
||||
"discard | groupmask | passthrough", "ACLMODE", acl_mode_table);
|
||||
register_index(ZFS_PROP_ACLINHERIT, "aclinherit", ZFS_ACL_RESTRICTED,
|
||||
PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
|
||||
zprop_register_index(ZFS_PROP_ACLINHERIT, "aclinherit",
|
||||
ZFS_ACL_RESTRICTED, PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
|
||||
"discard | noallow | restricted | passthrough | passthrough-x",
|
||||
"ACLINHERIT", acl_inherit_table);
|
||||
register_index(ZFS_PROP_COPIES, "copies", 1,
|
||||
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
|
||||
zprop_register_index(ZFS_PROP_COPIES, "copies", 1, PROP_INHERIT,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
|
||||
"1 | 2 | 3", "COPIES", copies_table);
|
||||
register_index(ZFS_PROP_PRIMARYCACHE, "primarycache",
|
||||
zprop_register_index(ZFS_PROP_PRIMARYCACHE, "primarycache",
|
||||
ZFS_CACHE_ALL, PROP_INHERIT,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME,
|
||||
"all | none | metadata", "PRIMARYCACHE", cache_table);
|
||||
register_index(ZFS_PROP_SECONDARYCACHE, "secondarycache",
|
||||
zprop_register_index(ZFS_PROP_SECONDARYCACHE, "secondarycache",
|
||||
ZFS_CACHE_ALL, PROP_INHERIT,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME,
|
||||
"all | none | metadata", "SECONDARYCACHE", cache_table);
|
||||
zprop_register_index(ZFS_PROP_LOGBIAS, "logbias", ZFS_LOGBIAS_LATENCY,
|
||||
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
|
||||
"latency | throughput", "LOGBIAS", logbias_table);
|
||||
|
||||
/* inherit index (boolean) properties */
|
||||
register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT,
|
||||
zprop_register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT,
|
||||
ZFS_TYPE_FILESYSTEM, "on | off", "ATIME", boolean_table);
|
||||
register_index(ZFS_PROP_DEVICES, "devices", 1, PROP_INHERIT,
|
||||
zprop_register_index(ZFS_PROP_DEVICES, "devices", 1, PROP_INHERIT,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "DEVICES",
|
||||
boolean_table);
|
||||
register_index(ZFS_PROP_EXEC, "exec", 1, PROP_INHERIT,
|
||||
zprop_register_index(ZFS_PROP_EXEC, "exec", 1, PROP_INHERIT,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "EXEC",
|
||||
boolean_table);
|
||||
register_index(ZFS_PROP_SETUID, "setuid", 1, PROP_INHERIT,
|
||||
zprop_register_index(ZFS_PROP_SETUID, "setuid", 1, PROP_INHERIT,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "SETUID",
|
||||
boolean_table);
|
||||
register_index(ZFS_PROP_READONLY, "readonly", 0, PROP_INHERIT,
|
||||
zprop_register_index(ZFS_PROP_READONLY, "readonly", 0, PROP_INHERIT,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "on | off", "RDONLY",
|
||||
boolean_table);
|
||||
register_index(ZFS_PROP_ZONED, "zoned", 0, PROP_INHERIT,
|
||||
zprop_register_index(ZFS_PROP_ZONED, "zoned", 0, PROP_INHERIT,
|
||||
ZFS_TYPE_FILESYSTEM, "on | off", "ZONED", boolean_table);
|
||||
register_index(ZFS_PROP_XATTR, "xattr", 1, PROP_INHERIT,
|
||||
zprop_register_index(ZFS_PROP_XATTR, "xattr", 1, PROP_INHERIT,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "XATTR",
|
||||
boolean_table);
|
||||
register_index(ZFS_PROP_VSCAN, "vscan", 0, PROP_INHERIT,
|
||||
zprop_register_index(ZFS_PROP_VSCAN, "vscan", 0, PROP_INHERIT,
|
||||
ZFS_TYPE_FILESYSTEM, "on | off", "VSCAN",
|
||||
boolean_table);
|
||||
register_index(ZFS_PROP_NBMAND, "nbmand", 0, PROP_INHERIT,
|
||||
zprop_register_index(ZFS_PROP_NBMAND, "nbmand", 0, PROP_INHERIT,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "NBMAND",
|
||||
boolean_table);
|
||||
|
||||
/* default index properties */
|
||||
register_index(ZFS_PROP_VERSION, "version", 0, PROP_DEFAULT,
|
||||
zprop_register_index(ZFS_PROP_VERSION, "version", 0, PROP_DEFAULT,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
|
||||
"1 | 2 | 3 | 4 | current", "VERSION", version_table);
|
||||
register_index(ZFS_PROP_CANMOUNT, "canmount", ZFS_CANMOUNT_ON,
|
||||
zprop_register_index(ZFS_PROP_CANMOUNT, "canmount", ZFS_CANMOUNT_ON,
|
||||
PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto",
|
||||
"CANMOUNT", canmount_table);
|
||||
|
||||
/* readonly index (boolean) properties */
|
||||
register_index(ZFS_PROP_MOUNTED, "mounted", 0, PROP_READONLY,
|
||||
zprop_register_index(ZFS_PROP_MOUNTED, "mounted", 0, PROP_READONLY,
|
||||
ZFS_TYPE_FILESYSTEM, "yes | no", "MOUNTED", boolean_table);
|
||||
register_index(ZFS_PROP_DEFER_DESTROY, "defer_destroy", 0,
|
||||
zprop_register_index(ZFS_PROP_DEFER_DESTROY, "defer_destroy", 0,
|
||||
PROP_READONLY, ZFS_TYPE_SNAPSHOT, "yes | no", "DEFER_DESTROY",
|
||||
boolean_table);
|
||||
|
||||
/* set once index properties */
|
||||
register_index(ZFS_PROP_NORMALIZE, "normalization", 0,
|
||||
zprop_register_index(ZFS_PROP_NORMALIZE, "normalization", 0,
|
||||
PROP_ONETIME, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
|
||||
"none | formC | formD | formKC | formKD", "NORMALIZATION",
|
||||
normalize_table);
|
||||
register_index(ZFS_PROP_CASE, "casesensitivity", ZFS_CASE_SENSITIVE,
|
||||
PROP_ONETIME, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
|
||||
zprop_register_index(ZFS_PROP_CASE, "casesensitivity",
|
||||
ZFS_CASE_SENSITIVE, PROP_ONETIME, ZFS_TYPE_FILESYSTEM |
|
||||
ZFS_TYPE_SNAPSHOT,
|
||||
"sensitive | insensitive | mixed", "CASE", case_table);
|
||||
|
||||
/* set once index (boolean) properties */
|
||||
register_index(ZFS_PROP_UTF8ONLY, "utf8only", 0, PROP_ONETIME,
|
||||
zprop_register_index(ZFS_PROP_UTF8ONLY, "utf8only", 0, PROP_ONETIME,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
|
||||
"on | off", "UTF8ONLY", boolean_table);
|
||||
|
||||
/* string properties */
|
||||
register_string(ZFS_PROP_ORIGIN, "origin", NULL, PROP_READONLY,
|
||||
zprop_register_string(ZFS_PROP_ORIGIN, "origin", NULL, PROP_READONLY,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<snapshot>", "ORIGIN");
|
||||
register_string(ZFS_PROP_MOUNTPOINT, "mountpoint", "/", PROP_INHERIT,
|
||||
ZFS_TYPE_FILESYSTEM, "<path> | legacy | none", "MOUNTPOINT");
|
||||
register_string(ZFS_PROP_SHARENFS, "sharenfs", "off", PROP_INHERIT,
|
||||
ZFS_TYPE_FILESYSTEM, "on | off | share(1M) options", "SHARENFS");
|
||||
register_string(ZFS_PROP_SHAREISCSI, "shareiscsi", "off", PROP_INHERIT,
|
||||
ZFS_TYPE_DATASET, "on | off | type=<type>", "SHAREISCSI");
|
||||
register_string(ZFS_PROP_TYPE, "type", NULL, PROP_READONLY,
|
||||
zprop_register_string(ZFS_PROP_MOUNTPOINT, "mountpoint", "/",
|
||||
PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "<path> | legacy | none",
|
||||
"MOUNTPOINT");
|
||||
zprop_register_string(ZFS_PROP_SHARENFS, "sharenfs", "off",
|
||||
PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off | share(1M) options",
|
||||
"SHARENFS");
|
||||
zprop_register_string(ZFS_PROP_TYPE, "type", NULL, PROP_READONLY,
|
||||
ZFS_TYPE_DATASET, "filesystem | volume | snapshot", "TYPE");
|
||||
register_string(ZFS_PROP_SHARESMB, "sharesmb", "off", PROP_INHERIT,
|
||||
ZFS_TYPE_FILESYSTEM, "on | off | sharemgr(1M) options", "SHARESMB");
|
||||
zprop_register_string(ZFS_PROP_SHARESMB, "sharesmb", "off",
|
||||
PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
|
||||
"on | off | sharemgr(1M) options", "SHARESMB");
|
||||
zprop_register_string(ZFS_PROP_MLSLABEL, "mlslabel",
|
||||
ZFS_MLSLABEL_DEFAULT, PROP_INHERIT, ZFS_TYPE_DATASET,
|
||||
"<sensitivity label>", "MLSLABEL");
|
||||
|
||||
/* readonly number properties */
|
||||
register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY,
|
||||
zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY,
|
||||
ZFS_TYPE_DATASET, "<size>", "USED");
|
||||
register_number(ZFS_PROP_AVAILABLE, "available", 0, PROP_READONLY,
|
||||
zprop_register_number(ZFS_PROP_AVAILABLE, "available", 0, PROP_READONLY,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "AVAIL");
|
||||
register_number(ZFS_PROP_REFERENCED, "referenced", 0, PROP_READONLY,
|
||||
ZFS_TYPE_DATASET, "<size>", "REFER");
|
||||
register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0,
|
||||
zprop_register_number(ZFS_PROP_REFERENCED, "referenced", 0,
|
||||
PROP_READONLY, ZFS_TYPE_DATASET, "<size>", "REFER");
|
||||
zprop_register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0,
|
||||
PROP_READONLY, ZFS_TYPE_DATASET,
|
||||
"<1.00x or higher if compressed>", "RATIO");
|
||||
register_number(ZFS_PROP_VOLBLOCKSIZE, "volblocksize", 8192,
|
||||
PROP_ONETIME,
|
||||
zprop_register_number(ZFS_PROP_VOLBLOCKSIZE, "volblocksize",
|
||||
ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME,
|
||||
ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK");
|
||||
register_number(ZFS_PROP_USEDSNAP, "usedbysnapshots", 0, PROP_READONLY,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "USEDSNAP");
|
||||
register_number(ZFS_PROP_USEDDS, "usedbydataset", 0, PROP_READONLY,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "USEDDS");
|
||||
register_number(ZFS_PROP_USEDCHILD, "usedbychildren", 0, PROP_READONLY,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "USEDCHILD");
|
||||
register_number(ZFS_PROP_USEDREFRESERV, "usedbyrefreservation", 0,
|
||||
zprop_register_number(ZFS_PROP_USEDSNAP, "usedbysnapshots", 0,
|
||||
PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
|
||||
"USEDSNAP");
|
||||
zprop_register_number(ZFS_PROP_USEDDS, "usedbydataset", 0,
|
||||
PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
|
||||
"USEDDS");
|
||||
zprop_register_number(ZFS_PROP_USEDCHILD, "usedbychildren", 0,
|
||||
PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
|
||||
"USEDCHILD");
|
||||
zprop_register_number(ZFS_PROP_USEDREFRESERV, "usedbyrefreservation", 0,
|
||||
PROP_READONLY,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "USEDREFRESERV");
|
||||
register_number(ZFS_PROP_USERREFS, "userrefs", 0, PROP_READONLY,
|
||||
zprop_register_number(ZFS_PROP_USERREFS, "userrefs", 0, PROP_READONLY,
|
||||
ZFS_TYPE_SNAPSHOT, "<count>", "USERREFS");
|
||||
|
||||
/* default number properties */
|
||||
register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT,
|
||||
zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT,
|
||||
ZFS_TYPE_FILESYSTEM, "<size> | none", "QUOTA");
|
||||
register_number(ZFS_PROP_RESERVATION, "reservation", 0, PROP_DEFAULT,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size> | none", "RESERV");
|
||||
register_number(ZFS_PROP_VOLSIZE, "volsize", 0, PROP_DEFAULT,
|
||||
zprop_register_number(ZFS_PROP_RESERVATION, "reservation", 0,
|
||||
PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
|
||||
"<size> | none", "RESERV");
|
||||
zprop_register_number(ZFS_PROP_VOLSIZE, "volsize", 0, PROP_DEFAULT,
|
||||
ZFS_TYPE_VOLUME, "<size>", "VOLSIZE");
|
||||
register_number(ZFS_PROP_REFQUOTA, "refquota", 0, PROP_DEFAULT,
|
||||
zprop_register_number(ZFS_PROP_REFQUOTA, "refquota", 0, PROP_DEFAULT,
|
||||
ZFS_TYPE_FILESYSTEM, "<size> | none", "REFQUOTA");
|
||||
register_number(ZFS_PROP_REFRESERVATION, "refreservation", 0,
|
||||
zprop_register_number(ZFS_PROP_REFRESERVATION, "refreservation", 0,
|
||||
PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
|
||||
"<size> | none", "REFRESERV");
|
||||
|
||||
/* inherit number properties */
|
||||
register_number(ZFS_PROP_RECORDSIZE, "recordsize", SPA_MAXBLOCKSIZE,
|
||||
PROP_INHERIT,
|
||||
zprop_register_number(ZFS_PROP_RECORDSIZE, "recordsize",
|
||||
SPA_MAXBLOCKSIZE, PROP_INHERIT,
|
||||
ZFS_TYPE_FILESYSTEM, "512 to 128k, power of 2", "RECSIZE");
|
||||
|
||||
/* hidden properties */
|
||||
register_hidden(ZFS_PROP_CREATETXG, "createtxg", PROP_TYPE_NUMBER,
|
||||
PROP_READONLY, ZFS_TYPE_DATASET, NULL);
|
||||
register_hidden(ZFS_PROP_NUMCLONES, "numclones", PROP_TYPE_NUMBER,
|
||||
PROP_READONLY, ZFS_TYPE_SNAPSHOT, NULL);
|
||||
register_hidden(ZFS_PROP_NAME, "name", PROP_TYPE_STRING,
|
||||
zprop_register_hidden(ZFS_PROP_CREATETXG, "createtxg", PROP_TYPE_NUMBER,
|
||||
PROP_READONLY, ZFS_TYPE_DATASET, "CREATETXG");
|
||||
zprop_register_hidden(ZFS_PROP_NUMCLONES, "numclones", PROP_TYPE_NUMBER,
|
||||
PROP_READONLY, ZFS_TYPE_SNAPSHOT, "NUMCLONES");
|
||||
zprop_register_hidden(ZFS_PROP_NAME, "name", PROP_TYPE_STRING,
|
||||
PROP_READONLY, ZFS_TYPE_DATASET, "NAME");
|
||||
register_hidden(ZFS_PROP_ISCSIOPTIONS, "iscsioptions", PROP_TYPE_STRING,
|
||||
PROP_INHERIT, ZFS_TYPE_VOLUME, "ISCSIOPTIONS");
|
||||
register_hidden(ZFS_PROP_STMF_SHAREINFO, "stmf_sbd_lu",
|
||||
zprop_register_hidden(ZFS_PROP_ISCSIOPTIONS, "iscsioptions",
|
||||
PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME, "ISCSIOPTIONS");
|
||||
zprop_register_hidden(ZFS_PROP_STMF_SHAREINFO, "stmf_sbd_lu",
|
||||
PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME,
|
||||
"STMF_SBD_LU");
|
||||
register_hidden(ZFS_PROP_GUID, "guid", PROP_TYPE_NUMBER, PROP_READONLY,
|
||||
ZFS_TYPE_DATASET, "GUID");
|
||||
register_hidden(ZFS_PROP_USERACCOUNTING, "useraccounting",
|
||||
PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, NULL);
|
||||
zprop_register_hidden(ZFS_PROP_GUID, "guid", PROP_TYPE_NUMBER,
|
||||
PROP_READONLY, ZFS_TYPE_DATASET, "GUID");
|
||||
zprop_register_hidden(ZFS_PROP_USERACCOUNTING, "useraccounting",
|
||||
PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET,
|
||||
"USERACCOUNTING");
|
||||
zprop_register_hidden(ZFS_PROP_UNIQUE, "unique", PROP_TYPE_NUMBER,
|
||||
PROP_READONLY, ZFS_TYPE_DATASET, "UNIQUE");
|
||||
zprop_register_hidden(ZFS_PROP_OBJSETID, "objsetid", PROP_TYPE_NUMBER,
|
||||
PROP_READONLY, ZFS_TYPE_DATASET, "OBJSETID");
|
||||
|
||||
/*
|
||||
* Property to be removed once libbe is integrated
|
||||
*/
|
||||
zprop_register_hidden(ZFS_PROP_PRIVATE, "priv_prop",
|
||||
PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_FILESYSTEM,
|
||||
"PRIV_PROP");
|
||||
|
||||
/* oddball properties */
|
||||
register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0, NULL,
|
||||
PROP_READONLY, ZFS_TYPE_DATASET,
|
||||
zprop_register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0,
|
||||
NULL, PROP_READONLY, ZFS_TYPE_DATASET,
|
||||
"<date>", "CREATION", B_FALSE, B_TRUE, NULL);
|
||||
}
|
||||
|
||||
@ -337,6 +387,11 @@ boolean_t
|
||||
zfs_prop_delegatable(zfs_prop_t prop)
|
||||
{
|
||||
zprop_desc_t *pd = &zfs_prop_table[prop];
|
||||
|
||||
/* The mlslabel property is never delegatable. */
|
||||
if (prop == ZFS_PROP_MLSLABEL)
|
||||
return (B_FALSE);
|
||||
|
||||
return (pd->pd_attr != PROP_READONLY);
|
||||
}
|
||||
|
||||
@ -421,6 +476,12 @@ zfs_prop_index_to_string(zfs_prop_t prop, uint64_t index, const char **string)
|
||||
return (zprop_index_to_string(prop, index, string, ZFS_TYPE_DATASET));
|
||||
}
|
||||
|
||||
uint64_t
|
||||
zfs_prop_random_value(zfs_prop_t prop, uint64_t seed)
|
||||
{
|
||||
return (zprop_random_value(prop, seed, ZFS_TYPE_DATASET));
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns TRUE if the property applies to any of the given dataset types.
|
||||
*/
|
||||
|
@ -19,7 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
@ -64,48 +64,55 @@ zpool_prop_init(void)
|
||||
};
|
||||
|
||||
/* string properties */
|
||||
register_string(ZPOOL_PROP_ALTROOT, "altroot", NULL, PROP_DEFAULT,
|
||||
zprop_register_string(ZPOOL_PROP_ALTROOT, "altroot", NULL, PROP_DEFAULT,
|
||||
ZFS_TYPE_POOL, "<path>", "ALTROOT");
|
||||
register_string(ZPOOL_PROP_BOOTFS, "bootfs", NULL, PROP_DEFAULT,
|
||||
zprop_register_string(ZPOOL_PROP_BOOTFS, "bootfs", NULL, PROP_DEFAULT,
|
||||
ZFS_TYPE_POOL, "<filesystem>", "BOOTFS");
|
||||
register_string(ZPOOL_PROP_CACHEFILE, "cachefile", NULL, PROP_DEFAULT,
|
||||
ZFS_TYPE_POOL, "<file> | none", "CACHEFILE");
|
||||
zprop_register_string(ZPOOL_PROP_CACHEFILE, "cachefile", NULL,
|
||||
PROP_DEFAULT, ZFS_TYPE_POOL, "<file> | none", "CACHEFILE");
|
||||
|
||||
/* readonly number properties */
|
||||
register_number(ZPOOL_PROP_SIZE, "size", 0, PROP_READONLY,
|
||||
zprop_register_number(ZPOOL_PROP_SIZE, "size", 0, PROP_READONLY,
|
||||
ZFS_TYPE_POOL, "<size>", "SIZE");
|
||||
register_number(ZPOOL_PROP_USED, "used", 0, PROP_READONLY,
|
||||
ZFS_TYPE_POOL, "<size>", "USED");
|
||||
register_number(ZPOOL_PROP_AVAILABLE, "available", 0, PROP_READONLY,
|
||||
ZFS_TYPE_POOL, "<size>", "AVAIL");
|
||||
register_number(ZPOOL_PROP_CAPACITY, "capacity", 0, PROP_READONLY,
|
||||
zprop_register_number(ZPOOL_PROP_FREE, "free", 0, PROP_READONLY,
|
||||
ZFS_TYPE_POOL, "<size>", "FREE");
|
||||
zprop_register_number(ZPOOL_PROP_ALLOCATED, "allocated", 0,
|
||||
PROP_READONLY, ZFS_TYPE_POOL, "<size>", "ALLOC");
|
||||
zprop_register_number(ZPOOL_PROP_CAPACITY, "capacity", 0, PROP_READONLY,
|
||||
ZFS_TYPE_POOL, "<size>", "CAP");
|
||||
register_number(ZPOOL_PROP_GUID, "guid", 0, PROP_READONLY,
|
||||
zprop_register_number(ZPOOL_PROP_GUID, "guid", 0, PROP_READONLY,
|
||||
ZFS_TYPE_POOL, "<guid>", "GUID");
|
||||
register_number(ZPOOL_PROP_HEALTH, "health", 0, PROP_READONLY,
|
||||
zprop_register_number(ZPOOL_PROP_HEALTH, "health", 0, PROP_READONLY,
|
||||
ZFS_TYPE_POOL, "<state>", "HEALTH");
|
||||
zprop_register_number(ZPOOL_PROP_DEDUPRATIO, "dedupratio", 0,
|
||||
PROP_READONLY, ZFS_TYPE_POOL, "<1.00x or higher if deduped>",
|
||||
"DEDUP");
|
||||
|
||||
/* default number properties */
|
||||
register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION,
|
||||
zprop_register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION,
|
||||
PROP_DEFAULT, ZFS_TYPE_POOL, "<version>", "VERSION");
|
||||
zprop_register_number(ZPOOL_PROP_DEDUPDITTO, "dedupditto", 0,
|
||||
PROP_DEFAULT, ZFS_TYPE_POOL, "<threshold (min 100)>", "DEDUPDITTO");
|
||||
|
||||
/* default index (boolean) properties */
|
||||
register_index(ZPOOL_PROP_DELEGATION, "delegation", 1, PROP_DEFAULT,
|
||||
ZFS_TYPE_POOL, "on | off", "DELEGATION", boolean_table);
|
||||
register_index(ZPOOL_PROP_AUTOREPLACE, "autoreplace", 0, PROP_DEFAULT,
|
||||
ZFS_TYPE_POOL, "on | off", "REPLACE", boolean_table);
|
||||
register_index(ZPOOL_PROP_LISTSNAPS, "listsnapshots", 0, PROP_DEFAULT,
|
||||
ZFS_TYPE_POOL, "on | off", "LISTSNAPS", boolean_table);
|
||||
register_index(ZPOOL_PROP_AUTOEXPAND, "autoexpand", 0, PROP_DEFAULT,
|
||||
ZFS_TYPE_POOL, "on | off", "EXPAND", boolean_table);
|
||||
zprop_register_index(ZPOOL_PROP_DELEGATION, "delegation", 1,
|
||||
PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "DELEGATION",
|
||||
boolean_table);
|
||||
zprop_register_index(ZPOOL_PROP_AUTOREPLACE, "autoreplace", 0,
|
||||
PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "REPLACE", boolean_table);
|
||||
zprop_register_index(ZPOOL_PROP_LISTSNAPS, "listsnapshots", 0,
|
||||
PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "LISTSNAPS",
|
||||
boolean_table);
|
||||
zprop_register_index(ZPOOL_PROP_AUTOEXPAND, "autoexpand", 0,
|
||||
PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "EXPAND", boolean_table);
|
||||
|
||||
/* default index properties */
|
||||
register_index(ZPOOL_PROP_FAILUREMODE, "failmode",
|
||||
zprop_register_index(ZPOOL_PROP_FAILUREMODE, "failmode",
|
||||
ZIO_FAILURE_MODE_WAIT, PROP_DEFAULT, ZFS_TYPE_POOL,
|
||||
"wait | continue | panic", "FAILMODE", failuremode_table);
|
||||
|
||||
/* hidden properties */
|
||||
register_hidden(ZPOOL_PROP_NAME, "name", PROP_TYPE_STRING,
|
||||
zprop_register_hidden(ZPOOL_PROP_NAME, "name", PROP_TYPE_STRING,
|
||||
PROP_READONLY, ZFS_TYPE_POOL, "NAME");
|
||||
}
|
||||
|
||||
@ -166,6 +173,12 @@ zpool_prop_index_to_string(zpool_prop_t prop, uint64_t index,
|
||||
return (zprop_index_to_string(prop, index, string, ZFS_TYPE_POOL));
|
||||
}
|
||||
|
||||
uint64_t
|
||||
zpool_prop_random_value(zpool_prop_t prop, uint64_t seed)
|
||||
{
|
||||
return (zprop_random_value(prop, seed, ZFS_TYPE_POOL));
|
||||
}
|
||||
|
||||
#ifndef _KERNEL
|
||||
|
||||
const char *
|
||||
|
@ -19,7 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
@ -65,7 +65,7 @@ zprop_get_numprops(zfs_type_t type)
|
||||
}
|
||||
|
||||
void
|
||||
register_impl(int prop, const char *name, zprop_type_t type,
|
||||
zprop_register_impl(int prop, const char *name, zprop_type_t type,
|
||||
uint64_t numdefault, const char *strdefault, zprop_attr_t attr,
|
||||
int objset_types, const char *values, const char *colname,
|
||||
boolean_t rightalign, boolean_t visible, const zprop_index_t *idx_tbl)
|
||||
@ -76,6 +76,8 @@ register_impl(int prop, const char *name, zprop_type_t type,
|
||||
pd = &prop_tbl[prop];
|
||||
|
||||
ASSERT(pd->pd_name == NULL || pd->pd_name == name);
|
||||
ASSERT(name != NULL);
|
||||
ASSERT(colname != NULL);
|
||||
|
||||
pd->pd_name = name;
|
||||
pd->pd_propnum = prop;
|
||||
@ -89,40 +91,44 @@ register_impl(int prop, const char *name, zprop_type_t type,
|
||||
pd->pd_rightalign = rightalign;
|
||||
pd->pd_visible = visible;
|
||||
pd->pd_table = idx_tbl;
|
||||
pd->pd_table_size = 0;
|
||||
while (idx_tbl && (idx_tbl++)->pi_name != NULL)
|
||||
pd->pd_table_size++;
|
||||
}
|
||||
|
||||
void
|
||||
register_string(int prop, const char *name, const char *def,
|
||||
zprop_register_string(int prop, const char *name, const char *def,
|
||||
zprop_attr_t attr, int objset_types, const char *values,
|
||||
const char *colname)
|
||||
{
|
||||
register_impl(prop, name, PROP_TYPE_STRING, 0, def, attr,
|
||||
zprop_register_impl(prop, name, PROP_TYPE_STRING, 0, def, attr,
|
||||
objset_types, values, colname, B_FALSE, B_TRUE, NULL);
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
register_number(int prop, const char *name, uint64_t def, zprop_attr_t attr,
|
||||
int objset_types, const char *values, const char *colname)
|
||||
zprop_register_number(int prop, const char *name, uint64_t def,
|
||||
zprop_attr_t attr, int objset_types, const char *values,
|
||||
const char *colname)
|
||||
{
|
||||
register_impl(prop, name, PROP_TYPE_NUMBER, def, NULL, attr,
|
||||
zprop_register_impl(prop, name, PROP_TYPE_NUMBER, def, NULL, attr,
|
||||
objset_types, values, colname, B_TRUE, B_TRUE, NULL);
|
||||
}
|
||||
|
||||
void
|
||||
register_index(int prop, const char *name, uint64_t def, zprop_attr_t attr,
|
||||
int objset_types, const char *values, const char *colname,
|
||||
const zprop_index_t *idx_tbl)
|
||||
zprop_register_index(int prop, const char *name, uint64_t def,
|
||||
zprop_attr_t attr, int objset_types, const char *values,
|
||||
const char *colname, const zprop_index_t *idx_tbl)
|
||||
{
|
||||
register_impl(prop, name, PROP_TYPE_INDEX, def, NULL, attr,
|
||||
zprop_register_impl(prop, name, PROP_TYPE_INDEX, def, NULL, attr,
|
||||
objset_types, values, colname, B_TRUE, B_TRUE, idx_tbl);
|
||||
}
|
||||
|
||||
void
|
||||
register_hidden(int prop, const char *name, zprop_type_t type,
|
||||
zprop_register_hidden(int prop, const char *name, zprop_type_t type,
|
||||
zprop_attr_t attr, int objset_types, const char *colname)
|
||||
{
|
||||
register_impl(prop, name, type, 0, NULL, attr,
|
||||
zprop_register_impl(prop, name, type, 0, NULL, attr,
|
||||
objset_types, NULL, colname, B_FALSE, B_FALSE, NULL);
|
||||
}
|
||||
|
||||
@ -307,6 +313,25 @@ zprop_index_to_string(int prop, uint64_t index, const char **string,
|
||||
return (-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return a random valid property value. Used by ztest.
|
||||
*/
|
||||
uint64_t
|
||||
zprop_random_value(int prop, uint64_t seed, zfs_type_t type)
|
||||
{
|
||||
zprop_desc_t *prop_tbl;
|
||||
const zprop_index_t *idx_tbl;
|
||||
|
||||
ASSERT((uint_t)prop < zprop_get_numprops(type));
|
||||
prop_tbl = zprop_get_proptable(type);
|
||||
idx_tbl = prop_tbl[prop].pd_table;
|
||||
|
||||
if (idx_tbl == NULL)
|
||||
return (seed);
|
||||
|
||||
return (idx_tbl[seed % prop_tbl[prop].pd_table_size].pi_value);
|
||||
}
|
||||
|
||||
const char *
|
||||
zprop_values(int prop, zfs_type_t type)
|
||||
{
|
||||
|
676
module/zfs/arc.c
676
module/zfs/arc.c
File diff suppressed because it is too large
Load Diff
@ -19,331 +19,51 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/bplist.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
static int
|
||||
bplist_hold(bplist_t *bpl)
|
||||
|
||||
void
|
||||
bplist_create(bplist_t *bpl)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&bpl->bpl_lock));
|
||||
if (bpl->bpl_dbuf == NULL) {
|
||||
int err = dmu_bonus_hold(bpl->bpl_mos,
|
||||
bpl->bpl_object, bpl, &bpl->bpl_dbuf);
|
||||
if (err)
|
||||
return (err);
|
||||
bpl->bpl_phys = bpl->bpl_dbuf->db_data;
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx)
|
||||
{
|
||||
int size;
|
||||
|
||||
size = spa_version(dmu_objset_spa(mos)) < SPA_VERSION_BPLIST_ACCOUNT ?
|
||||
BPLIST_SIZE_V0 : sizeof (bplist_phys_t);
|
||||
|
||||
return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize,
|
||||
DMU_OT_BPLIST_HDR, size, tx));
|
||||
mutex_init(&bpl->bpl_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
list_create(&bpl->bpl_list, sizeof (bplist_entry_t),
|
||||
offsetof(bplist_entry_t, bpe_node));
|
||||
}
|
||||
|
||||
void
|
||||
bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx)
|
||||
bplist_destroy(bplist_t *bpl)
|
||||
{
|
||||
VERIFY(dmu_object_free(mos, object, tx) == 0);
|
||||
}
|
||||
|
||||
int
|
||||
bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object)
|
||||
{
|
||||
dmu_object_info_t doi;
|
||||
int err;
|
||||
|
||||
err = dmu_object_info(mos, object, &doi);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
mutex_enter(&bpl->bpl_lock);
|
||||
|
||||
ASSERT(bpl->bpl_dbuf == NULL);
|
||||
ASSERT(bpl->bpl_phys == NULL);
|
||||
ASSERT(bpl->bpl_cached_dbuf == NULL);
|
||||
ASSERT(bpl->bpl_queue == NULL);
|
||||
ASSERT(object != 0);
|
||||
ASSERT3U(doi.doi_type, ==, DMU_OT_BPLIST);
|
||||
ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPLIST_HDR);
|
||||
|
||||
bpl->bpl_mos = mos;
|
||||
bpl->bpl_object = object;
|
||||
bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1);
|
||||
bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT;
|
||||
bpl->bpl_havecomp = (doi.doi_bonus_size == sizeof (bplist_phys_t));
|
||||
|
||||
mutex_exit(&bpl->bpl_lock);
|
||||
return (0);
|
||||
list_destroy(&bpl->bpl_list);
|
||||
mutex_destroy(&bpl->bpl_lock);
|
||||
}
|
||||
|
||||
void
|
||||
bplist_close(bplist_t *bpl)
|
||||
bplist_append(bplist_t *bpl, const blkptr_t *bp)
|
||||
{
|
||||
mutex_enter(&bpl->bpl_lock);
|
||||
|
||||
ASSERT(bpl->bpl_queue == NULL);
|
||||
|
||||
if (bpl->bpl_cached_dbuf) {
|
||||
dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
|
||||
bpl->bpl_cached_dbuf = NULL;
|
||||
}
|
||||
if (bpl->bpl_dbuf) {
|
||||
dmu_buf_rele(bpl->bpl_dbuf, bpl);
|
||||
bpl->bpl_dbuf = NULL;
|
||||
bpl->bpl_phys = NULL;
|
||||
}
|
||||
|
||||
mutex_exit(&bpl->bpl_lock);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
bplist_empty(bplist_t *bpl)
|
||||
{
|
||||
boolean_t rv;
|
||||
|
||||
if (bpl->bpl_object == 0)
|
||||
return (B_TRUE);
|
||||
bplist_entry_t *bpe = kmem_alloc(sizeof (*bpe), KM_SLEEP);
|
||||
|
||||
mutex_enter(&bpl->bpl_lock);
|
||||
VERIFY(0 == bplist_hold(bpl)); /* XXX */
|
||||
rv = (bpl->bpl_phys->bpl_entries == 0);
|
||||
mutex_exit(&bpl->bpl_lock);
|
||||
|
||||
return (rv);
|
||||
}
|
||||
|
||||
static int
|
||||
bplist_cache(bplist_t *bpl, uint64_t blkid)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (bpl->bpl_cached_dbuf == NULL ||
|
||||
bpl->bpl_cached_dbuf->db_offset != (blkid << bpl->bpl_blockshift)) {
|
||||
if (bpl->bpl_cached_dbuf != NULL)
|
||||
dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
|
||||
err = dmu_buf_hold(bpl->bpl_mos,
|
||||
bpl->bpl_object, blkid << bpl->bpl_blockshift,
|
||||
bpl, &bpl->bpl_cached_dbuf);
|
||||
ASSERT(err || bpl->bpl_cached_dbuf->db_size ==
|
||||
1ULL << bpl->bpl_blockshift);
|
||||
}
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp)
|
||||
{
|
||||
uint64_t blk, off;
|
||||
blkptr_t *bparray;
|
||||
int err;
|
||||
|
||||
mutex_enter(&bpl->bpl_lock);
|
||||
|
||||
err = bplist_hold(bpl);
|
||||
if (err) {
|
||||
mutex_exit(&bpl->bpl_lock);
|
||||
return (err);
|
||||
}
|
||||
|
||||
if (*itorp >= bpl->bpl_phys->bpl_entries) {
|
||||
mutex_exit(&bpl->bpl_lock);
|
||||
return (ENOENT);
|
||||
}
|
||||
|
||||
blk = *itorp >> bpl->bpl_bpshift;
|
||||
off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift);
|
||||
|
||||
err = bplist_cache(bpl, blk);
|
||||
if (err) {
|
||||
mutex_exit(&bpl->bpl_lock);
|
||||
return (err);
|
||||
}
|
||||
|
||||
bparray = bpl->bpl_cached_dbuf->db_data;
|
||||
*bp = bparray[off];
|
||||
(*itorp)++;
|
||||
mutex_exit(&bpl->bpl_lock);
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
bplist_enqueue(bplist_t *bpl, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
{
|
||||
uint64_t blk, off;
|
||||
blkptr_t *bparray;
|
||||
int err;
|
||||
|
||||
ASSERT(!BP_IS_HOLE(bp));
|
||||
mutex_enter(&bpl->bpl_lock);
|
||||
err = bplist_hold(bpl);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift;
|
||||
off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift);
|
||||
|
||||
err = bplist_cache(bpl, blk);
|
||||
if (err) {
|
||||
mutex_exit(&bpl->bpl_lock);
|
||||
return (err);
|
||||
}
|
||||
|
||||
dmu_buf_will_dirty(bpl->bpl_cached_dbuf, tx);
|
||||
bparray = bpl->bpl_cached_dbuf->db_data;
|
||||
bparray[off] = *bp;
|
||||
|
||||
/* We never need the fill count. */
|
||||
bparray[off].blk_fill = 0;
|
||||
|
||||
/* The bplist will compress better if we can leave off the checksum */
|
||||
bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum));
|
||||
|
||||
dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
|
||||
bpl->bpl_phys->bpl_entries++;
|
||||
bpl->bpl_phys->bpl_bytes +=
|
||||
bp_get_dasize(dmu_objset_spa(bpl->bpl_mos), bp);
|
||||
if (bpl->bpl_havecomp) {
|
||||
bpl->bpl_phys->bpl_comp += BP_GET_PSIZE(bp);
|
||||
bpl->bpl_phys->bpl_uncomp += BP_GET_UCSIZE(bp);
|
||||
}
|
||||
mutex_exit(&bpl->bpl_lock);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Deferred entry; will be written later by bplist_sync().
|
||||
*/
|
||||
void
|
||||
bplist_enqueue_deferred(bplist_t *bpl, const blkptr_t *bp)
|
||||
{
|
||||
bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP);
|
||||
|
||||
ASSERT(!BP_IS_HOLE(bp));
|
||||
mutex_enter(&bpl->bpl_lock);
|
||||
bpq->bpq_blk = *bp;
|
||||
bpq->bpq_next = bpl->bpl_queue;
|
||||
bpl->bpl_queue = bpq;
|
||||
bpe->bpe_blk = *bp;
|
||||
list_insert_tail(&bpl->bpl_list, bpe);
|
||||
mutex_exit(&bpl->bpl_lock);
|
||||
}
|
||||
|
||||
void
|
||||
bplist_sync(bplist_t *bpl, dmu_tx_t *tx)
|
||||
bplist_iterate(bplist_t *bpl, bplist_itor_t *func, void *arg, dmu_tx_t *tx)
|
||||
{
|
||||
bplist_q_t *bpq;
|
||||
bplist_entry_t *bpe;
|
||||
|
||||
mutex_enter(&bpl->bpl_lock);
|
||||
while ((bpq = bpl->bpl_queue) != NULL) {
|
||||
bpl->bpl_queue = bpq->bpq_next;
|
||||
while (bpe = list_head(&bpl->bpl_list)) {
|
||||
list_remove(&bpl->bpl_list, bpe);
|
||||
mutex_exit(&bpl->bpl_lock);
|
||||
VERIFY(0 == bplist_enqueue(bpl, &bpq->bpq_blk, tx));
|
||||
kmem_free(bpq, sizeof (*bpq));
|
||||
func(arg, &bpe->bpe_blk, tx);
|
||||
kmem_free(bpe, sizeof (*bpe));
|
||||
mutex_enter(&bpl->bpl_lock);
|
||||
}
|
||||
mutex_exit(&bpl->bpl_lock);
|
||||
}
|
||||
|
||||
void
|
||||
bplist_vacate(bplist_t *bpl, dmu_tx_t *tx)
|
||||
{
|
||||
mutex_enter(&bpl->bpl_lock);
|
||||
ASSERT3P(bpl->bpl_queue, ==, NULL);
|
||||
VERIFY(0 == bplist_hold(bpl));
|
||||
dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
|
||||
VERIFY(0 == dmu_free_range(bpl->bpl_mos,
|
||||
bpl->bpl_object, 0, -1ULL, tx));
|
||||
bpl->bpl_phys->bpl_entries = 0;
|
||||
bpl->bpl_phys->bpl_bytes = 0;
|
||||
if (bpl->bpl_havecomp) {
|
||||
bpl->bpl_phys->bpl_comp = 0;
|
||||
bpl->bpl_phys->bpl_uncomp = 0;
|
||||
}
|
||||
mutex_exit(&bpl->bpl_lock);
|
||||
}
|
||||
|
||||
int
|
||||
bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
|
||||
{
|
||||
int err;
|
||||
|
||||
mutex_enter(&bpl->bpl_lock);
|
||||
|
||||
err = bplist_hold(bpl);
|
||||
if (err) {
|
||||
mutex_exit(&bpl->bpl_lock);
|
||||
return (err);
|
||||
}
|
||||
|
||||
*usedp = bpl->bpl_phys->bpl_bytes;
|
||||
if (bpl->bpl_havecomp) {
|
||||
*compp = bpl->bpl_phys->bpl_comp;
|
||||
*uncompp = bpl->bpl_phys->bpl_uncomp;
|
||||
}
|
||||
mutex_exit(&bpl->bpl_lock);
|
||||
|
||||
if (!bpl->bpl_havecomp) {
|
||||
uint64_t itor = 0, comp = 0, uncomp = 0;
|
||||
blkptr_t bp;
|
||||
|
||||
while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) {
|
||||
comp += BP_GET_PSIZE(&bp);
|
||||
uncomp += BP_GET_UCSIZE(&bp);
|
||||
}
|
||||
if (err == ENOENT)
|
||||
err = 0;
|
||||
*compp = comp;
|
||||
*uncompp = uncomp;
|
||||
}
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return (in *dasizep) the amount of space on the deadlist which is:
|
||||
* mintxg < blk_birth <= maxtxg
|
||||
*/
|
||||
int
|
||||
bplist_space_birthrange(bplist_t *bpl, uint64_t mintxg, uint64_t maxtxg,
|
||||
uint64_t *dasizep)
|
||||
{
|
||||
uint64_t size = 0;
|
||||
uint64_t itor = 0;
|
||||
blkptr_t bp;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* As an optimization, if they want the whole txg range, just
|
||||
* get bpl_bytes rather than iterating over the bps.
|
||||
*/
|
||||
if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX) {
|
||||
mutex_enter(&bpl->bpl_lock);
|
||||
err = bplist_hold(bpl);
|
||||
if (err == 0)
|
||||
*dasizep = bpl->bpl_phys->bpl_bytes;
|
||||
mutex_exit(&bpl->bpl_lock);
|
||||
return (err);
|
||||
}
|
||||
|
||||
while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) {
|
||||
if (bp.blk_birth > mintxg && bp.blk_birth <= maxtxg) {
|
||||
size +=
|
||||
bp_get_dasize(dmu_objset_spa(bpl->bpl_mos), &bp);
|
||||
}
|
||||
}
|
||||
if (err == ENOENT)
|
||||
err = 0;
|
||||
*dasizep = size;
|
||||
return (err);
|
||||
}
|
||||
|
462
module/zfs/bpobj.c
Normal file
462
module/zfs/bpobj.c
Normal file
@ -0,0 +1,462 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/bpobj.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/refcount.h>
|
||||
|
||||
uint64_t
|
||||
bpobj_alloc(objset_t *os, int blocksize, dmu_tx_t *tx)
|
||||
{
|
||||
int size;
|
||||
|
||||
if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_BPOBJ_ACCOUNT)
|
||||
size = BPOBJ_SIZE_V0;
|
||||
else if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS)
|
||||
size = BPOBJ_SIZE_V1;
|
||||
else
|
||||
size = sizeof (bpobj_phys_t);
|
||||
|
||||
return (dmu_object_alloc(os, DMU_OT_BPOBJ, blocksize,
|
||||
DMU_OT_BPOBJ_HDR, size, tx));
|
||||
}
|
||||
|
||||
void
|
||||
bpobj_free(objset_t *os, uint64_t obj, dmu_tx_t *tx)
|
||||
{
|
||||
int64_t i;
|
||||
bpobj_t bpo;
|
||||
dmu_object_info_t doi;
|
||||
int epb;
|
||||
dmu_buf_t *dbuf = NULL;
|
||||
|
||||
VERIFY3U(0, ==, bpobj_open(&bpo, os, obj));
|
||||
|
||||
mutex_enter(&bpo.bpo_lock);
|
||||
|
||||
if (!bpo.bpo_havesubobj || bpo.bpo_phys->bpo_subobjs == 0)
|
||||
goto out;
|
||||
|
||||
VERIFY3U(0, ==, dmu_object_info(os, bpo.bpo_phys->bpo_subobjs, &doi));
|
||||
epb = doi.doi_data_block_size / sizeof (uint64_t);
|
||||
|
||||
for (i = bpo.bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) {
|
||||
uint64_t *objarray;
|
||||
uint64_t offset, blkoff;
|
||||
|
||||
offset = i * sizeof (uint64_t);
|
||||
blkoff = P2PHASE(i, epb);
|
||||
|
||||
if (dbuf == NULL || dbuf->db_offset > offset) {
|
||||
if (dbuf)
|
||||
dmu_buf_rele(dbuf, FTAG);
|
||||
VERIFY3U(0, ==, dmu_buf_hold(os,
|
||||
bpo.bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0));
|
||||
}
|
||||
|
||||
ASSERT3U(offset, >=, dbuf->db_offset);
|
||||
ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size);
|
||||
|
||||
objarray = dbuf->db_data;
|
||||
bpobj_free(os, objarray[blkoff], tx);
|
||||
}
|
||||
if (dbuf) {
|
||||
dmu_buf_rele(dbuf, FTAG);
|
||||
dbuf = NULL;
|
||||
}
|
||||
VERIFY3U(0, ==, dmu_object_free(os, bpo.bpo_phys->bpo_subobjs, tx));
|
||||
|
||||
out:
|
||||
mutex_exit(&bpo.bpo_lock);
|
||||
bpobj_close(&bpo);
|
||||
|
||||
VERIFY3U(0, ==, dmu_object_free(os, obj, tx));
|
||||
}
|
||||
|
||||
int
|
||||
bpobj_open(bpobj_t *bpo, objset_t *os, uint64_t object)
|
||||
{
|
||||
dmu_object_info_t doi;
|
||||
int err;
|
||||
|
||||
err = dmu_object_info(os, object, &doi);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
bzero(bpo, sizeof (*bpo));
|
||||
mutex_init(&bpo->bpo_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
|
||||
ASSERT(bpo->bpo_dbuf == NULL);
|
||||
ASSERT(bpo->bpo_phys == NULL);
|
||||
ASSERT(object != 0);
|
||||
ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ);
|
||||
ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPOBJ_HDR);
|
||||
|
||||
bpo->bpo_os = os;
|
||||
bpo->bpo_object = object;
|
||||
bpo->bpo_epb = doi.doi_data_block_size >> SPA_BLKPTRSHIFT;
|
||||
bpo->bpo_havecomp = (doi.doi_bonus_size > BPOBJ_SIZE_V0);
|
||||
bpo->bpo_havesubobj = (doi.doi_bonus_size > BPOBJ_SIZE_V1);
|
||||
|
||||
err = dmu_bonus_hold(bpo->bpo_os,
|
||||
bpo->bpo_object, bpo, &bpo->bpo_dbuf);
|
||||
if (err)
|
||||
return (err);
|
||||
bpo->bpo_phys = bpo->bpo_dbuf->db_data;
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
bpobj_close(bpobj_t *bpo)
|
||||
{
|
||||
/* Lame workaround for closing a bpobj that was never opened. */
|
||||
if (bpo->bpo_object == 0)
|
||||
return;
|
||||
|
||||
dmu_buf_rele(bpo->bpo_dbuf, bpo);
|
||||
if (bpo->bpo_cached_dbuf != NULL)
|
||||
dmu_buf_rele(bpo->bpo_cached_dbuf, bpo);
|
||||
bpo->bpo_dbuf = NULL;
|
||||
bpo->bpo_phys = NULL;
|
||||
bpo->bpo_cached_dbuf = NULL;
|
||||
|
||||
mutex_destroy(&bpo->bpo_lock);
|
||||
}
|
||||
|
||||
static int
|
||||
bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx,
|
||||
boolean_t free)
|
||||
{
|
||||
dmu_object_info_t doi;
|
||||
int epb;
|
||||
int64_t i;
|
||||
int err = 0;
|
||||
dmu_buf_t *dbuf = NULL;
|
||||
|
||||
mutex_enter(&bpo->bpo_lock);
|
||||
|
||||
if (free)
|
||||
dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
|
||||
|
||||
for (i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= 0; i--) {
|
||||
blkptr_t *bparray;
|
||||
blkptr_t *bp;
|
||||
uint64_t offset, blkoff;
|
||||
|
||||
offset = i * sizeof (blkptr_t);
|
||||
blkoff = P2PHASE(i, bpo->bpo_epb);
|
||||
|
||||
if (dbuf == NULL || dbuf->db_offset > offset) {
|
||||
if (dbuf)
|
||||
dmu_buf_rele(dbuf, FTAG);
|
||||
err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, offset,
|
||||
FTAG, &dbuf, 0);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
ASSERT3U(offset, >=, dbuf->db_offset);
|
||||
ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size);
|
||||
|
||||
bparray = dbuf->db_data;
|
||||
bp = &bparray[blkoff];
|
||||
err = func(arg, bp, tx);
|
||||
if (err)
|
||||
break;
|
||||
if (free) {
|
||||
bpo->bpo_phys->bpo_bytes -=
|
||||
bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp);
|
||||
ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0);
|
||||
if (bpo->bpo_havecomp) {
|
||||
bpo->bpo_phys->bpo_comp -= BP_GET_PSIZE(bp);
|
||||
bpo->bpo_phys->bpo_uncomp -= BP_GET_UCSIZE(bp);
|
||||
}
|
||||
bpo->bpo_phys->bpo_num_blkptrs--;
|
||||
ASSERT3S(bpo->bpo_phys->bpo_num_blkptrs, >=, 0);
|
||||
}
|
||||
}
|
||||
if (dbuf) {
|
||||
dmu_buf_rele(dbuf, FTAG);
|
||||
dbuf = NULL;
|
||||
}
|
||||
if (free) {
|
||||
i++;
|
||||
VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, bpo->bpo_object,
|
||||
i * sizeof (blkptr_t), -1ULL, tx));
|
||||
}
|
||||
if (err || !bpo->bpo_havesubobj || bpo->bpo_phys->bpo_subobjs == 0)
|
||||
goto out;
|
||||
|
||||
ASSERT(bpo->bpo_havecomp);
|
||||
err = dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi);
|
||||
if (err)
|
||||
return (err);
|
||||
epb = doi.doi_data_block_size / sizeof (uint64_t);
|
||||
|
||||
for (i = bpo->bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) {
|
||||
uint64_t *objarray;
|
||||
uint64_t offset, blkoff;
|
||||
bpobj_t sublist;
|
||||
uint64_t used_before, comp_before, uncomp_before;
|
||||
uint64_t used_after, comp_after, uncomp_after;
|
||||
|
||||
offset = i * sizeof (uint64_t);
|
||||
blkoff = P2PHASE(i, epb);
|
||||
|
||||
if (dbuf == NULL || dbuf->db_offset > offset) {
|
||||
if (dbuf)
|
||||
dmu_buf_rele(dbuf, FTAG);
|
||||
err = dmu_buf_hold(bpo->bpo_os,
|
||||
bpo->bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
ASSERT3U(offset, >=, dbuf->db_offset);
|
||||
ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size);
|
||||
|
||||
objarray = dbuf->db_data;
|
||||
err = bpobj_open(&sublist, bpo->bpo_os, objarray[blkoff]);
|
||||
if (err)
|
||||
break;
|
||||
if (free) {
|
||||
err = bpobj_space(&sublist,
|
||||
&used_before, &comp_before, &uncomp_before);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
err = bpobj_iterate_impl(&sublist, func, arg, tx, free);
|
||||
if (free) {
|
||||
VERIFY3U(0, ==, bpobj_space(&sublist,
|
||||
&used_after, &comp_after, &uncomp_after));
|
||||
bpo->bpo_phys->bpo_bytes -= used_before - used_after;
|
||||
ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0);
|
||||
bpo->bpo_phys->bpo_comp -= comp_before - used_after;
|
||||
bpo->bpo_phys->bpo_uncomp -=
|
||||
uncomp_before - uncomp_after;
|
||||
}
|
||||
|
||||
bpobj_close(&sublist);
|
||||
if (err)
|
||||
break;
|
||||
if (free) {
|
||||
err = dmu_object_free(bpo->bpo_os,
|
||||
objarray[blkoff], tx);
|
||||
if (err)
|
||||
break;
|
||||
bpo->bpo_phys->bpo_num_subobjs--;
|
||||
ASSERT3S(bpo->bpo_phys->bpo_num_subobjs, >=, 0);
|
||||
}
|
||||
}
|
||||
if (dbuf) {
|
||||
dmu_buf_rele(dbuf, FTAG);
|
||||
dbuf = NULL;
|
||||
}
|
||||
if (free) {
|
||||
VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os,
|
||||
bpo->bpo_phys->bpo_subobjs,
|
||||
(i + 1) * sizeof (uint64_t), -1ULL, tx));
|
||||
}
|
||||
|
||||
out:
|
||||
/* If there are no entries, there should be no bytes. */
|
||||
ASSERT(bpo->bpo_phys->bpo_num_blkptrs > 0 ||
|
||||
(bpo->bpo_havesubobj && bpo->bpo_phys->bpo_num_subobjs > 0) ||
|
||||
bpo->bpo_phys->bpo_bytes == 0);
|
||||
|
||||
mutex_exit(&bpo->bpo_lock);
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate and remove the entries. If func returns nonzero, iteration
|
||||
* will stop and that entry will not be removed.
|
||||
*/
|
||||
int
|
||||
bpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx)
|
||||
{
|
||||
return (bpobj_iterate_impl(bpo, func, arg, tx, B_TRUE));
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate the entries. If func returns nonzero, iteration will stop.
|
||||
*/
|
||||
int
|
||||
bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx)
|
||||
{
|
||||
return (bpobj_iterate_impl(bpo, func, arg, tx, B_FALSE));
|
||||
}
|
||||
|
||||
void
|
||||
bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
|
||||
{
|
||||
bpobj_t subbpo;
|
||||
uint64_t used, comp, uncomp;
|
||||
|
||||
ASSERT(bpo->bpo_havesubobj);
|
||||
ASSERT(bpo->bpo_havecomp);
|
||||
|
||||
VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj));
|
||||
VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp));
|
||||
bpobj_close(&subbpo);
|
||||
|
||||
if (used == 0) {
|
||||
/* No point in having an empty subobj. */
|
||||
bpobj_free(bpo->bpo_os, subobj, tx);
|
||||
return;
|
||||
}
|
||||
|
||||
dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
|
||||
if (bpo->bpo_phys->bpo_subobjs == 0) {
|
||||
bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os,
|
||||
DMU_OT_BPOBJ_SUBOBJ, SPA_MAXBLOCKSIZE, DMU_OT_NONE, 0, tx);
|
||||
}
|
||||
|
||||
mutex_enter(&bpo->bpo_lock);
|
||||
dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
|
||||
bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
|
||||
sizeof (subobj), &subobj, tx);
|
||||
bpo->bpo_phys->bpo_num_subobjs++;
|
||||
bpo->bpo_phys->bpo_bytes += used;
|
||||
bpo->bpo_phys->bpo_comp += comp;
|
||||
bpo->bpo_phys->bpo_uncomp += uncomp;
|
||||
mutex_exit(&bpo->bpo_lock);
|
||||
}
|
||||
|
||||
void
|
||||
bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
{
|
||||
blkptr_t stored_bp = *bp;
|
||||
uint64_t offset;
|
||||
int blkoff;
|
||||
blkptr_t *bparray;
|
||||
|
||||
ASSERT(!BP_IS_HOLE(bp));
|
||||
|
||||
/* We never need the fill count. */
|
||||
stored_bp.blk_fill = 0;
|
||||
|
||||
/* The bpobj will compress better if we can leave off the checksum */
|
||||
if (!BP_GET_DEDUP(bp))
|
||||
bzero(&stored_bp.blk_cksum, sizeof (stored_bp.blk_cksum));
|
||||
|
||||
mutex_enter(&bpo->bpo_lock);
|
||||
|
||||
offset = bpo->bpo_phys->bpo_num_blkptrs * sizeof (stored_bp);
|
||||
blkoff = P2PHASE(bpo->bpo_phys->bpo_num_blkptrs, bpo->bpo_epb);
|
||||
|
||||
if (bpo->bpo_cached_dbuf == NULL ||
|
||||
offset < bpo->bpo_cached_dbuf->db_offset ||
|
||||
offset >= bpo->bpo_cached_dbuf->db_offset +
|
||||
bpo->bpo_cached_dbuf->db_size) {
|
||||
if (bpo->bpo_cached_dbuf)
|
||||
dmu_buf_rele(bpo->bpo_cached_dbuf, bpo);
|
||||
VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, bpo->bpo_object,
|
||||
offset, bpo, &bpo->bpo_cached_dbuf, 0));
|
||||
}
|
||||
|
||||
dmu_buf_will_dirty(bpo->bpo_cached_dbuf, tx);
|
||||
bparray = bpo->bpo_cached_dbuf->db_data;
|
||||
bparray[blkoff] = stored_bp;
|
||||
|
||||
dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
|
||||
bpo->bpo_phys->bpo_num_blkptrs++;
|
||||
bpo->bpo_phys->bpo_bytes +=
|
||||
bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp);
|
||||
if (bpo->bpo_havecomp) {
|
||||
bpo->bpo_phys->bpo_comp += BP_GET_PSIZE(bp);
|
||||
bpo->bpo_phys->bpo_uncomp += BP_GET_UCSIZE(bp);
|
||||
}
|
||||
mutex_exit(&bpo->bpo_lock);
|
||||
}
|
||||
|
||||
struct space_range_arg {
|
||||
spa_t *spa;
|
||||
uint64_t mintxg;
|
||||
uint64_t maxtxg;
|
||||
uint64_t used;
|
||||
uint64_t comp;
|
||||
uint64_t uncomp;
|
||||
};
|
||||
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
space_range_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
{
|
||||
struct space_range_arg *sra = arg;
|
||||
|
||||
if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) {
|
||||
sra->used += bp_get_dsize_sync(sra->spa, bp);
|
||||
sra->comp += BP_GET_PSIZE(bp);
|
||||
sra->uncomp += BP_GET_UCSIZE(bp);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
bpobj_space(bpobj_t *bpo, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
|
||||
{
|
||||
mutex_enter(&bpo->bpo_lock);
|
||||
|
||||
*usedp = bpo->bpo_phys->bpo_bytes;
|
||||
if (bpo->bpo_havecomp) {
|
||||
*compp = bpo->bpo_phys->bpo_comp;
|
||||
*uncompp = bpo->bpo_phys->bpo_uncomp;
|
||||
mutex_exit(&bpo->bpo_lock);
|
||||
return (0);
|
||||
} else {
|
||||
mutex_exit(&bpo->bpo_lock);
|
||||
return (bpobj_space_range(bpo, 0, UINT64_MAX,
|
||||
usedp, compp, uncompp));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the amount of space in the bpobj which is:
|
||||
* mintxg < blk_birth <= maxtxg
|
||||
*/
|
||||
int
|
||||
bpobj_space_range(bpobj_t *bpo, uint64_t mintxg, uint64_t maxtxg,
|
||||
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
|
||||
{
|
||||
struct space_range_arg sra = { 0 };
|
||||
int err;
|
||||
|
||||
/*
|
||||
* As an optimization, if they want the whole txg range, just
|
||||
* get bpo_bytes rather than iterating over the bps.
|
||||
*/
|
||||
if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX && bpo->bpo_havecomp)
|
||||
return (bpobj_space(bpo, usedp, compp, uncompp));
|
||||
|
||||
sra.spa = dmu_objset_spa(bpo->bpo_os);
|
||||
sra.mintxg = mintxg;
|
||||
sra.maxtxg = maxtxg;
|
||||
|
||||
err = bpobj_iterate_nofree(bpo, space_range_cb, &sra, NULL);
|
||||
*usedp = sra.used;
|
||||
*compp = sra.comp;
|
||||
*uncompp = sra.uncomp;
|
||||
return (err);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
1140
module/zfs/ddt.c
Normal file
1140
module/zfs/ddt.c
Normal file
File diff suppressed because it is too large
Load Diff
157
module/zfs/ddt_zap.c
Normal file
157
module/zfs/ddt_zap.c
Normal file
@ -0,0 +1,157 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/ddt.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/dmu_tx.h>
|
||||
#include <util/sscanf.h>
|
||||
|
||||
int ddt_zap_leaf_blockshift = 12;
|
||||
int ddt_zap_indirect_blockshift = 12;
|
||||
|
||||
static int
|
||||
ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash)
|
||||
{
|
||||
zap_flags_t flags = ZAP_FLAG_HASH64 | ZAP_FLAG_UINT64_KEY;
|
||||
|
||||
if (prehash)
|
||||
flags |= ZAP_FLAG_PRE_HASHED_KEY;
|
||||
|
||||
*objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP,
|
||||
ddt_zap_leaf_blockshift, ddt_zap_indirect_blockshift,
|
||||
DMU_OT_NONE, 0, tx);
|
||||
|
||||
return (*objectp == 0 ? ENOTSUP : 0);
|
||||
}
|
||||
|
||||
static int
|
||||
ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx)
|
||||
{
|
||||
return (zap_destroy(os, object, tx));
|
||||
}
|
||||
|
||||
static int
|
||||
ddt_zap_lookup(objset_t *os, uint64_t object, ddt_entry_t *dde)
|
||||
{
|
||||
uchar_t cbuf[sizeof (dde->dde_phys) + 1];
|
||||
uint64_t one, csize;
|
||||
int error;
|
||||
|
||||
error = zap_length_uint64(os, object, (uint64_t *)&dde->dde_key,
|
||||
DDT_KEY_WORDS, &one, &csize);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
ASSERT(one == 1);
|
||||
ASSERT(csize <= sizeof (cbuf));
|
||||
|
||||
error = zap_lookup_uint64(os, object, (uint64_t *)&dde->dde_key,
|
||||
DDT_KEY_WORDS, 1, csize, cbuf);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
ddt_decompress(cbuf, dde->dde_phys, csize, sizeof (dde->dde_phys));
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
ddt_zap_prefetch(objset_t *os, uint64_t object, ddt_entry_t *dde)
|
||||
{
|
||||
(void) zap_prefetch_uint64(os, object, (uint64_t *)&dde->dde_key,
|
||||
DDT_KEY_WORDS);
|
||||
}
|
||||
|
||||
static int
|
||||
ddt_zap_update(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx)
|
||||
{
|
||||
uchar_t cbuf[sizeof (dde->dde_phys) + 1];
|
||||
uint64_t csize;
|
||||
|
||||
csize = ddt_compress(dde->dde_phys, cbuf,
|
||||
sizeof (dde->dde_phys), sizeof (cbuf));
|
||||
|
||||
return (zap_update_uint64(os, object, (uint64_t *)&dde->dde_key,
|
||||
DDT_KEY_WORDS, 1, csize, cbuf, tx));
|
||||
}
|
||||
|
||||
static int
|
||||
ddt_zap_remove(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx)
|
||||
{
|
||||
return (zap_remove_uint64(os, object, (uint64_t *)&dde->dde_key,
|
||||
DDT_KEY_WORDS, tx));
|
||||
}
|
||||
|
||||
static int
|
||||
ddt_zap_walk(objset_t *os, uint64_t object, ddt_entry_t *dde, uint64_t *walk)
|
||||
{
|
||||
zap_cursor_t zc;
|
||||
zap_attribute_t za;
|
||||
int error;
|
||||
|
||||
zap_cursor_init_serialized(&zc, os, object, *walk);
|
||||
if ((error = zap_cursor_retrieve(&zc, &za)) == 0) {
|
||||
uchar_t cbuf[sizeof (dde->dde_phys) + 1];
|
||||
uint64_t csize = za.za_num_integers;
|
||||
ASSERT(za.za_integer_length == 1);
|
||||
error = zap_lookup_uint64(os, object, (uint64_t *)za.za_name,
|
||||
DDT_KEY_WORDS, 1, csize, cbuf);
|
||||
ASSERT(error == 0);
|
||||
if (error == 0) {
|
||||
ddt_decompress(cbuf, dde->dde_phys, csize,
|
||||
sizeof (dde->dde_phys));
|
||||
dde->dde_key = *(ddt_key_t *)za.za_name;
|
||||
}
|
||||
zap_cursor_advance(&zc);
|
||||
*walk = zap_cursor_serialize(&zc);
|
||||
}
|
||||
zap_cursor_fini(&zc);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
ddt_zap_count(objset_t *os, uint64_t object)
|
||||
{
|
||||
uint64_t count = 0;
|
||||
|
||||
VERIFY(zap_count(os, object, &count) == 0);
|
||||
|
||||
return (count);
|
||||
}
|
||||
|
||||
const ddt_ops_t ddt_zap_ops = {
|
||||
"zap",
|
||||
ddt_zap_create,
|
||||
ddt_zap_destroy,
|
||||
ddt_zap_lookup,
|
||||
ddt_zap_prefetch,
|
||||
ddt_zap_update,
|
||||
ddt_zap_remove,
|
||||
ddt_zap_walk,
|
||||
ddt_zap_count,
|
||||
};
|
717
module/zfs/dmu.c
717
module/zfs/dmu.c
File diff suppressed because it is too large
Load Diff
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/dmu.h>
|
||||
@ -32,16 +31,15 @@ uint64_t
|
||||
dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
|
||||
{
|
||||
objset_impl_t *osi = os->os;
|
||||
uint64_t object;
|
||||
uint64_t L2_dnode_count = DNODES_PER_BLOCK <<
|
||||
(osi->os_meta_dnode->dn_indblkshift - SPA_BLKPTRSHIFT);
|
||||
(os->os_meta_dnode->dn_indblkshift - SPA_BLKPTRSHIFT);
|
||||
dnode_t *dn = NULL;
|
||||
int restarted = B_FALSE;
|
||||
|
||||
mutex_enter(&osi->os_obj_lock);
|
||||
mutex_enter(&os->os_obj_lock);
|
||||
for (;;) {
|
||||
object = osi->os_obj_next;
|
||||
object = os->os_obj_next;
|
||||
/*
|
||||
* Each time we polish off an L2 bp worth of dnodes
|
||||
* (2^13 objects), move to another L2 bp that's still
|
||||
@ -51,14 +49,14 @@ dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
|
||||
*/
|
||||
if (P2PHASE(object, L2_dnode_count) == 0) {
|
||||
uint64_t offset = restarted ? object << DNODE_SHIFT : 0;
|
||||
int error = dnode_next_offset(osi->os_meta_dnode,
|
||||
int error = dnode_next_offset(os->os_meta_dnode,
|
||||
DNODE_FIND_HOLE,
|
||||
&offset, 2, DNODES_PER_BLOCK >> 2, 0);
|
||||
restarted = B_TRUE;
|
||||
if (error == 0)
|
||||
object = offset >> DNODE_SHIFT;
|
||||
}
|
||||
osi->os_obj_next = ++object;
|
||||
os->os_obj_next = ++object;
|
||||
|
||||
/*
|
||||
* XXX We should check for an i/o error here and return
|
||||
@ -66,19 +64,19 @@ dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
|
||||
* dmu_tx_assign(), but there is currently no mechanism
|
||||
* to do so.
|
||||
*/
|
||||
(void) dnode_hold_impl(os->os, object, DNODE_MUST_BE_FREE,
|
||||
(void) dnode_hold_impl(os, object, DNODE_MUST_BE_FREE,
|
||||
FTAG, &dn);
|
||||
if (dn)
|
||||
break;
|
||||
|
||||
if (dmu_object_next(os, &object, B_TRUE, 0) == 0)
|
||||
osi->os_obj_next = object - 1;
|
||||
os->os_obj_next = object - 1;
|
||||
}
|
||||
|
||||
dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx);
|
||||
dnode_rele(dn, FTAG);
|
||||
|
||||
mutex_exit(&osi->os_obj_lock);
|
||||
mutex_exit(&os->os_obj_lock);
|
||||
|
||||
dmu_tx_add_new_object(tx, os, object);
|
||||
return (object);
|
||||
@ -94,7 +92,7 @@ dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
|
||||
if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx))
|
||||
return (EBADF);
|
||||
|
||||
err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_FREE, FTAG, &dn);
|
||||
err = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, FTAG, &dn);
|
||||
if (err)
|
||||
return (err);
|
||||
dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx);
|
||||
@ -116,7 +114,7 @@ dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
|
||||
if (object == DMU_META_DNODE_OBJECT)
|
||||
return (EBADF);
|
||||
|
||||
err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_ALLOCATED,
|
||||
err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED,
|
||||
FTAG, &dn);
|
||||
if (err)
|
||||
return (err);
|
||||
@ -128,7 +126,11 @@ dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
|
||||
return (0);
|
||||
}
|
||||
|
||||
nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
|
||||
if (bonustype == DMU_OT_SA) {
|
||||
nblkptr = 1;
|
||||
} else {
|
||||
nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are losing blkptrs or changing the block size this must
|
||||
@ -166,7 +168,7 @@ dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
|
||||
|
||||
ASSERT(object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx));
|
||||
|
||||
err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_ALLOCATED,
|
||||
err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED,
|
||||
FTAG, &dn);
|
||||
if (err)
|
||||
return (err);
|
||||
@ -185,7 +187,7 @@ dmu_object_next(objset_t *os, uint64_t *objectp, boolean_t hole, uint64_t txg)
|
||||
uint64_t offset = (*objectp + 1) << DNODE_SHIFT;
|
||||
int error;
|
||||
|
||||
error = dnode_next_offset(os->os->os_meta_dnode,
|
||||
error = dnode_next_offset(os->os_meta_dnode,
|
||||
(hole ? DNODE_FIND_HOLE : 0), &offset, 0, DNODES_PER_BLOCK, txg);
|
||||
|
||||
*objectp = offset >> DNODE_SHIFT;
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -33,16 +32,10 @@
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dmu_impl.h>
|
||||
#include <sys/sa.h>
|
||||
#include <sys/sa_impl.h>
|
||||
#include <sys/callb.h>
|
||||
|
||||
#define SET_BOOKMARK(zb, objset, object, level, blkid) \
|
||||
{ \
|
||||
(zb)->zb_objset = objset; \
|
||||
(zb)->zb_object = object; \
|
||||
(zb)->zb_level = level; \
|
||||
(zb)->zb_blkid = blkid; \
|
||||
}
|
||||
|
||||
struct prefetch_data {
|
||||
kmutex_t pd_mtx;
|
||||
kcondvar_t pd_cv;
|
||||
@ -68,27 +61,28 @@ static int traverse_dnode(struct traverse_data *td, const dnode_phys_t *dnp,
|
||||
arc_buf_t *buf, uint64_t objset, uint64_t object);
|
||||
|
||||
/* ARGSUSED */
|
||||
static void
|
||||
static int
|
||||
traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
|
||||
{
|
||||
struct traverse_data *td = arg;
|
||||
zbookmark_t zb;
|
||||
|
||||
if (bp->blk_birth == 0)
|
||||
return;
|
||||
return (0);
|
||||
|
||||
if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(td->td_spa))
|
||||
return;
|
||||
return (0);
|
||||
|
||||
zb.zb_objset = td->td_objset;
|
||||
zb.zb_object = 0;
|
||||
zb.zb_level = -1;
|
||||
zb.zb_blkid = bp->blk_cksum.zc_word[ZIL_ZC_SEQ];
|
||||
VERIFY(0 == td->td_func(td->td_spa, bp, &zb, NULL, td->td_arg));
|
||||
SET_BOOKMARK(&zb, td->td_objset, ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
|
||||
bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
|
||||
|
||||
(void) td->td_func(td->td_spa, zilog, bp, NULL, &zb, NULL, td->td_arg);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static void
|
||||
static int
|
||||
traverse_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
|
||||
{
|
||||
struct traverse_data *td = arg;
|
||||
@ -99,17 +93,18 @@ traverse_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
|
||||
zbookmark_t zb;
|
||||
|
||||
if (bp->blk_birth == 0)
|
||||
return;
|
||||
return (0);
|
||||
|
||||
if (claim_txg == 0 || bp->blk_birth < claim_txg)
|
||||
return;
|
||||
return (0);
|
||||
|
||||
zb.zb_objset = td->td_objset;
|
||||
zb.zb_object = lr->lr_foid;
|
||||
zb.zb_level = BP_GET_LEVEL(bp);
|
||||
zb.zb_blkid = lr->lr_offset / BP_GET_LSIZE(bp);
|
||||
VERIFY(0 == td->td_func(td->td_spa, bp, &zb, NULL, td->td_arg));
|
||||
SET_BOOKMARK(&zb, td->td_objset, lr->lr_foid, ZB_ZIL_LEVEL,
|
||||
lr->lr_offset / BP_GET_LSIZE(bp));
|
||||
|
||||
(void) td->td_func(td->td_spa, zilog, bp, NULL, &zb, NULL,
|
||||
td->td_arg);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -120,7 +115,7 @@ traverse_zil(struct traverse_data *td, zil_header_t *zh)
|
||||
|
||||
/*
|
||||
* We only want to visit blocks that have been claimed but not yet
|
||||
* replayed (or, in read-only mode, blocks that *would* be claimed).
|
||||
* replayed; plus, in read-only mode, blocks that are already stable.
|
||||
*/
|
||||
if (claim_txg == 0 && spa_writeable(td->td_spa))
|
||||
return;
|
||||
@ -138,12 +133,14 @@ traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp,
|
||||
arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb)
|
||||
{
|
||||
zbookmark_t czb;
|
||||
int err = 0;
|
||||
int err = 0, lasterr = 0;
|
||||
arc_buf_t *buf = NULL;
|
||||
struct prefetch_data *pd = td->td_pfd;
|
||||
boolean_t hard = td->td_flags & TRAVERSE_HARD;
|
||||
|
||||
if (bp->blk_birth == 0) {
|
||||
err = td->td_func(td->td_spa, NULL, zb, dnp, td->td_arg);
|
||||
err = td->td_func(td->td_spa, NULL, NULL, pbuf, zb, dnp,
|
||||
td->td_arg);
|
||||
return (err);
|
||||
}
|
||||
|
||||
@ -163,7 +160,8 @@ traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp,
|
||||
}
|
||||
|
||||
if (td->td_flags & TRAVERSE_PRE) {
|
||||
err = td->td_func(td->td_spa, bp, zb, dnp, td->td_arg);
|
||||
err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp,
|
||||
td->td_arg);
|
||||
if (err)
|
||||
return (err);
|
||||
}
|
||||
@ -174,7 +172,7 @@ traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp,
|
||||
blkptr_t *cbp;
|
||||
int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
|
||||
|
||||
err = arc_read(NULL, td->td_spa, bp, pbuf,
|
||||
err = dsl_read(NULL, td->td_spa, bp, pbuf,
|
||||
arc_getbuf_func, &buf,
|
||||
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
|
||||
if (err)
|
||||
@ -187,15 +185,18 @@ traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp,
|
||||
zb->zb_level - 1,
|
||||
zb->zb_blkid * epb + i);
|
||||
err = traverse_visitbp(td, dnp, buf, cbp, &czb);
|
||||
if (err)
|
||||
break;
|
||||
if (err) {
|
||||
if (!hard)
|
||||
break;
|
||||
lasterr = err;
|
||||
}
|
||||
}
|
||||
} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
|
||||
uint32_t flags = ARC_WAIT;
|
||||
int i;
|
||||
int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
|
||||
|
||||
err = arc_read(NULL, td->td_spa, bp, pbuf,
|
||||
err = dsl_read(NULL, td->td_spa, bp, pbuf,
|
||||
arc_getbuf_func, &buf,
|
||||
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
|
||||
if (err)
|
||||
@ -203,18 +204,21 @@ traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp,
|
||||
|
||||
/* recursively visitbp() blocks below this */
|
||||
dnp = buf->b_data;
|
||||
for (i = 0; i < epb && err == 0; i++, dnp++) {
|
||||
for (i = 0; i < epb; i++, dnp++) {
|
||||
err = traverse_dnode(td, dnp, buf, zb->zb_objset,
|
||||
zb->zb_blkid * epb + i);
|
||||
if (err)
|
||||
break;
|
||||
if (err) {
|
||||
if (!hard)
|
||||
break;
|
||||
lasterr = err;
|
||||
}
|
||||
}
|
||||
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
|
||||
uint32_t flags = ARC_WAIT;
|
||||
objset_phys_t *osp;
|
||||
dnode_phys_t *dnp;
|
||||
|
||||
err = arc_read_nolock(NULL, td->td_spa, bp,
|
||||
err = dsl_read_nolock(NULL, td->td_spa, bp,
|
||||
arc_getbuf_func, &buf,
|
||||
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
|
||||
if (err)
|
||||
@ -224,12 +228,21 @@ traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp,
|
||||
traverse_zil(td, &osp->os_zil_header);
|
||||
|
||||
dnp = &osp->os_meta_dnode;
|
||||
err = traverse_dnode(td, dnp, buf, zb->zb_objset, 0);
|
||||
err = traverse_dnode(td, dnp, buf, zb->zb_objset,
|
||||
DMU_META_DNODE_OBJECT);
|
||||
if (err && hard) {
|
||||
lasterr = err;
|
||||
err = 0;
|
||||
}
|
||||
if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
|
||||
dnp = &osp->os_userused_dnode;
|
||||
err = traverse_dnode(td, dnp, buf, zb->zb_objset,
|
||||
DMU_USERUSED_OBJECT);
|
||||
}
|
||||
if (err && hard) {
|
||||
lasterr = err;
|
||||
err = 0;
|
||||
}
|
||||
if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
|
||||
dnp = &osp->os_groupused_dnode;
|
||||
err = traverse_dnode(td, dnp, buf, zb->zb_objset,
|
||||
@ -240,33 +253,52 @@ traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp,
|
||||
if (buf)
|
||||
(void) arc_buf_remove_ref(buf, &buf);
|
||||
|
||||
if (err == 0 && (td->td_flags & TRAVERSE_POST))
|
||||
err = td->td_func(td->td_spa, bp, zb, dnp, td->td_arg);
|
||||
if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) {
|
||||
err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp,
|
||||
td->td_arg);
|
||||
}
|
||||
|
||||
return (err);
|
||||
return (err != 0 ? err : lasterr);
|
||||
}
|
||||
|
||||
static int
|
||||
traverse_dnode(struct traverse_data *td, const dnode_phys_t *dnp,
|
||||
arc_buf_t *buf, uint64_t objset, uint64_t object)
|
||||
{
|
||||
int j, err = 0;
|
||||
int j, err = 0, lasterr = 0;
|
||||
zbookmark_t czb;
|
||||
boolean_t hard = (td->td_flags & TRAVERSE_HARD);
|
||||
|
||||
for (j = 0; j < dnp->dn_nblkptr; j++) {
|
||||
SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
|
||||
err = traverse_visitbp(td, dnp, buf,
|
||||
(blkptr_t *)&dnp->dn_blkptr[j], &czb);
|
||||
if (err)
|
||||
break;
|
||||
if (err) {
|
||||
if (!hard)
|
||||
break;
|
||||
lasterr = err;
|
||||
}
|
||||
}
|
||||
return (err);
|
||||
|
||||
if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
|
||||
SET_BOOKMARK(&czb, objset,
|
||||
object, 0, DMU_SPILL_BLKID);
|
||||
err = traverse_visitbp(td, dnp, buf,
|
||||
(blkptr_t *)&dnp->dn_spill, &czb);
|
||||
if (err) {
|
||||
if (!hard)
|
||||
return (err);
|
||||
lasterr = err;
|
||||
}
|
||||
}
|
||||
return (err != 0 ? err : lasterr);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
traverse_prefetcher(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
|
||||
const dnode_phys_t *dnp, void *arg)
|
||||
traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp,
|
||||
void *arg)
|
||||
{
|
||||
struct prefetch_data *pfd = arg;
|
||||
uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
|
||||
@ -276,7 +308,8 @@ traverse_prefetcher(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
|
||||
return (EINTR);
|
||||
|
||||
if (bp == NULL || !((pfd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
|
||||
BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0))
|
||||
BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0) ||
|
||||
BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG)
|
||||
return (0);
|
||||
|
||||
mutex_enter(&pfd->pd_mtx);
|
||||
@ -286,7 +319,7 @@ traverse_prefetcher(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
|
||||
cv_broadcast(&pfd->pd_cv);
|
||||
mutex_exit(&pfd->pd_mtx);
|
||||
|
||||
(void) arc_read_nolock(NULL, spa, bp, NULL, NULL,
|
||||
(void) dsl_read(NULL, spa, bp, pbuf, NULL, NULL,
|
||||
ZIO_PRIORITY_ASYNC_READ,
|
||||
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
|
||||
&aflags, zb);
|
||||
@ -305,7 +338,8 @@ traverse_prefetch_thread(void *arg)
|
||||
td.td_arg = td_main->td_pfd;
|
||||
td.td_pfd = NULL;
|
||||
|
||||
SET_BOOKMARK(&czb, td.td_objset, 0, -1, 0);
|
||||
SET_BOOKMARK(&czb, td.td_objset,
|
||||
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
|
||||
(void) traverse_visitbp(&td, NULL, NULL, td.td_rootbp, &czb);
|
||||
|
||||
mutex_enter(&td_main->td_pfd->pd_mtx);
|
||||
@ -346,7 +380,8 @@ traverse_impl(spa_t *spa, uint64_t objset, blkptr_t *rootbp,
|
||||
&td, TQ_NOQUEUE))
|
||||
pd.pd_exited = B_TRUE;
|
||||
|
||||
SET_BOOKMARK(&czb, objset, 0, -1, 0);
|
||||
SET_BOOKMARK(&czb, objset,
|
||||
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
|
||||
err = traverse_visitbp(&td, NULL, NULL, rootbp, &czb);
|
||||
|
||||
mutex_enter(&pd.pd_mtx);
|
||||
@ -378,43 +413,59 @@ traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags,
|
||||
* NB: pool must not be changing on-disk (eg, from zdb or sync context).
|
||||
*/
|
||||
int
|
||||
traverse_pool(spa_t *spa, blkptr_cb_t func, void *arg)
|
||||
traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
|
||||
blkptr_cb_t func, void *arg)
|
||||
{
|
||||
int err;
|
||||
int err, lasterr = 0;
|
||||
uint64_t obj;
|
||||
dsl_pool_t *dp = spa_get_dsl(spa);
|
||||
objset_t *mos = dp->dp_meta_objset;
|
||||
boolean_t hard = (flags & TRAVERSE_HARD);
|
||||
|
||||
/* visit the MOS */
|
||||
err = traverse_impl(spa, 0, spa_get_rootblkptr(spa),
|
||||
0, TRAVERSE_PRE, func, arg);
|
||||
txg_start, flags, func, arg);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
/* visit each dataset */
|
||||
for (obj = 1; err == 0; err = dmu_object_next(mos, &obj, FALSE, 0)) {
|
||||
for (obj = 1; err == 0 || (err != ESRCH && hard);
|
||||
err = dmu_object_next(mos, &obj, FALSE, txg_start)) {
|
||||
dmu_object_info_t doi;
|
||||
|
||||
err = dmu_object_info(mos, obj, &doi);
|
||||
if (err)
|
||||
return (err);
|
||||
if (err) {
|
||||
if (!hard)
|
||||
return (err);
|
||||
lasterr = err;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (doi.doi_type == DMU_OT_DSL_DATASET) {
|
||||
dsl_dataset_t *ds;
|
||||
uint64_t txg = txg_start;
|
||||
|
||||
rw_enter(&dp->dp_config_rwlock, RW_READER);
|
||||
err = dsl_dataset_hold_obj(dp, obj, FTAG, &ds);
|
||||
rw_exit(&dp->dp_config_rwlock);
|
||||
if (err)
|
||||
return (err);
|
||||
err = traverse_dataset(ds,
|
||||
ds->ds_phys->ds_prev_snap_txg, TRAVERSE_PRE,
|
||||
func, arg);
|
||||
if (err) {
|
||||
if (!hard)
|
||||
return (err);
|
||||
lasterr = err;
|
||||
continue;
|
||||
}
|
||||
if (ds->ds_phys->ds_prev_snap_txg > txg)
|
||||
txg = ds->ds_phys->ds_prev_snap_txg;
|
||||
err = traverse_dataset(ds, txg, flags, func, arg);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
if (err)
|
||||
return (err);
|
||||
if (err) {
|
||||
if (!hard)
|
||||
return (err);
|
||||
lasterr = err;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (err == ESRCH)
|
||||
err = 0;
|
||||
return (err);
|
||||
return (err != 0 ? err : lasterr);
|
||||
}
|
||||
|
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/dmu.h>
|
||||
@ -33,7 +32,10 @@
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/zap_impl.h> /* for fzap_default_block_shift */
|
||||
#include <sys/spa.h>
|
||||
#include <sys/sa.h>
|
||||
#include <sys/sa_impl.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/varargs.h>
|
||||
|
||||
typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn,
|
||||
uint64_t arg1, uint64_t arg2);
|
||||
@ -48,6 +50,8 @@ dmu_tx_create_dd(dsl_dir_t *dd)
|
||||
tx->tx_pool = dd->dd_pool;
|
||||
list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t),
|
||||
offsetof(dmu_tx_hold_t, txh_node));
|
||||
list_create(&tx->tx_callbacks, sizeof (dmu_tx_callback_t),
|
||||
offsetof(dmu_tx_callback_t, dcb_node));
|
||||
#ifdef ZFS_DEBUG
|
||||
refcount_create(&tx->tx_space_written);
|
||||
refcount_create(&tx->tx_space_freed);
|
||||
@ -58,9 +62,9 @@ dmu_tx_create_dd(dsl_dir_t *dd)
|
||||
dmu_tx_t *
|
||||
dmu_tx_create(objset_t *os)
|
||||
{
|
||||
dmu_tx_t *tx = dmu_tx_create_dd(os->os->os_dsl_dataset->ds_dir);
|
||||
dmu_tx_t *tx = dmu_tx_create_dd(os->os_dsl_dataset->ds_dir);
|
||||
tx->tx_objset = os;
|
||||
tx->tx_lastsnap_txg = dsl_dataset_prev_snap_txg(os->os->os_dsl_dataset);
|
||||
tx->tx_lastsnap_txg = dsl_dataset_prev_snap_txg(os->os_dsl_dataset);
|
||||
return (tx);
|
||||
}
|
||||
|
||||
@ -98,7 +102,7 @@ dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object,
|
||||
int err;
|
||||
|
||||
if (object != DMU_NEW_OBJECT) {
|
||||
err = dnode_hold(os->os, object, tx, &dn);
|
||||
err = dnode_hold(os, object, tx, &dn);
|
||||
if (err) {
|
||||
tx->tx_err = err;
|
||||
return (NULL);
|
||||
@ -161,38 +165,47 @@ dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid)
|
||||
}
|
||||
|
||||
static void
|
||||
dmu_tx_count_indirects(dmu_tx_hold_t *txh, dmu_buf_impl_t *db,
|
||||
boolean_t freeable, dmu_buf_impl_t **history)
|
||||
dmu_tx_count_twig(dmu_tx_hold_t *txh, dnode_t *dn, dmu_buf_impl_t *db,
|
||||
int level, uint64_t blkid, boolean_t freeable, uint64_t *history)
|
||||
{
|
||||
int i = db->db_level + 1;
|
||||
dnode_t *dn = db->db_dnode;
|
||||
objset_t *os = dn->dn_objset;
|
||||
dsl_dataset_t *ds = os->os_dsl_dataset;
|
||||
int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
|
||||
dmu_buf_impl_t *parent = NULL;
|
||||
blkptr_t *bp = NULL;
|
||||
uint64_t space;
|
||||
|
||||
if (i >= dn->dn_nlevels)
|
||||
if (level >= dn->dn_nlevels || history[level] == blkid)
|
||||
return;
|
||||
|
||||
db = db->db_parent;
|
||||
if (db == NULL) {
|
||||
uint64_t lvls = dn->dn_nlevels - i;
|
||||
history[level] = blkid;
|
||||
|
||||
txh->txh_space_towrite += lvls << dn->dn_indblkshift;
|
||||
return;
|
||||
space = (level == 0) ? dn->dn_datablksz : (1ULL << dn->dn_indblkshift);
|
||||
|
||||
if (db == NULL || db == dn->dn_dbuf) {
|
||||
ASSERT(level != 0);
|
||||
db = NULL;
|
||||
} else {
|
||||
ASSERT(db->db_dnode == dn);
|
||||
ASSERT(db->db_level == level);
|
||||
ASSERT(db->db.db_size == space);
|
||||
ASSERT(db->db_blkid == blkid);
|
||||
bp = db->db_blkptr;
|
||||
parent = db->db_parent;
|
||||
}
|
||||
|
||||
if (db != history[i]) {
|
||||
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
|
||||
uint64_t space = 1ULL << dn->dn_indblkshift;
|
||||
freeable = (bp && (freeable ||
|
||||
dsl_dataset_block_freeable(ds, bp, bp->blk_birth)));
|
||||
|
||||
freeable = (db->db_blkptr && (freeable ||
|
||||
dsl_dataset_block_freeable(ds, db->db_blkptr->blk_birth)));
|
||||
if (freeable)
|
||||
txh->txh_space_tooverwrite += space;
|
||||
else
|
||||
txh->txh_space_towrite += space;
|
||||
if (db->db_blkptr)
|
||||
txh->txh_space_tounref += space;
|
||||
history[i] = db;
|
||||
dmu_tx_count_indirects(txh, db, freeable, history);
|
||||
}
|
||||
if (freeable)
|
||||
txh->txh_space_tooverwrite += space;
|
||||
else
|
||||
txh->txh_space_towrite += space;
|
||||
if (bp)
|
||||
txh->txh_space_tounref += bp_get_dsize(os->os_spa, bp);
|
||||
|
||||
dmu_tx_count_twig(txh, dn, parent, level + 1,
|
||||
blkid >> epbs, freeable, history);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
@ -213,7 +226,7 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
|
||||
max_ibs = DN_MAX_INDBLKSHIFT;
|
||||
|
||||
if (dn) {
|
||||
dmu_buf_impl_t *last[DN_MAX_LEVELS];
|
||||
uint64_t history[DN_MAX_LEVELS];
|
||||
int nlvls = dn->dn_nlevels;
|
||||
int delta;
|
||||
|
||||
@ -289,29 +302,24 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
|
||||
* If this write is not off the end of the file
|
||||
* we need to account for overwrites/unref.
|
||||
*/
|
||||
if (start <= dn->dn_maxblkid)
|
||||
bzero(last, sizeof (dmu_buf_impl_t *) * DN_MAX_LEVELS);
|
||||
if (start <= dn->dn_maxblkid) {
|
||||
for (int l = 0; l < DN_MAX_LEVELS; l++)
|
||||
history[l] = -1ULL;
|
||||
}
|
||||
while (start <= dn->dn_maxblkid) {
|
||||
spa_t *spa = txh->txh_tx->tx_pool->dp_spa;
|
||||
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
|
||||
dmu_buf_impl_t *db;
|
||||
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
db = dbuf_hold_level(dn, 0, start, FTAG);
|
||||
err = dbuf_hold_impl(dn, 0, start, FALSE, FTAG, &db);
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
if (db->db_blkptr && dsl_dataset_block_freeable(ds,
|
||||
db->db_blkptr->blk_birth)) {
|
||||
dprintf_bp(db->db_blkptr, "can free old%s", "");
|
||||
txh->txh_space_tooverwrite += dn->dn_datablksz;
|
||||
txh->txh_space_tounref += dn->dn_datablksz;
|
||||
dmu_tx_count_indirects(txh, db, TRUE, last);
|
||||
} else {
|
||||
txh->txh_space_towrite += dn->dn_datablksz;
|
||||
if (db->db_blkptr)
|
||||
txh->txh_space_tounref +=
|
||||
bp_get_dasize(spa, db->db_blkptr);
|
||||
dmu_tx_count_indirects(txh, db, FALSE, last);
|
||||
|
||||
if (err) {
|
||||
txh->txh_tx->tx_err = err;
|
||||
return;
|
||||
}
|
||||
|
||||
dmu_tx_count_twig(txh, dn, db, 0, start, B_FALSE,
|
||||
history);
|
||||
dbuf_rele(db, FTAG);
|
||||
if (++start > end) {
|
||||
/*
|
||||
@ -376,13 +384,13 @@ static void
|
||||
dmu_tx_count_dnode(dmu_tx_hold_t *txh)
|
||||
{
|
||||
dnode_t *dn = txh->txh_dnode;
|
||||
dnode_t *mdn = txh->txh_tx->tx_objset->os->os_meta_dnode;
|
||||
dnode_t *mdn = txh->txh_tx->tx_objset->os_meta_dnode;
|
||||
uint64_t space = mdn->dn_datablksz +
|
||||
((mdn->dn_nlevels-1) << mdn->dn_indblkshift);
|
||||
|
||||
if (dn && dn->dn_dbuf->db_blkptr &&
|
||||
dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
|
||||
dn->dn_dbuf->db_blkptr->blk_birth)) {
|
||||
dn->dn_dbuf->db_blkptr, dn->dn_dbuf->db_blkptr->blk_birth)) {
|
||||
txh->txh_space_tooverwrite += space;
|
||||
txh->txh_space_tounref += space;
|
||||
} else {
|
||||
@ -427,7 +435,7 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
|
||||
* The struct_rwlock protects us against dn_nlevels
|
||||
* changing, in case (against all odds) we manage to dirty &
|
||||
* sync out the changes after we check for being dirty.
|
||||
* Also, dbuf_hold_level() wants us to have the struct_rwlock.
|
||||
* Also, dbuf_hold_impl() wants us to have the struct_rwlock.
|
||||
*/
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
|
||||
@ -457,9 +465,9 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
|
||||
blkptr_t *bp = dn->dn_phys->dn_blkptr;
|
||||
ASSERT3U(blkid + i, <, dn->dn_nblkptr);
|
||||
bp += blkid + i;
|
||||
if (dsl_dataset_block_freeable(ds, bp->blk_birth)) {
|
||||
if (dsl_dataset_block_freeable(ds, bp, bp->blk_birth)) {
|
||||
dprintf_bp(bp, "can free old%s", "");
|
||||
space += bp_get_dasize(spa, bp);
|
||||
space += bp_get_dsize(spa, bp);
|
||||
}
|
||||
unref += BP_GET_ASIZE(bp);
|
||||
}
|
||||
@ -515,14 +523,22 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
|
||||
blkoff = P2PHASE(blkid, epb);
|
||||
tochk = MIN(epb - blkoff, nblks);
|
||||
|
||||
dbuf = dbuf_hold_level(dn, 1, blkid >> epbs, FTAG);
|
||||
|
||||
txh->txh_memory_tohold += dbuf->db.db_size;
|
||||
if (txh->txh_memory_tohold > DMU_MAX_ACCESS) {
|
||||
txh->txh_tx->tx_err = E2BIG;
|
||||
dbuf_rele(dbuf, FTAG);
|
||||
err = dbuf_hold_impl(dn, 1, blkid >> epbs, FALSE, FTAG, &dbuf);
|
||||
if (err) {
|
||||
txh->txh_tx->tx_err = err;
|
||||
break;
|
||||
}
|
||||
|
||||
txh->txh_memory_tohold += dbuf->db.db_size;
|
||||
|
||||
/*
|
||||
* We don't check memory_tohold against DMU_MAX_ACCESS because
|
||||
* memory_tohold is an over-estimation (especially the >L1
|
||||
* indirect blocks), so it could fail. Callers should have
|
||||
* already verified that they will not be holding too much
|
||||
* memory.
|
||||
*/
|
||||
|
||||
err = dbuf_read(dbuf, NULL, DB_RF_HAVESTRUCT | DB_RF_CANFAIL);
|
||||
if (err != 0) {
|
||||
txh->txh_tx->tx_err = err;
|
||||
@ -534,9 +550,10 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
|
||||
bp += blkoff;
|
||||
|
||||
for (i = 0; i < tochk; i++) {
|
||||
if (dsl_dataset_block_freeable(ds, bp[i].blk_birth)) {
|
||||
if (dsl_dataset_block_freeable(ds, &bp[i],
|
||||
bp[i].blk_birth)) {
|
||||
dprintf_bp(&bp[i], "can free old%s", "");
|
||||
space += bp_get_dasize(spa, &bp[i]);
|
||||
space += bp_get_dsize(spa, &bp[i]);
|
||||
}
|
||||
unref += BP_GET_ASIZE(bp);
|
||||
}
|
||||
@ -581,6 +598,8 @@ dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len)
|
||||
if (len != DMU_OBJECT_END)
|
||||
dmu_tx_count_write(txh, off+len, 1);
|
||||
|
||||
dmu_tx_count_dnode(txh);
|
||||
|
||||
if (off >= (dn->dn_maxblkid+1) * dn->dn_datablksz)
|
||||
return;
|
||||
if (len == DMU_OBJECT_END)
|
||||
@ -623,7 +642,6 @@ dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len)
|
||||
}
|
||||
}
|
||||
|
||||
dmu_tx_count_dnode(txh);
|
||||
dmu_tx_count_free(txh, off, len);
|
||||
}
|
||||
|
||||
@ -673,6 +691,7 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
|
||||
* the size will change between now and the dbuf dirty call.
|
||||
*/
|
||||
if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
|
||||
&dn->dn_phys->dn_blkptr[0],
|
||||
dn->dn_phys->dn_blkptr[0].blk_birth)) {
|
||||
txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
|
||||
} else {
|
||||
@ -688,7 +707,7 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
|
||||
* access the name in this fat-zap so that we'll check
|
||||
* for i/o errors to the leaf blocks, etc.
|
||||
*/
|
||||
err = zap_lookup(&dn->dn_objset->os, dn->dn_object, name,
|
||||
err = zap_lookup(dn->dn_objset, dn->dn_object, name,
|
||||
8, 0, NULL);
|
||||
if (err == EIO) {
|
||||
tx->tx_err = err;
|
||||
@ -696,7 +715,7 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
|
||||
}
|
||||
}
|
||||
|
||||
err = zap_count_write(&dn->dn_objset->os, dn->dn_object, name, add,
|
||||
err = zap_count_write(dn->dn_objset, dn->dn_object, name, add,
|
||||
&txh->txh_space_towrite, &txh->txh_space_tooverwrite);
|
||||
|
||||
/*
|
||||
@ -771,7 +790,7 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
|
||||
dnode_t *dn = db->db_dnode;
|
||||
|
||||
ASSERT(tx->tx_txg != 0);
|
||||
ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset->os);
|
||||
ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset);
|
||||
ASSERT3U(dn->dn_object, ==, db->db.db_object);
|
||||
|
||||
if (tx->tx_anyobj)
|
||||
@ -808,10 +827,11 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
|
||||
match_offset = TRUE;
|
||||
/*
|
||||
* We will let this hold work for the bonus
|
||||
* buffer so that we don't need to hold it
|
||||
* when creating a new object.
|
||||
* or spill buffer so that we don't need to
|
||||
* hold it when creating a new object.
|
||||
*/
|
||||
if (blkid == DB_BONUS_BLKID)
|
||||
if (blkid == DMU_BONUS_BLKID ||
|
||||
blkid == DMU_SPILL_BLKID)
|
||||
match_offset = TRUE;
|
||||
/*
|
||||
* They might have to increase nlevels,
|
||||
@ -832,8 +852,12 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
|
||||
txh->txh_arg2 == DMU_OBJECT_END))
|
||||
match_offset = TRUE;
|
||||
break;
|
||||
case THT_SPILL:
|
||||
if (blkid == DMU_SPILL_BLKID)
|
||||
match_offset = TRUE;
|
||||
break;
|
||||
case THT_BONUS:
|
||||
if (blkid == DB_BONUS_BLKID)
|
||||
if (blkid == DMU_BONUS_BLKID)
|
||||
match_offset = TRUE;
|
||||
break;
|
||||
case THT_ZAP:
|
||||
@ -931,7 +955,7 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
|
||||
* assume that we won't be able to free or overwrite anything.
|
||||
*/
|
||||
if (tx->tx_objset &&
|
||||
dsl_dataset_prev_snap_txg(tx->tx_objset->os->os_dsl_dataset) >
|
||||
dsl_dataset_prev_snap_txg(tx->tx_objset->os_dsl_dataset) >
|
||||
tx->tx_lastsnap_txg) {
|
||||
towrite += tooverwrite;
|
||||
tooverwrite = tofree = 0;
|
||||
@ -1112,8 +1136,13 @@ dmu_tx_commit(dmu_tx_t *tx)
|
||||
if (tx->tx_tempreserve_cookie)
|
||||
dsl_dir_tempreserve_clear(tx->tx_tempreserve_cookie, tx);
|
||||
|
||||
if (!list_is_empty(&tx->tx_callbacks))
|
||||
txg_register_callbacks(&tx->tx_txgh, &tx->tx_callbacks);
|
||||
|
||||
if (tx->tx_anyobj == FALSE)
|
||||
txg_rele_to_sync(&tx->tx_txgh);
|
||||
|
||||
list_destroy(&tx->tx_callbacks);
|
||||
list_destroy(&tx->tx_holds);
|
||||
#ifdef ZFS_DEBUG
|
||||
dprintf("towrite=%llu written=%llu tofree=%llu freed=%llu\n",
|
||||
@ -1142,6 +1171,14 @@ dmu_tx_abort(dmu_tx_t *tx)
|
||||
if (dn != NULL)
|
||||
dnode_rele(dn, tx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Call any registered callbacks with an error code.
|
||||
*/
|
||||
if (!list_is_empty(&tx->tx_callbacks))
|
||||
dmu_tx_do_callbacks(&tx->tx_callbacks, ECANCELED);
|
||||
|
||||
list_destroy(&tx->tx_callbacks);
|
||||
list_destroy(&tx->tx_holds);
|
||||
#ifdef ZFS_DEBUG
|
||||
refcount_destroy_many(&tx->tx_space_written,
|
||||
@ -1158,3 +1195,169 @@ dmu_tx_get_txg(dmu_tx_t *tx)
|
||||
ASSERT(tx->tx_txg != 0);
|
||||
return (tx->tx_txg);
|
||||
}
|
||||
|
||||
void
|
||||
dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data)
|
||||
{
|
||||
dmu_tx_callback_t *dcb;
|
||||
|
||||
dcb = kmem_alloc(sizeof (dmu_tx_callback_t), KM_SLEEP);
|
||||
|
||||
dcb->dcb_func = func;
|
||||
dcb->dcb_data = data;
|
||||
|
||||
list_insert_tail(&tx->tx_callbacks, dcb);
|
||||
}
|
||||
|
||||
/*
|
||||
* Call all the commit callbacks on a list, with a given error code.
|
||||
*/
|
||||
void
|
||||
dmu_tx_do_callbacks(list_t *cb_list, int error)
|
||||
{
|
||||
dmu_tx_callback_t *dcb;
|
||||
|
||||
while (dcb = list_head(cb_list)) {
|
||||
list_remove(cb_list, dcb);
|
||||
dcb->dcb_func(dcb->dcb_data, error);
|
||||
kmem_free(dcb, sizeof (dmu_tx_callback_t));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Interface to hold a bunch of attributes.
|
||||
* used for creating new files.
|
||||
* attrsize is the total size of all attributes
|
||||
* to be added during object creation
|
||||
*
|
||||
* For updating/adding a single attribute dmu_tx_hold_sa() should be used.
|
||||
*/
|
||||
|
||||
/*
|
||||
* hold necessary attribute name for attribute registration.
|
||||
* should be a very rare case where this is needed. If it does
|
||||
* happen it would only happen on the first write to the file system.
|
||||
*/
|
||||
static void
|
||||
dmu_tx_sa_registration_hold(sa_os_t *sa, dmu_tx_t *tx)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!sa->sa_need_attr_registration)
|
||||
return;
|
||||
|
||||
for (i = 0; i != sa->sa_num_attrs; i++) {
|
||||
if (!sa->sa_attr_table[i].sa_registered) {
|
||||
if (sa->sa_reg_attr_obj)
|
||||
dmu_tx_hold_zap(tx, sa->sa_reg_attr_obj,
|
||||
B_TRUE, sa->sa_attr_table[i].sa_name);
|
||||
else
|
||||
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT,
|
||||
B_TRUE, sa->sa_attr_table[i].sa_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object)
|
||||
{
|
||||
dnode_t *dn;
|
||||
dmu_tx_hold_t *txh;
|
||||
blkptr_t *bp;
|
||||
|
||||
txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, object,
|
||||
THT_SPILL, 0, 0);
|
||||
|
||||
dn = txh->txh_dnode;
|
||||
|
||||
if (dn == NULL)
|
||||
return;
|
||||
|
||||
/* If blkptr doesn't exist then add space to towrite */
|
||||
bp = &dn->dn_phys->dn_spill;
|
||||
if (BP_IS_HOLE(bp)) {
|
||||
txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
|
||||
txh->txh_space_tounref = 0;
|
||||
} else {
|
||||
if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
|
||||
bp, bp->blk_birth))
|
||||
txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
|
||||
else
|
||||
txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
|
||||
if (bp->blk_birth)
|
||||
txh->txh_space_tounref += SPA_MAXBLOCKSIZE;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
dmu_tx_hold_sa_create(dmu_tx_t *tx, int attrsize)
|
||||
{
|
||||
sa_os_t *sa = tx->tx_objset->os_sa;
|
||||
|
||||
dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
|
||||
|
||||
if (tx->tx_objset->os_sa->sa_master_obj == 0)
|
||||
return;
|
||||
|
||||
if (tx->tx_objset->os_sa->sa_layout_attr_obj)
|
||||
dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL);
|
||||
else {
|
||||
dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS);
|
||||
dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY);
|
||||
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
|
||||
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
|
||||
}
|
||||
|
||||
dmu_tx_sa_registration_hold(sa, tx);
|
||||
|
||||
if (attrsize <= DN_MAX_BONUSLEN && !sa->sa_force_spill)
|
||||
return;
|
||||
|
||||
(void) dmu_tx_hold_object_impl(tx, tx->tx_objset, DMU_NEW_OBJECT,
|
||||
THT_SPILL, 0, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Hold SA attribute
|
||||
*
|
||||
* dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *, attribute, add, size)
|
||||
*
|
||||
* variable_size is the total size of all variable sized attributes
|
||||
* passed to this function. It is not the total size of all
|
||||
* variable size attributes that *may* exist on this object.
|
||||
*/
|
||||
void
|
||||
dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow)
|
||||
{
|
||||
uint64_t object;
|
||||
sa_os_t *sa = tx->tx_objset->os_sa;
|
||||
|
||||
ASSERT(hdl != NULL);
|
||||
|
||||
object = sa_handle_object(hdl);
|
||||
|
||||
dmu_tx_hold_bonus(tx, object);
|
||||
|
||||
if (tx->tx_objset->os_sa->sa_master_obj == 0)
|
||||
return;
|
||||
|
||||
if (tx->tx_objset->os_sa->sa_reg_attr_obj == 0 ||
|
||||
tx->tx_objset->os_sa->sa_layout_attr_obj == 0) {
|
||||
dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS);
|
||||
dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY);
|
||||
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
|
||||
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
|
||||
}
|
||||
|
||||
dmu_tx_sa_registration_hold(sa, tx);
|
||||
|
||||
if (may_grow && tx->tx_objset->os_sa->sa_layout_attr_obj)
|
||||
dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL);
|
||||
|
||||
if (sa->sa_force_spill || may_grow || hdl->sa_spill ||
|
||||
((dmu_buf_impl_t *)hdl->sa_bonus)->db_dnode->dn_have_spill) {
|
||||
ASSERT(tx->tx_txg == 0);
|
||||
dmu_tx_hold_spill(tx, object);
|
||||
}
|
||||
}
|
||||
|
@ -19,18 +19,17 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/dnode.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/dmu_zfetch.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dbuf.h>
|
||||
#include <sys/kstat.h>
|
||||
|
||||
/*
|
||||
* I'm against tune-ables, but these should probably exist as tweakable globals
|
||||
@ -59,6 +58,41 @@ static zstream_t *dmu_zfetch_stream_reclaim(zfetch_t *);
|
||||
static void dmu_zfetch_stream_remove(zfetch_t *, zstream_t *);
|
||||
static int dmu_zfetch_streams_equal(zstream_t *, zstream_t *);
|
||||
|
||||
typedef struct zfetch_stats {
|
||||
kstat_named_t zfetchstat_hits;
|
||||
kstat_named_t zfetchstat_misses;
|
||||
kstat_named_t zfetchstat_colinear_hits;
|
||||
kstat_named_t zfetchstat_colinear_misses;
|
||||
kstat_named_t zfetchstat_stride_hits;
|
||||
kstat_named_t zfetchstat_stride_misses;
|
||||
kstat_named_t zfetchstat_reclaim_successes;
|
||||
kstat_named_t zfetchstat_reclaim_failures;
|
||||
kstat_named_t zfetchstat_stream_resets;
|
||||
kstat_named_t zfetchstat_stream_noresets;
|
||||
kstat_named_t zfetchstat_bogus_streams;
|
||||
} zfetch_stats_t;
|
||||
|
||||
static zfetch_stats_t zfetch_stats = {
|
||||
{ "hits", KSTAT_DATA_UINT64 },
|
||||
{ "misses", KSTAT_DATA_UINT64 },
|
||||
{ "colinear_hits", KSTAT_DATA_UINT64 },
|
||||
{ "colinear_misses", KSTAT_DATA_UINT64 },
|
||||
{ "stride_hits", KSTAT_DATA_UINT64 },
|
||||
{ "stride_misses", KSTAT_DATA_UINT64 },
|
||||
{ "reclaim_successes", KSTAT_DATA_UINT64 },
|
||||
{ "reclaim_failures", KSTAT_DATA_UINT64 },
|
||||
{ "streams_resets", KSTAT_DATA_UINT64 },
|
||||
{ "streams_noresets", KSTAT_DATA_UINT64 },
|
||||
{ "bogus_streams", KSTAT_DATA_UINT64 },
|
||||
};
|
||||
|
||||
#define ZFETCHSTAT_INCR(stat, val) \
|
||||
atomic_add_64(&zfetch_stats.stat.value.ui64, (val));
|
||||
|
||||
#define ZFETCHSTAT_BUMP(stat) ZFETCHSTAT_INCR(stat, 1);
|
||||
|
||||
kstat_t *zfetch_ksp;
|
||||
|
||||
/*
|
||||
* Given a zfetch structure and a zstream structure, determine whether the
|
||||
* blocks to be read are part of a co-linear pair of existing prefetch
|
||||
@ -192,7 +226,30 @@ dmu_zfetch_dofetch(zfetch_t *zf, zstream_t *zs)
|
||||
break;
|
||||
}
|
||||
zs->zst_ph_offset = prefetch_tail;
|
||||
zs->zst_last = lbolt;
|
||||
zs->zst_last = ddi_get_lbolt();
|
||||
}
|
||||
|
||||
void
|
||||
zfetch_init(void)
|
||||
{
|
||||
|
||||
zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc",
|
||||
KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t),
|
||||
KSTAT_FLAG_VIRTUAL);
|
||||
|
||||
if (zfetch_ksp != NULL) {
|
||||
zfetch_ksp->ks_data = &zfetch_stats;
|
||||
kstat_install(zfetch_ksp);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
zfetch_fini(void)
|
||||
{
|
||||
if (zfetch_ksp != NULL) {
|
||||
kstat_delete(zfetch_ksp);
|
||||
zfetch_ksp = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -265,7 +322,7 @@ dmu_zfetch_fetchsz(dnode_t *dn, uint64_t blkid, uint64_t nblks)
|
||||
}
|
||||
|
||||
/*
|
||||
* given a zfetch and a zsearch structure, see if there is an associated zstream
|
||||
* given a zfetch and a zstream structure, see if there is an associated zstream
|
||||
* for this block read. If so, it starts a prefetch for the stream it
|
||||
* located and returns true, otherwise it returns false
|
||||
*/
|
||||
@ -297,6 +354,7 @@ top:
|
||||
*/
|
||||
if (zs->zst_len == 0) {
|
||||
/* bogus stream */
|
||||
ZFETCHSTAT_BUMP(zfetchstat_bogus_streams);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -306,9 +364,14 @@ top:
|
||||
*/
|
||||
if (zh->zst_offset >= zs->zst_offset &&
|
||||
zh->zst_offset < zs->zst_offset + zs->zst_len) {
|
||||
/* already fetched */
|
||||
rc = 1;
|
||||
goto out;
|
||||
if (prefetched) {
|
||||
/* already fetched */
|
||||
ZFETCHSTAT_BUMP(zfetchstat_stride_hits);
|
||||
rc = 1;
|
||||
goto out;
|
||||
} else {
|
||||
ZFETCHSTAT_BUMP(zfetchstat_stride_misses);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -413,6 +476,7 @@ top:
|
||||
if (reset) {
|
||||
zstream_t *remove = zs;
|
||||
|
||||
ZFETCHSTAT_BUMP(zfetchstat_stream_resets);
|
||||
rc = 0;
|
||||
mutex_exit(&zs->zst_lock);
|
||||
rw_exit(&zf->zf_rwlock);
|
||||
@ -431,6 +495,7 @@ top:
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ZFETCHSTAT_BUMP(zfetchstat_stream_noresets);
|
||||
rc = 1;
|
||||
dmu_zfetch_dofetch(zf, zs);
|
||||
mutex_exit(&zs->zst_lock);
|
||||
@ -487,13 +552,12 @@ dmu_zfetch_stream_insert(zfetch_t *zf, zstream_t *zs)
|
||||
zs_next = list_next(&zf->zf_stream, zs_walk);
|
||||
|
||||
if (dmu_zfetch_streams_equal(zs_walk, zs)) {
|
||||
return (0);
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
|
||||
list_insert_head(&zf->zf_stream, zs);
|
||||
zf->zf_stream_cnt++;
|
||||
|
||||
return (1);
|
||||
}
|
||||
|
||||
@ -513,7 +577,7 @@ dmu_zfetch_stream_reclaim(zfetch_t *zf)
|
||||
for (zs = list_head(&zf->zf_stream); zs;
|
||||
zs = list_next(&zf->zf_stream, zs)) {
|
||||
|
||||
if (((lbolt - zs->zst_last) / hz) > zfetch_min_sec_reap)
|
||||
if (((ddi_get_lbolt() - zs->zst_last)/hz) > zfetch_min_sec_reap)
|
||||
break;
|
||||
}
|
||||
|
||||
@ -597,8 +661,15 @@ dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
|
||||
P2ALIGN(offset, blksz)) >> blkshft;
|
||||
|
||||
fetched = dmu_zfetch_find(zf, &zst, prefetched);
|
||||
if (!fetched) {
|
||||
fetched = dmu_zfetch_colinear(zf, &zst);
|
||||
if (fetched) {
|
||||
ZFETCHSTAT_BUMP(zfetchstat_hits);
|
||||
} else {
|
||||
ZFETCHSTAT_BUMP(zfetchstat_misses);
|
||||
if (fetched = dmu_zfetch_colinear(zf, &zst)) {
|
||||
ZFETCHSTAT_BUMP(zfetchstat_colinear_hits);
|
||||
} else {
|
||||
ZFETCHSTAT_BUMP(zfetchstat_colinear_misses);
|
||||
}
|
||||
}
|
||||
|
||||
if (!fetched) {
|
||||
@ -608,11 +679,14 @@ dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
|
||||
* we still couldn't find a stream, drop the lock, and allocate
|
||||
* one if possible. Otherwise, give up and go home.
|
||||
*/
|
||||
if (newstream == NULL) {
|
||||
if (newstream) {
|
||||
ZFETCHSTAT_BUMP(zfetchstat_reclaim_successes);
|
||||
} else {
|
||||
uint64_t maxblocks;
|
||||
uint32_t max_streams;
|
||||
uint32_t cur_streams;
|
||||
|
||||
ZFETCHSTAT_BUMP(zfetchstat_reclaim_failures);
|
||||
cur_streams = zf->zf_stream_cnt;
|
||||
maxblocks = zf->zf_dnode->dn_maxblkid;
|
||||
|
||||
@ -625,7 +699,6 @@ dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
|
||||
if (cur_streams >= max_streams) {
|
||||
return;
|
||||
}
|
||||
|
||||
newstream = kmem_zalloc(sizeof (zstream_t), KM_SLEEP);
|
||||
}
|
||||
|
||||
@ -635,7 +708,7 @@ dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
|
||||
newstream->zst_ph_offset = zst.zst_len + zst.zst_offset;
|
||||
newstream->zst_cap = zst.zst_len;
|
||||
newstream->zst_direction = ZFETCH_FORWARD;
|
||||
newstream->zst_last = lbolt;
|
||||
newstream->zst_last = ddi_get_lbolt();
|
||||
|
||||
mutex_init(&newstream->zst_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
|
||||
|
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -210,6 +209,11 @@ dnode_byteswap(dnode_phys_t *dnp)
|
||||
ASSERT3U(dnp->dn_bonustype, <, DMU_OT_NUMTYPES);
|
||||
dmu_ot[dnp->dn_bonustype].ot_byteswap(dnp->dn_bonus + off, len);
|
||||
}
|
||||
|
||||
/* Swap SPILL block if we have one */
|
||||
if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)
|
||||
byteswap_uint64_array(&dnp->dn_spill, sizeof (blkptr_t));
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
@ -258,6 +262,27 @@ dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx)
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
}
|
||||
|
||||
void
|
||||
dnode_setbonus_type(dnode_t *dn, dmu_object_type_t newtype, dmu_tx_t *tx)
|
||||
{
|
||||
ASSERT3U(refcount_count(&dn->dn_holds), >=, 1);
|
||||
dnode_setdirty(dn, tx);
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
|
||||
dn->dn_bonustype = newtype;
|
||||
dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = dn->dn_bonustype;
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
}
|
||||
|
||||
void
|
||||
dnode_rm_spill(dnode_t *dn, dmu_tx_t *tx)
|
||||
{
|
||||
ASSERT3U(refcount_count(&dn->dn_holds), >=, 1);
|
||||
ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
|
||||
dnode_setdirty(dn, tx);
|
||||
dn->dn_rm_spillblk[tx->tx_txg&TXG_MASK] = DN_KILL_SPILLBLK;
|
||||
dn->dn_have_spill = B_FALSE;
|
||||
}
|
||||
|
||||
static void
|
||||
dnode_setdblksz(dnode_t *dn, int size)
|
||||
{
|
||||
@ -272,7 +297,7 @@ dnode_setdblksz(dnode_t *dn, int size)
|
||||
}
|
||||
|
||||
static dnode_t *
|
||||
dnode_create(objset_impl_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
|
||||
dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
|
||||
uint64_t object)
|
||||
{
|
||||
dnode_t *dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
|
||||
@ -294,6 +319,8 @@ dnode_create(objset_impl_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
|
||||
dn->dn_bonustype = dnp->dn_bonustype;
|
||||
dn->dn_bonuslen = dnp->dn_bonuslen;
|
||||
dn->dn_maxblkid = dnp->dn_maxblkid;
|
||||
dn->dn_have_spill = ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) != 0);
|
||||
dn->dn_id_flags = 0;
|
||||
|
||||
dmu_zfetch_init(&dn->dn_zfetch, dn);
|
||||
|
||||
@ -309,7 +336,7 @@ dnode_create(objset_impl_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
|
||||
static void
|
||||
dnode_destroy(dnode_t *dn)
|
||||
{
|
||||
objset_impl_t *os = dn->dn_objset;
|
||||
objset_t *os = dn->dn_objset;
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
int i;
|
||||
@ -321,7 +348,7 @@ dnode_destroy(dnode_t *dn)
|
||||
}
|
||||
ASSERT(NULL == list_head(&dn->dn_dbufs));
|
||||
#endif
|
||||
ASSERT(dn->dn_oldphys == NULL);
|
||||
ASSERT((dn->dn_id_flags & DN_ID_NEW_EXIST) == 0);
|
||||
|
||||
mutex_enter(&os->os_lock);
|
||||
list_remove(&os->os_dnodes, dn);
|
||||
@ -368,6 +395,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
|
||||
ASSERT(ot != DMU_OT_NONE);
|
||||
ASSERT3U(ot, <, DMU_OT_NUMTYPES);
|
||||
ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
|
||||
(bonustype == DMU_OT_SA && bonuslen == 0) ||
|
||||
(bonustype != DMU_OT_NONE && bonuslen != 0));
|
||||
ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
|
||||
ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
|
||||
@ -383,6 +411,8 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
|
||||
ASSERT3U(dn->dn_next_nlevels[i], ==, 0);
|
||||
ASSERT3U(dn->dn_next_indblkshift[i], ==, 0);
|
||||
ASSERT3U(dn->dn_next_bonuslen[i], ==, 0);
|
||||
ASSERT3U(dn->dn_next_bonustype[i], ==, 0);
|
||||
ASSERT3U(dn->dn_rm_spillblk[i], ==, 0);
|
||||
ASSERT3U(dn->dn_next_blksz[i], ==, 0);
|
||||
ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
|
||||
ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL);
|
||||
@ -393,7 +423,11 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
|
||||
dnode_setdblksz(dn, blocksize);
|
||||
dn->dn_indblkshift = ibs;
|
||||
dn->dn_nlevels = 1;
|
||||
dn->dn_nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
|
||||
if (bonustype == DMU_OT_SA) /* Maximize bonus space for SA */
|
||||
dn->dn_nblkptr = 1;
|
||||
else
|
||||
dn->dn_nblkptr = 1 +
|
||||
((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
|
||||
dn->dn_bonustype = bonustype;
|
||||
dn->dn_bonuslen = bonuslen;
|
||||
dn->dn_checksum = ZIO_CHECKSUM_INHERIT;
|
||||
@ -407,10 +441,12 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
|
||||
}
|
||||
|
||||
dn->dn_allocated_txg = tx->tx_txg;
|
||||
dn->dn_id_flags = 0;
|
||||
|
||||
dnode_setdirty(dn, tx);
|
||||
dn->dn_next_indblkshift[tx->tx_txg & TXG_MASK] = ibs;
|
||||
dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = dn->dn_bonuslen;
|
||||
dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = dn->dn_bonustype;
|
||||
dn->dn_next_blksz[tx->tx_txg & TXG_MASK] = dn->dn_datablksz;
|
||||
}
|
||||
|
||||
@ -426,13 +462,16 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
|
||||
ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx));
|
||||
ASSERT(tx->tx_txg != 0);
|
||||
ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
|
||||
(bonustype != DMU_OT_NONE && bonuslen != 0));
|
||||
(bonustype != DMU_OT_NONE && bonuslen != 0) ||
|
||||
(bonustype == DMU_OT_SA && bonuslen == 0));
|
||||
ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
|
||||
ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
|
||||
|
||||
/* clean up any unreferenced dbufs */
|
||||
dnode_evict_dbufs(dn);
|
||||
|
||||
dn->dn_id_flags = 0;
|
||||
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
|
||||
dnode_setdirty(dn, tx);
|
||||
if (dn->dn_datablksz != blocksize) {
|
||||
@ -445,9 +484,19 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
|
||||
}
|
||||
if (dn->dn_bonuslen != bonuslen)
|
||||
dn->dn_next_bonuslen[tx->tx_txg&TXG_MASK] = bonuslen;
|
||||
nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
|
||||
|
||||
if (bonustype == DMU_OT_SA) /* Maximize bonus space for SA */
|
||||
nblkptr = 1;
|
||||
else
|
||||
nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
|
||||
if (dn->dn_bonustype != bonustype)
|
||||
dn->dn_next_bonustype[tx->tx_txg&TXG_MASK] = bonustype;
|
||||
if (dn->dn_nblkptr != nblkptr)
|
||||
dn->dn_next_nblkptr[tx->tx_txg&TXG_MASK] = nblkptr;
|
||||
if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
|
||||
dbuf_rm_spill(dn, tx);
|
||||
dnode_rm_spill(dn, tx);
|
||||
}
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
|
||||
/* change type */
|
||||
@ -488,7 +537,7 @@ dnode_special_close(dnode_t *dn)
|
||||
}
|
||||
|
||||
dnode_t *
|
||||
dnode_special_open(objset_impl_t *os, dnode_phys_t *dnp, uint64_t object)
|
||||
dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object)
|
||||
{
|
||||
dnode_t *dn = dnode_create(os, dnp, NULL, object);
|
||||
DNODE_VERIFY(dn);
|
||||
@ -535,7 +584,7 @@ dnode_buf_pageout(dmu_buf_t *db, void *arg)
|
||||
* succeeds even for free dnodes.
|
||||
*/
|
||||
int
|
||||
dnode_hold_impl(objset_impl_t *os, uint64_t object, int flag,
|
||||
dnode_hold_impl(objset_t *os, uint64_t object, int flag,
|
||||
void *tag, dnode_t **dnp)
|
||||
{
|
||||
int epb, idx, err;
|
||||
@ -548,9 +597,14 @@ dnode_hold_impl(objset_impl_t *os, uint64_t object, int flag,
|
||||
|
||||
/*
|
||||
* If you are holding the spa config lock as writer, you shouldn't
|
||||
* be asking the DMU to do *anything*.
|
||||
* be asking the DMU to do *anything* unless it's the root pool
|
||||
* which may require us to read from the root filesystem while
|
||||
* holding some (not all) of the locks as writer.
|
||||
*/
|
||||
ASSERT(spa_config_held(os->os_spa, SCL_ALL, RW_WRITER) == 0);
|
||||
ASSERT(spa_config_held(os->os_spa, SCL_ALL, RW_WRITER) == 0 ||
|
||||
(spa_is_root(os->os_spa) &&
|
||||
spa_config_held(os->os_spa, SCL_STATE, RW_WRITER) &&
|
||||
!spa_config_held(os->os_spa, SCL_ZIO, RW_WRITER)));
|
||||
|
||||
if (object == DMU_USERUSED_OBJECT || object == DMU_GROUPUSED_OBJECT) {
|
||||
dn = (object == DMU_USERUSED_OBJECT) ?
|
||||
@ -627,7 +681,7 @@ dnode_hold_impl(objset_impl_t *os, uint64_t object, int flag,
|
||||
if (dn->dn_free_txg ||
|
||||
((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE) ||
|
||||
((flag & DNODE_MUST_BE_FREE) &&
|
||||
(type != DMU_OT_NONE || dn->dn_oldphys))) {
|
||||
(type != DMU_OT_NONE || !refcount_is_zero(&dn->dn_holds)))) {
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
dbuf_rele(db, FTAG);
|
||||
return (type == DMU_OT_NONE ? ENOENT : EEXIST);
|
||||
@ -650,7 +704,7 @@ dnode_hold_impl(objset_impl_t *os, uint64_t object, int flag,
|
||||
* Return held dnode if the object is allocated, NULL if not.
|
||||
*/
|
||||
int
|
||||
dnode_hold(objset_impl_t *os, uint64_t object, void *tag, dnode_t **dnp)
|
||||
dnode_hold(objset_t *os, uint64_t object, void *tag, dnode_t **dnp)
|
||||
{
|
||||
return (dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, tag, dnp));
|
||||
}
|
||||
@ -689,7 +743,7 @@ dnode_rele(dnode_t *dn, void *tag)
|
||||
void
|
||||
dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
|
||||
{
|
||||
objset_impl_t *os = dn->dn_objset;
|
||||
objset_t *os = dn->dn_objset;
|
||||
uint64_t txg = tx->tx_txg;
|
||||
|
||||
if (DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
|
||||
@ -706,6 +760,11 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Determine old uid/gid when necessary
|
||||
*/
|
||||
dmu_objset_userquota_get_ids(dn, B_TRUE, tx);
|
||||
|
||||
mutex_enter(&os->os_lock);
|
||||
|
||||
/*
|
||||
@ -720,6 +779,7 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
|
||||
ASSERT(dn->dn_datablksz != 0);
|
||||
ASSERT3U(dn->dn_next_bonuslen[txg&TXG_MASK], ==, 0);
|
||||
ASSERT3U(dn->dn_next_blksz[txg&TXG_MASK], ==, 0);
|
||||
ASSERT3U(dn->dn_next_bonustype[txg&TXG_MASK], ==, 0);
|
||||
|
||||
dprintf_ds(os->os_dsl_dataset, "obj=%llu txg=%llu\n",
|
||||
dn->dn_object, txg);
|
||||
@ -814,7 +874,8 @@ dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx)
|
||||
for (db = list_head(&dn->dn_dbufs); db; db = db_next) {
|
||||
db_next = list_next(&dn->dn_dbufs, db);
|
||||
|
||||
if (db->db_blkid != 0 && db->db_blkid != DB_BONUS_BLKID) {
|
||||
if (db->db_blkid != 0 && db->db_blkid != DMU_BONUS_BLKID &&
|
||||
db->db_blkid != DMU_SPILL_BLKID) {
|
||||
mutex_exit(&dn->dn_dbufs_mtx);
|
||||
goto fail;
|
||||
}
|
||||
@ -858,7 +919,7 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
|
||||
int epbs, new_nlevels;
|
||||
uint64_t sz;
|
||||
|
||||
ASSERT(blkid != DB_BONUS_BLKID);
|
||||
ASSERT(blkid != DMU_BONUS_BLKID);
|
||||
|
||||
ASSERT(have_read ?
|
||||
RW_READ_HELD(&dn->dn_struct_rwlock) :
|
||||
@ -905,6 +966,7 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
|
||||
|
||||
/* dirty the left indirects */
|
||||
db = dbuf_hold_level(dn, old_nlevels, 0, FTAG);
|
||||
ASSERT(db != NULL);
|
||||
new = dbuf_dirty(db, tx);
|
||||
dbuf_rele(db, FTAG);
|
||||
|
||||
@ -915,7 +977,8 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
|
||||
for (dr = list_head(list); dr; dr = dr_next) {
|
||||
dr_next = list_next(&dn->dn_dirty_records[txgoff], dr);
|
||||
if (dr->dr_dbuf->db_level != new_nlevels-1 &&
|
||||
dr->dr_dbuf->db_blkid != DB_BONUS_BLKID) {
|
||||
dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID &&
|
||||
dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) {
|
||||
ASSERT(dr->dr_dbuf->db_level == old_nlevels-1);
|
||||
list_remove(&dn->dn_dirty_records[txgoff], dr);
|
||||
list_insert_tail(&new->dt.di.dr_children, dr);
|
||||
@ -1170,6 +1233,20 @@ out:
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
dnode_spill_freed(dnode_t *dn)
|
||||
{
|
||||
int i;
|
||||
|
||||
mutex_enter(&dn->dn_mtx);
|
||||
for (i = 0; i < TXG_SIZE; i++) {
|
||||
if (dn->dn_rm_spillblk[i] == DN_KILL_SPILLBLK)
|
||||
break;
|
||||
}
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
return (i < TXG_SIZE);
|
||||
}
|
||||
|
||||
/* return TRUE if this blkid was freed in a recent txg, or FALSE if it wasn't */
|
||||
uint64_t
|
||||
dnode_block_freed(dnode_t *dn, uint64_t blkid)
|
||||
@ -1178,7 +1255,7 @@ dnode_block_freed(dnode_t *dn, uint64_t blkid)
|
||||
void *dp = spa_get_dsl(dn->dn_objset->os_spa);
|
||||
int i;
|
||||
|
||||
if (blkid == DB_BONUS_BLKID)
|
||||
if (blkid == DMU_BONUS_BLKID)
|
||||
return (FALSE);
|
||||
|
||||
/*
|
||||
@ -1191,6 +1268,9 @@ dnode_block_freed(dnode_t *dn, uint64_t blkid)
|
||||
if (dn->dn_free_txg)
|
||||
return (TRUE);
|
||||
|
||||
if (blkid == DMU_SPILL_BLKID)
|
||||
return (dnode_spill_freed(dn));
|
||||
|
||||
range_tofind.fr_blkid = blkid;
|
||||
mutex_enter(&dn->dn_mtx);
|
||||
for (i = 0; i < TXG_SIZE; i++) {
|
||||
@ -1248,7 +1328,7 @@ dnode_diduse_space(dnode_t *dn, int64_t delta)
|
||||
void
|
||||
dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx)
|
||||
{
|
||||
objset_impl_t *os = dn->dn_objset;
|
||||
objset_t *os = dn->dn_objset;
|
||||
dsl_dataset_t *ds = os->os_dsl_dataset;
|
||||
|
||||
if (space > 0)
|
||||
|
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -120,7 +119,7 @@ free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx)
|
||||
if (BP_IS_HOLE(bp))
|
||||
continue;
|
||||
|
||||
bytesfreed += dsl_dataset_block_kill(ds, bp, dn->dn_zio, tx);
|
||||
bytesfreed += dsl_dataset_block_kill(ds, bp, tx, B_FALSE);
|
||||
ASSERT3U(bytesfreed, <=, DN_USED_BYTES(dn->dn_phys));
|
||||
bzero(bp, sizeof (blkptr_t));
|
||||
blocks_freed += 1;
|
||||
@ -228,7 +227,7 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
|
||||
if (db->db_state != DB_CACHED)
|
||||
(void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED);
|
||||
|
||||
arc_release(db->db_buf, db);
|
||||
dbuf_release_bp(db);
|
||||
bp = (blkptr_t *)db->db.db_data;
|
||||
|
||||
epbs = db->db_dnode->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
|
||||
@ -424,6 +423,9 @@ dnode_undirty_dbufs(list_t *list)
|
||||
dmu_buf_impl_t *db = dr->dr_dbuf;
|
||||
uint64_t txg = dr->dr_txg;
|
||||
|
||||
if (db->db_level != 0)
|
||||
dnode_undirty_dbufs(&dr->dt.di.dr_children);
|
||||
|
||||
mutex_enter(&db->db_mtx);
|
||||
/* XXX - use dbuf_undirty()? */
|
||||
list_remove(list, dr);
|
||||
@ -431,16 +433,12 @@ dnode_undirty_dbufs(list_t *list)
|
||||
db->db_last_dirty = NULL;
|
||||
db->db_dirtycnt -= 1;
|
||||
if (db->db_level == 0) {
|
||||
ASSERT(db->db_blkid == DB_BONUS_BLKID ||
|
||||
ASSERT(db->db_blkid == DMU_BONUS_BLKID ||
|
||||
dr->dt.dl.dr_data == db->db_buf);
|
||||
dbuf_unoverride(dr);
|
||||
mutex_exit(&db->db_mtx);
|
||||
} else {
|
||||
mutex_exit(&db->db_mtx);
|
||||
dnode_undirty_dbufs(&dr->dt.di.dr_children);
|
||||
}
|
||||
kmem_free(dr, sizeof (dbuf_dirty_record_t));
|
||||
dbuf_rele(db, (void *)(uintptr_t)txg);
|
||||
dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg);
|
||||
}
|
||||
}
|
||||
|
||||
@ -491,6 +489,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
|
||||
dn->dn_maxblkid = 0;
|
||||
dn->dn_allocated_txg = 0;
|
||||
dn->dn_free_txg = 0;
|
||||
dn->dn_have_spill = B_FALSE;
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
|
||||
ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
|
||||
@ -513,6 +512,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
|
||||
int txgoff = tx->tx_txg & TXG_MASK;
|
||||
list_t *list = &dn->dn_dirty_records[txgoff];
|
||||
static const dnode_phys_t zerodn = { 0 };
|
||||
boolean_t kill_spill = B_FALSE;
|
||||
|
||||
ASSERT(dmu_tx_is_syncing(tx));
|
||||
ASSERT(dnp->dn_type != DMU_OT_NONE || dn->dn_allocated_txg);
|
||||
@ -524,10 +524,12 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
|
||||
|
||||
if (dmu_objset_userused_enabled(dn->dn_objset) &&
|
||||
!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
|
||||
ASSERT(dn->dn_oldphys == NULL);
|
||||
dn->dn_oldphys = zio_buf_alloc(sizeof (dnode_phys_t));
|
||||
*dn->dn_oldphys = *dn->dn_phys; /* struct assignment */
|
||||
mutex_enter(&dn->dn_mtx);
|
||||
dn->dn_oldused = DN_USED_BYTES(dn->dn_phys);
|
||||
dn->dn_oldflags = dn->dn_phys->dn_flags;
|
||||
dn->dn_phys->dn_flags |= DNODE_FLAG_USERUSED_ACCOUNTED;
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
dmu_objset_userquota_get_ids(dn, B_FALSE, tx);
|
||||
} else {
|
||||
/* Once we account for it, we should always account for it. */
|
||||
ASSERT(!(dn->dn_phys->dn_flags &
|
||||
@ -558,6 +560,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
|
||||
SPA_MINBLOCKSIZE) == 0);
|
||||
ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[0]) ||
|
||||
dn->dn_maxblkid == 0 || list_head(list) != NULL ||
|
||||
avl_last(&dn->dn_ranges[txgoff]) ||
|
||||
dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT ==
|
||||
dnp->dn_datablkszsec);
|
||||
dnp->dn_datablkszsec =
|
||||
@ -574,6 +577,24 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
|
||||
dn->dn_next_bonuslen[txgoff] = 0;
|
||||
}
|
||||
|
||||
if (dn->dn_next_bonustype[txgoff]) {
|
||||
ASSERT(dn->dn_next_bonustype[txgoff] < DMU_OT_NUMTYPES);
|
||||
dnp->dn_bonustype = dn->dn_next_bonustype[txgoff];
|
||||
dn->dn_next_bonustype[txgoff] = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* We will either remove a spill block when a file is being removed
|
||||
* or we have been asked to remove it.
|
||||
*/
|
||||
if (dn->dn_rm_spillblk[txgoff] ||
|
||||
((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) &&
|
||||
dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg)) {
|
||||
if ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR))
|
||||
kill_spill = B_TRUE;
|
||||
dn->dn_rm_spillblk[txgoff] = 0;
|
||||
}
|
||||
|
||||
if (dn->dn_next_indblkshift[txgoff]) {
|
||||
ASSERT(dnp->dn_nlevels == 1);
|
||||
dnp->dn_indblkshift = dn->dn_next_indblkshift[txgoff];
|
||||
@ -590,6 +611,13 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
|
||||
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
|
||||
if (kill_spill) {
|
||||
(void) free_blocks(dn, &dn->dn_phys->dn_spill, 1, tx);
|
||||
mutex_enter(&dn->dn_mtx);
|
||||
dnp->dn_flags &= ~DNODE_FLAG_SPILL_BLKPTR;
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
}
|
||||
|
||||
/* process all the "freed" ranges in the file */
|
||||
while (rp = avl_last(&dn->dn_ranges[txgoff])) {
|
||||
dnode_sync_free_range(dn, rp->fr_blkid, rp->fr_nblks, tx);
|
||||
|
File diff suppressed because it is too large
Load Diff
474
module/zfs/dsl_deadlist.c
Normal file
474
module/zfs/dsl_deadlist.c
Normal file
@ -0,0 +1,474 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/dsl_dataset.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
|
||||
static int
|
||||
dsl_deadlist_compare(const void *arg1, const void *arg2)
|
||||
{
|
||||
const dsl_deadlist_entry_t *dle1 = arg1;
|
||||
const dsl_deadlist_entry_t *dle2 = arg2;
|
||||
|
||||
if (dle1->dle_mintxg < dle2->dle_mintxg)
|
||||
return (-1);
|
||||
else if (dle1->dle_mintxg > dle2->dle_mintxg)
|
||||
return (+1);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
dsl_deadlist_load_tree(dsl_deadlist_t *dl)
|
||||
{
|
||||
zap_cursor_t zc;
|
||||
zap_attribute_t za;
|
||||
|
||||
ASSERT(!dl->dl_oldfmt);
|
||||
if (dl->dl_havetree)
|
||||
return;
|
||||
|
||||
avl_create(&dl->dl_tree, dsl_deadlist_compare,
|
||||
sizeof (dsl_deadlist_entry_t),
|
||||
offsetof(dsl_deadlist_entry_t, dle_node));
|
||||
for (zap_cursor_init(&zc, dl->dl_os, dl->dl_object);
|
||||
zap_cursor_retrieve(&zc, &za) == 0;
|
||||
zap_cursor_advance(&zc)) {
|
||||
dsl_deadlist_entry_t *dle = kmem_alloc(sizeof (*dle), KM_SLEEP);
|
||||
dle->dle_mintxg = strtonum(za.za_name, NULL);
|
||||
VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os,
|
||||
za.za_first_integer));
|
||||
avl_add(&dl->dl_tree, dle);
|
||||
}
|
||||
zap_cursor_fini(&zc);
|
||||
dl->dl_havetree = B_TRUE;
|
||||
}
|
||||
|
||||
void
|
||||
dsl_deadlist_open(dsl_deadlist_t *dl, objset_t *os, uint64_t object)
|
||||
{
|
||||
dmu_object_info_t doi;
|
||||
|
||||
mutex_init(&dl->dl_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
dl->dl_os = os;
|
||||
dl->dl_object = object;
|
||||
VERIFY3U(0, ==, dmu_bonus_hold(os, object, dl, &dl->dl_dbuf));
|
||||
dmu_object_info_from_db(dl->dl_dbuf, &doi);
|
||||
if (doi.doi_type == DMU_OT_BPOBJ) {
|
||||
dmu_buf_rele(dl->dl_dbuf, dl);
|
||||
dl->dl_dbuf = NULL;
|
||||
dl->dl_oldfmt = B_TRUE;
|
||||
VERIFY3U(0, ==, bpobj_open(&dl->dl_bpobj, os, object));
|
||||
return;
|
||||
}
|
||||
|
||||
dl->dl_oldfmt = B_FALSE;
|
||||
dl->dl_phys = dl->dl_dbuf->db_data;
|
||||
dl->dl_havetree = B_FALSE;
|
||||
}
|
||||
|
||||
void
|
||||
dsl_deadlist_close(dsl_deadlist_t *dl)
|
||||
{
|
||||
void *cookie = NULL;
|
||||
dsl_deadlist_entry_t *dle;
|
||||
|
||||
if (dl->dl_oldfmt) {
|
||||
dl->dl_oldfmt = B_FALSE;
|
||||
bpobj_close(&dl->dl_bpobj);
|
||||
return;
|
||||
}
|
||||
|
||||
if (dl->dl_havetree) {
|
||||
while ((dle = avl_destroy_nodes(&dl->dl_tree, &cookie))
|
||||
!= NULL) {
|
||||
bpobj_close(&dle->dle_bpobj);
|
||||
kmem_free(dle, sizeof (*dle));
|
||||
}
|
||||
avl_destroy(&dl->dl_tree);
|
||||
}
|
||||
dmu_buf_rele(dl->dl_dbuf, dl);
|
||||
mutex_destroy(&dl->dl_lock);
|
||||
dl->dl_dbuf = NULL;
|
||||
dl->dl_phys = NULL;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
dsl_deadlist_alloc(objset_t *os, dmu_tx_t *tx)
|
||||
{
|
||||
if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS)
|
||||
return (bpobj_alloc(os, SPA_MAXBLOCKSIZE, tx));
|
||||
return (zap_create(os, DMU_OT_DEADLIST, DMU_OT_DEADLIST_HDR,
|
||||
sizeof (dsl_deadlist_phys_t), tx));
|
||||
}
|
||||
|
||||
void
|
||||
dsl_deadlist_free(objset_t *os, uint64_t dlobj, dmu_tx_t *tx)
|
||||
{
|
||||
dmu_object_info_t doi;
|
||||
zap_cursor_t zc;
|
||||
zap_attribute_t za;
|
||||
|
||||
VERIFY3U(0, ==, dmu_object_info(os, dlobj, &doi));
|
||||
if (doi.doi_type == DMU_OT_BPOBJ) {
|
||||
bpobj_free(os, dlobj, tx);
|
||||
return;
|
||||
}
|
||||
|
||||
for (zap_cursor_init(&zc, os, dlobj);
|
||||
zap_cursor_retrieve(&zc, &za) == 0;
|
||||
zap_cursor_advance(&zc))
|
||||
bpobj_free(os, za.za_first_integer, tx);
|
||||
zap_cursor_fini(&zc);
|
||||
VERIFY3U(0, ==, dmu_object_free(os, dlobj, tx));
|
||||
}
|
||||
|
||||
void
|
||||
dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_deadlist_entry_t dle_tofind;
|
||||
dsl_deadlist_entry_t *dle;
|
||||
avl_index_t where;
|
||||
|
||||
if (dl->dl_oldfmt) {
|
||||
bpobj_enqueue(&dl->dl_bpobj, bp, tx);
|
||||
return;
|
||||
}
|
||||
|
||||
dsl_deadlist_load_tree(dl);
|
||||
|
||||
dmu_buf_will_dirty(dl->dl_dbuf, tx);
|
||||
mutex_enter(&dl->dl_lock);
|
||||
dl->dl_phys->dl_used +=
|
||||
bp_get_dsize_sync(dmu_objset_spa(dl->dl_os), bp);
|
||||
dl->dl_phys->dl_comp += BP_GET_PSIZE(bp);
|
||||
dl->dl_phys->dl_uncomp += BP_GET_UCSIZE(bp);
|
||||
mutex_exit(&dl->dl_lock);
|
||||
|
||||
dle_tofind.dle_mintxg = bp->blk_birth;
|
||||
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
|
||||
if (dle == NULL)
|
||||
dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE);
|
||||
else
|
||||
dle = AVL_PREV(&dl->dl_tree, dle);
|
||||
bpobj_enqueue(&dle->dle_bpobj, bp, tx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert new key in deadlist, which must be > all current entries.
|
||||
* mintxg is not inclusive.
|
||||
*/
|
||||
void
|
||||
dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
|
||||
{
|
||||
uint64_t obj;
|
||||
dsl_deadlist_entry_t *dle;
|
||||
|
||||
if (dl->dl_oldfmt)
|
||||
return;
|
||||
|
||||
dsl_deadlist_load_tree(dl);
|
||||
|
||||
dle = kmem_alloc(sizeof (*dle), KM_SLEEP);
|
||||
dle->dle_mintxg = mintxg;
|
||||
obj = bpobj_alloc(dl->dl_os, SPA_MAXBLOCKSIZE, tx);
|
||||
VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
|
||||
avl_add(&dl->dl_tree, dle);
|
||||
|
||||
VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, dl->dl_object,
|
||||
mintxg, obj, tx));
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove this key, merging its entries into the previous key.
|
||||
*/
|
||||
void
|
||||
dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_deadlist_entry_t dle_tofind;
|
||||
dsl_deadlist_entry_t *dle, *dle_prev;
|
||||
|
||||
if (dl->dl_oldfmt)
|
||||
return;
|
||||
|
||||
dsl_deadlist_load_tree(dl);
|
||||
|
||||
dle_tofind.dle_mintxg = mintxg;
|
||||
dle = avl_find(&dl->dl_tree, &dle_tofind, NULL);
|
||||
dle_prev = AVL_PREV(&dl->dl_tree, dle);
|
||||
|
||||
bpobj_enqueue_subobj(&dle_prev->dle_bpobj,
|
||||
dle->dle_bpobj.bpo_object, tx);
|
||||
|
||||
avl_remove(&dl->dl_tree, dle);
|
||||
bpobj_close(&dle->dle_bpobj);
|
||||
kmem_free(dle, sizeof (*dle));
|
||||
|
||||
VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object, mintxg, tx));
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk ds's snapshots to regenerate generate ZAP & AVL.
|
||||
*/
|
||||
static void
|
||||
dsl_deadlist_regenerate(objset_t *os, uint64_t dlobj,
|
||||
uint64_t mrs_obj, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_deadlist_t dl;
|
||||
dsl_pool_t *dp = dmu_objset_pool(os);
|
||||
|
||||
dsl_deadlist_open(&dl, os, dlobj);
|
||||
if (dl.dl_oldfmt) {
|
||||
dsl_deadlist_close(&dl);
|
||||
return;
|
||||
}
|
||||
|
||||
while (mrs_obj != 0) {
|
||||
dsl_dataset_t *ds;
|
||||
VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, mrs_obj, FTAG, &ds));
|
||||
dsl_deadlist_add_key(&dl, ds->ds_phys->ds_prev_snap_txg, tx);
|
||||
mrs_obj = ds->ds_phys->ds_prev_snap_obj;
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
}
|
||||
dsl_deadlist_close(&dl);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg,
|
||||
uint64_t mrs_obj, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_deadlist_entry_t *dle;
|
||||
uint64_t newobj;
|
||||
|
||||
newobj = dsl_deadlist_alloc(dl->dl_os, tx);
|
||||
|
||||
if (dl->dl_oldfmt) {
|
||||
dsl_deadlist_regenerate(dl->dl_os, newobj, mrs_obj, tx);
|
||||
return (newobj);
|
||||
}
|
||||
|
||||
dsl_deadlist_load_tree(dl);
|
||||
|
||||
for (dle = avl_first(&dl->dl_tree); dle;
|
||||
dle = AVL_NEXT(&dl->dl_tree, dle)) {
|
||||
uint64_t obj;
|
||||
|
||||
if (dle->dle_mintxg >= maxtxg)
|
||||
break;
|
||||
|
||||
obj = bpobj_alloc(dl->dl_os, SPA_MAXBLOCKSIZE, tx);
|
||||
VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, newobj,
|
||||
dle->dle_mintxg, obj, tx));
|
||||
}
|
||||
return (newobj);
|
||||
}
|
||||
|
||||
void
|
||||
dsl_deadlist_space(dsl_deadlist_t *dl,
|
||||
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
|
||||
{
|
||||
if (dl->dl_oldfmt) {
|
||||
VERIFY3U(0, ==, bpobj_space(&dl->dl_bpobj,
|
||||
usedp, compp, uncompp));
|
||||
return;
|
||||
}
|
||||
|
||||
mutex_enter(&dl->dl_lock);
|
||||
*usedp = dl->dl_phys->dl_used;
|
||||
*compp = dl->dl_phys->dl_comp;
|
||||
*uncompp = dl->dl_phys->dl_uncomp;
|
||||
mutex_exit(&dl->dl_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* return space used in the range (mintxg, maxtxg].
|
||||
* Includes maxtxg, does not include mintxg.
|
||||
* mintxg and maxtxg must both be keys in the deadlist (unless maxtxg is
|
||||
* UINT64_MAX).
|
||||
*/
|
||||
void
|
||||
dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg,
|
||||
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
|
||||
{
|
||||
dsl_deadlist_entry_t dle_tofind;
|
||||
dsl_deadlist_entry_t *dle;
|
||||
avl_index_t where;
|
||||
|
||||
if (dl->dl_oldfmt) {
|
||||
VERIFY3U(0, ==, bpobj_space_range(&dl->dl_bpobj,
|
||||
mintxg, maxtxg, usedp, compp, uncompp));
|
||||
return;
|
||||
}
|
||||
|
||||
dsl_deadlist_load_tree(dl);
|
||||
*usedp = *compp = *uncompp = 0;
|
||||
|
||||
dle_tofind.dle_mintxg = mintxg;
|
||||
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
|
||||
/*
|
||||
* If we don't find this mintxg, there shouldn't be anything
|
||||
* after it either.
|
||||
*/
|
||||
ASSERT(dle != NULL ||
|
||||
avl_nearest(&dl->dl_tree, where, AVL_AFTER) == NULL);
|
||||
for (; dle && dle->dle_mintxg < maxtxg;
|
||||
dle = AVL_NEXT(&dl->dl_tree, dle)) {
|
||||
uint64_t used, comp, uncomp;
|
||||
|
||||
VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj,
|
||||
&used, &comp, &uncomp));
|
||||
|
||||
*usedp += used;
|
||||
*compp += comp;
|
||||
*uncompp += uncomp;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth,
|
||||
dmu_tx_t *tx)
|
||||
{
|
||||
dsl_deadlist_entry_t dle_tofind;
|
||||
dsl_deadlist_entry_t *dle;
|
||||
avl_index_t where;
|
||||
uint64_t used, comp, uncomp;
|
||||
bpobj_t bpo;
|
||||
|
||||
VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj));
|
||||
VERIFY3U(0, ==, bpobj_space(&bpo, &used, &comp, &uncomp));
|
||||
bpobj_close(&bpo);
|
||||
|
||||
dsl_deadlist_load_tree(dl);
|
||||
|
||||
dmu_buf_will_dirty(dl->dl_dbuf, tx);
|
||||
mutex_enter(&dl->dl_lock);
|
||||
dl->dl_phys->dl_used += used;
|
||||
dl->dl_phys->dl_comp += comp;
|
||||
dl->dl_phys->dl_uncomp += uncomp;
|
||||
mutex_exit(&dl->dl_lock);
|
||||
|
||||
dle_tofind.dle_mintxg = birth;
|
||||
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
|
||||
if (dle == NULL)
|
||||
dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE);
|
||||
bpobj_enqueue_subobj(&dle->dle_bpobj, obj, tx);
|
||||
}
|
||||
|
||||
static int
|
||||
dsl_deadlist_insert_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_deadlist_t *dl = arg;
|
||||
dsl_deadlist_insert(dl, bp, tx);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Merge the deadlist pointed to by 'obj' into dl. obj will be left as
|
||||
* an empty deadlist.
|
||||
*/
|
||||
void
|
||||
dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
|
||||
{
|
||||
zap_cursor_t zc;
|
||||
zap_attribute_t za;
|
||||
dmu_buf_t *bonus;
|
||||
dsl_deadlist_phys_t *dlp;
|
||||
dmu_object_info_t doi;
|
||||
|
||||
VERIFY3U(0, ==, dmu_object_info(dl->dl_os, obj, &doi));
|
||||
if (doi.doi_type == DMU_OT_BPOBJ) {
|
||||
bpobj_t bpo;
|
||||
VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj));
|
||||
VERIFY3U(0, ==, bpobj_iterate(&bpo,
|
||||
dsl_deadlist_insert_cb, dl, tx));
|
||||
bpobj_close(&bpo);
|
||||
return;
|
||||
}
|
||||
|
||||
for (zap_cursor_init(&zc, dl->dl_os, obj);
|
||||
zap_cursor_retrieve(&zc, &za) == 0;
|
||||
zap_cursor_advance(&zc)) {
|
||||
uint64_t mintxg = strtonum(za.za_name, NULL);
|
||||
dsl_deadlist_insert_bpobj(dl, za.za_first_integer, mintxg, tx);
|
||||
VERIFY3U(0, ==, zap_remove_int(dl->dl_os, obj, mintxg, tx));
|
||||
}
|
||||
zap_cursor_fini(&zc);
|
||||
|
||||
VERIFY3U(0, ==, dmu_bonus_hold(dl->dl_os, obj, FTAG, &bonus));
|
||||
dlp = bonus->db_data;
|
||||
dmu_buf_will_dirty(bonus, tx);
|
||||
bzero(dlp, sizeof (*dlp));
|
||||
dmu_buf_rele(bonus, FTAG);
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove entries on dl that are >= mintxg, and put them on the bpobj.
|
||||
*/
|
||||
void
|
||||
dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
|
||||
dmu_tx_t *tx)
|
||||
{
|
||||
dsl_deadlist_entry_t dle_tofind;
|
||||
dsl_deadlist_entry_t *dle;
|
||||
avl_index_t where;
|
||||
|
||||
ASSERT(!dl->dl_oldfmt);
|
||||
dmu_buf_will_dirty(dl->dl_dbuf, tx);
|
||||
dsl_deadlist_load_tree(dl);
|
||||
|
||||
dle_tofind.dle_mintxg = mintxg;
|
||||
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
|
||||
if (dle == NULL)
|
||||
dle = avl_nearest(&dl->dl_tree, where, AVL_AFTER);
|
||||
while (dle) {
|
||||
uint64_t used, comp, uncomp;
|
||||
dsl_deadlist_entry_t *dle_next;
|
||||
|
||||
bpobj_enqueue_subobj(bpo, dle->dle_bpobj.bpo_object, tx);
|
||||
|
||||
VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj,
|
||||
&used, &comp, &uncomp));
|
||||
mutex_enter(&dl->dl_lock);
|
||||
ASSERT3U(dl->dl_phys->dl_used, >=, used);
|
||||
ASSERT3U(dl->dl_phys->dl_comp, >=, comp);
|
||||
ASSERT3U(dl->dl_phys->dl_uncomp, >=, uncomp);
|
||||
dl->dl_phys->dl_used -= used;
|
||||
dl->dl_phys->dl_comp -= comp;
|
||||
dl->dl_phys->dl_uncomp -= uncomp;
|
||||
mutex_exit(&dl->dl_lock);
|
||||
|
||||
VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object,
|
||||
dle->dle_mintxg, tx));
|
||||
|
||||
dle_next = AVL_NEXT(&dl->dl_tree, dle);
|
||||
avl_remove(&dl->dl_tree, dle);
|
||||
bpobj_close(&dle->dle_bpobj);
|
||||
kmem_free(dle, sizeof (*dle));
|
||||
dle = dle_next;
|
||||
}
|
||||
}
|
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -75,8 +74,6 @@
|
||||
#include <sys/dsl_synctask.h>
|
||||
#include <sys/dsl_deleg.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/spa_impl.h>
|
||||
#include <sys/zio_checksum.h> /* for the default checksum value */
|
||||
#include <sys/zap.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/cred.h>
|
||||
@ -150,7 +147,7 @@ dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr)
|
||||
}
|
||||
|
||||
static void
|
||||
dsl_deleg_set_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dir_t *dd = arg1;
|
||||
nvlist_t *nvp = arg2;
|
||||
@ -185,8 +182,8 @@ dsl_deleg_set_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
|
||||
VERIFY(zap_update(mos, jumpobj,
|
||||
perm, 8, 1, &n, tx) == 0);
|
||||
spa_history_internal_log(LOG_DS_PERM_UPDATE,
|
||||
dd->dd_pool->dp_spa, tx, cr,
|
||||
spa_history_log_internal(LOG_DS_PERM_UPDATE,
|
||||
dd->dd_pool->dp_spa, tx,
|
||||
"%s %s dataset = %llu", whokey, perm,
|
||||
dd->dd_phys->dd_head_dataset_obj);
|
||||
}
|
||||
@ -194,7 +191,7 @@ dsl_deleg_set_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
}
|
||||
|
||||
static void
|
||||
dsl_deleg_unset_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dir_t *dd = arg1;
|
||||
nvlist_t *nvp = arg2;
|
||||
@ -217,8 +214,8 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
(void) zap_remove(mos, zapobj, whokey, tx);
|
||||
VERIFY(0 == zap_destroy(mos, jumpobj, tx));
|
||||
}
|
||||
spa_history_internal_log(LOG_DS_PERM_WHO_REMOVE,
|
||||
dd->dd_pool->dp_spa, tx, cr,
|
||||
spa_history_log_internal(LOG_DS_PERM_WHO_REMOVE,
|
||||
dd->dd_pool->dp_spa, tx,
|
||||
"%s dataset = %llu", whokey,
|
||||
dd->dd_phys->dd_head_dataset_obj);
|
||||
continue;
|
||||
@ -238,8 +235,8 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
VERIFY(0 == zap_destroy(mos,
|
||||
jumpobj, tx));
|
||||
}
|
||||
spa_history_internal_log(LOG_DS_PERM_REMOVE,
|
||||
dd->dd_pool->dp_spa, tx, cr,
|
||||
spa_history_log_internal(LOG_DS_PERM_REMOVE,
|
||||
dd->dd_pool->dp_spa, tx,
|
||||
"%s %s dataset = %llu", whokey, perm,
|
||||
dd->dd_phys->dd_head_dataset_obj);
|
||||
}
|
||||
@ -589,7 +586,7 @@ dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr)
|
||||
|
||||
if (dsl_prop_get_dd(dd,
|
||||
zfs_prop_to_name(ZFS_PROP_ZONED),
|
||||
8, 1, &zoned, NULL) != 0)
|
||||
8, 1, &zoned, NULL, B_FALSE) != 0)
|
||||
break;
|
||||
if (!zoned)
|
||||
break;
|
||||
@ -739,5 +736,5 @@ dsl_deleg_destroy(objset_t *mos, uint64_t zapobj, dmu_tx_t *tx)
|
||||
boolean_t
|
||||
dsl_delegation_on(objset_t *os)
|
||||
{
|
||||
return (os->os->os_spa->spa_delegation);
|
||||
return (!!spa_delegation(os->os_spa));
|
||||
}
|
||||
|
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/dmu.h>
|
||||
@ -32,6 +31,7 @@
|
||||
#include <sys/dsl_synctask.h>
|
||||
#include <sys/dsl_deleg.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/metaslab.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/arc.h>
|
||||
@ -39,8 +39,7 @@
|
||||
#include "zfs_namecheck.h"
|
||||
|
||||
static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
|
||||
static void dsl_dir_set_reservation_sync(void *arg1, void *arg2,
|
||||
cred_t *cr, dmu_tx_t *tx);
|
||||
static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx);
|
||||
|
||||
|
||||
/* ARGSUSED */
|
||||
@ -63,8 +62,8 @@ dsl_dir_evict(dmu_buf_t *db, void *arg)
|
||||
spa_close(dd->dd_pool->dp_spa, dd);
|
||||
|
||||
/*
|
||||
* The props callback list should be empty since they hold the
|
||||
* dir open.
|
||||
* The props callback list should have been cleaned up by
|
||||
* objset_evict().
|
||||
*/
|
||||
list_destroy(&dd->dd_prop_cbs);
|
||||
mutex_destroy(&dd->dd_lock);
|
||||
@ -107,6 +106,8 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
|
||||
list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t),
|
||||
offsetof(dsl_prop_cb_record_t, cbr_node));
|
||||
|
||||
dsl_dir_snap_cmtime_update(dd);
|
||||
|
||||
if (dd->dd_phys->dd_parent_obj) {
|
||||
err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj,
|
||||
NULL, dd, &dd->dd_parent);
|
||||
@ -133,6 +134,25 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
|
||||
(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
|
||||
}
|
||||
|
||||
if (dsl_dir_is_clone(dd)) {
|
||||
dmu_buf_t *origin_bonus;
|
||||
dsl_dataset_phys_t *origin_phys;
|
||||
|
||||
/*
|
||||
* We can't open the origin dataset, because
|
||||
* that would require opening this dsl_dir.
|
||||
* Just look at its phys directly instead.
|
||||
*/
|
||||
err = dmu_bonus_hold(dp->dp_meta_objset,
|
||||
dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus);
|
||||
if (err)
|
||||
goto errout;
|
||||
origin_phys = origin_bonus->db_data;
|
||||
dd->dd_origin_txg =
|
||||
origin_phys->ds_creation_txg;
|
||||
dmu_buf_rele(origin_bonus, FTAG);
|
||||
}
|
||||
|
||||
winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys,
|
||||
dsl_dir_evict);
|
||||
if (winner) {
|
||||
@ -392,7 +412,7 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
|
||||
{
|
||||
objset_t *mos = dp->dp_meta_objset;
|
||||
uint64_t ddobj;
|
||||
dsl_dir_phys_t *dsphys;
|
||||
dsl_dir_phys_t *ddphys;
|
||||
dmu_buf_t *dbuf;
|
||||
|
||||
ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
|
||||
@ -407,17 +427,17 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
|
||||
}
|
||||
VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf));
|
||||
dmu_buf_will_dirty(dbuf, tx);
|
||||
dsphys = dbuf->db_data;
|
||||
ddphys = dbuf->db_data;
|
||||
|
||||
dsphys->dd_creation_time = gethrestime_sec();
|
||||
ddphys->dd_creation_time = gethrestime_sec();
|
||||
if (pds)
|
||||
dsphys->dd_parent_obj = pds->dd_object;
|
||||
dsphys->dd_props_zapobj = zap_create(mos,
|
||||
ddphys->dd_parent_obj = pds->dd_object;
|
||||
ddphys->dd_props_zapobj = zap_create(mos,
|
||||
DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
|
||||
dsphys->dd_child_dir_zapobj = zap_create(mos,
|
||||
ddphys->dd_child_dir_zapobj = zap_create(mos,
|
||||
DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
|
||||
if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
|
||||
dsphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
|
||||
ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
|
||||
dmu_buf_rele(dbuf, FTAG);
|
||||
|
||||
return (ddobj);
|
||||
@ -427,7 +447,8 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
|
||||
int
|
||||
dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dir_t *dd = arg1;
|
||||
dsl_dataset_t *ds = arg1;
|
||||
dsl_dir_t *dd = ds->ds_dir;
|
||||
dsl_pool_t *dp = dd->dd_pool;
|
||||
objset_t *mos = dp->dp_meta_objset;
|
||||
int err;
|
||||
@ -454,19 +475,27 @@ dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
}
|
||||
|
||||
void
|
||||
dsl_dir_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
|
||||
dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dir_t *dd = arg1;
|
||||
dsl_dataset_t *ds = arg1;
|
||||
dsl_dir_t *dd = ds->ds_dir;
|
||||
objset_t *mos = dd->dd_pool->dp_meta_objset;
|
||||
uint64_t val, obj;
|
||||
dsl_prop_setarg_t psa;
|
||||
uint64_t value = 0;
|
||||
uint64_t obj;
|
||||
dd_used_t t;
|
||||
|
||||
ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock));
|
||||
ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
|
||||
|
||||
/* Remove our reservation. */
|
||||
val = 0;
|
||||
dsl_dir_set_reservation_sync(dd, &val, cr, tx);
|
||||
dsl_prop_setarg_init_uint64(&psa, "reservation",
|
||||
(ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
|
||||
&value);
|
||||
psa.psa_effective_value = 0; /* predict default value */
|
||||
|
||||
dsl_dir_set_reservation_sync(ds, &psa, tx);
|
||||
|
||||
ASSERT3U(dd->dd_phys->dd_used_bytes, ==, 0);
|
||||
ASSERT3U(dd->dd_phys->dd_reserved, ==, 0);
|
||||
for (t = 0; t < DD_USED_NUM; t++)
|
||||
@ -640,15 +669,6 @@ dsl_dir_space_available(dsl_dir_t *dd,
|
||||
if (used > quota) {
|
||||
/* over quota */
|
||||
myspace = 0;
|
||||
|
||||
/*
|
||||
* While it's OK to be a little over quota, if
|
||||
* we think we are using more space than there
|
||||
* is in the pool (which is already 1.6% more than
|
||||
* dsl_pool_adjustedsize()), something is very
|
||||
* wrong.
|
||||
*/
|
||||
ASSERT3U(used, <=, spa_get_space(dd->dd_pool->dp_spa));
|
||||
} else {
|
||||
/*
|
||||
* the lesser of the space provided by our parent and
|
||||
@ -676,8 +696,9 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
|
||||
{
|
||||
uint64_t txg = tx->tx_txg;
|
||||
uint64_t est_inflight, used_on_disk, quota, parent_rsrv;
|
||||
uint64_t deferred = 0;
|
||||
struct tempreserve *tr;
|
||||
int enospc = EDQUOT;
|
||||
int retval = EDQUOT;
|
||||
int txgidx = txg & TXG_MASK;
|
||||
int i;
|
||||
uint64_t ref_rsrv = 0;
|
||||
@ -703,7 +724,7 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
|
||||
*/
|
||||
if (first && tx->tx_objset) {
|
||||
int error;
|
||||
dsl_dataset_t *ds = tx->tx_objset->os->os_dsl_dataset;
|
||||
dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset;
|
||||
|
||||
error = dsl_dataset_check_quota(ds, checkrefquota,
|
||||
asize, est_inflight, &used_on_disk, &ref_rsrv);
|
||||
@ -723,7 +744,8 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
|
||||
quota = dd->dd_phys->dd_quota;
|
||||
|
||||
/*
|
||||
* Adjust the quota against the actual pool size at the root.
|
||||
* Adjust the quota against the actual pool size at the root
|
||||
* minus any outstanding deferred frees.
|
||||
* To ensure that it's possible to remove files from a full
|
||||
* pool without inducing transient overcommits, we throttle
|
||||
* netfree transactions against a quota that is slightly larger,
|
||||
@ -732,10 +754,12 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
|
||||
* removes to get through.
|
||||
*/
|
||||
if (dd->dd_parent == NULL) {
|
||||
spa_t *spa = dd->dd_pool->dp_spa;
|
||||
uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree);
|
||||
if (poolsize < quota) {
|
||||
quota = poolsize;
|
||||
enospc = ENOSPC;
|
||||
deferred = metaslab_class_get_deferred(spa_normal_class(spa));
|
||||
if (poolsize - deferred < quota) {
|
||||
quota = poolsize - deferred;
|
||||
retval = ENOSPC;
|
||||
}
|
||||
}
|
||||
|
||||
@ -745,15 +769,16 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
|
||||
* on-disk is over quota and there are no pending changes (which
|
||||
* may free up space for us).
|
||||
*/
|
||||
if (used_on_disk + est_inflight > quota) {
|
||||
if (est_inflight > 0 || used_on_disk < quota)
|
||||
enospc = ERESTART;
|
||||
if (used_on_disk + est_inflight >= quota) {
|
||||
if (est_inflight > 0 || used_on_disk < quota ||
|
||||
(retval == ENOSPC && used_on_disk < quota + deferred))
|
||||
retval = ERESTART;
|
||||
dprintf_dd(dd, "failing: used=%lluK inflight = %lluK "
|
||||
"quota=%lluK tr=%lluK err=%d\n",
|
||||
used_on_disk>>10, est_inflight>>10,
|
||||
quota>>10, asize>>10, enospc);
|
||||
quota>>10, asize>>10, retval);
|
||||
mutex_exit(&dd->dd_lock);
|
||||
return (enospc);
|
||||
return (retval);
|
||||
}
|
||||
|
||||
/* We need to up our estimated delta before dropping dd_lock */
|
||||
@ -987,13 +1012,16 @@ dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
|
||||
static int
|
||||
dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dir_t *dd = arg1;
|
||||
uint64_t *quotap = arg2;
|
||||
uint64_t new_quota = *quotap;
|
||||
int err = 0;
|
||||
dsl_dataset_t *ds = arg1;
|
||||
dsl_dir_t *dd = ds->ds_dir;
|
||||
dsl_prop_setarg_t *psa = arg2;
|
||||
int err;
|
||||
uint64_t towrite;
|
||||
|
||||
if (new_quota == 0)
|
||||
if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
|
||||
return (err);
|
||||
|
||||
if (psa->psa_effective_value == 0)
|
||||
return (0);
|
||||
|
||||
mutex_enter(&dd->dd_lock);
|
||||
@ -1005,64 +1033,88 @@ dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
*/
|
||||
towrite = dsl_dir_space_towrite(dd);
|
||||
if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
|
||||
(new_quota < dd->dd_phys->dd_reserved ||
|
||||
new_quota < dd->dd_phys->dd_used_bytes + towrite)) {
|
||||
(psa->psa_effective_value < dd->dd_phys->dd_reserved ||
|
||||
psa->psa_effective_value < dd->dd_phys->dd_used_bytes + towrite)) {
|
||||
err = ENOSPC;
|
||||
}
|
||||
mutex_exit(&dd->dd_lock);
|
||||
return (err);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
extern dsl_syncfunc_t dsl_prop_set_sync;
|
||||
|
||||
static void
|
||||
dsl_dir_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dir_t *dd = arg1;
|
||||
uint64_t *quotap = arg2;
|
||||
uint64_t new_quota = *quotap;
|
||||
dsl_dataset_t *ds = arg1;
|
||||
dsl_dir_t *dd = ds->ds_dir;
|
||||
dsl_prop_setarg_t *psa = arg2;
|
||||
uint64_t effective_value = psa->psa_effective_value;
|
||||
|
||||
dsl_prop_set_sync(ds, psa, tx);
|
||||
DSL_PROP_CHECK_PREDICTION(dd, psa);
|
||||
|
||||
dmu_buf_will_dirty(dd->dd_dbuf, tx);
|
||||
|
||||
mutex_enter(&dd->dd_lock);
|
||||
dd->dd_phys->dd_quota = new_quota;
|
||||
dd->dd_phys->dd_quota = effective_value;
|
||||
mutex_exit(&dd->dd_lock);
|
||||
|
||||
spa_history_internal_log(LOG_DS_QUOTA, dd->dd_pool->dp_spa,
|
||||
tx, cr, "%lld dataset = %llu ",
|
||||
(longlong_t)new_quota, dd->dd_phys->dd_head_dataset_obj);
|
||||
spa_history_log_internal(LOG_DS_QUOTA, dd->dd_pool->dp_spa,
|
||||
tx, "%lld dataset = %llu ",
|
||||
(longlong_t)effective_value, dd->dd_phys->dd_head_dataset_obj);
|
||||
}
|
||||
|
||||
int
|
||||
dsl_dir_set_quota(const char *ddname, uint64_t quota)
|
||||
dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota)
|
||||
{
|
||||
dsl_dir_t *dd;
|
||||
dsl_dataset_t *ds;
|
||||
dsl_prop_setarg_t psa;
|
||||
int err;
|
||||
|
||||
err = dsl_dir_open(ddname, FTAG, &dd, NULL);
|
||||
dsl_prop_setarg_init_uint64(&psa, "quota", source, "a);
|
||||
|
||||
err = dsl_dataset_hold(ddname, FTAG, &ds);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
if (quota != dd->dd_phys->dd_quota) {
|
||||
/*
|
||||
* If someone removes a file, then tries to set the quota, we
|
||||
* want to make sure the file freeing takes effect.
|
||||
*/
|
||||
txg_wait_open(dd->dd_pool, 0);
|
||||
|
||||
err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check,
|
||||
dsl_dir_set_quota_sync, dd, "a, 0);
|
||||
err = dsl_dir_open(ddname, FTAG, &dd, NULL);
|
||||
if (err) {
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
ASSERT(ds->ds_dir == dd);
|
||||
|
||||
/*
|
||||
* If someone removes a file, then tries to set the quota, we want to
|
||||
* make sure the file freeing takes effect.
|
||||
*/
|
||||
txg_wait_open(dd->dd_pool, 0);
|
||||
|
||||
err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check,
|
||||
dsl_dir_set_quota_sync, ds, &psa, 0);
|
||||
|
||||
dsl_dir_close(dd, FTAG);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dir_t *dd = arg1;
|
||||
uint64_t *reservationp = arg2;
|
||||
uint64_t new_reservation = *reservationp;
|
||||
dsl_dataset_t *ds = arg1;
|
||||
dsl_dir_t *dd = ds->ds_dir;
|
||||
dsl_prop_setarg_t *psa = arg2;
|
||||
uint64_t effective_value;
|
||||
uint64_t used, avail;
|
||||
int err;
|
||||
|
||||
if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
|
||||
return (err);
|
||||
|
||||
effective_value = psa->psa_effective_value;
|
||||
|
||||
/*
|
||||
* If we are doing the preliminary check in open context, the
|
||||
@ -1082,37 +1134,40 @@ dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used;
|
||||
}
|
||||
|
||||
if (MAX(used, new_reservation) > MAX(used, dd->dd_phys->dd_reserved)) {
|
||||
uint64_t delta = MAX(used, new_reservation) -
|
||||
if (MAX(used, effective_value) > MAX(used, dd->dd_phys->dd_reserved)) {
|
||||
uint64_t delta = MAX(used, effective_value) -
|
||||
MAX(used, dd->dd_phys->dd_reserved);
|
||||
|
||||
if (delta > avail)
|
||||
return (ENOSPC);
|
||||
if (dd->dd_phys->dd_quota > 0 &&
|
||||
new_reservation > dd->dd_phys->dd_quota)
|
||||
effective_value > dd->dd_phys->dd_quota)
|
||||
return (ENOSPC);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static void
|
||||
dsl_dir_set_reservation_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dir_t *dd = arg1;
|
||||
uint64_t *reservationp = arg2;
|
||||
uint64_t new_reservation = *reservationp;
|
||||
dsl_dataset_t *ds = arg1;
|
||||
dsl_dir_t *dd = ds->ds_dir;
|
||||
dsl_prop_setarg_t *psa = arg2;
|
||||
uint64_t effective_value = psa->psa_effective_value;
|
||||
uint64_t used;
|
||||
int64_t delta;
|
||||
|
||||
dsl_prop_set_sync(ds, psa, tx);
|
||||
DSL_PROP_CHECK_PREDICTION(dd, psa);
|
||||
|
||||
dmu_buf_will_dirty(dd->dd_dbuf, tx);
|
||||
|
||||
mutex_enter(&dd->dd_lock);
|
||||
used = dd->dd_phys->dd_used_bytes;
|
||||
delta = MAX(used, new_reservation) -
|
||||
delta = MAX(used, effective_value) -
|
||||
MAX(used, dd->dd_phys->dd_reserved);
|
||||
dd->dd_phys->dd_reserved = new_reservation;
|
||||
dd->dd_phys->dd_reserved = effective_value;
|
||||
|
||||
if (dd->dd_parent != NULL) {
|
||||
/* Roll up this additional usage into our ancestors */
|
||||
@ -1121,23 +1176,39 @@ dsl_dir_set_reservation_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
}
|
||||
mutex_exit(&dd->dd_lock);
|
||||
|
||||
spa_history_internal_log(LOG_DS_RESERVATION, dd->dd_pool->dp_spa,
|
||||
tx, cr, "%lld dataset = %llu",
|
||||
(longlong_t)new_reservation, dd->dd_phys->dd_head_dataset_obj);
|
||||
spa_history_log_internal(LOG_DS_RESERVATION, dd->dd_pool->dp_spa,
|
||||
tx, "%lld dataset = %llu",
|
||||
(longlong_t)effective_value, dd->dd_phys->dd_head_dataset_obj);
|
||||
}
|
||||
|
||||
int
|
||||
dsl_dir_set_reservation(const char *ddname, uint64_t reservation)
|
||||
dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
|
||||
uint64_t reservation)
|
||||
{
|
||||
dsl_dir_t *dd;
|
||||
dsl_dataset_t *ds;
|
||||
dsl_prop_setarg_t psa;
|
||||
int err;
|
||||
|
||||
err = dsl_dir_open(ddname, FTAG, &dd, NULL);
|
||||
dsl_prop_setarg_init_uint64(&psa, "reservation", source, &reservation);
|
||||
|
||||
err = dsl_dataset_hold(ddname, FTAG, &ds);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
err = dsl_dir_open(ddname, FTAG, &dd, NULL);
|
||||
if (err) {
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
ASSERT(ds->ds_dir == dd);
|
||||
|
||||
err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check,
|
||||
dsl_dir_set_reservation_sync, dd, &reservation, 0);
|
||||
dsl_dir_set_reservation_sync, ds, &psa, 0);
|
||||
|
||||
dsl_dir_close(dd, FTAG);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
@ -1175,7 +1246,6 @@ struct renamearg {
|
||||
const char *mynewname;
|
||||
};
|
||||
|
||||
/*ARGSUSED*/
|
||||
static int
|
||||
dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
{
|
||||
@ -1186,8 +1256,14 @@ dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
int err;
|
||||
uint64_t val;
|
||||
|
||||
/* There should be 2 references: the open and the dirty */
|
||||
if (dmu_buf_refcount(dd->dd_dbuf) > 2)
|
||||
/*
|
||||
* There should only be one reference, from dmu_objset_rename().
|
||||
* Fleeting holds are also possible (eg, from "zfs list" getting
|
||||
* stats), but any that are present in open context will likely
|
||||
* be gone by syncing context, so only fail from syncing
|
||||
* context.
|
||||
*/
|
||||
if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 1)
|
||||
return (EBUSY);
|
||||
|
||||
/* check for existing name */
|
||||
@ -1216,7 +1292,7 @@ dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
}
|
||||
|
||||
static void
|
||||
dsl_dir_rename_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dir_t *dd = arg1;
|
||||
struct renamearg *ra = arg2;
|
||||
@ -1265,8 +1341,8 @@ dsl_dir_rename_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
dd->dd_myname, 8, 1, &dd->dd_object, tx);
|
||||
ASSERT3U(err, ==, 0);
|
||||
|
||||
spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa,
|
||||
tx, cr, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj);
|
||||
spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa,
|
||||
tx, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj);
|
||||
}
|
||||
|
||||
int
|
||||
@ -1315,3 +1391,26 @@ dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space)
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
timestruc_t
|
||||
dsl_dir_snap_cmtime(dsl_dir_t *dd)
|
||||
{
|
||||
timestruc_t t;
|
||||
|
||||
mutex_enter(&dd->dd_lock);
|
||||
t = dd->dd_snap_cmtime;
|
||||
mutex_exit(&dd->dd_lock);
|
||||
|
||||
return (t);
|
||||
}
|
||||
|
||||
void
|
||||
dsl_dir_snap_cmtime_update(dsl_dir_t *dd)
|
||||
{
|
||||
timestruc_t t;
|
||||
|
||||
gethrestime(&t);
|
||||
mutex_enter(&dd->dd_lock);
|
||||
dd->dd_snap_cmtime = t;
|
||||
mutex_exit(&dd->dd_lock);
|
||||
}
|
||||
|
@ -19,14 +19,16 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/dsl_dataset.h>
|
||||
#include <sys/dsl_prop.h>
|
||||
#include <sys/dsl_dir.h>
|
||||
#include <sys/dsl_synctask.h>
|
||||
#include <sys/dsl_scan.h>
|
||||
#include <sys/dnode.h>
|
||||
#include <sys/dmu_tx.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/arc.h>
|
||||
@ -36,10 +38,11 @@
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
#include <sys/spa_impl.h>
|
||||
#include <sys/dsl_deadlist.h>
|
||||
|
||||
int zfs_no_write_throttle = 0;
|
||||
int zfs_write_limit_shift = 3; /* 1/8th of physical memory */
|
||||
int zfs_txg_synctime = 5; /* target secs to sync a txg */
|
||||
int zfs_txg_synctime_ms = 5000; /* target millisecs to sync a txg */
|
||||
|
||||
uint64_t zfs_write_limit_min = 32 << 20; /* min write limit is 32MB */
|
||||
uint64_t zfs_write_limit_max = 0; /* max data payload per txg */
|
||||
@ -50,7 +53,7 @@ kmutex_t zfs_write_limit_lock;
|
||||
|
||||
static pgcnt_t old_physmem = 0;
|
||||
|
||||
static int
|
||||
int
|
||||
dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp)
|
||||
{
|
||||
uint64_t obj;
|
||||
@ -88,7 +91,6 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
|
||||
offsetof(dsl_dataset_t, ds_synced_link));
|
||||
|
||||
mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&dp->dp_scrub_cancel_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
|
||||
dp->dp_vnrele_taskq = taskq_create("zfs_vn_rele_taskq", 1, minclsyspri,
|
||||
1, 4, 0);
|
||||
@ -103,13 +105,13 @@ dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
|
||||
dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
|
||||
dsl_dir_t *dd;
|
||||
dsl_dataset_t *ds;
|
||||
objset_impl_t *osi;
|
||||
uint64_t obj;
|
||||
|
||||
rw_enter(&dp->dp_config_rwlock, RW_WRITER);
|
||||
err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp, &osi);
|
||||
err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp,
|
||||
&dp->dp_meta_objset);
|
||||
if (err)
|
||||
goto out;
|
||||
dp->dp_meta_objset = &osi->os;
|
||||
|
||||
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1,
|
||||
@ -143,53 +145,30 @@ dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* get scrub status */
|
||||
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_SCRUB_FUNC, sizeof (uint32_t), 1,
|
||||
&dp->dp_scrub_func);
|
||||
if (err == 0) {
|
||||
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_SCRUB_QUEUE, sizeof (uint64_t), 1,
|
||||
&dp->dp_scrub_queue_obj);
|
||||
if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
|
||||
err = dsl_pool_open_special_dir(dp, FREE_DIR_NAME,
|
||||
&dp->dp_free_dir);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_SCRUB_MIN_TXG, sizeof (uint64_t), 1,
|
||||
&dp->dp_scrub_min_txg);
|
||||
DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj);
|
||||
if (err)
|
||||
goto out;
|
||||
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_SCRUB_MAX_TXG, sizeof (uint64_t), 1,
|
||||
&dp->dp_scrub_max_txg);
|
||||
if (err)
|
||||
goto out;
|
||||
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_SCRUB_BOOKMARK, sizeof (uint64_t), 4,
|
||||
&dp->dp_scrub_bookmark);
|
||||
if (err)
|
||||
goto out;
|
||||
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_SCRUB_ERRORS, sizeof (uint64_t), 1,
|
||||
&spa->spa_scrub_errors);
|
||||
if (err)
|
||||
goto out;
|
||||
if (spa_version(spa) < SPA_VERSION_DSL_SCRUB) {
|
||||
/*
|
||||
* A new-type scrub was in progress on an old
|
||||
* pool. Restart from the beginning, since the
|
||||
* old software may have changed the pool in the
|
||||
* meantime.
|
||||
*/
|
||||
dsl_pool_scrub_restart(dp);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* It's OK if there is no scrub in progress (and if
|
||||
* there was an I/O error, ignore it).
|
||||
*/
|
||||
err = 0;
|
||||
VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
|
||||
dp->dp_meta_objset, obj));
|
||||
}
|
||||
|
||||
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1,
|
||||
&dp->dp_tmp_userrefs_obj);
|
||||
if (err == ENOENT)
|
||||
err = 0;
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = dsl_scan_init(dp, txg);
|
||||
|
||||
out:
|
||||
rw_exit(&dp->dp_config_rwlock);
|
||||
if (err)
|
||||
@ -214,22 +193,27 @@ dsl_pool_close(dsl_pool_t *dp)
|
||||
dsl_dataset_drop_ref(dp->dp_origin_snap, dp);
|
||||
if (dp->dp_mos_dir)
|
||||
dsl_dir_close(dp->dp_mos_dir, dp);
|
||||
if (dp->dp_free_dir)
|
||||
dsl_dir_close(dp->dp_free_dir, dp);
|
||||
if (dp->dp_root_dir)
|
||||
dsl_dir_close(dp->dp_root_dir, dp);
|
||||
|
||||
bpobj_close(&dp->dp_free_bpobj);
|
||||
|
||||
/* undo the dmu_objset_open_impl(mos) from dsl_pool_open() */
|
||||
if (dp->dp_meta_objset)
|
||||
dmu_objset_evict(NULL, dp->dp_meta_objset->os);
|
||||
dmu_objset_evict(dp->dp_meta_objset);
|
||||
|
||||
txg_list_destroy(&dp->dp_dirty_datasets);
|
||||
txg_list_destroy(&dp->dp_sync_tasks);
|
||||
txg_list_destroy(&dp->dp_dirty_dirs);
|
||||
list_destroy(&dp->dp_synced_datasets);
|
||||
|
||||
arc_flush(dp->dp_spa);
|
||||
txg_fini(dp);
|
||||
dsl_scan_fini(dp);
|
||||
rw_destroy(&dp->dp_config_rwlock);
|
||||
mutex_destroy(&dp->dp_lock);
|
||||
mutex_destroy(&dp->dp_scrub_cancel_lock);
|
||||
taskq_destroy(dp->dp_vnrele_taskq);
|
||||
if (dp->dp_blkstats)
|
||||
kmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t));
|
||||
@ -242,19 +226,22 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
|
||||
int err;
|
||||
dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
|
||||
dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg);
|
||||
objset_impl_t *osip;
|
||||
objset_t *os;
|
||||
dsl_dataset_t *ds;
|
||||
uint64_t dsobj;
|
||||
uint64_t obj;
|
||||
|
||||
/* create and open the MOS (meta-objset) */
|
||||
dp->dp_meta_objset = &dmu_objset_create_impl(spa,
|
||||
NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx)->os;
|
||||
dp->dp_meta_objset = dmu_objset_create_impl(spa,
|
||||
NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx);
|
||||
|
||||
/* create the pool directory */
|
||||
err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx);
|
||||
ASSERT3U(err, ==, 0);
|
||||
|
||||
/* Initialize scan structures */
|
||||
VERIFY3U(0, ==, dsl_scan_init(dp, txg));
|
||||
|
||||
/* create and open the root dir */
|
||||
dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx);
|
||||
VERIFY(0 == dsl_dir_open_obj(dp, dp->dp_root_dir_obj,
|
||||
@ -265,18 +252,33 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
|
||||
VERIFY(0 == dsl_pool_open_special_dir(dp,
|
||||
MOS_DIR_NAME, &dp->dp_mos_dir));
|
||||
|
||||
if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
|
||||
/* create and open the free dir */
|
||||
(void) dsl_dir_create_sync(dp, dp->dp_root_dir,
|
||||
FREE_DIR_NAME, tx);
|
||||
VERIFY(0 == dsl_pool_open_special_dir(dp,
|
||||
FREE_DIR_NAME, &dp->dp_free_dir));
|
||||
|
||||
/* create and open the free_bplist */
|
||||
obj = bpobj_alloc(dp->dp_meta_objset, SPA_MAXBLOCKSIZE, tx);
|
||||
VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0);
|
||||
VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
|
||||
dp->dp_meta_objset, obj));
|
||||
}
|
||||
|
||||
if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB)
|
||||
dsl_pool_create_origin(dp, tx);
|
||||
|
||||
/* create the root dataset */
|
||||
dsobj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx);
|
||||
obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx);
|
||||
|
||||
/* create the root objset */
|
||||
VERIFY(0 == dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
|
||||
osip = dmu_objset_create_impl(dp->dp_spa, ds,
|
||||
VERIFY(0 == dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
|
||||
os = dmu_objset_create_impl(dp->dp_spa, ds,
|
||||
dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx);
|
||||
#ifdef _KERNEL
|
||||
zfs_create_fs(&osip->os, kcred, zplprops, tx);
|
||||
zfs_create_fs(os, kcred, zplprops, tx);
|
||||
#endif
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
|
||||
@ -285,6 +287,14 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
|
||||
return (dp);
|
||||
}
|
||||
|
||||
static int
|
||||
deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_deadlist_t *dl = arg;
|
||||
dsl_deadlist_insert(dl, bp, tx);
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
|
||||
{
|
||||
@ -293,11 +303,19 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
|
||||
dsl_dir_t *dd;
|
||||
dsl_dataset_t *ds;
|
||||
dsl_sync_task_group_t *dstg;
|
||||
objset_impl_t *mosi = dp->dp_meta_objset->os;
|
||||
objset_t *mos = dp->dp_meta_objset;
|
||||
hrtime_t start, write_time;
|
||||
uint64_t data_written;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* We need to copy dp_space_towrite() before doing
|
||||
* dsl_sync_task_group_sync(), because
|
||||
* dsl_dataset_snapshot_reserve_space() will increase
|
||||
* dp_space_towrite but not actually write anything.
|
||||
*/
|
||||
data_written = dp->dp_space_towrite[txg & TXG_MASK];
|
||||
|
||||
tx = dmu_tx_create_assigned(dp, txg);
|
||||
|
||||
dp->dp_read_overhead = 0;
|
||||
@ -323,11 +341,11 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
|
||||
|
||||
for (ds = list_head(&dp->dp_synced_datasets); ds;
|
||||
ds = list_next(&dp->dp_synced_datasets, ds))
|
||||
dmu_objset_do_userquota_callbacks(ds->ds_user_ptr, tx);
|
||||
dmu_objset_do_userquota_updates(ds->ds_objset, tx);
|
||||
|
||||
/*
|
||||
* Sync the datasets again to push out the changes due to
|
||||
* userquota updates. This must be done before we process the
|
||||
* userspace updates. This must be done before we process the
|
||||
* sync tasks, because that could cause a snapshot of a dataset
|
||||
* whose ds_bp will be rewritten when we do this 2nd sync.
|
||||
*/
|
||||
@ -339,6 +357,16 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
|
||||
}
|
||||
err = zio_wait(zio);
|
||||
|
||||
/*
|
||||
* Move dead blocks from the pending deadlist to the on-disk
|
||||
* deadlist.
|
||||
*/
|
||||
for (ds = list_head(&dp->dp_synced_datasets); ds;
|
||||
ds = list_next(&dp->dp_synced_datasets, ds)) {
|
||||
bplist_iterate(&ds->ds_pending_deadlist,
|
||||
deadlist_enqueue_cb, &ds->ds_deadlist, tx);
|
||||
}
|
||||
|
||||
while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg)) {
|
||||
/*
|
||||
* No more sync tasks should have been added while we
|
||||
@ -354,14 +382,11 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
|
||||
dsl_dir_sync(dd, tx);
|
||||
write_time += gethrtime() - start;
|
||||
|
||||
if (spa_sync_pass(dp->dp_spa) == 1)
|
||||
dsl_pool_scrub_sync(dp, tx);
|
||||
|
||||
start = gethrtime();
|
||||
if (list_head(&mosi->os_dirty_dnodes[txg & TXG_MASK]) != NULL ||
|
||||
list_head(&mosi->os_free_dnodes[txg & TXG_MASK]) != NULL) {
|
||||
if (list_head(&mos->os_dirty_dnodes[txg & TXG_MASK]) != NULL ||
|
||||
list_head(&mos->os_free_dnodes[txg & TXG_MASK]) != NULL) {
|
||||
zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
|
||||
dmu_objset_sync(mosi, zio, tx);
|
||||
dmu_objset_sync(mos, zio, tx);
|
||||
err = zio_wait(zio);
|
||||
ASSERT(err == 0);
|
||||
dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", "");
|
||||
@ -374,7 +399,6 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
|
||||
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
data_written = dp->dp_space_towrite[txg & TXG_MASK];
|
||||
dp->dp_space_towrite[txg & TXG_MASK] = 0;
|
||||
ASSERT(dp->dp_tempreserved[txg & TXG_MASK] == 0);
|
||||
|
||||
@ -399,10 +423,14 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
|
||||
* amount of write traffic allowed into each transaction group.
|
||||
* Weight the throughput calculation towards the current value:
|
||||
* thru = 3/4 old_thru + 1/4 new_thru
|
||||
*
|
||||
* Note: write_time is in nanosecs, so write_time/MICROSEC
|
||||
* yields millisecs
|
||||
*/
|
||||
ASSERT(zfs_write_limit_min > 0);
|
||||
if (data_written > zfs_write_limit_min / 8 && write_time > 0) {
|
||||
uint64_t throughput = (data_written * NANOSEC) / write_time;
|
||||
if (data_written > zfs_write_limit_min / 8 && write_time > MICROSEC) {
|
||||
uint64_t throughput = data_written / (write_time / MICROSEC);
|
||||
|
||||
if (dp->dp_throughput)
|
||||
dp->dp_throughput = throughput / 4 +
|
||||
3 * dp->dp_throughput / 4;
|
||||
@ -410,21 +438,24 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
|
||||
dp->dp_throughput = throughput;
|
||||
dp->dp_write_limit = MIN(zfs_write_limit_inflated,
|
||||
MAX(zfs_write_limit_min,
|
||||
dp->dp_throughput * zfs_txg_synctime));
|
||||
dp->dp_throughput * zfs_txg_synctime_ms));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
dsl_pool_zil_clean(dsl_pool_t *dp)
|
||||
dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg)
|
||||
{
|
||||
dsl_dataset_t *ds;
|
||||
objset_t *os;
|
||||
|
||||
while (ds = list_head(&dp->dp_synced_datasets)) {
|
||||
list_remove(&dp->dp_synced_datasets, ds);
|
||||
ASSERT(ds->ds_user_ptr != NULL);
|
||||
zil_clean(((objset_impl_t *)ds->ds_user_ptr)->os_zil);
|
||||
os = ds->ds_objset;
|
||||
zil_clean(os->os_zil);
|
||||
ASSERT(!dmu_objset_is_dirty(os, txg));
|
||||
dmu_buf_rele(ds->ds_dbuf, ds);
|
||||
}
|
||||
ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -601,6 +632,7 @@ upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
|
||||
ASSERT(ds->ds_phys->ds_prev_snap_obj == prev->ds_object);
|
||||
|
||||
if (prev->ds_phys->ds_next_clones_obj == 0) {
|
||||
dmu_buf_will_dirty(prev->ds_dbuf, tx);
|
||||
prev->ds_phys->ds_next_clones_obj =
|
||||
zap_create(dp->dp_meta_objset,
|
||||
DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
|
||||
@ -620,8 +652,67 @@ dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx)
|
||||
ASSERT(dmu_tx_is_syncing(tx));
|
||||
ASSERT(dp->dp_origin_snap != NULL);
|
||||
|
||||
(void) dmu_objset_find_spa(dp->dp_spa, NULL, upgrade_clones_cb,
|
||||
tx, DS_FIND_CHILDREN);
|
||||
VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL, upgrade_clones_cb,
|
||||
tx, DS_FIND_CHILDREN));
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
upgrade_dir_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
|
||||
{
|
||||
dmu_tx_t *tx = arg;
|
||||
dsl_dataset_t *ds;
|
||||
dsl_pool_t *dp = spa_get_dsl(spa);
|
||||
objset_t *mos = dp->dp_meta_objset;
|
||||
|
||||
VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
|
||||
|
||||
if (ds->ds_dir->dd_phys->dd_origin_obj) {
|
||||
dsl_dataset_t *origin;
|
||||
|
||||
VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
|
||||
ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin));
|
||||
|
||||
if (origin->ds_dir->dd_phys->dd_clones == 0) {
|
||||
dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx);
|
||||
origin->ds_dir->dd_phys->dd_clones = zap_create(mos,
|
||||
DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
|
||||
}
|
||||
|
||||
VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
|
||||
origin->ds_dir->dd_phys->dd_clones, dsobj, tx));
|
||||
|
||||
dsl_dataset_rele(origin, FTAG);
|
||||
}
|
||||
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx)
|
||||
{
|
||||
ASSERT(dmu_tx_is_syncing(tx));
|
||||
uint64_t obj;
|
||||
|
||||
(void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx);
|
||||
VERIFY(0 == dsl_pool_open_special_dir(dp,
|
||||
FREE_DIR_NAME, &dp->dp_free_dir));
|
||||
|
||||
/*
|
||||
* We can't use bpobj_alloc(), because spa_version() still
|
||||
* returns the old version, and we need a new-version bpobj with
|
||||
* subobj support. So call dmu_object_alloc() directly.
|
||||
*/
|
||||
obj = dmu_object_alloc(dp->dp_meta_objset, DMU_OT_BPOBJ,
|
||||
SPA_MAXBLOCKSIZE, DMU_OT_BPOBJ_HDR, sizeof (bpobj_phys_t), tx);
|
||||
VERIFY3U(0, ==, zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx));
|
||||
VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
|
||||
dp->dp_meta_objset, obj));
|
||||
|
||||
VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL,
|
||||
upgrade_dir_clones_cb, tx, DS_FIND_CHILDREN));
|
||||
}
|
||||
|
||||
void
|
||||
@ -638,7 +729,7 @@ dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx)
|
||||
dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME,
|
||||
NULL, 0, kcred, tx);
|
||||
VERIFY(0 == dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
|
||||
dsl_dataset_snapshot_sync(ds, ORIGIN_DIR_NAME, kcred, tx);
|
||||
dsl_dataset_snapshot_sync(ds, ORIGIN_DIR_NAME, tx);
|
||||
VERIFY(0 == dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj,
|
||||
dp, &dp->dp_origin_snap));
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
@ -650,3 +741,108 @@ dsl_pool_vnrele_taskq(dsl_pool_t *dp)
|
||||
{
|
||||
return (dp->dp_vnrele_taskq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk through the pool-wide zap object of temporary snapshot user holds
|
||||
* and release them.
|
||||
*/
|
||||
void
|
||||
dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp)
|
||||
{
|
||||
zap_attribute_t za;
|
||||
zap_cursor_t zc;
|
||||
objset_t *mos = dp->dp_meta_objset;
|
||||
uint64_t zapobj = dp->dp_tmp_userrefs_obj;
|
||||
|
||||
if (zapobj == 0)
|
||||
return;
|
||||
ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
|
||||
|
||||
for (zap_cursor_init(&zc, mos, zapobj);
|
||||
zap_cursor_retrieve(&zc, &za) == 0;
|
||||
zap_cursor_advance(&zc)) {
|
||||
char *htag;
|
||||
uint64_t dsobj;
|
||||
|
||||
htag = strchr(za.za_name, '-');
|
||||
*htag = '\0';
|
||||
++htag;
|
||||
dsobj = strtonum(za.za_name, NULL);
|
||||
(void) dsl_dataset_user_release_tmp(dp, dsobj, htag);
|
||||
}
|
||||
zap_cursor_fini(&zc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Create the pool-wide zap object for storing temporary snapshot holds.
|
||||
*/
|
||||
void
|
||||
dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx)
|
||||
{
|
||||
objset_t *mos = dp->dp_meta_objset;
|
||||
|
||||
ASSERT(dp->dp_tmp_userrefs_obj == 0);
|
||||
ASSERT(dmu_tx_is_syncing(tx));
|
||||
|
||||
dp->dp_tmp_userrefs_obj = zap_create(mos, DMU_OT_USERREFS,
|
||||
DMU_OT_NONE, 0, tx);
|
||||
|
||||
VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS,
|
||||
sizeof (uint64_t), 1, &dp->dp_tmp_userrefs_obj, tx) == 0);
|
||||
}
|
||||
|
||||
static int
|
||||
dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj,
|
||||
const char *tag, uint64_t *now, dmu_tx_t *tx, boolean_t holding)
|
||||
{
|
||||
objset_t *mos = dp->dp_meta_objset;
|
||||
uint64_t zapobj = dp->dp_tmp_userrefs_obj;
|
||||
char *name;
|
||||
int error;
|
||||
|
||||
ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
|
||||
ASSERT(dmu_tx_is_syncing(tx));
|
||||
|
||||
/*
|
||||
* If the pool was created prior to SPA_VERSION_USERREFS, the
|
||||
* zap object for temporary holds might not exist yet.
|
||||
*/
|
||||
if (zapobj == 0) {
|
||||
if (holding) {
|
||||
dsl_pool_user_hold_create_obj(dp, tx);
|
||||
zapobj = dp->dp_tmp_userrefs_obj;
|
||||
} else {
|
||||
return (ENOENT);
|
||||
}
|
||||
}
|
||||
|
||||
name = kmem_asprintf("%llx-%s", (u_longlong_t)dsobj, tag);
|
||||
if (holding)
|
||||
error = zap_add(mos, zapobj, name, 8, 1, now, tx);
|
||||
else
|
||||
error = zap_remove(mos, zapobj, name, tx);
|
||||
strfree(name);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a temporary hold for the given dataset object and tag.
|
||||
*/
|
||||
int
|
||||
dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, const char *tag,
|
||||
uint64_t *now, dmu_tx_t *tx)
|
||||
{
|
||||
return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, now, tx, B_TRUE));
|
||||
}
|
||||
|
||||
/*
|
||||
* Release a temporary hold for the given dataset object and tag.
|
||||
*/
|
||||
int
|
||||
dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, const char *tag,
|
||||
dmu_tx_t *tx)
|
||||
{
|
||||
return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, NULL,
|
||||
tx, B_FALSE));
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
1739
module/zfs/dsl_scan.c
Normal file
1739
module/zfs/dsl_scan.c
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -19,18 +19,15 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dmu_tx.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/dsl_dir.h>
|
||||
#include <sys/dsl_synctask.h>
|
||||
#include <sys/cred.h>
|
||||
#include <sys/metaslab.h>
|
||||
|
||||
#define DST_AVG_BLKSHIFT 14
|
||||
|
||||
@ -50,7 +47,6 @@ dsl_sync_task_group_create(dsl_pool_t *dp)
|
||||
list_create(&dstg->dstg_tasks, sizeof (dsl_sync_task_t),
|
||||
offsetof(dsl_sync_task_t, dst_node));
|
||||
dstg->dstg_pool = dp;
|
||||
dstg->dstg_cr = CRED();
|
||||
|
||||
return (dstg);
|
||||
}
|
||||
@ -112,14 +108,21 @@ top:
|
||||
return (dstg->dstg_err);
|
||||
}
|
||||
|
||||
VERIFY(0 == txg_list_add(&dstg->dstg_pool->dp_sync_tasks, dstg, txg));
|
||||
/*
|
||||
* We don't generally have many sync tasks, so pay the price of
|
||||
* add_tail to get the tasks executed in the right order.
|
||||
*/
|
||||
VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
|
||||
dstg, txg));
|
||||
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
txg_wait_synced(dstg->dstg_pool, txg);
|
||||
|
||||
if (dstg->dstg_err == EAGAIN)
|
||||
if (dstg->dstg_err == EAGAIN) {
|
||||
txg_wait_synced(dstg->dstg_pool, txg + TXG_DEFER_SIZE);
|
||||
goto top;
|
||||
}
|
||||
|
||||
return (dstg->dstg_err);
|
||||
}
|
||||
@ -131,7 +134,12 @@ dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
|
||||
|
||||
dstg->dstg_nowaiter = B_TRUE;
|
||||
txg = dmu_tx_get_txg(tx);
|
||||
VERIFY(0 == txg_list_add(&dstg->dstg_pool->dp_sync_tasks, dstg, txg));
|
||||
/*
|
||||
* We don't generally have many sync tasks, so pay the price of
|
||||
* add_tail to get the tasks executed in the right order.
|
||||
*/
|
||||
VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
|
||||
dstg, txg));
|
||||
}
|
||||
|
||||
void
|
||||
@ -150,25 +158,30 @@ void
|
||||
dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_sync_task_t *dst;
|
||||
void *tr_cookie;
|
||||
dsl_pool_t *dp = dstg->dstg_pool;
|
||||
uint64_t quota, used;
|
||||
|
||||
ASSERT3U(dstg->dstg_err, ==, 0);
|
||||
|
||||
/*
|
||||
* Check for sufficient space.
|
||||
* Check for sufficient space. We just check against what's
|
||||
* on-disk; we don't want any in-flight accounting to get in our
|
||||
* way, because open context may have already used up various
|
||||
* in-core limits (arc_tempreserve, dsl_pool_tempreserve).
|
||||
*/
|
||||
dstg->dstg_err = dsl_dir_tempreserve_space(dstg->dstg_pool->dp_mos_dir,
|
||||
dstg->dstg_space, dstg->dstg_space * 3, 0, 0, &tr_cookie, tx);
|
||||
/* don't bother trying again */
|
||||
if (dstg->dstg_err == ERESTART)
|
||||
dstg->dstg_err = EAGAIN;
|
||||
if (dstg->dstg_err)
|
||||
quota = dsl_pool_adjustedsize(dp, B_FALSE) -
|
||||
metaslab_class_get_deferred(spa_normal_class(dp->dp_spa));
|
||||
used = dp->dp_root_dir->dd_phys->dd_used_bytes;
|
||||
/* MOS space is triple-dittoed, so we multiply by 3. */
|
||||
if (dstg->dstg_space > 0 && used + dstg->dstg_space * 3 > quota) {
|
||||
dstg->dstg_err = ENOSPC;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for errors by calling checkfuncs.
|
||||
*/
|
||||
rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_WRITER);
|
||||
rw_enter(&dp->dp_config_rwlock, RW_WRITER);
|
||||
for (dst = list_head(&dstg->dstg_tasks); dst;
|
||||
dst = list_next(&dstg->dstg_tasks, dst)) {
|
||||
dst->dst_err =
|
||||
@ -183,13 +196,10 @@ dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
|
||||
*/
|
||||
for (dst = list_head(&dstg->dstg_tasks); dst;
|
||||
dst = list_next(&dstg->dstg_tasks, dst)) {
|
||||
dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2,
|
||||
dstg->dstg_cr, tx);
|
||||
dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2, tx);
|
||||
}
|
||||
}
|
||||
rw_exit(&dstg->dstg_pool->dp_config_rwlock);
|
||||
|
||||
dsl_dir_tempreserve_clear(tr_cookie, tx);
|
||||
rw_exit(&dp->dp_config_rwlock);
|
||||
|
||||
if (dstg->dstg_nowaiter)
|
||||
dsl_sync_task_group_destroy(dstg);
|
||||
|
160
module/zfs/fm.c
160
module/zfs/fm.c
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -94,6 +93,8 @@ static ulong_t ereport_qlen = 0;
|
||||
static size_t ereport_size = 0;
|
||||
static int ereport_cols = 80;
|
||||
|
||||
extern void fastreboot_disable_highpil(void);
|
||||
|
||||
/*
|
||||
* Common fault management kstats to record ereport generation
|
||||
* failures
|
||||
@ -374,6 +375,9 @@ fm_panic(const char *format, ...)
|
||||
va_list ap;
|
||||
|
||||
(void) casptr((void *)&fm_panicstr, NULL, (void *)format);
|
||||
#if defined(__i386) || defined(__amd64)
|
||||
fastreboot_disable_highpil();
|
||||
#endif /* __i386 || __amd64 */
|
||||
va_start(ap, format);
|
||||
vpanic(format, ap);
|
||||
va_end(ap);
|
||||
@ -512,10 +516,10 @@ fm_ereport_post(nvlist_t *ereport, int evc_flag)
|
||||
if (sysevent_evc_publish(error_chan, EC_FM, ESC_FM_ERROR,
|
||||
SUNW_VENDOR, FM_PUB, ereport, evc_flag) != 0) {
|
||||
atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
|
||||
sysevent_evc_unbind(error_chan);
|
||||
(void) sysevent_evc_unbind(error_chan);
|
||||
return;
|
||||
}
|
||||
sysevent_evc_unbind(error_chan);
|
||||
(void) sysevent_evc_unbind(error_chan);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -788,6 +792,14 @@ fm_payload_set(nvlist_t *payload, ...)
|
||||
* detector nvlist_t <detector>
|
||||
* ereport-payload nvlist_t <var args>
|
||||
*
|
||||
* We don't actually add a 'version' member to the payload. Really,
|
||||
* the version quoted to us by our caller is that of the category 1
|
||||
* "ereport" event class (and we require FM_EREPORT_VERS0) but
|
||||
* the payload version of the actual leaf class event under construction
|
||||
* may be something else. Callers should supply a version in the varargs,
|
||||
* or (better) we could take two version arguments - one for the
|
||||
* ereport category 1 classification (expect FM_EREPORT_VERS0) and one
|
||||
* for the leaf class.
|
||||
*/
|
||||
void
|
||||
fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class,
|
||||
@ -920,46 +932,41 @@ fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
|
||||
* version uint8_t 0
|
||||
* auth nvlist_t <auth>
|
||||
* devpath string <devpath>
|
||||
* devid string <devid>
|
||||
* [devid] string <devid>
|
||||
* [target-port-l0id] string <target-port-lun0-id>
|
||||
*
|
||||
* Note that auth and devid are optional members.
|
||||
*/
|
||||
void
|
||||
fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth,
|
||||
const char *devpath, const char *devid)
|
||||
const char *devpath, const char *devid, const char *tpl0)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (version != DEV_SCHEME_VERSION0) {
|
||||
atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (nvlist_add_uint8(fmri_dev, FM_VERSION, version) != 0) {
|
||||
atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (nvlist_add_string(fmri_dev, FM_FMRI_SCHEME,
|
||||
FM_FMRI_SCHEME_DEV) != 0) {
|
||||
atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
|
||||
return;
|
||||
}
|
||||
err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version);
|
||||
err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV);
|
||||
|
||||
if (auth != NULL) {
|
||||
if (nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,
|
||||
(nvlist_t *)auth) != 0) {
|
||||
atomic_add_64(
|
||||
&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
|
||||
}
|
||||
err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,
|
||||
(nvlist_t *)auth);
|
||||
}
|
||||
|
||||
if (nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath) != 0) {
|
||||
atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
|
||||
}
|
||||
err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath);
|
||||
|
||||
if (devid != NULL)
|
||||
if (nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid) != 0)
|
||||
atomic_add_64(
|
||||
&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
|
||||
err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid);
|
||||
|
||||
if (tpl0 != NULL)
|
||||
err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0);
|
||||
|
||||
if (err)
|
||||
atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1264,3 +1271,102 @@ print_msg_hwerr(ctid_t ct_id, proc_t *p)
|
||||
uprintf("Killed process %d (%s) in contract id %d "
|
||||
"due to hardware error\n", p->p_pid, p->p_user.u_comm, ct_id);
|
||||
}
|
||||
|
||||
void
|
||||
fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
|
||||
nvlist_t *snvl, nvlist_t *bboard, int npairs, ...)
|
||||
{
|
||||
nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
|
||||
nvlist_t *pairs[HC_MAXPAIRS];
|
||||
nvlist_t **hcl;
|
||||
uint_t n;
|
||||
int i, j;
|
||||
va_list ap;
|
||||
char *hcname, *hcid;
|
||||
|
||||
if (!fm_fmri_hc_set_common(fmri, version, auth))
|
||||
return;
|
||||
|
||||
/*
|
||||
* copy the bboard nvpairs to the pairs array
|
||||
*/
|
||||
if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n)
|
||||
!= 0) {
|
||||
atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME,
|
||||
&hcname) != 0) {
|
||||
atomic_add_64(
|
||||
&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
|
||||
return;
|
||||
}
|
||||
if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) {
|
||||
atomic_add_64(
|
||||
&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
pairs[i] = fm_nvlist_create(nva);
|
||||
if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 ||
|
||||
nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) {
|
||||
for (j = 0; j <= i; j++) {
|
||||
if (pairs[j] != NULL)
|
||||
fm_nvlist_destroy(pairs[j],
|
||||
FM_NVA_RETAIN);
|
||||
}
|
||||
atomic_add_64(
|
||||
&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* create the pairs from passed in pairs
|
||||
*/
|
||||
npairs = MIN(npairs, HC_MAXPAIRS);
|
||||
|
||||
va_start(ap, npairs);
|
||||
for (i = n; i < npairs + n; i++) {
|
||||
const char *name = va_arg(ap, const char *);
|
||||
uint32_t id = va_arg(ap, uint32_t);
|
||||
char idstr[11];
|
||||
(void) snprintf(idstr, sizeof (idstr), "%u", id);
|
||||
pairs[i] = fm_nvlist_create(nva);
|
||||
if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
|
||||
nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
|
||||
for (j = 0; j <= i; j++) {
|
||||
if (pairs[j] != NULL)
|
||||
fm_nvlist_destroy(pairs[j],
|
||||
FM_NVA_RETAIN);
|
||||
}
|
||||
atomic_add_64(
|
||||
&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
va_end(ap);
|
||||
|
||||
/*
|
||||
* Create the fmri hc list
|
||||
*/
|
||||
if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs,
|
||||
npairs + n) != 0) {
|
||||
atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < npairs + n; i++) {
|
||||
fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
|
||||
}
|
||||
|
||||
if (snvl != NULL) {
|
||||
if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
|
||||
atomic_add_64(
|
||||
&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ARC_H
|
||||
@ -48,7 +47,8 @@ arc_done_func_t arc_getbuf_func;
|
||||
struct arc_buf {
|
||||
arc_buf_hdr_t *b_hdr;
|
||||
arc_buf_t *b_next;
|
||||
krwlock_t b_lock;
|
||||
kmutex_t b_evict_lock;
|
||||
krwlock_t b_data_lock;
|
||||
void *b_data;
|
||||
arc_evict_func_t *b_efunc;
|
||||
void *b_private;
|
||||
@ -87,10 +87,13 @@ arc_buf_t *arc_buf_alloc(spa_t *spa, int size, void *tag,
|
||||
arc_buf_contents_t type);
|
||||
arc_buf_t *arc_loan_buf(spa_t *spa, int size);
|
||||
void arc_return_buf(arc_buf_t *buf, void *tag);
|
||||
void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
|
||||
void arc_buf_add_ref(arc_buf_t *buf, void *tag);
|
||||
int arc_buf_remove_ref(arc_buf_t *buf, void *tag);
|
||||
int arc_buf_size(arc_buf_t *buf);
|
||||
void arc_release(arc_buf_t *buf, void *tag);
|
||||
int arc_release_bp(arc_buf_t *buf, void *tag, blkptr_t *bp, spa_t *spa,
|
||||
zbookmark_t *zb);
|
||||
int arc_released(arc_buf_t *buf);
|
||||
int arc_has_callback(arc_buf_t *buf);
|
||||
void arc_buf_freeze(arc_buf_t *buf);
|
||||
@ -99,28 +102,16 @@ void arc_buf_thaw(arc_buf_t *buf);
|
||||
int arc_referenced(arc_buf_t *buf);
|
||||
#endif
|
||||
|
||||
typedef struct writeprops {
|
||||
dmu_object_type_t wp_type;
|
||||
uint8_t wp_level;
|
||||
uint8_t wp_copies;
|
||||
uint8_t wp_dncompress, wp_oscompress;
|
||||
uint8_t wp_dnchecksum, wp_oschecksum;
|
||||
} writeprops_t;
|
||||
|
||||
void write_policy(spa_t *spa, const writeprops_t *wp, zio_prop_t *zp);
|
||||
int arc_read(zio_t *pio, spa_t *spa, blkptr_t *bp, arc_buf_t *pbuf,
|
||||
int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_buf_t *pbuf,
|
||||
arc_done_func_t *done, void *private, int priority, int zio_flags,
|
||||
uint32_t *arc_flags, const zbookmark_t *zb);
|
||||
int arc_read_nolock(zio_t *pio, spa_t *spa, blkptr_t *bp,
|
||||
int arc_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bp,
|
||||
arc_done_func_t *done, void *private, int priority, int flags,
|
||||
uint32_t *arc_flags, const zbookmark_t *zb);
|
||||
zio_t *arc_write(zio_t *pio, spa_t *spa, const writeprops_t *wp,
|
||||
boolean_t l2arc, uint64_t txg, blkptr_t *bp, arc_buf_t *buf,
|
||||
arc_done_func_t *ready, arc_done_func_t *done, void *private, int priority,
|
||||
int zio_flags, const zbookmark_t *zb);
|
||||
int arc_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
|
||||
zio_done_func_t *done, void *private, uint32_t arc_flags);
|
||||
int arc_tryread(spa_t *spa, blkptr_t *bp, void *data);
|
||||
zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
|
||||
blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp,
|
||||
arc_done_func_t *ready, arc_done_func_t *done, void *private,
|
||||
int priority, int zio_flags, const zbookmark_t *zb);
|
||||
|
||||
void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
|
||||
int arc_buf_evict(arc_buf_t *buf);
|
||||
|
@ -19,68 +19,36 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_BPLIST_H
|
||||
#define _SYS_BPLIST_H
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/spa.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct bplist_phys {
|
||||
/*
|
||||
* This is the bonus buffer for the dead lists. The object's
|
||||
* contents is an array of bpl_entries blkptr_t's, representing
|
||||
* a total of bpl_bytes physical space.
|
||||
*/
|
||||
uint64_t bpl_entries;
|
||||
uint64_t bpl_bytes;
|
||||
uint64_t bpl_comp;
|
||||
uint64_t bpl_uncomp;
|
||||
} bplist_phys_t;
|
||||
|
||||
#define BPLIST_SIZE_V0 (2 * sizeof (uint64_t))
|
||||
|
||||
typedef struct bplist_q {
|
||||
blkptr_t bpq_blk;
|
||||
void *bpq_next;
|
||||
} bplist_q_t;
|
||||
typedef struct bplist_entry {
|
||||
blkptr_t bpe_blk;
|
||||
list_node_t bpe_node;
|
||||
} bplist_entry_t;
|
||||
|
||||
typedef struct bplist {
|
||||
kmutex_t bpl_lock;
|
||||
objset_t *bpl_mos;
|
||||
uint64_t bpl_object;
|
||||
uint8_t bpl_blockshift;
|
||||
uint8_t bpl_bpshift;
|
||||
uint8_t bpl_havecomp;
|
||||
bplist_q_t *bpl_queue;
|
||||
bplist_phys_t *bpl_phys;
|
||||
dmu_buf_t *bpl_dbuf;
|
||||
dmu_buf_t *bpl_cached_dbuf;
|
||||
list_t bpl_list;
|
||||
} bplist_t;
|
||||
|
||||
extern uint64_t bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx);
|
||||
extern void bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx);
|
||||
extern int bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object);
|
||||
extern void bplist_close(bplist_t *bpl);
|
||||
extern boolean_t bplist_empty(bplist_t *bpl);
|
||||
extern int bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp);
|
||||
extern int bplist_enqueue(bplist_t *bpl, const blkptr_t *bp, dmu_tx_t *tx);
|
||||
extern void bplist_enqueue_deferred(bplist_t *bpl, const blkptr_t *bp);
|
||||
extern void bplist_sync(bplist_t *bpl, dmu_tx_t *tx);
|
||||
extern void bplist_vacate(bplist_t *bpl, dmu_tx_t *tx);
|
||||
extern int bplist_space(bplist_t *bpl,
|
||||
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
|
||||
extern int bplist_space_birthrange(bplist_t *bpl,
|
||||
uint64_t mintxg, uint64_t maxtxg, uint64_t *dasizep);
|
||||
typedef int bplist_itor_t(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
|
||||
|
||||
void bplist_create(bplist_t *bpl);
|
||||
void bplist_destroy(bplist_t *bpl);
|
||||
void bplist_append(bplist_t *bpl, const blkptr_t *bp);
|
||||
void bplist_iterate(bplist_t *bpl, bplist_itor_t *func,
|
||||
void *arg, dmu_tx_t *tx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
91
module/zfs/include/sys/bpobj.h
Normal file
91
module/zfs/include/sys/bpobj.h
Normal file
@ -0,0 +1,91 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_BPOBJ_H
|
||||
#define _SYS_BPOBJ_H
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct bpobj_phys {
|
||||
/*
|
||||
* This is the bonus buffer for the dead lists. The object's
|
||||
* contents is an array of bpo_entries blkptr_t's, representing
|
||||
* a total of bpo_bytes physical space.
|
||||
*/
|
||||
uint64_t bpo_num_blkptrs;
|
||||
uint64_t bpo_bytes;
|
||||
uint64_t bpo_comp;
|
||||
uint64_t bpo_uncomp;
|
||||
uint64_t bpo_subobjs;
|
||||
uint64_t bpo_num_subobjs;
|
||||
} bpobj_phys_t;
|
||||
|
||||
#define BPOBJ_SIZE_V0 (2 * sizeof (uint64_t))
|
||||
#define BPOBJ_SIZE_V1 (4 * sizeof (uint64_t))
|
||||
|
||||
typedef struct bpobj {
|
||||
kmutex_t bpo_lock;
|
||||
objset_t *bpo_os;
|
||||
uint64_t bpo_object;
|
||||
int bpo_epb;
|
||||
uint8_t bpo_havecomp;
|
||||
uint8_t bpo_havesubobj;
|
||||
bpobj_phys_t *bpo_phys;
|
||||
dmu_buf_t *bpo_dbuf;
|
||||
dmu_buf_t *bpo_cached_dbuf;
|
||||
} bpobj_t;
|
||||
|
||||
typedef int bpobj_itor_t(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
|
||||
|
||||
uint64_t bpobj_alloc(objset_t *mos, int blocksize, dmu_tx_t *tx);
|
||||
void bpobj_free(objset_t *os, uint64_t obj, dmu_tx_t *tx);
|
||||
|
||||
int bpobj_open(bpobj_t *bpo, objset_t *mos, uint64_t object);
|
||||
void bpobj_close(bpobj_t *bpo);
|
||||
|
||||
int bpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx);
|
||||
int bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *, dmu_tx_t *);
|
||||
int bpobj_iterate_dbg(bpobj_t *bpo, uint64_t *itorp, blkptr_t *bp);
|
||||
|
||||
void bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx);
|
||||
void bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx);
|
||||
|
||||
int bpobj_space(bpobj_t *bpo,
|
||||
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
|
||||
int bpobj_space_range(bpobj_t *bpo, uint64_t mintxg, uint64_t maxtxg,
|
||||
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_BPOBJ_H */
|
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DBUF_H
|
||||
@ -38,7 +37,6 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define DB_BONUS_BLKID (-1ULL)
|
||||
#define IN_DMU_SYNC 2
|
||||
|
||||
/*
|
||||
@ -75,7 +73,6 @@ typedef enum dbuf_states {
|
||||
DB_EVICTING
|
||||
} dbuf_states_t;
|
||||
|
||||
struct objset_impl;
|
||||
struct dnode;
|
||||
struct dmu_tx;
|
||||
|
||||
@ -134,6 +131,7 @@ typedef struct dbuf_dirty_record {
|
||||
arc_buf_t *dr_data;
|
||||
blkptr_t dr_overridden_by;
|
||||
override_states_t dr_override_state;
|
||||
uint8_t dr_copies;
|
||||
} dl;
|
||||
} dt;
|
||||
} dbuf_dirty_record_t;
|
||||
@ -148,7 +146,7 @@ typedef struct dmu_buf_impl {
|
||||
dmu_buf_t db;
|
||||
|
||||
/* the objset we belong to */
|
||||
struct objset_impl *db_objset;
|
||||
struct objset *db_objset;
|
||||
|
||||
/*
|
||||
* the dnode we belong to (NULL when evicted)
|
||||
@ -242,6 +240,10 @@ uint64_t dbuf_whichblock(struct dnode *di, uint64_t offset);
|
||||
|
||||
dmu_buf_impl_t *dbuf_create_tlib(struct dnode *dn, char *data);
|
||||
void dbuf_create_bonus(struct dnode *dn);
|
||||
int dbuf_spill_set_blksz(dmu_buf_t *db, uint64_t blksz, dmu_tx_t *tx);
|
||||
void dbuf_spill_hold(struct dnode *dn, dmu_buf_impl_t **dbp, void *tag);
|
||||
|
||||
void dbuf_rm_spill(struct dnode *dn, dmu_tx_t *tx);
|
||||
|
||||
dmu_buf_impl_t *dbuf_hold(struct dnode *dn, uint64_t blkid, void *tag);
|
||||
dmu_buf_impl_t *dbuf_hold_level(struct dnode *dn, int level, uint64_t blkid,
|
||||
@ -255,6 +257,7 @@ void dbuf_add_ref(dmu_buf_impl_t *db, void *tag);
|
||||
uint64_t dbuf_refcount(dmu_buf_impl_t *db);
|
||||
|
||||
void dbuf_rele(dmu_buf_impl_t *db, void *tag);
|
||||
void dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag);
|
||||
|
||||
dmu_buf_impl_t *dbuf_find(struct dnode *dn, uint8_t level, uint64_t blkid);
|
||||
|
||||
@ -266,6 +269,7 @@ void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx);
|
||||
dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||
arc_buf_t *dbuf_loan_arcbuf(dmu_buf_impl_t *db);
|
||||
|
||||
void dbuf_clear(dmu_buf_impl_t *db);
|
||||
void dbuf_evict(dmu_buf_impl_t *db);
|
||||
@ -273,6 +277,7 @@ void dbuf_evict(dmu_buf_impl_t *db);
|
||||
void dbuf_setdirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||
void dbuf_unoverride(dbuf_dirty_record_t *dr);
|
||||
void dbuf_sync_list(list_t *list, dmu_tx_t *tx);
|
||||
void dbuf_release_bp(dmu_buf_impl_t *db);
|
||||
|
||||
void dbuf_free_range(struct dnode *dn, uint64_t start, uint64_t end,
|
||||
struct dmu_tx *);
|
||||
@ -324,7 +329,7 @@ _NOTE(CONSTCOND) } while (0)
|
||||
#define dprintf_dbuf_bp(db, bp, fmt, ...) do { \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
|
||||
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \
|
||||
sprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, bp); \
|
||||
sprintf_blkptr(__blkbuf, bp); \
|
||||
dprintf_dbuf(db, fmt " %s\n", __VA_ARGS__, __blkbuf); \
|
||||
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
|
||||
} \
|
||||
|
246
module/zfs/include/sys/ddt.h
Normal file
246
module/zfs/include/sys/ddt.h
Normal file
@ -0,0 +1,246 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DDT_H
|
||||
#define _SYS_DDT_H
|
||||
|
||||
#include <sys/sysmacros.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dmu.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* On-disk DDT formats, in the desired search order (newest version first).
|
||||
*/
|
||||
enum ddt_type {
|
||||
DDT_TYPE_ZAP = 0,
|
||||
DDT_TYPES
|
||||
};
|
||||
|
||||
/*
|
||||
* DDT classes, in the desired search order (highest replication level first).
|
||||
*/
|
||||
enum ddt_class {
|
||||
DDT_CLASS_DITTO = 0,
|
||||
DDT_CLASS_DUPLICATE,
|
||||
DDT_CLASS_UNIQUE,
|
||||
DDT_CLASSES
|
||||
};
|
||||
|
||||
#define DDT_TYPE_CURRENT 0
|
||||
|
||||
#define DDT_COMPRESS_BYTEORDER_MASK 0x80
|
||||
#define DDT_COMPRESS_FUNCTION_MASK 0x7f
|
||||
|
||||
/*
|
||||
* On-disk ddt entry: key (name) and physical storage (value).
|
||||
*/
|
||||
typedef struct ddt_key {
|
||||
zio_cksum_t ddk_cksum; /* 256-bit block checksum */
|
||||
uint64_t ddk_prop; /* LSIZE, PSIZE, compression */
|
||||
} ddt_key_t;
|
||||
|
||||
/*
|
||||
* ddk_prop layout:
|
||||
*
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* | 0 | 0 | 0 | comp | PSIZE | LSIZE |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
*/
|
||||
#define DDK_GET_LSIZE(ddk) \
|
||||
BF64_GET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)
|
||||
#define DDK_SET_LSIZE(ddk, x) \
|
||||
BF64_SET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
|
||||
|
||||
#define DDK_GET_PSIZE(ddk) \
|
||||
BF64_GET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1)
|
||||
#define DDK_SET_PSIZE(ddk, x) \
|
||||
BF64_SET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
|
||||
|
||||
#define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 8)
|
||||
#define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 8, x)
|
||||
|
||||
#define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t))
|
||||
|
||||
typedef struct ddt_phys {
|
||||
dva_t ddp_dva[SPA_DVAS_PER_BP];
|
||||
uint64_t ddp_refcnt;
|
||||
uint64_t ddp_phys_birth;
|
||||
} ddt_phys_t;
|
||||
|
||||
enum ddt_phys_type {
|
||||
DDT_PHYS_DITTO = 0,
|
||||
DDT_PHYS_SINGLE = 1,
|
||||
DDT_PHYS_DOUBLE = 2,
|
||||
DDT_PHYS_TRIPLE = 3,
|
||||
DDT_PHYS_TYPES
|
||||
};
|
||||
|
||||
/*
|
||||
* In-core ddt entry
|
||||
*/
|
||||
struct ddt_entry {
|
||||
ddt_key_t dde_key;
|
||||
ddt_phys_t dde_phys[DDT_PHYS_TYPES];
|
||||
zio_t *dde_lead_zio[DDT_PHYS_TYPES];
|
||||
void *dde_repair_data;
|
||||
enum ddt_type dde_type;
|
||||
enum ddt_class dde_class;
|
||||
uint8_t dde_loading;
|
||||
uint8_t dde_loaded;
|
||||
kcondvar_t dde_cv;
|
||||
avl_node_t dde_node;
|
||||
};
|
||||
|
||||
/*
|
||||
* In-core ddt
|
||||
*/
|
||||
struct ddt {
|
||||
kmutex_t ddt_lock;
|
||||
avl_tree_t ddt_tree;
|
||||
avl_tree_t ddt_repair_tree;
|
||||
enum zio_checksum ddt_checksum;
|
||||
spa_t *ddt_spa;
|
||||
objset_t *ddt_os;
|
||||
uint64_t ddt_stat_object;
|
||||
uint64_t ddt_object[DDT_TYPES][DDT_CLASSES];
|
||||
ddt_histogram_t ddt_histogram[DDT_TYPES][DDT_CLASSES];
|
||||
ddt_histogram_t ddt_histogram_cache[DDT_TYPES][DDT_CLASSES];
|
||||
ddt_object_t ddt_object_stats[DDT_TYPES][DDT_CLASSES];
|
||||
avl_node_t ddt_node;
|
||||
};
|
||||
|
||||
/*
|
||||
* In-core and on-disk bookmark for DDT walks
|
||||
*/
|
||||
typedef struct ddt_bookmark {
|
||||
uint64_t ddb_class;
|
||||
uint64_t ddb_type;
|
||||
uint64_t ddb_checksum;
|
||||
uint64_t ddb_cursor;
|
||||
} ddt_bookmark_t;
|
||||
|
||||
/*
|
||||
* Ops vector to access a specific DDT object type.
|
||||
*/
|
||||
typedef struct ddt_ops {
|
||||
char ddt_op_name[32];
|
||||
int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx,
|
||||
boolean_t prehash);
|
||||
int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
|
||||
int (*ddt_op_lookup)(objset_t *os, uint64_t object, ddt_entry_t *dde);
|
||||
void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
|
||||
ddt_entry_t *dde);
|
||||
int (*ddt_op_update)(objset_t *os, uint64_t object, ddt_entry_t *dde,
|
||||
dmu_tx_t *tx);
|
||||
int (*ddt_op_remove)(objset_t *os, uint64_t object, ddt_entry_t *dde,
|
||||
dmu_tx_t *tx);
|
||||
int (*ddt_op_walk)(objset_t *os, uint64_t object, ddt_entry_t *dde,
|
||||
uint64_t *walk);
|
||||
uint64_t (*ddt_op_count)(objset_t *os, uint64_t object);
|
||||
} ddt_ops_t;
|
||||
|
||||
#define DDT_NAMELEN 80
|
||||
|
||||
extern void ddt_object_name(ddt_t *ddt, enum ddt_type type,
|
||||
enum ddt_class class, char *name);
|
||||
extern int ddt_object_walk(ddt_t *ddt, enum ddt_type type,
|
||||
enum ddt_class class, uint64_t *walk, ddt_entry_t *dde);
|
||||
extern uint64_t ddt_object_count(ddt_t *ddt, enum ddt_type type,
|
||||
enum ddt_class class);
|
||||
extern int ddt_object_info(ddt_t *ddt, enum ddt_type type,
|
||||
enum ddt_class class, dmu_object_info_t *);
|
||||
extern boolean_t ddt_object_exists(ddt_t *ddt, enum ddt_type type,
|
||||
enum ddt_class class);
|
||||
|
||||
extern void ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp,
|
||||
uint64_t txg);
|
||||
extern void ddt_bp_create(enum zio_checksum checksum, const ddt_key_t *ddk,
|
||||
const ddt_phys_t *ddp, blkptr_t *bp);
|
||||
|
||||
extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp);
|
||||
|
||||
extern void ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp);
|
||||
extern void ddt_phys_clear(ddt_phys_t *ddp);
|
||||
extern void ddt_phys_addref(ddt_phys_t *ddp);
|
||||
extern void ddt_phys_decref(ddt_phys_t *ddp);
|
||||
extern void ddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp,
|
||||
uint64_t txg);
|
||||
extern ddt_phys_t *ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp);
|
||||
extern uint64_t ddt_phys_total_refcnt(const ddt_entry_t *dde);
|
||||
|
||||
extern void ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg);
|
||||
|
||||
extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src);
|
||||
extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh);
|
||||
extern boolean_t ddt_histogram_empty(const ddt_histogram_t *ddh);
|
||||
extern void ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo);
|
||||
extern void ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh);
|
||||
extern void ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total);
|
||||
|
||||
extern uint64_t ddt_get_dedup_dspace(spa_t *spa);
|
||||
extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
|
||||
|
||||
extern int ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde,
|
||||
ddt_phys_t *ddp_willref);
|
||||
extern int ddt_ditto_copies_present(ddt_entry_t *dde);
|
||||
|
||||
extern size_t ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len);
|
||||
extern void ddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len);
|
||||
|
||||
extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp);
|
||||
extern void ddt_enter(ddt_t *ddt);
|
||||
extern void ddt_exit(ddt_t *ddt);
|
||||
extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add);
|
||||
extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp);
|
||||
extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde);
|
||||
|
||||
extern boolean_t ddt_class_contains(spa_t *spa, enum ddt_class max_class,
|
||||
const blkptr_t *bp);
|
||||
|
||||
extern ddt_entry_t *ddt_repair_start(ddt_t *ddt, const blkptr_t *bp);
|
||||
extern void ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde);
|
||||
|
||||
extern int ddt_entry_compare(const void *x1, const void *x2);
|
||||
|
||||
extern void ddt_create(spa_t *spa);
|
||||
extern int ddt_load(spa_t *spa);
|
||||
extern void ddt_unload(spa_t *spa);
|
||||
extern void ddt_sync(spa_t *spa, uint64_t txg);
|
||||
extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde);
|
||||
extern int ddt_object_update(ddt_t *ddt, enum ddt_type type,
|
||||
enum ddt_class class, ddt_entry_t *dde, dmu_tx_t *tx);
|
||||
|
||||
extern const ddt_ops_t ddt_zap_ops;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DDT_H */
|
@ -19,10 +19,11 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
/* Portions Copyright 2010 Robert Milkowski */
|
||||
|
||||
#ifndef _SYS_DMU_H
|
||||
#define _SYS_DMU_H
|
||||
|
||||
@ -38,12 +39,14 @@
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/cred.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct uio;
|
||||
struct xuio;
|
||||
struct page;
|
||||
struct vnode;
|
||||
struct spa;
|
||||
@ -59,8 +62,9 @@ struct drr_end;
|
||||
struct zbookmark;
|
||||
struct spa;
|
||||
struct nvlist;
|
||||
struct objset_impl;
|
||||
struct arc_buf;
|
||||
struct zio_prop;
|
||||
struct sa_handle;
|
||||
|
||||
typedef struct objset objset_t;
|
||||
typedef struct dmu_tx dmu_tx_t;
|
||||
@ -73,8 +77,8 @@ typedef enum dmu_object_type {
|
||||
DMU_OT_OBJECT_ARRAY, /* UINT64 */
|
||||
DMU_OT_PACKED_NVLIST, /* UINT8 (XDR by nvlist_pack/unpack) */
|
||||
DMU_OT_PACKED_NVLIST_SIZE, /* UINT64 */
|
||||
DMU_OT_BPLIST, /* UINT64 */
|
||||
DMU_OT_BPLIST_HDR, /* UINT64 */
|
||||
DMU_OT_BPOBJ, /* UINT64 */
|
||||
DMU_OT_BPOBJ_HDR, /* UINT64 */
|
||||
/* spa: */
|
||||
DMU_OT_SPACE_MAP_HEADER, /* UINT64 */
|
||||
DMU_OT_SPACE_MAP, /* UINT64 */
|
||||
@ -114,10 +118,22 @@ typedef enum dmu_object_type {
|
||||
DMU_OT_FUID, /* FUID table (Packed NVLIST UINT8) */
|
||||
DMU_OT_FUID_SIZE, /* FUID table size UINT64 */
|
||||
DMU_OT_NEXT_CLONES, /* ZAP */
|
||||
DMU_OT_SCRUB_QUEUE, /* ZAP */
|
||||
DMU_OT_SCAN_QUEUE, /* ZAP */
|
||||
DMU_OT_USERGROUP_USED, /* ZAP */
|
||||
DMU_OT_USERGROUP_QUOTA, /* ZAP */
|
||||
DMU_OT_USERREFS, /* ZAP */
|
||||
DMU_OT_DDT_ZAP, /* ZAP */
|
||||
DMU_OT_DDT_STATS, /* ZAP */
|
||||
DMU_OT_SA, /* System attr */
|
||||
DMU_OT_SA_MASTER_NODE, /* ZAP */
|
||||
DMU_OT_SA_ATTR_REGISTRATION, /* ZAP */
|
||||
DMU_OT_SA_ATTR_LAYOUTS, /* ZAP */
|
||||
DMU_OT_SCAN_XLATE, /* ZAP */
|
||||
DMU_OT_DEDUP, /* fake dedup BP from ddt_bp_create() */
|
||||
DMU_OT_DEADLIST, /* ZAP */
|
||||
DMU_OT_DEADLIST_HDR, /* UINT64 */
|
||||
DMU_OT_DSL_CLONES, /* ZAP */
|
||||
DMU_OT_BPOBJ_SUBOBJ, /* UINT64 */
|
||||
DMU_OT_NUMTYPES
|
||||
} dmu_object_type_t;
|
||||
|
||||
@ -140,16 +156,6 @@ void zfs_oldacl_byteswap(void *buf, size_t size);
|
||||
void zfs_acl_byteswap(void *buf, size_t size);
|
||||
void zfs_znode_byteswap(void *buf, size_t size);
|
||||
|
||||
#define DS_MODE_NOHOLD 0 /* internal use only */
|
||||
#define DS_MODE_USER 1 /* simple access, no special needs */
|
||||
#define DS_MODE_OWNER 2 /* the "main" access, e.g. a mount */
|
||||
#define DS_MODE_TYPE_MASK 0x3
|
||||
#define DS_MODE_TYPE(x) ((x) & DS_MODE_TYPE_MASK)
|
||||
#define DS_MODE_READONLY 0x8
|
||||
#define DS_MODE_IS_READONLY(x) ((x) & DS_MODE_READONLY)
|
||||
#define DS_MODE_INCONSISTENT 0x10
|
||||
#define DS_MODE_IS_INCONSISTENT(x) ((x) & DS_MODE_INCONSISTENT)
|
||||
|
||||
#define DS_FIND_SNAPSHOTS (1<<0)
|
||||
#define DS_FIND_CHILDREN (1<<1)
|
||||
|
||||
@ -162,27 +168,35 @@ void zfs_znode_byteswap(void *buf, size_t size);
|
||||
|
||||
#define DMU_USERUSED_OBJECT (-1ULL)
|
||||
#define DMU_GROUPUSED_OBJECT (-2ULL)
|
||||
#define DMU_DEADLIST_OBJECT (-3ULL)
|
||||
|
||||
/*
|
||||
* artificial blkids for bonus buffer and spill blocks
|
||||
*/
|
||||
#define DMU_BONUS_BLKID (-1ULL)
|
||||
#define DMU_SPILL_BLKID (-2ULL)
|
||||
/*
|
||||
* Public routines to create, destroy, open, and close objsets.
|
||||
*/
|
||||
int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
|
||||
objset_t **osp);
|
||||
int dmu_objset_open_ds(struct dsl_dataset *ds, dmu_objset_type_t type,
|
||||
objset_t **osp);
|
||||
void dmu_objset_close(objset_t *os);
|
||||
int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
|
||||
int dmu_objset_own(const char *name, dmu_objset_type_t type,
|
||||
boolean_t readonly, void *tag, objset_t **osp);
|
||||
void dmu_objset_rele(objset_t *os, void *tag);
|
||||
void dmu_objset_disown(objset_t *os, void *tag);
|
||||
int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp);
|
||||
|
||||
int dmu_objset_evict_dbufs(objset_t *os);
|
||||
int dmu_objset_create(const char *name, dmu_objset_type_t type,
|
||||
objset_t *clone_parent, uint64_t flags,
|
||||
int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
|
||||
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
|
||||
int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
|
||||
uint64_t flags);
|
||||
int dmu_objset_destroy(const char *name, boolean_t defer);
|
||||
int dmu_snapshots_destroy(char *fsname, char *snapname, boolean_t defer);
|
||||
int dmu_objset_rollback(objset_t *os);
|
||||
int dmu_objset_snapshot(char *fsname, char *snapname, struct nvlist *props,
|
||||
boolean_t recursive);
|
||||
int dmu_objset_rename(const char *name, const char *newname,
|
||||
boolean_t recursive);
|
||||
int dmu_objset_find(char *name, int func(char *, void *), void *arg,
|
||||
int dmu_objset_find(char *name, int func(const char *, void *), void *arg,
|
||||
int flags);
|
||||
void dmu_objset_byteswap(void *buf, size_t size);
|
||||
|
||||
@ -201,7 +215,7 @@ typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
|
||||
#define DMU_POOL_DIRECTORY_OBJECT 1
|
||||
#define DMU_POOL_CONFIG "config"
|
||||
#define DMU_POOL_ROOT_DATASET "root_dataset"
|
||||
#define DMU_POOL_SYNC_BPLIST "sync_bplist"
|
||||
#define DMU_POOL_SYNC_BPOBJ "sync_bplist"
|
||||
#define DMU_POOL_ERRLOG_SCRUB "errlog_scrub"
|
||||
#define DMU_POOL_ERRLOG_LAST "errlog_last"
|
||||
#define DMU_POOL_SPARES "spares"
|
||||
@ -209,19 +223,12 @@ typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
|
||||
#define DMU_POOL_HISTORY "history"
|
||||
#define DMU_POOL_PROPS "pool_props"
|
||||
#define DMU_POOL_L2CACHE "l2cache"
|
||||
|
||||
/* 4x8 zbookmark_t */
|
||||
#define DMU_POOL_SCRUB_BOOKMARK "scrub_bookmark"
|
||||
/* 1x8 zap obj DMU_OT_SCRUB_QUEUE */
|
||||
#define DMU_POOL_SCRUB_QUEUE "scrub_queue"
|
||||
/* 1x8 txg */
|
||||
#define DMU_POOL_SCRUB_MIN_TXG "scrub_min_txg"
|
||||
/* 1x8 txg */
|
||||
#define DMU_POOL_SCRUB_MAX_TXG "scrub_max_txg"
|
||||
/* 1x4 enum scrub_func */
|
||||
#define DMU_POOL_SCRUB_FUNC "scrub_func"
|
||||
/* 1x8 count */
|
||||
#define DMU_POOL_SCRUB_ERRORS "scrub_errors"
|
||||
#define DMU_POOL_TMP_USERREFS "tmp_userrefs"
|
||||
#define DMU_POOL_DDT "DDT-%s-%s-%s"
|
||||
#define DMU_POOL_DDT_STATS "DDT-statistics"
|
||||
#define DMU_POOL_CREATION_VERSION "creation_version"
|
||||
#define DMU_POOL_SCAN "scan"
|
||||
#define DMU_POOL_FREE_BPOBJ "free_bpobj"
|
||||
|
||||
/*
|
||||
* Allocate an object from this objset. The range of object numbers
|
||||
@ -306,11 +313,14 @@ void dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
|
||||
dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Decide how many copies of a given block we should make. Can be from
|
||||
* 1 to SPA_DVAS_PER_BP.
|
||||
* Decide how to write a block: checksum, compression, number of copies, etc.
|
||||
*/
|
||||
int dmu_get_replication_level(struct objset_impl *, struct zbookmark *zb,
|
||||
dmu_object_type_t ot);
|
||||
#define WP_NOFILL 0x1
|
||||
#define WP_DMU_SYNC 0x2
|
||||
#define WP_SPILL 0x4
|
||||
|
||||
void dmu_write_policy(objset_t *os, struct dnode *dn, int level, int wp,
|
||||
struct zio_prop *zp);
|
||||
/*
|
||||
* The bonus data is accessed more or less like a regular buffer.
|
||||
* You must dmu_bonus_hold() to get the buffer, which will give you a
|
||||
@ -324,6 +334,17 @@ int dmu_get_replication_level(struct objset_impl *, struct zbookmark *zb,
|
||||
int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **);
|
||||
int dmu_bonus_max(void);
|
||||
int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
|
||||
int dmu_set_bonustype(dmu_buf_t *, dmu_object_type_t, dmu_tx_t *);
|
||||
int dmu_rm_spill(objset_t *, uint64_t, dmu_tx_t *);
|
||||
|
||||
/*
|
||||
* Special spill buffer support used by "SA" framework
|
||||
*/
|
||||
|
||||
int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
|
||||
int dmu_spill_hold_by_dnode(struct dnode *dn, uint32_t flags,
|
||||
void *tag, dmu_buf_t **dbp);
|
||||
int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
|
||||
|
||||
/*
|
||||
* Obtain the DMU buffer from the specified object which contains the
|
||||
@ -340,7 +361,7 @@ int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
|
||||
* The object number must be a valid, allocated object number.
|
||||
*/
|
||||
int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
|
||||
void *tag, dmu_buf_t **);
|
||||
void *tag, dmu_buf_t **, int flags);
|
||||
void dmu_buf_add_ref(dmu_buf_t *db, void* tag);
|
||||
void dmu_buf_rele(dmu_buf_t *db, void *tag);
|
||||
uint64_t dmu_buf_refcount(dmu_buf_t *db);
|
||||
@ -437,11 +458,34 @@ void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
|
||||
uint64_t len);
|
||||
void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name);
|
||||
void dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object);
|
||||
void dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object);
|
||||
void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow);
|
||||
void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size);
|
||||
void dmu_tx_abort(dmu_tx_t *tx);
|
||||
int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
|
||||
void dmu_tx_wait(dmu_tx_t *tx);
|
||||
void dmu_tx_commit(dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* To register a commit callback, dmu_tx_callback_register() must be called.
|
||||
*
|
||||
* dcb_data is a pointer to caller private data that is passed on as a
|
||||
* callback parameter. The caller is responsible for properly allocating and
|
||||
* freeing it.
|
||||
*
|
||||
* When registering a callback, the transaction must be already created, but
|
||||
* it cannot be committed or aborted. It can be assigned to a txg or not.
|
||||
*
|
||||
* The callback will be called after the transaction has been safely written
|
||||
* to stable storage and will also be called if the dmu_tx is aborted.
|
||||
* If there is any error which prevents the transaction from being committed to
|
||||
* disk, the callback will be called with a value of error != 0.
|
||||
*/
|
||||
typedef void dmu_tx_callback_func_t(void *dcb_data, int error);
|
||||
|
||||
void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
|
||||
void *dcb_data);
|
||||
|
||||
/*
|
||||
* Free up the data blocks for a defined range of a file. If size is
|
||||
* zero, the range from offset to end-of-file is freed.
|
||||
@ -469,12 +513,23 @@ void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||
int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
|
||||
int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size,
|
||||
dmu_tx_t *tx);
|
||||
int dmu_write_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size,
|
||||
dmu_tx_t *tx);
|
||||
int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset,
|
||||
uint64_t size, struct page *pp, dmu_tx_t *tx);
|
||||
struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
|
||||
void dmu_return_arcbuf(struct arc_buf *buf);
|
||||
void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf,
|
||||
dmu_tx_t *tx);
|
||||
int dmu_xuio_init(struct xuio *uio, int niov);
|
||||
void dmu_xuio_fini(struct xuio *uio);
|
||||
int dmu_xuio_add(struct xuio *uio, struct arc_buf *abuf, offset_t off,
|
||||
size_t n);
|
||||
int dmu_xuio_cnt(struct xuio *uio);
|
||||
struct arc_buf *dmu_xuio_arcbuf(struct xuio *uio, int i);
|
||||
void dmu_xuio_clear(struct xuio *uio, int i);
|
||||
void xuio_stat_wbuf_copied();
|
||||
void xuio_stat_wbuf_nocopy();
|
||||
|
||||
extern int zfs_prefetch_disable;
|
||||
|
||||
@ -485,19 +540,19 @@ void dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset,
|
||||
uint64_t len);
|
||||
|
||||
typedef struct dmu_object_info {
|
||||
/* All sizes are in bytes. */
|
||||
/* All sizes are in bytes unless otherwise indicated. */
|
||||
uint32_t doi_data_block_size;
|
||||
uint32_t doi_metadata_block_size;
|
||||
uint64_t doi_bonus_size;
|
||||
dmu_object_type_t doi_type;
|
||||
dmu_object_type_t doi_bonus_type;
|
||||
uint64_t doi_bonus_size;
|
||||
uint8_t doi_indirection; /* 2 = dnode->indirect->data */
|
||||
uint8_t doi_checksum;
|
||||
uint8_t doi_compress;
|
||||
uint8_t doi_pad[5];
|
||||
/* Values below are number of 512-byte blocks. */
|
||||
uint64_t doi_physical_blks; /* data + metadata */
|
||||
uint64_t doi_max_block_offset;
|
||||
uint64_t doi_physical_blocks_512; /* data + metadata, 512b blks */
|
||||
uint64_t doi_max_offset;
|
||||
uint64_t doi_fill_count; /* number of non-empty blocks */
|
||||
} dmu_object_info_t;
|
||||
|
||||
typedef void arc_byteswap_func_t(void *buf, size_t size);
|
||||
@ -566,6 +621,11 @@ void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
|
||||
*/
|
||||
uint64_t dmu_objset_fsid_guid(objset_t *os);
|
||||
|
||||
/*
|
||||
* Get the [cm]time for an objset's snapshot dir
|
||||
*/
|
||||
timestruc_t dmu_objset_snap_cmtime(objset_t *os);
|
||||
|
||||
int dmu_objset_is_snapshot(objset_t *os);
|
||||
|
||||
extern struct spa *dmu_objset_spa(objset_t *os);
|
||||
@ -575,6 +635,8 @@ extern struct dsl_dataset *dmu_objset_ds(objset_t *os);
|
||||
extern void dmu_objset_name(objset_t *os, char *buf);
|
||||
extern dmu_objset_type_t dmu_objset_type(objset_t *os);
|
||||
extern uint64_t dmu_objset_id(objset_t *os);
|
||||
extern uint64_t dmu_objset_syncprop(objset_t *os);
|
||||
extern uint64_t dmu_objset_logbias(objset_t *os);
|
||||
extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
|
||||
uint64_t *id, uint64_t *offp, boolean_t *case_conflict);
|
||||
extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
|
||||
@ -582,9 +644,8 @@ extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
|
||||
extern int dmu_dir_list_next(objset_t *os, int namelen, char *name,
|
||||
uint64_t *idp, uint64_t *offp);
|
||||
|
||||
typedef void objset_used_cb_t(objset_t *os, dmu_object_type_t bonustype,
|
||||
void *oldbonus, void *newbonus, uint64_t oldused, uint64_t newused,
|
||||
dmu_tx_t *tx);
|
||||
typedef int objset_used_cb_t(dmu_object_type_t bonustype,
|
||||
void *bonus, uint64_t *userp, uint64_t *groupp);
|
||||
extern void dmu_objset_register_type(dmu_objset_type_t ost,
|
||||
objset_used_cb_t *cb);
|
||||
extern void dmu_objset_set_user(objset_t *os, void *user_ptr);
|
||||
@ -605,9 +666,20 @@ uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
|
||||
* storage when the write completes this new data does not become a
|
||||
* permanent part of the file until the associated transaction commits.
|
||||
*/
|
||||
typedef void dmu_sync_cb_t(dmu_buf_t *db, void *arg);
|
||||
int dmu_sync(struct zio *zio, dmu_buf_t *db,
|
||||
struct blkptr *bp, uint64_t txg, dmu_sync_cb_t *done, void *arg);
|
||||
|
||||
/*
|
||||
* {zfs,zvol,ztest}_get_done() args
|
||||
*/
|
||||
typedef struct zgd {
|
||||
struct zilog *zgd_zilog;
|
||||
struct blkptr *zgd_bp;
|
||||
dmu_buf_t *zgd_db;
|
||||
struct rl *zgd_rl;
|
||||
void *zgd_private;
|
||||
} zgd_t;
|
||||
|
||||
typedef void dmu_sync_cb_t(zgd_t *arg, int error);
|
||||
int dmu_sync(struct zio *zio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd);
|
||||
|
||||
/*
|
||||
* Find the next hole or data block in file starting at *off
|
||||
@ -642,11 +714,12 @@ typedef struct dmu_recv_cookie {
|
||||
struct dsl_dataset *drc_real_ds;
|
||||
struct drr_begin *drc_drrb;
|
||||
char *drc_tosnap;
|
||||
char *drc_top_ds;
|
||||
boolean_t drc_newfs;
|
||||
boolean_t drc_force;
|
||||
} dmu_recv_cookie_t;
|
||||
|
||||
int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *,
|
||||
int dmu_recv_begin(char *tofs, char *tosnap, char *topds, struct drr_begin *,
|
||||
boolean_t force, objset_t *origin, dmu_recv_cookie_t *);
|
||||
int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp);
|
||||
int dmu_recv_end(dmu_recv_cookie_t *drc);
|
||||
|
@ -19,7 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
@ -210,8 +210,7 @@ extern "C" {
|
||||
*
|
||||
* ds_lock
|
||||
* protects:
|
||||
* ds_user_ptr
|
||||
* ds_user_evict_func
|
||||
* ds_objset
|
||||
* ds_open_refcount
|
||||
* ds_snapname
|
||||
* ds_phys accounting
|
||||
@ -233,6 +232,39 @@ extern "C" {
|
||||
struct objset;
|
||||
struct dmu_pool;
|
||||
|
||||
typedef struct dmu_xuio {
|
||||
int next;
|
||||
int cnt;
|
||||
struct arc_buf **bufs;
|
||||
iovec_t *iovp;
|
||||
} dmu_xuio_t;
|
||||
|
||||
typedef struct xuio_stats {
|
||||
/* loaned yet not returned arc_buf */
|
||||
kstat_named_t xuiostat_onloan_rbuf;
|
||||
kstat_named_t xuiostat_onloan_wbuf;
|
||||
/* whether a copy is made when loaning out a read buffer */
|
||||
kstat_named_t xuiostat_rbuf_copied;
|
||||
kstat_named_t xuiostat_rbuf_nocopy;
|
||||
/* whether a copy is made when assigning a write buffer */
|
||||
kstat_named_t xuiostat_wbuf_copied;
|
||||
kstat_named_t xuiostat_wbuf_nocopy;
|
||||
} xuio_stats_t;
|
||||
|
||||
static xuio_stats_t xuio_stats = {
|
||||
{ "onloan_read_buf", KSTAT_DATA_UINT64 },
|
||||
{ "onloan_write_buf", KSTAT_DATA_UINT64 },
|
||||
{ "read_buf_copied", KSTAT_DATA_UINT64 },
|
||||
{ "read_buf_nocopy", KSTAT_DATA_UINT64 },
|
||||
{ "write_buf_copied", KSTAT_DATA_UINT64 },
|
||||
{ "write_buf_nocopy", KSTAT_DATA_UINT64 }
|
||||
};
|
||||
|
||||
#define XUIOSTAT_INCR(stat, val) \
|
||||
atomic_add_64(&xuio_stats.stat.value.ui64, (val))
|
||||
#define XUIOSTAT_BUMP(stat) XUIOSTAT_INCR(stat, 1)
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -19,10 +19,11 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
/* Portions Copyright 2010 Robert Milkowski */
|
||||
|
||||
#ifndef _SYS_DMU_OBJSET_H
|
||||
#define _SYS_DMU_OBJSET_H
|
||||
|
||||
@ -33,6 +34,7 @@
|
||||
#include <sys/dnode.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/zil.h>
|
||||
#include <sys/sa.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -40,11 +42,13 @@ extern "C" {
|
||||
|
||||
struct dsl_dataset;
|
||||
struct dmu_tx;
|
||||
struct objset_impl;
|
||||
|
||||
#define OBJSET_PHYS_SIZE 2048
|
||||
#define OBJSET_OLD_PHYS_SIZE 1024
|
||||
|
||||
#define OBJSET_BUF_HAS_USERUSED(buf) \
|
||||
(arc_buf_size(buf) > OBJSET_OLD_PHYS_SIZE)
|
||||
|
||||
#define OBJSET_FLAG_USERACCOUNTING_COMPLETE (1ULL<<0)
|
||||
|
||||
typedef struct objset_phys {
|
||||
@ -59,11 +63,6 @@ typedef struct objset_phys {
|
||||
} objset_phys_t;
|
||||
|
||||
struct objset {
|
||||
struct objset_impl *os;
|
||||
int os_mode;
|
||||
};
|
||||
|
||||
typedef struct objset_impl {
|
||||
/* Immutable: */
|
||||
struct dsl_dataset *os_dsl_dataset;
|
||||
spa_t *os_spa;
|
||||
@ -73,12 +72,17 @@ typedef struct objset_impl {
|
||||
dnode_t *os_userused_dnode;
|
||||
dnode_t *os_groupused_dnode;
|
||||
zilog_t *os_zil;
|
||||
objset_t os;
|
||||
uint8_t os_checksum; /* can change, under dsl_dir's locks */
|
||||
uint8_t os_compress; /* can change, under dsl_dir's locks */
|
||||
uint8_t os_copies; /* can change, under dsl_dir's locks */
|
||||
uint8_t os_primary_cache; /* can change, under dsl_dir's locks */
|
||||
uint8_t os_secondary_cache; /* can change, under dsl_dir's locks */
|
||||
|
||||
/* can change, under dsl_dir's locks: */
|
||||
uint8_t os_checksum;
|
||||
uint8_t os_compress;
|
||||
uint8_t os_copies;
|
||||
uint8_t os_dedup_checksum;
|
||||
uint8_t os_dedup_verify;
|
||||
uint8_t os_logbias;
|
||||
uint8_t os_primary_cache;
|
||||
uint8_t os_secondary_cache;
|
||||
uint8_t os_sync;
|
||||
|
||||
/* no lock needed: */
|
||||
struct dmu_tx *os_synctx; /* XXX sketchy */
|
||||
@ -101,8 +105,12 @@ typedef struct objset_impl {
|
||||
/* stuff we store for the user */
|
||||
kmutex_t os_user_ptr_lock;
|
||||
void *os_user_ptr;
|
||||
} objset_impl_t;
|
||||
|
||||
/* SA layout/attribute registration */
|
||||
sa_os_t *os_sa;
|
||||
};
|
||||
|
||||
#define DMU_META_OBJSET 0
|
||||
#define DMU_META_DNODE_OBJECT 0
|
||||
#define DMU_OBJECT_IS_SPECIAL(obj) ((int64_t)(obj) <= 0)
|
||||
|
||||
@ -111,14 +119,18 @@ typedef struct objset_impl {
|
||||
(os)->os_secondary_cache == ZFS_CACHE_METADATA)
|
||||
|
||||
/* called from zpl */
|
||||
int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
|
||||
objset_t **osp);
|
||||
void dmu_objset_close(objset_t *os);
|
||||
int dmu_objset_create(const char *name, dmu_objset_type_t type,
|
||||
objset_t *clone_parent, uint64_t flags,
|
||||
int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
|
||||
int dmu_objset_own(const char *name, dmu_objset_type_t type,
|
||||
boolean_t readonly, void *tag, objset_t **osp);
|
||||
void dmu_objset_rele(objset_t *os, void *tag);
|
||||
void dmu_objset_disown(objset_t *os, void *tag);
|
||||
int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp);
|
||||
|
||||
int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
|
||||
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
|
||||
int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
|
||||
uint64_t flags);
|
||||
int dmu_objset_destroy(const char *name, boolean_t defer);
|
||||
int dmu_objset_rollback(objset_t *os);
|
||||
int dmu_objset_snapshot(char *fsname, char *snapname, nvlist_t *props,
|
||||
boolean_t recursive);
|
||||
void dmu_objset_stats(objset_t *os, nvlist_t *nv);
|
||||
@ -126,23 +138,26 @@ void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
|
||||
void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
|
||||
uint64_t *usedobjsp, uint64_t *availobjsp);
|
||||
uint64_t dmu_objset_fsid_guid(objset_t *os);
|
||||
int dmu_objset_find(char *name, int func(char *, void *), void *arg,
|
||||
int dmu_objset_find(char *name, int func(const char *, void *), void *arg,
|
||||
int flags);
|
||||
int dmu_objset_find_spa(spa_t *spa, const char *name,
|
||||
int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags);
|
||||
int dmu_objset_prefetch(char *name, void *arg);
|
||||
int dmu_objset_prefetch(const char *name, void *arg);
|
||||
void dmu_objset_byteswap(void *buf, size_t size);
|
||||
int dmu_objset_evict_dbufs(objset_t *os);
|
||||
timestruc_t dmu_objset_snap_cmtime(objset_t *os);
|
||||
|
||||
/* called from dsl */
|
||||
void dmu_objset_sync(objset_impl_t *os, zio_t *zio, dmu_tx_t *tx);
|
||||
objset_impl_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
|
||||
void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx);
|
||||
boolean_t dmu_objset_is_dirty(objset_t *os, uint64_t txg);
|
||||
objset_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
|
||||
blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx);
|
||||
int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
|
||||
objset_impl_t **osip);
|
||||
void dmu_objset_evict(struct dsl_dataset *ds, void *arg);
|
||||
void dmu_objset_do_userquota_callbacks(objset_impl_t *os, dmu_tx_t *tx);
|
||||
boolean_t dmu_objset_userused_enabled(objset_impl_t *os);
|
||||
objset_t **osp);
|
||||
void dmu_objset_evict(objset_t *os);
|
||||
void dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx);
|
||||
void dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx);
|
||||
boolean_t dmu_objset_userused_enabled(objset_t *os);
|
||||
int dmu_objset_userspace_upgrade(objset_t *os);
|
||||
boolean_t dmu_objset_userspace_present(objset_t *os);
|
||||
|
||||
|
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DMU_TRAVERSE_H
|
||||
@ -36,19 +35,24 @@ extern "C" {
|
||||
|
||||
struct dnode_phys;
|
||||
struct dsl_dataset;
|
||||
struct zilog;
|
||||
struct arc_buf;
|
||||
|
||||
typedef int (blkptr_cb_t)(spa_t *spa, blkptr_t *bp,
|
||||
const zbookmark_t *zb, const struct dnode_phys *dnp, void *arg);
|
||||
typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
struct arc_buf *pbuf, const zbookmark_t *zb, const struct dnode_phys *dnp,
|
||||
void *arg);
|
||||
|
||||
#define TRAVERSE_PRE (1<<0)
|
||||
#define TRAVERSE_POST (1<<1)
|
||||
#define TRAVERSE_PREFETCH_METADATA (1<<2)
|
||||
#define TRAVERSE_PREFETCH_DATA (1<<3)
|
||||
#define TRAVERSE_PREFETCH (TRAVERSE_PREFETCH_METADATA | TRAVERSE_PREFETCH_DATA)
|
||||
#define TRAVERSE_HARD (1<<4)
|
||||
|
||||
int traverse_dataset(struct dsl_dataset *ds, uint64_t txg_start,
|
||||
int flags, blkptr_cb_t func, void *arg);
|
||||
int traverse_pool(spa_t *spa, blkptr_cb_t func, void *arg);
|
||||
int traverse_dataset(struct dsl_dataset *ds,
|
||||
uint64_t txg_start, int flags, blkptr_cb_t func, void *arg);
|
||||
int traverse_pool(spa_t *spa,
|
||||
uint64_t txg_start, int flags, blkptr_cb_t func, void *arg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -19,15 +19,13 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DMU_TX_H
|
||||
#define _SYS_DMU_TX_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/inttypes.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/txg.h>
|
||||
@ -59,6 +57,7 @@ struct dmu_tx {
|
||||
txg_handle_t tx_txgh;
|
||||
void *tx_tempreserve_cookie;
|
||||
struct dmu_tx_hold *tx_needassign_txh;
|
||||
list_t tx_callbacks; /* list of dmu_tx_callback_t on this dmu_tx */
|
||||
uint8_t tx_anyobj;
|
||||
int tx_err;
|
||||
#ifdef ZFS_DEBUG
|
||||
@ -78,6 +77,7 @@ enum dmu_tx_hold_type {
|
||||
THT_FREE,
|
||||
THT_ZAP,
|
||||
THT_SPACE,
|
||||
THT_SPILL,
|
||||
THT_NUMTYPES
|
||||
};
|
||||
|
||||
@ -98,6 +98,11 @@ typedef struct dmu_tx_hold {
|
||||
#endif
|
||||
} dmu_tx_hold_t;
|
||||
|
||||
typedef struct dmu_tx_callback {
|
||||
list_node_t dcb_node; /* linked to tx_callbacks list */
|
||||
dmu_tx_callback_func_t *dcb_func; /* caller function pointer */
|
||||
void *dcb_data; /* caller private data */
|
||||
} dmu_tx_callback_t;
|
||||
|
||||
/*
|
||||
* These routines are defined in dmu.h, and are called by the user.
|
||||
@ -109,6 +114,10 @@ void dmu_tx_abort(dmu_tx_t *tx);
|
||||
uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
|
||||
void dmu_tx_wait(dmu_tx_t *tx);
|
||||
|
||||
void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
|
||||
void *dcb_data);
|
||||
void dmu_tx_do_callbacks(list_t *cb_list, int error);
|
||||
|
||||
/*
|
||||
* These routines are defined in dmu_spa.h, and are called by the SPA.
|
||||
*/
|
||||
|
@ -19,15 +19,13 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _DFETCH_H
|
||||
#define _DFETCH_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
@ -63,6 +61,9 @@ typedef struct zfetch {
|
||||
uint64_t zf_alloc_fail; /* # of failed attempts to alloc strm */
|
||||
} zfetch_t;
|
||||
|
||||
void zfetch_init(void);
|
||||
void zfetch_fini(void);
|
||||
|
||||
void dmu_zfetch_init(zfetch_t *, struct dnode *);
|
||||
void dmu_zfetch_rele(zfetch_t *);
|
||||
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, int);
|
||||
|
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DNODE_H
|
||||
@ -62,6 +61,18 @@ extern "C" {
|
||||
#define DN_MAX_OBJECT_SHIFT 48 /* 256 trillion (zfs_fid_t limit) */
|
||||
#define DN_MAX_OFFSET_SHIFT 64 /* 2^64 bytes in a dnode */
|
||||
|
||||
/*
|
||||
* dnode id flags
|
||||
*
|
||||
* Note: a file will never ever have its
|
||||
* ids moved from bonus->spill
|
||||
* and only in a crypto environment would it be on spill
|
||||
*/
|
||||
#define DN_ID_CHKED_BONUS 0x1
|
||||
#define DN_ID_CHKED_SPILL 0x2
|
||||
#define DN_ID_OLD_EXIST 0x4
|
||||
#define DN_ID_NEW_EXIST 0x8
|
||||
|
||||
/*
|
||||
* Derived constants.
|
||||
*/
|
||||
@ -70,10 +81,12 @@ extern "C" {
|
||||
#define DN_MAX_BONUSLEN (DNODE_SIZE - DNODE_CORE_SIZE - (1 << SPA_BLKPTRSHIFT))
|
||||
#define DN_MAX_OBJECT (1ULL << DN_MAX_OBJECT_SHIFT)
|
||||
#define DN_ZERO_BONUSLEN (DN_MAX_BONUSLEN + 1)
|
||||
#define DN_KILL_SPILLBLK (1)
|
||||
|
||||
#define DNODES_PER_BLOCK_SHIFT (DNODE_BLOCK_SHIFT - DNODE_SHIFT)
|
||||
#define DNODES_PER_BLOCK (1ULL << DNODES_PER_BLOCK_SHIFT)
|
||||
#define DNODES_PER_LEVEL_SHIFT (DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT)
|
||||
#define DNODES_PER_LEVEL (1ULL << DNODES_PER_LEVEL_SHIFT)
|
||||
|
||||
/* The +2 here is a cheesy way to round up */
|
||||
#define DN_MAX_LEVELS (2 + ((DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT) / \
|
||||
@ -88,7 +101,7 @@ extern "C" {
|
||||
#define EPB(blkshift, typeshift) (1 << (blkshift - typeshift))
|
||||
|
||||
struct dmu_buf_impl;
|
||||
struct objset_impl;
|
||||
struct objset;
|
||||
struct zio;
|
||||
|
||||
enum dnode_dirtycontext {
|
||||
@ -101,6 +114,9 @@ enum dnode_dirtycontext {
|
||||
#define DNODE_FLAG_USED_BYTES (1<<0)
|
||||
#define DNODE_FLAG_USERUSED_ACCOUNTED (1<<1)
|
||||
|
||||
/* Does dnode have a SA spill blkptr in bonus? */
|
||||
#define DNODE_FLAG_SPILL_BLKPTR (1<<2)
|
||||
|
||||
typedef struct dnode_phys {
|
||||
uint8_t dn_type; /* dmu_object_type_t */
|
||||
uint8_t dn_indblkshift; /* ln2(indirect block size) */
|
||||
@ -121,7 +137,8 @@ typedef struct dnode_phys {
|
||||
uint64_t dn_pad3[4];
|
||||
|
||||
blkptr_t dn_blkptr[1];
|
||||
uint8_t dn_bonus[DN_MAX_BONUSLEN];
|
||||
uint8_t dn_bonus[DN_MAX_BONUSLEN - sizeof (blkptr_t)];
|
||||
blkptr_t dn_spill;
|
||||
} dnode_phys_t;
|
||||
|
||||
typedef struct dnode {
|
||||
@ -136,7 +153,7 @@ typedef struct dnode {
|
||||
list_node_t dn_link;
|
||||
|
||||
/* immutable: */
|
||||
struct objset_impl *dn_objset;
|
||||
struct objset *dn_objset;
|
||||
uint64_t dn_object;
|
||||
struct dmu_buf_impl *dn_dbuf;
|
||||
dnode_phys_t *dn_phys; /* pointer into dn->dn_dbuf->db.db_data */
|
||||
@ -161,6 +178,8 @@ typedef struct dnode {
|
||||
uint8_t dn_next_nblkptr[TXG_SIZE];
|
||||
uint8_t dn_next_nlevels[TXG_SIZE];
|
||||
uint8_t dn_next_indblkshift[TXG_SIZE];
|
||||
uint8_t dn_next_bonustype[TXG_SIZE];
|
||||
uint8_t dn_rm_spillblk[TXG_SIZE]; /* for removing spill blk */
|
||||
uint16_t dn_next_bonuslen[TXG_SIZE];
|
||||
uint32_t dn_next_blksz[TXG_SIZE]; /* next block size in bytes */
|
||||
|
||||
@ -185,12 +204,17 @@ typedef struct dnode {
|
||||
kmutex_t dn_dbufs_mtx;
|
||||
list_t dn_dbufs; /* linked list of descendent dbuf_t's */
|
||||
struct dmu_buf_impl *dn_bonus; /* bonus buffer dbuf */
|
||||
boolean_t dn_have_spill; /* have spill or are spilling */
|
||||
|
||||
/* parent IO for current sync write */
|
||||
zio_t *dn_zio;
|
||||
|
||||
/* used in syncing context */
|
||||
dnode_phys_t *dn_oldphys;
|
||||
uint64_t dn_oldused; /* old phys used bytes */
|
||||
uint64_t dn_oldflags; /* old phys dn_flags */
|
||||
uint64_t dn_olduid, dn_oldgid;
|
||||
uint64_t dn_newuid, dn_newgid;
|
||||
int dn_id_flags;
|
||||
|
||||
/* holds prefetch structure */
|
||||
struct zfetch dn_zfetch;
|
||||
@ -202,14 +226,17 @@ typedef struct free_range {
|
||||
uint64_t fr_nblks;
|
||||
} free_range_t;
|
||||
|
||||
dnode_t *dnode_special_open(struct objset_impl *dd, dnode_phys_t *dnp,
|
||||
dnode_t *dnode_special_open(struct objset *dd, dnode_phys_t *dnp,
|
||||
uint64_t object);
|
||||
void dnode_special_close(dnode_t *dn);
|
||||
|
||||
void dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx);
|
||||
int dnode_hold(struct objset_impl *dd, uint64_t object,
|
||||
void dnode_setbonus_type(dnode_t *dn, dmu_object_type_t, dmu_tx_t *tx);
|
||||
void dnode_rm_spill(dnode_t *dn, dmu_tx_t *tx);
|
||||
|
||||
int dnode_hold(struct objset *dd, uint64_t object,
|
||||
void *ref, dnode_t **dnp);
|
||||
int dnode_hold_impl(struct objset_impl *dd, uint64_t object, int flag,
|
||||
int dnode_hold_impl(struct objset *dd, uint64_t object, int flag,
|
||||
void *ref, dnode_t **dnp);
|
||||
boolean_t dnode_add_ref(dnode_t *dn, void *ref);
|
||||
void dnode_rele(dnode_t *dn, void *ref);
|
||||
|
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_DATASET_H
|
||||
@ -33,6 +32,7 @@
|
||||
#include <sys/bplist.h>
|
||||
#include <sys/dsl_synctask.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/dsl_deadlist.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -42,8 +42,6 @@ struct dsl_dataset;
|
||||
struct dsl_dir;
|
||||
struct dsl_pool;
|
||||
|
||||
typedef void dsl_dataset_evict_func_t(struct dsl_dataset *, void *);
|
||||
|
||||
#define DS_FLAG_INCONSISTENT (1ULL<<0)
|
||||
#define DS_IS_INCONSISTENT(ds) \
|
||||
((ds)->ds_phys->ds_flags & DS_FLAG_INCONSISTENT)
|
||||
@ -85,7 +83,7 @@ typedef struct dsl_dataset_phys {
|
||||
uint64_t ds_num_children; /* clone/snap children; ==0 for head */
|
||||
uint64_t ds_creation_time; /* seconds since 1970 */
|
||||
uint64_t ds_creation_txg;
|
||||
uint64_t ds_deadlist_obj; /* DMU_OT_BPLIST */
|
||||
uint64_t ds_deadlist_obj; /* DMU_OT_DEADLIST */
|
||||
uint64_t ds_used_bytes;
|
||||
uint64_t ds_compressed_bytes;
|
||||
uint64_t ds_uncompressed_bytes;
|
||||
@ -115,10 +113,10 @@ typedef struct dsl_dataset {
|
||||
|
||||
/* only used in syncing context, only valid for non-snapshots: */
|
||||
struct dsl_dataset *ds_prev;
|
||||
uint64_t ds_origin_txg;
|
||||
|
||||
/* has internal locking: */
|
||||
bplist_t ds_deadlist;
|
||||
dsl_deadlist_t ds_deadlist;
|
||||
bplist_t ds_pending_deadlist;
|
||||
|
||||
/* to protect against multiple concurrent incremental recv */
|
||||
kmutex_t ds_recvlock;
|
||||
@ -132,8 +130,7 @@ typedef struct dsl_dataset {
|
||||
* Protected by ds_lock:
|
||||
*/
|
||||
kmutex_t ds_lock;
|
||||
void *ds_user_ptr;
|
||||
dsl_dataset_evict_func_t *ds_user_evict_func;
|
||||
objset_t *ds_objset;
|
||||
uint64_t ds_userrefs;
|
||||
|
||||
/*
|
||||
@ -165,7 +162,7 @@ struct dsl_ds_destroyarg {
|
||||
boolean_t need_prep; /* do we need to retry due to EBUSY? */
|
||||
};
|
||||
|
||||
#define dsl_dataset_is_snapshot(ds) \
|
||||
#define dsl_dataset_is_snapshot(ds) \
|
||||
((ds)->ds_phys->ds_num_children != 0)
|
||||
|
||||
#define DS_UNIQUE_IS_ACCURATE(ds) \
|
||||
@ -174,17 +171,17 @@ struct dsl_ds_destroyarg {
|
||||
int dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp);
|
||||
int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj,
|
||||
void *tag, dsl_dataset_t **);
|
||||
int dsl_dataset_own(const char *name, int flags, void *owner,
|
||||
dsl_dataset_t **dsp);
|
||||
int dsl_dataset_own(const char *name, boolean_t inconsistentok,
|
||||
void *tag, dsl_dataset_t **dsp);
|
||||
int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
|
||||
int flags, void *owner, dsl_dataset_t **);
|
||||
boolean_t inconsistentok, void *tag, dsl_dataset_t **dsp);
|
||||
void dsl_dataset_name(dsl_dataset_t *ds, char *name);
|
||||
void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
|
||||
void dsl_dataset_disown(dsl_dataset_t *ds, void *owner);
|
||||
void dsl_dataset_disown(dsl_dataset_t *ds, void *tag);
|
||||
void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag);
|
||||
boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok,
|
||||
void *owner);
|
||||
void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner);
|
||||
void *tag);
|
||||
void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *tag);
|
||||
uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
|
||||
dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
|
||||
uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
|
||||
@ -195,21 +192,18 @@ dsl_checkfunc_t dsl_dataset_destroy_check;
|
||||
dsl_syncfunc_t dsl_dataset_destroy_sync;
|
||||
dsl_checkfunc_t dsl_dataset_snapshot_check;
|
||||
dsl_syncfunc_t dsl_dataset_snapshot_sync;
|
||||
int dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost);
|
||||
int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive);
|
||||
int dsl_dataset_promote(const char *name);
|
||||
int dsl_dataset_promote(const char *name, char *conflsnap);
|
||||
int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
|
||||
boolean_t force);
|
||||
int dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
|
||||
boolean_t recursive);
|
||||
boolean_t recursive, boolean_t temphold);
|
||||
int dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
|
||||
boolean_t recursive);
|
||||
int dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj,
|
||||
char *htag);
|
||||
int dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp);
|
||||
|
||||
void *dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
|
||||
void *p, dsl_dataset_evict_func_t func);
|
||||
void *dsl_dataset_get_user_ptr(dsl_dataset_t *ds);
|
||||
|
||||
blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
|
||||
void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
|
||||
|
||||
@ -219,10 +213,12 @@ boolean_t dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds);
|
||||
|
||||
void dsl_dataset_sync(dsl_dataset_t *os, zio_t *zio, dmu_tx_t *tx);
|
||||
|
||||
void dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
|
||||
int dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
|
||||
void dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp,
|
||||
dmu_tx_t *tx);
|
||||
boolean_t dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth);
|
||||
int dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp,
|
||||
dmu_tx_t *tx, boolean_t async);
|
||||
boolean_t dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
|
||||
uint64_t blk_birth);
|
||||
uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds);
|
||||
|
||||
void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx);
|
||||
@ -238,13 +234,13 @@ int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
|
||||
int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
|
||||
uint64_t asize, uint64_t inflight, uint64_t *used,
|
||||
uint64_t *ref_rsrv);
|
||||
int dsl_dataset_set_quota(const char *dsname, uint64_t quota);
|
||||
void dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr,
|
||||
dmu_tx_t *tx);
|
||||
int dsl_dataset_set_reservation(const char *dsname, uint64_t reservation);
|
||||
void dsl_dataset_set_flags(dsl_dataset_t *ds, uint64_t flags);
|
||||
int64_t dsl_dataset_new_refreservation(dsl_dataset_t *ds, uint64_t reservation,
|
||||
dmu_tx_t *tx);
|
||||
int dsl_dataset_set_quota(const char *dsname, zprop_source_t source,
|
||||
uint64_t quota);
|
||||
dsl_syncfunc_t dsl_dataset_set_quota_sync;
|
||||
int dsl_dataset_set_reservation(const char *dsname, zprop_source_t source,
|
||||
uint64_t reservation);
|
||||
|
||||
int dsl_destroy_inconsistent(const char *dsname, void *arg);
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
#define dprintf_ds(ds, fmt, ...) do { \
|
||||
|
87
module/zfs/include/sys/dsl_deadlist.h
Normal file
87
module/zfs/include/sys/dsl_deadlist.h
Normal file
@ -0,0 +1,87 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_DEADLIST_H
|
||||
#define _SYS_DSL_DEADLIST_H
|
||||
|
||||
#include <sys/bpobj.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct dmu_buf;
|
||||
struct dsl_dataset;
|
||||
|
||||
typedef struct dsl_deadlist_phys {
|
||||
uint64_t dl_used;
|
||||
uint64_t dl_comp;
|
||||
uint64_t dl_uncomp;
|
||||
uint64_t dl_pad[37]; /* pad out to 320b for future expansion */
|
||||
} dsl_deadlist_phys_t;
|
||||
|
||||
typedef struct dsl_deadlist {
|
||||
objset_t *dl_os;
|
||||
uint64_t dl_object;
|
||||
avl_tree_t dl_tree;
|
||||
boolean_t dl_havetree;
|
||||
struct dmu_buf *dl_dbuf;
|
||||
dsl_deadlist_phys_t *dl_phys;
|
||||
kmutex_t dl_lock;
|
||||
|
||||
/* if it's the old on-disk format: */
|
||||
bpobj_t dl_bpobj;
|
||||
boolean_t dl_oldfmt;
|
||||
} dsl_deadlist_t;
|
||||
|
||||
typedef struct dsl_deadlist_entry {
|
||||
avl_node_t dle_node;
|
||||
uint64_t dle_mintxg;
|
||||
bpobj_t dle_bpobj;
|
||||
} dsl_deadlist_entry_t;
|
||||
|
||||
void dsl_deadlist_open(dsl_deadlist_t *dl, objset_t *os, uint64_t object);
|
||||
void dsl_deadlist_close(dsl_deadlist_t *dl);
|
||||
uint64_t dsl_deadlist_alloc(objset_t *os, dmu_tx_t *tx);
|
||||
void dsl_deadlist_free(objset_t *os, uint64_t dlobj, dmu_tx_t *tx);
|
||||
void dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx);
|
||||
void dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx);
|
||||
void dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx);
|
||||
uint64_t dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg,
|
||||
uint64_t mrs_obj, dmu_tx_t *tx);
|
||||
void dsl_deadlist_space(dsl_deadlist_t *dl,
|
||||
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
|
||||
void dsl_deadlist_space_range(dsl_deadlist_t *dl,
|
||||
uint64_t mintxg, uint64_t maxtxg,
|
||||
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
|
||||
void dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx);
|
||||
void dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
|
||||
dmu_tx_t *tx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DSL_DEADLIST_H */
|
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_DIR_H
|
||||
@ -70,7 +69,8 @@ typedef struct dsl_dir_phys {
|
||||
uint64_t dd_deleg_zapobj; /* dataset delegation permissions */
|
||||
uint64_t dd_flags;
|
||||
uint64_t dd_used_breakdown[DD_USED_NUM];
|
||||
uint64_t dd_pad[14]; /* pad out to 256 bytes for good measure */
|
||||
uint64_t dd_clones; /* dsl_dir objects */
|
||||
uint64_t dd_pad[13]; /* pad out to 256 bytes for good measure */
|
||||
} dsl_dir_phys_t;
|
||||
|
||||
struct dsl_dir {
|
||||
@ -89,6 +89,8 @@ struct dsl_dir {
|
||||
/* Protected by dd_lock */
|
||||
kmutex_t dd_lock;
|
||||
list_t dd_prop_cbs; /* list of dsl_prop_cb_record_t's */
|
||||
timestruc_t dd_snap_cmtime; /* last time snapshot namespace changed */
|
||||
uint64_t dd_origin_txg;
|
||||
|
||||
/* gross estimate of space used by in-flight tx's */
|
||||
uint64_t dd_tempreserved[TXG_SIZE];
|
||||
@ -125,18 +127,24 @@ void dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
|
||||
int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx);
|
||||
void dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
|
||||
dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx);
|
||||
int dsl_dir_set_quota(const char *ddname, uint64_t quota);
|
||||
int dsl_dir_set_reservation(const char *ddname, uint64_t reservation);
|
||||
int dsl_dir_set_quota(const char *ddname, zprop_source_t source,
|
||||
uint64_t quota);
|
||||
int dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
|
||||
uint64_t reservation);
|
||||
int dsl_dir_rename(dsl_dir_t *dd, const char *newname);
|
||||
int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space);
|
||||
int dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx);
|
||||
boolean_t dsl_dir_is_clone(dsl_dir_t *dd);
|
||||
void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds,
|
||||
uint64_t reservation, cred_t *cr, dmu_tx_t *tx);
|
||||
void dsl_dir_snap_cmtime_update(dsl_dir_t *dd);
|
||||
timestruc_t dsl_dir_snap_cmtime(dsl_dir_t *dd);
|
||||
|
||||
/* internal reserved dir name */
|
||||
#define MOS_DIR_NAME "$MOS"
|
||||
#define ORIGIN_DIR_NAME "$ORIGIN"
|
||||
#define XLATION_DIR_NAME "$XLATION"
|
||||
#define FREE_DIR_NAME "$FREE"
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
#define dprintf_dd(dd, fmt, ...) do { \
|
||||
|
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_POOL_H
|
||||
@ -32,6 +31,9 @@
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dnode.h>
|
||||
#include <sys/ddt.h>
|
||||
#include <sys/arc.h>
|
||||
#include <sys/bpobj.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -42,12 +44,7 @@ struct dsl_dir;
|
||||
struct dsl_dataset;
|
||||
struct dsl_pool;
|
||||
struct dmu_tx;
|
||||
|
||||
enum scrub_func {
|
||||
SCRUB_FUNC_NONE,
|
||||
SCRUB_FUNC_CLEAN,
|
||||
SCRUB_FUNC_NUMFUNCS
|
||||
};
|
||||
struct dsl_scan;
|
||||
|
||||
/* These macros are for indexing into the zfs_all_blkstats_t. */
|
||||
#define DMU_OT_DEFERRED DMU_OT_NONE
|
||||
@ -75,6 +72,7 @@ typedef struct dsl_pool {
|
||||
struct objset *dp_meta_objset;
|
||||
struct dsl_dir *dp_root_dir;
|
||||
struct dsl_dir *dp_mos_dir;
|
||||
struct dsl_dir *dp_free_dir;
|
||||
struct dsl_dataset *dp_origin_snap;
|
||||
uint64_t dp_root_dir_obj;
|
||||
struct taskq *dp_vnrele_taskq;
|
||||
@ -83,25 +81,18 @@ typedef struct dsl_pool {
|
||||
blkptr_t dp_meta_rootbp;
|
||||
list_t dp_synced_datasets;
|
||||
hrtime_t dp_read_overhead;
|
||||
uint64_t dp_throughput;
|
||||
uint64_t dp_throughput; /* bytes per millisec */
|
||||
uint64_t dp_write_limit;
|
||||
uint64_t dp_tmp_userrefs_obj;
|
||||
bpobj_t dp_free_bpobj;
|
||||
|
||||
struct dsl_scan *dp_scan;
|
||||
|
||||
/* Uses dp_lock */
|
||||
kmutex_t dp_lock;
|
||||
uint64_t dp_space_towrite[TXG_SIZE];
|
||||
uint64_t dp_tempreserved[TXG_SIZE];
|
||||
|
||||
enum scrub_func dp_scrub_func;
|
||||
uint64_t dp_scrub_queue_obj;
|
||||
uint64_t dp_scrub_min_txg;
|
||||
uint64_t dp_scrub_max_txg;
|
||||
zbookmark_t dp_scrub_bookmark;
|
||||
boolean_t dp_scrub_pausing;
|
||||
boolean_t dp_scrub_isresilver;
|
||||
uint64_t dp_scrub_start_time;
|
||||
kmutex_t dp_scrub_cancel_lock; /* protects dp_scrub_restart */
|
||||
boolean_t dp_scrub_restart;
|
||||
|
||||
/* Has its own locking */
|
||||
tx_state_t dp_tx;
|
||||
txg_list_t dp_dirty_datasets;
|
||||
@ -123,29 +114,36 @@ int dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
|
||||
void dsl_pool_close(dsl_pool_t *dp);
|
||||
dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg);
|
||||
void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg);
|
||||
void dsl_pool_zil_clean(dsl_pool_t *dp);
|
||||
void dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg);
|
||||
int dsl_pool_sync_context(dsl_pool_t *dp);
|
||||
uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree);
|
||||
uint64_t dsl_pool_adjustedfree(dsl_pool_t *dp, boolean_t netfree);
|
||||
int dsl_pool_tempreserve_space(dsl_pool_t *dp, uint64_t space, dmu_tx_t *tx);
|
||||
void dsl_pool_tempreserve_clear(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
|
||||
void dsl_pool_memory_pressure(dsl_pool_t *dp);
|
||||
void dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
|
||||
int dsl_free(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp,
|
||||
zio_done_func_t *done, void *private, uint32_t arc_flags);
|
||||
void dsl_pool_ds_destroyed(struct dsl_dataset *ds, struct dmu_tx *tx);
|
||||
void dsl_pool_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
|
||||
void dsl_pool_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
|
||||
struct dmu_tx *tx);
|
||||
void dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp);
|
||||
void dsl_free_sync(zio_t *pio, dsl_pool_t *dp, uint64_t txg,
|
||||
const blkptr_t *bpp);
|
||||
int dsl_read(zio_t *pio, spa_t *spa, const blkptr_t *bpp, arc_buf_t *pbuf,
|
||||
arc_done_func_t *done, void *private, int priority, int zio_flags,
|
||||
uint32_t *arc_flags, const zbookmark_t *zb);
|
||||
int dsl_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bpp,
|
||||
arc_done_func_t *done, void *private, int priority, int zio_flags,
|
||||
uint32_t *arc_flags, const zbookmark_t *zb);
|
||||
void dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx);
|
||||
void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx);
|
||||
|
||||
int dsl_pool_scrub_cancel(dsl_pool_t *dp);
|
||||
int dsl_pool_scrub_clean(dsl_pool_t *dp);
|
||||
void dsl_pool_scrub_sync(dsl_pool_t *dp, dmu_tx_t *tx);
|
||||
void dsl_pool_scrub_restart(dsl_pool_t *dp);
|
||||
void dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx);
|
||||
|
||||
taskq_t *dsl_pool_vnrele_taskq(dsl_pool_t *dp);
|
||||
|
||||
extern int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj,
|
||||
const char *tag, uint64_t *now, dmu_tx_t *tx);
|
||||
extern int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj,
|
||||
const char *tag, dmu_tx_t *tx);
|
||||
extern void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp);
|
||||
int dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_PROP_H
|
||||
@ -49,6 +48,25 @@ typedef struct dsl_prop_cb_record {
|
||||
void *cbr_arg;
|
||||
} dsl_prop_cb_record_t;
|
||||
|
||||
typedef struct dsl_props_arg {
|
||||
nvlist_t *pa_props;
|
||||
zprop_source_t pa_source;
|
||||
} dsl_props_arg_t;
|
||||
|
||||
typedef struct dsl_prop_set_arg {
|
||||
const char *psa_name;
|
||||
zprop_source_t psa_source;
|
||||
int psa_intsz;
|
||||
int psa_numints;
|
||||
const void *psa_value;
|
||||
|
||||
/*
|
||||
* Used to handle the special requirements of the quota and reservation
|
||||
* properties.
|
||||
*/
|
||||
uint64_t psa_effective_value;
|
||||
} dsl_prop_setarg_t;
|
||||
|
||||
int dsl_prop_register(struct dsl_dataset *ds, const char *propname,
|
||||
dsl_prop_changed_cb_t *callback, void *cbarg);
|
||||
int dsl_prop_unregister(struct dsl_dataset *ds, const char *propname,
|
||||
@ -59,18 +77,36 @@ int dsl_prop_get(const char *ddname, const char *propname,
|
||||
int intsz, int numints, void *buf, char *setpoint);
|
||||
int dsl_prop_get_integer(const char *ddname, const char *propname,
|
||||
uint64_t *valuep, char *setpoint);
|
||||
int dsl_prop_get_all(objset_t *os, nvlist_t **nvp, boolean_t local);
|
||||
int dsl_prop_get_all(objset_t *os, nvlist_t **nvp);
|
||||
int dsl_prop_get_received(objset_t *os, nvlist_t **nvp);
|
||||
int dsl_prop_get_ds(struct dsl_dataset *ds, const char *propname,
|
||||
int intsz, int numints, void *buf, char *setpoint);
|
||||
int dsl_prop_get_dd(struct dsl_dir *dd, const char *propname,
|
||||
int intsz, int numints, void *buf, char *setpoint);
|
||||
int intsz, int numints, void *buf, char *setpoint,
|
||||
boolean_t snapshot);
|
||||
|
||||
dsl_syncfunc_t dsl_props_set_sync;
|
||||
int dsl_prop_set(const char *ddname, const char *propname,
|
||||
int intsz, int numints, const void *buf);
|
||||
int dsl_props_set(const char *dsname, nvlist_t *nvl);
|
||||
zprop_source_t source, int intsz, int numints, const void *buf);
|
||||
int dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *nvl);
|
||||
void dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
|
||||
cred_t *cr, dmu_tx_t *tx);
|
||||
dmu_tx_t *tx);
|
||||
|
||||
void dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,
|
||||
zprop_source_t source, uint64_t *value);
|
||||
int dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa);
|
||||
#ifdef ZFS_DEBUG
|
||||
void dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa);
|
||||
#define DSL_PROP_CHECK_PREDICTION(dd, psa) \
|
||||
dsl_prop_check_prediction((dd), (psa))
|
||||
#else
|
||||
#define DSL_PROP_CHECK_PREDICTION(dd, psa) /* nothing */
|
||||
#endif
|
||||
|
||||
/* flag first receive on or after SPA_VERSION_RECVD_PROPS */
|
||||
boolean_t dsl_prop_get_hasrecvd(objset_t *os);
|
||||
void dsl_prop_set_hasrecvd(objset_t *os);
|
||||
void dsl_prop_unset_hasrecvd(objset_t *os);
|
||||
|
||||
void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value);
|
||||
void dsl_prop_nvlist_add_string(nvlist_t *nv,
|
||||
|
108
module/zfs/include/sys/dsl_scan.h
Normal file
108
module/zfs/include/sys/dsl_scan.h
Normal file
@ -0,0 +1,108 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_SCAN_H
|
||||
#define _SYS_DSL_SCAN_H
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/ddt.h>
|
||||
#include <sys/bplist.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct objset;
|
||||
struct dsl_dir;
|
||||
struct dsl_dataset;
|
||||
struct dsl_pool;
|
||||
struct dmu_tx;
|
||||
|
||||
/*
|
||||
* All members of this structure must be uint64_t, for byteswap
|
||||
* purposes.
|
||||
*/
|
||||
typedef struct dsl_scan_phys {
|
||||
uint64_t scn_func; /* pool_scan_func_t */
|
||||
uint64_t scn_state; /* dsl_scan_state_t */
|
||||
uint64_t scn_queue_obj;
|
||||
uint64_t scn_min_txg;
|
||||
uint64_t scn_max_txg;
|
||||
uint64_t scn_cur_min_txg;
|
||||
uint64_t scn_cur_max_txg;
|
||||
uint64_t scn_start_time;
|
||||
uint64_t scn_end_time;
|
||||
uint64_t scn_to_examine; /* total bytes to be scanned */
|
||||
uint64_t scn_examined; /* bytes scanned so far */
|
||||
uint64_t scn_to_process;
|
||||
uint64_t scn_processed;
|
||||
uint64_t scn_errors; /* scan I/O error count */
|
||||
uint64_t scn_ddt_class_max;
|
||||
ddt_bookmark_t scn_ddt_bookmark;
|
||||
zbookmark_t scn_bookmark;
|
||||
uint64_t scn_flags; /* dsl_scan_flags_t */
|
||||
} dsl_scan_phys_t;
|
||||
|
||||
#define SCAN_PHYS_NUMINTS (sizeof (dsl_scan_phys_t) / sizeof (uint64_t))
|
||||
|
||||
typedef enum dsl_scan_flags {
|
||||
DSF_VISIT_DS_AGAIN = 1<<0,
|
||||
} dsl_scan_flags_t;
|
||||
|
||||
typedef struct dsl_scan {
|
||||
struct dsl_pool *scn_dp;
|
||||
|
||||
boolean_t scn_pausing;
|
||||
uint64_t scn_restart_txg;
|
||||
uint64_t scn_sync_start_time;
|
||||
zio_t *scn_zio_root;
|
||||
|
||||
/* for debugging / information */
|
||||
uint64_t scn_visited_this_txg;
|
||||
|
||||
dsl_scan_phys_t scn_phys;
|
||||
} dsl_scan_t;
|
||||
|
||||
int dsl_scan_init(struct dsl_pool *dp, uint64_t txg);
|
||||
void dsl_scan_fini(struct dsl_pool *dp);
|
||||
void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
|
||||
int dsl_scan_cancel(struct dsl_pool *);
|
||||
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
|
||||
void dsl_resilver_restart(struct dsl_pool *, uint64_t txg);
|
||||
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
|
||||
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
|
||||
void dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
|
||||
ddt_entry_t *dde, dmu_tx_t *tx);
|
||||
void dsl_scan_ds_destroyed(struct dsl_dataset *ds, struct dmu_tx *tx);
|
||||
void dsl_scan_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
|
||||
void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
|
||||
struct dmu_tx *tx);
|
||||
boolean_t dsl_scan_active(dsl_scan_t *scn);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DSL_SCAN_H */
|
@ -19,15 +19,12 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_SYNCTASK_H
|
||||
#define _SYS_DSL_SYNCTASK_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
@ -38,7 +35,7 @@ extern "C" {
|
||||
struct dsl_pool;
|
||||
|
||||
typedef int (dsl_checkfunc_t)(void *, void *, dmu_tx_t *);
|
||||
typedef void (dsl_syncfunc_t)(void *, void *, cred_t *, dmu_tx_t *);
|
||||
typedef void (dsl_syncfunc_t)(void *, void *, dmu_tx_t *);
|
||||
|
||||
typedef struct dsl_sync_task {
|
||||
list_node_t dst_node;
|
||||
@ -53,7 +50,6 @@ typedef struct dsl_sync_task_group {
|
||||
txg_node_t dstg_node;
|
||||
list_t dstg_tasks;
|
||||
struct dsl_pool *dstg_pool;
|
||||
cred_t *dstg_cr;
|
||||
uint64_t dstg_txg;
|
||||
int dstg_err;
|
||||
int dstg_space;
|
||||
|
@ -68,6 +68,18 @@ extern "C" {
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET "zio_offset"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE "zio_size"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_PREV_STATE "prev_state"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED "cksum_expected"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL "cksum_actual"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO "cksum_algorithm"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_BYTESWAP "cksum_byteswap"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_OFFSET_RANGES "bad_ranges"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_MIN_GAP "bad_ranges_min_gap"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_SETS "bad_range_sets"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_CLEARS "bad_range_clears"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_SET_BITS "bad_set_bits"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS "bad_cleared_bits"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM "bad_set_histogram"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM "bad_cleared_histogram"
|
||||
|
||||
#define FM_EREPORT_FAILMODE_WAIT "wait"
|
||||
#define FM_EREPORT_FAILMODE_CONTINUE "continue"
|
||||
@ -75,6 +87,7 @@ extern "C" {
|
||||
|
||||
#define FM_RESOURCE_REMOVED "removed"
|
||||
#define FM_RESOURCE_AUTOREPLACE "autoreplace"
|
||||
#define FM_RESOURCE_STATECHANGE "statechange"
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -20,8 +20,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FM_PROTOCOL_H
|
||||
@ -47,6 +46,7 @@ extern "C" {
|
||||
/* FM event class values */
|
||||
#define FM_EREPORT_CLASS "ereport"
|
||||
#define FM_FAULT_CLASS "fault"
|
||||
#define FM_DEFECT_CLASS "defect"
|
||||
#define FM_RSRC_CLASS "resource"
|
||||
#define FM_LIST_EVENT "list"
|
||||
|
||||
@ -83,6 +83,7 @@ extern "C" {
|
||||
#define FM_SUSPECT_FAULT_LIST "fault-list"
|
||||
#define FM_SUSPECT_FAULT_SZ "fault-list-sz"
|
||||
#define FM_SUSPECT_FAULT_STATUS "fault-status"
|
||||
#define FM_SUSPECT_INJECTED "__injected"
|
||||
#define FM_SUSPECT_MESSAGE "message"
|
||||
#define FM_SUSPECT_RETIRE "retire"
|
||||
#define FM_SUSPECT_RESPONSE "response"
|
||||
@ -122,6 +123,7 @@ extern "C" {
|
||||
#define FM_RSRC_ASRU_REPAIRED "repaired"
|
||||
#define FM_RSRC_ASRU_REPLACED "replaced"
|
||||
#define FM_RSRC_ASRU_ACQUITTED "acquitted"
|
||||
#define FM_RSRC_ASRU_RESOLVED "resolved"
|
||||
#define FM_RSRC_ASRU_UNUSABLE "unusable"
|
||||
#define FM_RSRC_ASRU_EVENT "event"
|
||||
|
||||
@ -170,6 +172,7 @@ extern "C" {
|
||||
|
||||
/* FMRI authority-type member names */
|
||||
#define FM_FMRI_AUTH_CHASSIS "chassis-id"
|
||||
#define FM_FMRI_AUTH_PRODUCT_SN "product-sn"
|
||||
#define FM_FMRI_AUTH_PRODUCT "product-id"
|
||||
#define FM_FMRI_AUTH_DOMAIN "domain-id"
|
||||
#define FM_FMRI_AUTH_SERVER "server-id"
|
||||
@ -243,6 +246,7 @@ extern "C" {
|
||||
|
||||
/* dev scheme member names */
|
||||
#define FM_FMRI_DEV_ID "devid"
|
||||
#define FM_FMRI_DEV_TGTPTLUN0 "target-port-l0id"
|
||||
#define FM_FMRI_DEV_PATH "device-path"
|
||||
|
||||
/* pkg scheme member names */
|
||||
@ -311,7 +315,7 @@ extern int i_fm_payload_set(nvlist_t *, const char *, va_list);
|
||||
extern void fm_fmri_hc_set(nvlist_t *, int, const nvlist_t *, nvlist_t *,
|
||||
int, ...);
|
||||
extern void fm_fmri_dev_set(nvlist_t *, int, const nvlist_t *, const char *,
|
||||
const char *);
|
||||
const char *, const char *);
|
||||
extern void fm_fmri_de_set(nvlist_t *, int, const nvlist_t *, const char *);
|
||||
extern void fm_fmri_cpu_set(nvlist_t *, int, const nvlist_t *, uint32_t,
|
||||
uint8_t *, const char *);
|
||||
@ -320,6 +324,8 @@ extern void fm_fmri_mem_set(nvlist_t *, int, const nvlist_t *, const char *,
|
||||
extern void fm_authority_set(nvlist_t *, int, const char *, const char *,
|
||||
const char *, const char *);
|
||||
extern void fm_fmri_zfs_set(nvlist_t *, int, uint64_t, uint64_t);
|
||||
extern void fm_fmri_hc_create(nvlist_t *, int, const nvlist_t *, nvlist_t *,
|
||||
nvlist_t *, int, ...);
|
||||
|
||||
extern uint64_t fm_ena_increment(uint64_t);
|
||||
extern uint64_t fm_ena_generate(uint64_t, uchar_t);
|
||||
|
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_METASLAB_H
|
||||
@ -36,9 +35,6 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct metaslab_class metaslab_class_t;
|
||||
typedef struct metaslab_group metaslab_group_t;
|
||||
|
||||
extern space_map_ops_t *zfs_metaslab_ops;
|
||||
|
||||
extern metaslab_t *metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
|
||||
@ -46,6 +42,7 @@ extern metaslab_t *metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
|
||||
extern void metaslab_fini(metaslab_t *msp);
|
||||
extern void metaslab_sync(metaslab_t *msp, uint64_t txg);
|
||||
extern void metaslab_sync_done(metaslab_t *msp, uint64_t txg);
|
||||
extern void metaslab_sync_reassess(metaslab_group_t *mg);
|
||||
|
||||
#define METASLAB_HINTBP_FAVOR 0x0
|
||||
#define METASLAB_HINTBP_AVOID 0x1
|
||||
@ -57,14 +54,24 @@ extern void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg,
|
||||
boolean_t now);
|
||||
extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg);
|
||||
|
||||
extern metaslab_class_t *metaslab_class_create(space_map_ops_t *ops);
|
||||
extern metaslab_class_t *metaslab_class_create(spa_t *spa,
|
||||
space_map_ops_t *ops);
|
||||
extern void metaslab_class_destroy(metaslab_class_t *mc);
|
||||
extern void metaslab_class_add(metaslab_class_t *mc, metaslab_group_t *mg);
|
||||
extern void metaslab_class_remove(metaslab_class_t *mc, metaslab_group_t *mg);
|
||||
extern int metaslab_class_validate(metaslab_class_t *mc);
|
||||
|
||||
extern void metaslab_class_space_update(metaslab_class_t *mc,
|
||||
int64_t alloc_delta, int64_t defer_delta,
|
||||
int64_t space_delta, int64_t dspace_delta);
|
||||
extern uint64_t metaslab_class_get_alloc(metaslab_class_t *mc);
|
||||
extern uint64_t metaslab_class_get_space(metaslab_class_t *mc);
|
||||
extern uint64_t metaslab_class_get_dspace(metaslab_class_t *mc);
|
||||
extern uint64_t metaslab_class_get_deferred(metaslab_class_t *mc);
|
||||
|
||||
extern metaslab_group_t *metaslab_group_create(metaslab_class_t *mc,
|
||||
vdev_t *vd);
|
||||
extern void metaslab_group_destroy(metaslab_group_t *mg);
|
||||
extern void metaslab_group_activate(metaslab_group_t *mg);
|
||||
extern void metaslab_group_passivate(metaslab_group_t *mg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -37,16 +37,23 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
struct metaslab_class {
|
||||
spa_t *mc_spa;
|
||||
metaslab_group_t *mc_rotor;
|
||||
uint64_t mc_allocated;
|
||||
space_map_ops_t *mc_ops;
|
||||
uint64_t mc_aliquot;
|
||||
uint64_t mc_alloc; /* total allocated space */
|
||||
uint64_t mc_deferred; /* total deferred frees */
|
||||
uint64_t mc_space; /* total space (alloc + free) */
|
||||
uint64_t mc_dspace; /* total deflated space */
|
||||
};
|
||||
|
||||
struct metaslab_group {
|
||||
kmutex_t mg_lock;
|
||||
avl_tree_t mg_metaslab_tree;
|
||||
uint64_t mg_aliquot;
|
||||
uint64_t mg_bonus_area;
|
||||
int64_t mg_bias;
|
||||
int64_t mg_activation_count;
|
||||
metaslab_class_t *mg_class;
|
||||
vdev_t *mg_vd;
|
||||
metaslab_group_t *mg_prev;
|
||||
@ -66,7 +73,9 @@ struct metaslab {
|
||||
space_map_obj_t ms_smo_syncing; /* syncing space map object */
|
||||
space_map_t ms_allocmap[TXG_SIZE]; /* allocated this txg */
|
||||
space_map_t ms_freemap[TXG_SIZE]; /* freed this txg */
|
||||
space_map_t ms_defermap[TXG_DEFER_SIZE]; /* deferred frees */
|
||||
space_map_t ms_map; /* in-core free space map */
|
||||
int64_t ms_deferspace; /* sum of ms_defermap[] space */
|
||||
uint64_t ms_weight; /* weight vs. others in group */
|
||||
metaslab_group_t *ms_group; /* metaslab group */
|
||||
avl_node_t ms_group_node; /* node in metaslab group tree */
|
||||
|
@ -19,15 +19,12 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_REFCOUNT_H
|
||||
#define _SYS_REFCOUNT_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/inttypes.h>
|
||||
#include <sys/list.h>
|
||||
#include <sys/zfs_context.h>
|
||||
@ -91,6 +88,11 @@ typedef struct refcount {
|
||||
atomic_add_64_nv(&(rc)->rc_count, number)
|
||||
#define refcount_remove_many(rc, number, holder) \
|
||||
atomic_add_64_nv(&(rc)->rc_count, -number)
|
||||
#define refcount_transfer(dst, src) { \
|
||||
uint64_t __tmp = (src)->rc_count; \
|
||||
atomic_add_64(&(src)->rc_count, -__tmp); \
|
||||
atomic_add_64(&(dst)->rc_count, __tmp); \
|
||||
}
|
||||
|
||||
#define refcount_init()
|
||||
#define refcount_fini()
|
||||
|
171
module/zfs/include/sys/sa.h
Normal file
171
module/zfs/include/sys/sa.h
Normal file
@ -0,0 +1,171 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_SA_H
|
||||
#define _SYS_SA_H
|
||||
|
||||
#include <sys/dmu.h>
|
||||
|
||||
/*
|
||||
* Currently available byteswap functions.
|
||||
* If it all possible new attributes should used
|
||||
* one of the already defined byteswap functions.
|
||||
* If a new byteswap function is added then the
|
||||
* ZPL/Pool version will need to be bumped.
|
||||
*/
|
||||
|
||||
typedef enum sa_bswap_type {
|
||||
SA_UINT64_ARRAY,
|
||||
SA_UINT32_ARRAY,
|
||||
SA_UINT16_ARRAY,
|
||||
SA_UINT8_ARRAY,
|
||||
SA_ACL,
|
||||
} sa_bswap_type_t;
|
||||
|
||||
typedef uint16_t sa_attr_type_t;
|
||||
|
||||
/*
|
||||
* Attribute to register support for.
|
||||
*/
|
||||
typedef struct sa_attr_reg {
|
||||
char *sa_name; /* attribute name */
|
||||
uint16_t sa_length;
|
||||
sa_bswap_type_t sa_byteswap; /* bswap functon enum */
|
||||
sa_attr_type_t sa_attr; /* filled in during registration */
|
||||
} sa_attr_reg_t;
|
||||
|
||||
|
||||
typedef void (sa_data_locator_t)(void **, uint32_t *, uint32_t,
|
||||
boolean_t, void *userptr);
|
||||
|
||||
/*
|
||||
* array of attributes to store.
|
||||
*
|
||||
* This array should be treated as opaque/private data.
|
||||
* The SA_BULK_ADD_ATTR() macro should be used for manipulating
|
||||
* the array.
|
||||
*
|
||||
* When sa_replace_all_by_template() is used the attributes
|
||||
* will be stored in the order defined in the array, except that
|
||||
* the attributes may be split between the bonus and the spill buffer
|
||||
*
|
||||
*/
|
||||
typedef struct sa_bulk_attr {
|
||||
void *sa_data;
|
||||
sa_data_locator_t *sa_data_func;
|
||||
uint16_t sa_length;
|
||||
sa_attr_type_t sa_attr;
|
||||
/* the following are private to the sa framework */
|
||||
void *sa_addr;
|
||||
uint16_t sa_buftype;
|
||||
uint16_t sa_size;
|
||||
} sa_bulk_attr_t;
|
||||
|
||||
|
||||
/*
|
||||
* special macro for adding entries for bulk attr support
|
||||
* bulk - sa_bulk_attr_t
|
||||
* count - integer that will be incremented during each add
|
||||
* attr - attribute to manipulate
|
||||
* func - function for accessing data.
|
||||
* data - pointer to data.
|
||||
* len - length of data
|
||||
*/
|
||||
|
||||
#define SA_ADD_BULK_ATTR(b, idx, attr, func, data, len) \
|
||||
{ \
|
||||
b[idx].sa_attr = attr;\
|
||||
b[idx].sa_data_func = func; \
|
||||
b[idx].sa_data = data; \
|
||||
b[idx++].sa_length = len; \
|
||||
}
|
||||
|
||||
typedef struct sa_os sa_os_t;
|
||||
|
||||
typedef enum sa_handle_type {
|
||||
SA_HDL_SHARED,
|
||||
SA_HDL_PRIVATE
|
||||
} sa_handle_type_t;
|
||||
|
||||
struct sa_handle;
|
||||
typedef void *sa_lookup_tab_t;
|
||||
typedef struct sa_handle sa_handle_t;
|
||||
|
||||
typedef void (sa_update_cb_t)(sa_handle_t *, dmu_tx_t *tx);
|
||||
|
||||
int sa_handle_get(objset_t *, uint64_t, void *userp,
|
||||
sa_handle_type_t, sa_handle_t **);
|
||||
int sa_handle_get_from_db(objset_t *, dmu_buf_t *, void *userp,
|
||||
sa_handle_type_t, sa_handle_t **);
|
||||
void sa_handle_destroy(sa_handle_t *);
|
||||
int sa_buf_hold(objset_t *, uint64_t, void *, dmu_buf_t **);
|
||||
void sa_buf_rele(dmu_buf_t *, void *);
|
||||
int sa_lookup(sa_handle_t *, sa_attr_type_t, void *buf, uint32_t buflen);
|
||||
int sa_update(sa_handle_t *, sa_attr_type_t, void *buf,
|
||||
uint32_t buflen, dmu_tx_t *);
|
||||
int sa_remove(sa_handle_t *, sa_attr_type_t, dmu_tx_t *);
|
||||
int sa_bulk_lookup(sa_handle_t *, sa_bulk_attr_t *, int count);
|
||||
int sa_bulk_lookup_locked(sa_handle_t *, sa_bulk_attr_t *, int count);
|
||||
int sa_bulk_update(sa_handle_t *, sa_bulk_attr_t *, int count, dmu_tx_t *);
|
||||
int sa_size(sa_handle_t *, sa_attr_type_t, int *);
|
||||
int sa_update_from_cb(sa_handle_t *, sa_attr_type_t,
|
||||
uint32_t buflen, sa_data_locator_t *, void *userdata, dmu_tx_t *);
|
||||
void sa_object_info(sa_handle_t *, dmu_object_info_t *);
|
||||
void sa_object_size(sa_handle_t *, uint32_t *, u_longlong_t *);
|
||||
void sa_update_user(sa_handle_t *, sa_handle_t *);
|
||||
void *sa_get_userdata(sa_handle_t *);
|
||||
void sa_set_userp(sa_handle_t *, void *);
|
||||
dmu_buf_t *sa_get_db(sa_handle_t *);
|
||||
uint64_t sa_handle_object(sa_handle_t *);
|
||||
boolean_t sa_attr_would_spill(sa_handle_t *, sa_attr_type_t, int size);
|
||||
void sa_register_update_callback(objset_t *, sa_update_cb_t *);
|
||||
sa_attr_type_t *sa_setup(objset_t *, uint64_t, sa_attr_reg_t *, int);
|
||||
void sa_tear_down(objset_t *);
|
||||
int sa_replace_all_by_template(sa_handle_t *, sa_bulk_attr_t *,
|
||||
int, dmu_tx_t *);
|
||||
int sa_replace_all_by_template_locked(sa_handle_t *, sa_bulk_attr_t *,
|
||||
int, dmu_tx_t *);
|
||||
boolean_t sa_enabled(objset_t *);
|
||||
void sa_cache_init();
|
||||
void sa_cache_fini();
|
||||
int sa_set_sa_object(objset_t *, uint64_t);
|
||||
int sa_hdrsize(void *);
|
||||
void sa_handle_lock(sa_handle_t *);
|
||||
void sa_handle_unlock(sa_handle_t *);
|
||||
|
||||
#ifdef _KERNEL
|
||||
int sa_lookup_uio(sa_handle_t *, sa_attr_type_t, uio_t *);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_SA_H */
|
288
module/zfs/include/sys/sa_impl.h
Normal file
288
module/zfs/include/sys/sa_impl.h
Normal file
@ -0,0 +1,288 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_SA_IMPL_H
|
||||
#define _SYS_SA_IMPL_H
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/list.h>
|
||||
|
||||
/*
|
||||
* Array of known attributes and their
|
||||
* various characteristics.
|
||||
*/
|
||||
typedef struct sa_attr_table {
|
||||
sa_attr_type_t sa_attr;
|
||||
uint8_t sa_registered;
|
||||
uint16_t sa_length;
|
||||
sa_bswap_type_t sa_byteswap;
|
||||
char *sa_name;
|
||||
} sa_attr_table_t;
|
||||
|
||||
/*
|
||||
* Zap attribute format for attribute registration
|
||||
*
|
||||
* 64 56 48 40 32 24 16 8 0
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* | unused | len | bswap | attr num |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
*
|
||||
* Zap attribute format for layout information.
|
||||
*
|
||||
* layout information is stored as an array of attribute numbers
|
||||
* The name of the attribute is the layout number (0, 1, 2, ...)
|
||||
*
|
||||
* 16 0
|
||||
* +---- ---+
|
||||
* | attr # |
|
||||
* +--------+
|
||||
* | attr # |
|
||||
* +--- ----+
|
||||
* ......
|
||||
*
|
||||
*/
|
||||
|
||||
#define ATTR_BSWAP(x) BF32_GET(x, 16, 8)
|
||||
#define ATTR_LENGTH(x) BF32_GET(x, 24, 16)
|
||||
#define ATTR_NUM(x) BF32_GET(x, 0, 16)
|
||||
#define ATTR_ENCODE(x, attr, length, bswap) \
|
||||
{ \
|
||||
BF64_SET(x, 24, 16, length); \
|
||||
BF64_SET(x, 16, 8, bswap); \
|
||||
BF64_SET(x, 0, 16, attr); \
|
||||
}
|
||||
|
||||
#define TOC_OFF(x) BF32_GET(x, 0, 23)
|
||||
#define TOC_ATTR_PRESENT(x) BF32_GET(x, 31, 1)
|
||||
#define TOC_LEN_IDX(x) BF32_GET(x, 24, 4)
|
||||
#define TOC_ATTR_ENCODE(x, len_idx, offset) \
|
||||
{ \
|
||||
BF32_SET(x, 31, 1, 1); \
|
||||
BF32_SET(x, 24, 7, len_idx); \
|
||||
BF32_SET(x, 0, 24, offset); \
|
||||
}
|
||||
|
||||
#define SA_LAYOUTS "LAYOUTS"
|
||||
#define SA_REGISTRY "REGISTRY"
|
||||
|
||||
/*
|
||||
* Each unique layout will have their own table
|
||||
* sa_lot (layout_table)
|
||||
*/
|
||||
typedef struct sa_lot {
|
||||
avl_node_t lot_num_node;
|
||||
avl_node_t lot_hash_node;
|
||||
uint64_t lot_num;
|
||||
uint64_t lot_hash;
|
||||
sa_attr_type_t *lot_attrs; /* array of attr #'s */
|
||||
uint32_t lot_var_sizes; /* how many aren't fixed size */
|
||||
uint32_t lot_attr_count; /* total attr count */
|
||||
list_t lot_idx_tab; /* should be only a couple of entries */
|
||||
int lot_instance; /* used with lot_hash to identify entry */
|
||||
} sa_lot_t;
|
||||
|
||||
/* index table of offsets */
|
||||
typedef struct sa_idx_tab {
|
||||
list_node_t sa_next;
|
||||
sa_lot_t *sa_layout;
|
||||
uint16_t *sa_variable_lengths;
|
||||
refcount_t sa_refcount;
|
||||
uint32_t *sa_idx_tab; /* array of offsets */
|
||||
} sa_idx_tab_t;
|
||||
|
||||
/*
|
||||
* Since the offset/index information into the actual data
|
||||
* will usually be identical we can share that information with
|
||||
* all handles that have the exact same offsets.
|
||||
*
|
||||
* You would typically only have a large number of different table of
|
||||
* contents if you had a several variable sized attributes.
|
||||
*
|
||||
* Two AVL trees are used to track the attribute layout numbers.
|
||||
* one is keyed by number and will be consulted when a DMU_OT_SA
|
||||
* object is first read. The second tree is keyed by the hash signature
|
||||
* of the attributes and will be consulted when an attribute is added
|
||||
* to determine if we already have an instance of that layout. Both
|
||||
* of these tree's are interconnected. The only difference is that
|
||||
* when an entry is found in the "hash" tree the list of attributes will
|
||||
* need to be compared against the list of attributes you have in hand.
|
||||
* The assumption is that typically attributes will just be updated and
|
||||
* adding a completely new attribute is a very rare operation.
|
||||
*/
|
||||
struct sa_os {
|
||||
kmutex_t sa_lock;
|
||||
boolean_t sa_need_attr_registration;
|
||||
boolean_t sa_force_spill;
|
||||
uint64_t sa_master_obj;
|
||||
uint64_t sa_reg_attr_obj;
|
||||
uint64_t sa_layout_attr_obj;
|
||||
int sa_num_attrs;
|
||||
sa_attr_table_t *sa_attr_table; /* private attr table */
|
||||
sa_update_cb_t *sa_update_cb;
|
||||
avl_tree_t sa_layout_num_tree; /* keyed by layout number */
|
||||
avl_tree_t sa_layout_hash_tree; /* keyed by layout hash value */
|
||||
int sa_user_table_sz;
|
||||
sa_attr_type_t *sa_user_table; /* user name->attr mapping table */
|
||||
};
|
||||
|
||||
/*
|
||||
* header for all bonus and spill buffers.
|
||||
* The header has a fixed portion with a variable number
|
||||
* of "lengths" depending on the number of variable sized
|
||||
* attribues which are determined by the "layout number"
|
||||
*/
|
||||
|
||||
#define SA_MAGIC 0x2F505A /* ZFS SA */
|
||||
typedef struct sa_hdr_phys {
|
||||
uint32_t sa_magic;
|
||||
uint16_t sa_layout_info; /* Encoded with hdrsize and layout number */
|
||||
uint16_t sa_lengths[1]; /* optional sizes for variable length attrs */
|
||||
/* ... Data follows the lengths. */
|
||||
} sa_hdr_phys_t;
|
||||
|
||||
/*
|
||||
* sa_hdr_phys -> sa_layout_info
|
||||
*
|
||||
* 16 10 0
|
||||
* +--------+-------+
|
||||
* | hdrsz |layout |
|
||||
* +--------+-------+
|
||||
*
|
||||
* Bits 0-10 are the layout number
|
||||
* Bits 11-16 are the size of the header.
|
||||
* The hdrsize is the number * 8
|
||||
*
|
||||
* For example.
|
||||
* hdrsz of 1 ==> 8 byte header
|
||||
* 2 ==> 16 byte header
|
||||
*
|
||||
*/
|
||||
|
||||
#define SA_HDR_LAYOUT_NUM(hdr) BF32_GET(hdr->sa_layout_info, 0, 10)
|
||||
#define SA_HDR_SIZE(hdr) BF32_GET_SB(hdr->sa_layout_info, 10, 16, 3, 0)
|
||||
#define SA_HDR_LAYOUT_INFO_ENCODE(x, num, size) \
|
||||
{ \
|
||||
BF32_SET_SB(x, 10, 6, 3, 0, size); \
|
||||
BF32_SET(x, 0, 10, num); \
|
||||
}
|
||||
|
||||
typedef enum sa_buf_type {
|
||||
SA_BONUS = 1,
|
||||
SA_SPILL = 2
|
||||
} sa_buf_type_t;
|
||||
|
||||
typedef enum sa_data_op {
|
||||
SA_LOOKUP,
|
||||
SA_UPDATE,
|
||||
SA_ADD,
|
||||
SA_REPLACE,
|
||||
SA_REMOVE
|
||||
} sa_data_op_t;
|
||||
|
||||
/*
|
||||
* Opaque handle used for most sa functions
|
||||
*
|
||||
* This needs to be kept as small as possible.
|
||||
*/
|
||||
|
||||
struct sa_handle {
|
||||
kmutex_t sa_lock;
|
||||
dmu_buf_t *sa_bonus;
|
||||
dmu_buf_t *sa_spill;
|
||||
objset_t *sa_os;
|
||||
void *sa_userp;
|
||||
sa_idx_tab_t *sa_bonus_tab; /* idx of bonus */
|
||||
sa_idx_tab_t *sa_spill_tab; /* only present if spill activated */
|
||||
};
|
||||
|
||||
#define SA_GET_DB(hdl, type) \
|
||||
(dmu_buf_impl_t *)((type == SA_BONUS) ? hdl->sa_bonus : hdl->sa_spill)
|
||||
|
||||
#define SA_GET_HDR(hdl, type) \
|
||||
((sa_hdr_phys_t *)((dmu_buf_impl_t *)(SA_GET_DB(hdl, \
|
||||
type))->db.db_data))
|
||||
|
||||
#define SA_IDX_TAB_GET(hdl, type) \
|
||||
(type == SA_BONUS ? hdl->sa_bonus_tab : hdl->sa_spill_tab)
|
||||
|
||||
#define IS_SA_BONUSTYPE(a) \
|
||||
((a == DMU_OT_SA) ? B_TRUE : B_FALSE)
|
||||
|
||||
#define SA_BONUSTYPE_FROM_DB(db) \
|
||||
(((dmu_buf_impl_t *)db)->db_dnode->dn_bonustype)
|
||||
|
||||
#define SA_BLKPTR_SPACE (DN_MAX_BONUSLEN - sizeof (blkptr_t))
|
||||
|
||||
#define SA_LAYOUT_NUM(x, type) \
|
||||
((!IS_SA_BONUSTYPE(type) ? 0 : (((IS_SA_BONUSTYPE(type)) && \
|
||||
((SA_HDR_LAYOUT_NUM(x)) == 0)) ? 1 : SA_HDR_LAYOUT_NUM(x))))
|
||||
|
||||
|
||||
#define SA_REGISTERED_LEN(sa, attr) sa->sa_attr_table[attr].sa_length
|
||||
|
||||
#define SA_ATTR_LEN(sa, idx, attr, hdr) ((SA_REGISTERED_LEN(sa, attr) == 0) ?\
|
||||
hdr->sa_lengths[TOC_LEN_IDX(idx->sa_idx_tab[attr])] : \
|
||||
SA_REGISTERED_LEN(sa, attr))
|
||||
|
||||
#define SA_SET_HDR(hdr, num, size) \
|
||||
{ \
|
||||
hdr->sa_magic = SA_MAGIC; \
|
||||
SA_HDR_LAYOUT_INFO_ENCODE(hdr->sa_layout_info, num, size); \
|
||||
}
|
||||
|
||||
#define SA_ATTR_INFO(sa, idx, hdr, attr, bulk, type, hdl) \
|
||||
{ \
|
||||
bulk.sa_size = SA_ATTR_LEN(sa, idx, attr, hdr); \
|
||||
bulk.sa_buftype = type; \
|
||||
bulk.sa_addr = \
|
||||
(void *)((uintptr_t)TOC_OFF(idx->sa_idx_tab[attr]) + \
|
||||
(uintptr_t)hdr); \
|
||||
}
|
||||
|
||||
#define SA_HDR_SIZE_MATCH_LAYOUT(hdr, tb) \
|
||||
(SA_HDR_SIZE(hdr) == (sizeof (sa_hdr_phys_t) + \
|
||||
(tb->lot_var_sizes > 1 ? P2ROUNDUP((tb->lot_var_sizes - 1) * \
|
||||
sizeof (uint16_t), 8) : 0)))
|
||||
|
||||
int sa_add_impl(sa_handle_t *, sa_attr_type_t,
|
||||
uint32_t, sa_data_locator_t, void *, dmu_tx_t *);
|
||||
|
||||
void sa_register_update_callback_locked(objset_t *, sa_update_cb_t *);
|
||||
int sa_size_locked(sa_handle_t *, sa_attr_type_t, int *);
|
||||
|
||||
void sa_default_locator(void **, uint32_t *, uint32_t, boolean_t, void *);
|
||||
int sa_attr_size(sa_os_t *, sa_idx_tab_t *, sa_attr_type_t,
|
||||
uint16_t *, sa_hdr_phys_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_SA_IMPL_H */
|
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_SPA_H
|
||||
@ -43,8 +42,13 @@ extern "C" {
|
||||
typedef struct spa spa_t;
|
||||
typedef struct vdev vdev_t;
|
||||
typedef struct metaslab metaslab_t;
|
||||
typedef struct metaslab_group metaslab_group_t;
|
||||
typedef struct metaslab_class metaslab_class_t;
|
||||
typedef struct zio zio_t;
|
||||
typedef struct zilog zilog_t;
|
||||
typedef struct spa_aux_vdev spa_aux_vdev_t;
|
||||
typedef struct ddt ddt_t;
|
||||
typedef struct ddt_entry ddt_entry_t;
|
||||
struct dsl_pool;
|
||||
|
||||
/*
|
||||
@ -134,15 +138,15 @@ typedef struct zio_cksum {
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 5 |G| offset3 |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 6 |E| lvl | type | cksum | comp | PSIZE | LSIZE |
|
||||
* 6 |BDX|lvl| type | cksum | comp | PSIZE | LSIZE |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 7 | padding |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 8 | padding |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 9 | padding |
|
||||
* 9 | physical birth txg |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* a | birth txg |
|
||||
* a | logical birth txg |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* b | fill count |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
@ -166,25 +170,29 @@ typedef struct zio_cksum {
|
||||
* cksum checksum function
|
||||
* comp compression function
|
||||
* G gang block indicator
|
||||
* E endianness
|
||||
* type DMU object type
|
||||
* B byteorder (endianness)
|
||||
* D dedup
|
||||
* X unused
|
||||
* lvl level of indirection
|
||||
* birth txg transaction group in which the block was born
|
||||
* type DMU object type
|
||||
* phys birth txg of block allocation; zero if same as logical birth txg
|
||||
* log. birth transaction group in which the block was logically born
|
||||
* fill count number of non-zero blocks under this bp
|
||||
* checksum[4] 256-bit checksum of the data this bp describes
|
||||
*/
|
||||
typedef struct blkptr {
|
||||
dva_t blk_dva[3]; /* 128-bit Data Virtual Address */
|
||||
uint64_t blk_prop; /* size, compression, type, etc */
|
||||
uint64_t blk_pad[3]; /* Extra space for the future */
|
||||
uint64_t blk_birth; /* transaction group at birth */
|
||||
uint64_t blk_fill; /* fill count */
|
||||
zio_cksum_t blk_cksum; /* 256-bit checksum */
|
||||
} blkptr_t;
|
||||
|
||||
#define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */
|
||||
#define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */
|
||||
|
||||
typedef struct blkptr {
|
||||
dva_t blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */
|
||||
uint64_t blk_prop; /* size, compression, type, etc */
|
||||
uint64_t blk_pad[2]; /* Extra space for the future */
|
||||
uint64_t blk_phys_birth; /* txg when block was allocated */
|
||||
uint64_t blk_birth; /* transaction group at birth */
|
||||
uint64_t blk_fill; /* fill count */
|
||||
zio_cksum_t blk_cksum; /* 256-bit checksum */
|
||||
} blkptr_t;
|
||||
|
||||
/*
|
||||
* Macros to get and set fields in a bp or DVA.
|
||||
*/
|
||||
@ -208,8 +216,7 @@ typedef struct blkptr {
|
||||
#define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x)
|
||||
|
||||
#define BP_GET_LSIZE(bp) \
|
||||
(BP_IS_HOLE(bp) ? 0 : \
|
||||
BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1))
|
||||
BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)
|
||||
#define BP_SET_LSIZE(bp, x) \
|
||||
BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
|
||||
|
||||
@ -218,20 +225,35 @@ typedef struct blkptr {
|
||||
#define BP_SET_PSIZE(bp, x) \
|
||||
BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
|
||||
|
||||
#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8)
|
||||
#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x)
|
||||
#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8)
|
||||
#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x)
|
||||
|
||||
#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8)
|
||||
#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x)
|
||||
#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8)
|
||||
#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x)
|
||||
|
||||
#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8)
|
||||
#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x)
|
||||
#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8)
|
||||
#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x)
|
||||
|
||||
#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5)
|
||||
#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x)
|
||||
#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5)
|
||||
#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x)
|
||||
|
||||
#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1))
|
||||
#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x)
|
||||
#define BP_GET_PROP_BIT_61(bp) BF64_GET((bp)->blk_prop, 61, 1)
|
||||
#define BP_SET_PROP_BIT_61(bp, x) BF64_SET((bp)->blk_prop, 61, 1, x)
|
||||
|
||||
#define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1)
|
||||
#define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x)
|
||||
|
||||
#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1))
|
||||
#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x)
|
||||
|
||||
#define BP_PHYSICAL_BIRTH(bp) \
|
||||
((bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth)
|
||||
|
||||
#define BP_SET_BIRTH(bp, logical, physical) \
|
||||
{ \
|
||||
(bp)->blk_birth = (logical); \
|
||||
(bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \
|
||||
}
|
||||
|
||||
#define BP_GET_ASIZE(bp) \
|
||||
(DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
|
||||
@ -239,7 +261,7 @@ typedef struct blkptr {
|
||||
|
||||
#define BP_GET_UCSIZE(bp) \
|
||||
((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
|
||||
BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp));
|
||||
BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
|
||||
|
||||
#define BP_GET_NDVAS(bp) \
|
||||
(!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
|
||||
@ -255,6 +277,12 @@ typedef struct blkptr {
|
||||
((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
|
||||
(dva1)->dva_word[0] == (dva2)->dva_word[0])
|
||||
|
||||
#define BP_EQUAL(bp1, bp2) \
|
||||
(BP_PHYSICAL_BIRTH(bp1) == BP_PHYSICAL_BIRTH(bp2) && \
|
||||
DVA_EQUAL(&(bp1)->blk_dva[0], &(bp2)->blk_dva[0]) && \
|
||||
DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) && \
|
||||
DVA_EQUAL(&(bp1)->blk_dva[2], &(bp2)->blk_dva[2]))
|
||||
|
||||
#define ZIO_CHECKSUM_EQUAL(zc1, zc2) \
|
||||
(0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \
|
||||
((zc1).zc_word[1] - (zc2).zc_word[1]) | \
|
||||
@ -274,7 +302,10 @@ typedef struct blkptr {
|
||||
#define BP_IDENTITY(bp) (&(bp)->blk_dva[0])
|
||||
#define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp))
|
||||
#define BP_IS_HOLE(bp) ((bp)->blk_birth == 0)
|
||||
#define BP_IS_OLDER(bp, txg) (!BP_IS_HOLE(bp) && (bp)->blk_birth < (txg))
|
||||
|
||||
/* BP_IS_RAIDZ(bp) assumes no block compression */
|
||||
#define BP_IS_RAIDZ(bp) (DVA_GET_ASIZE(&(bp)->blk_dva[0]) > \
|
||||
BP_GET_PSIZE(bp))
|
||||
|
||||
#define BP_ZERO(bp) \
|
||||
{ \
|
||||
@ -287,14 +318,12 @@ typedef struct blkptr {
|
||||
(bp)->blk_prop = 0; \
|
||||
(bp)->blk_pad[0] = 0; \
|
||||
(bp)->blk_pad[1] = 0; \
|
||||
(bp)->blk_pad[2] = 0; \
|
||||
(bp)->blk_phys_birth = 0; \
|
||||
(bp)->blk_birth = 0; \
|
||||
(bp)->blk_fill = 0; \
|
||||
ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \
|
||||
}
|
||||
|
||||
#define BLK_FILL_ALREADY_FREED (-1ULL)
|
||||
|
||||
/*
|
||||
* Note: the byteorder is either 0 or -1, both of which are palindromes.
|
||||
* This simplifies the endianness handling a bit.
|
||||
@ -309,17 +338,81 @@ typedef struct blkptr {
|
||||
|
||||
#define BP_SPRINTF_LEN 320
|
||||
|
||||
/*
|
||||
* This macro allows code sharing between zfs, libzpool, and mdb.
|
||||
* 'func' is either snprintf() or mdb_snprintf().
|
||||
* 'ws' (whitespace) can be ' ' for single-line format, '\n' for multi-line.
|
||||
*/
|
||||
#define SPRINTF_BLKPTR(func, ws, buf, bp, type, checksum, compress) \
|
||||
{ \
|
||||
static const char *copyname[] = \
|
||||
{ "zero", "single", "double", "triple" }; \
|
||||
int size = BP_SPRINTF_LEN; \
|
||||
int len = 0; \
|
||||
int copies = 0; \
|
||||
\
|
||||
if (bp == NULL) { \
|
||||
len = func(buf + len, size - len, "<NULL>"); \
|
||||
} else if (BP_IS_HOLE(bp)) { \
|
||||
len = func(buf + len, size - len, "<hole>"); \
|
||||
} else { \
|
||||
for (int d = 0; d < BP_GET_NDVAS(bp); d++) { \
|
||||
const dva_t *dva = &bp->blk_dva[d]; \
|
||||
if (DVA_IS_VALID(dva)) \
|
||||
copies++; \
|
||||
len += func(buf + len, size - len, \
|
||||
"DVA[%d]=<%llu:%llx:%llx>%c", d, \
|
||||
(u_longlong_t)DVA_GET_VDEV(dva), \
|
||||
(u_longlong_t)DVA_GET_OFFSET(dva), \
|
||||
(u_longlong_t)DVA_GET_ASIZE(dva), \
|
||||
ws); \
|
||||
} \
|
||||
if (BP_IS_GANG(bp) && \
|
||||
DVA_GET_ASIZE(&bp->blk_dva[2]) <= \
|
||||
DVA_GET_ASIZE(&bp->blk_dva[1]) / 2) \
|
||||
copies--; \
|
||||
len += func(buf + len, size - len, \
|
||||
"[L%llu %s] %s %s %s %s %s %s%c" \
|
||||
"size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \
|
||||
"cksum=%llx:%llx:%llx:%llx", \
|
||||
(u_longlong_t)BP_GET_LEVEL(bp), \
|
||||
type, \
|
||||
checksum, \
|
||||
compress, \
|
||||
BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE", \
|
||||
BP_IS_GANG(bp) ? "gang" : "contiguous", \
|
||||
BP_GET_DEDUP(bp) ? "dedup" : "unique", \
|
||||
copyname[copies], \
|
||||
ws, \
|
||||
(u_longlong_t)BP_GET_LSIZE(bp), \
|
||||
(u_longlong_t)BP_GET_PSIZE(bp), \
|
||||
(u_longlong_t)bp->blk_birth, \
|
||||
(u_longlong_t)BP_PHYSICAL_BIRTH(bp), \
|
||||
(u_longlong_t)bp->blk_fill, \
|
||||
ws, \
|
||||
(u_longlong_t)bp->blk_cksum.zc_word[0], \
|
||||
(u_longlong_t)bp->blk_cksum.zc_word[1], \
|
||||
(u_longlong_t)bp->blk_cksum.zc_word[2], \
|
||||
(u_longlong_t)bp->blk_cksum.zc_word[3]); \
|
||||
} \
|
||||
ASSERT(len < size); \
|
||||
}
|
||||
|
||||
#include <sys/dmu.h>
|
||||
|
||||
#define BP_GET_BUFC_TYPE(bp) \
|
||||
(((BP_GET_LEVEL(bp) > 0) || (dmu_ot[BP_GET_TYPE(bp)].ot_metadata)) ? \
|
||||
ARC_BUFC_METADATA : ARC_BUFC_DATA);
|
||||
/*
|
||||
* Routines found in spa.c
|
||||
*/
|
||||
|
||||
typedef enum spa_import_type {
|
||||
SPA_IMPORT_EXISTING,
|
||||
SPA_IMPORT_ASSEMBLE
|
||||
} spa_import_type_t;
|
||||
|
||||
/* state manipulation functions */
|
||||
extern int spa_open(const char *pool, spa_t **, void *tag);
|
||||
extern int spa_open_rewind(const char *pool, spa_t **, void *tag,
|
||||
nvlist_t *policy, nvlist_t **config);
|
||||
extern int spa_get_stats(const char *pool, nvlist_t **config,
|
||||
char *altroot, size_t buflen);
|
||||
extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
|
||||
@ -338,6 +431,8 @@ extern void spa_async_suspend(spa_t *spa);
|
||||
extern void spa_async_resume(spa_t *spa);
|
||||
extern spa_t *spa_inject_addref(char *pool);
|
||||
extern void spa_inject_delref(spa_t *spa);
|
||||
extern void spa_scan_stat_init(spa_t *spa);
|
||||
extern int spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps);
|
||||
|
||||
#define SPA_ASYNC_CONFIG_UPDATE 0x01
|
||||
#define SPA_ASYNC_REMOVE 0x02
|
||||
@ -345,6 +440,14 @@ extern void spa_inject_delref(spa_t *spa);
|
||||
#define SPA_ASYNC_RESILVER_DONE 0x08
|
||||
#define SPA_ASYNC_RESILVER 0x10
|
||||
#define SPA_ASYNC_AUTOEXPAND 0x20
|
||||
#define SPA_ASYNC_REMOVE_DONE 0x40
|
||||
#define SPA_ASYNC_REMOVE_STOP 0x80
|
||||
|
||||
/*
|
||||
* Controls the behavior of spa_vdev_remove().
|
||||
*/
|
||||
#define SPA_REMOVE_UNSPARE 0x01
|
||||
#define SPA_REMOVE_DONE 0x02
|
||||
|
||||
/* device manipulation */
|
||||
extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
|
||||
@ -353,8 +456,11 @@ extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot,
|
||||
extern int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid,
|
||||
int replace_done);
|
||||
extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
|
||||
extern boolean_t spa_vdev_remove_active(spa_t *spa);
|
||||
extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
|
||||
extern int spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru);
|
||||
extern int spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
|
||||
nvlist_t *props, boolean_t exp);
|
||||
|
||||
/* spare state (which is global across all pools) */
|
||||
extern void spa_spare_add(vdev_t *vd);
|
||||
@ -368,15 +474,23 @@ extern void spa_l2cache_remove(vdev_t *vd);
|
||||
extern boolean_t spa_l2cache_exists(uint64_t guid, uint64_t *pool);
|
||||
extern void spa_l2cache_activate(vdev_t *vd);
|
||||
extern void spa_l2cache_drop(spa_t *spa);
|
||||
extern void spa_l2cache_space_update(vdev_t *vd, int64_t space, int64_t alloc);
|
||||
|
||||
/* scrubbing */
|
||||
extern int spa_scrub(spa_t *spa, pool_scrub_type_t type);
|
||||
/* scanning */
|
||||
extern int spa_scan(spa_t *spa, pool_scan_func_t func);
|
||||
extern int spa_scan_stop(spa_t *spa);
|
||||
|
||||
/* spa syncing */
|
||||
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
|
||||
extern void spa_sync_allpools(void);
|
||||
|
||||
/*
|
||||
* DEFERRED_FREE must be large enough that regular blocks are not
|
||||
* deferred. XXX so can't we change it back to 1?
|
||||
*/
|
||||
#define SYNC_PASS_DEFERRED_FREE 2 /* defer frees after this pass */
|
||||
#define SYNC_PASS_DONT_COMPRESS 4 /* don't compress after this pass */
|
||||
#define SYNC_PASS_REWRITE 1 /* rewrite new bps after this pass */
|
||||
|
||||
/* spa namespace global mutex */
|
||||
extern kmutex_t spa_namespace_lock;
|
||||
|
||||
@ -394,7 +508,6 @@ extern void spa_config_set(spa_t *spa, nvlist_t *config);
|
||||
extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
|
||||
int getstats);
|
||||
extern void spa_config_update(spa_t *spa, int what);
|
||||
extern void spa_config_update_common(spa_t *spa, int what, boolean_t isroot);
|
||||
|
||||
/*
|
||||
* Miscellaneous SPA routines in spa_misc.c
|
||||
@ -402,7 +515,7 @@ extern void spa_config_update_common(spa_t *spa, int what, boolean_t isroot);
|
||||
|
||||
/* Namespace manipulation */
|
||||
extern spa_t *spa_lookup(const char *name);
|
||||
extern spa_t *spa_add(const char *name, const char *altroot);
|
||||
extern spa_t *spa_add(const char *name, nvlist_t *config, const char *altroot);
|
||||
extern void spa_remove(spa_t *spa);
|
||||
extern spa_t *spa_next(spa_t *prev);
|
||||
|
||||
@ -411,6 +524,7 @@ extern void spa_open_ref(spa_t *spa, void *tag);
|
||||
extern void spa_close(spa_t *spa, void *tag);
|
||||
extern boolean_t spa_refcount_zero(spa_t *spa);
|
||||
|
||||
#define SCL_NONE 0x00
|
||||
#define SCL_CONFIG 0x01
|
||||
#define SCL_STATE 0x02
|
||||
#define SCL_L2ARC 0x04 /* hack until L2ARC 2.0 */
|
||||
@ -430,12 +544,30 @@ extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
|
||||
|
||||
/* Pool vdev add/remove lock */
|
||||
extern uint64_t spa_vdev_enter(spa_t *spa);
|
||||
extern uint64_t spa_vdev_config_enter(spa_t *spa);
|
||||
extern void spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg,
|
||||
int error, char *tag);
|
||||
extern int spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error);
|
||||
|
||||
/* Pool vdev state change lock */
|
||||
extern void spa_vdev_state_enter(spa_t *spa);
|
||||
extern void spa_vdev_state_enter(spa_t *spa, int oplock);
|
||||
extern int spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error);
|
||||
|
||||
/* Log state */
|
||||
typedef enum spa_log_state {
|
||||
SPA_LOG_UNKNOWN = 0, /* unknown log state */
|
||||
SPA_LOG_MISSING, /* missing log(s) */
|
||||
SPA_LOG_CLEAR, /* clear the log(s) */
|
||||
SPA_LOG_GOOD, /* log(s) are good */
|
||||
} spa_log_state_t;
|
||||
|
||||
extern spa_log_state_t spa_get_log_state(spa_t *spa);
|
||||
extern void spa_set_log_state(spa_t *spa, spa_log_state_t state);
|
||||
extern int spa_offline_log(spa_t *spa);
|
||||
|
||||
/* Log claim callback */
|
||||
extern void spa_claim_notify(zio_t *zio);
|
||||
|
||||
/* Accessor functions */
|
||||
extern boolean_t spa_shutting_down(spa_t *spa);
|
||||
extern struct dsl_pool *spa_get_dsl(spa_t *spa);
|
||||
@ -447,18 +579,26 @@ extern char *spa_name(spa_t *spa);
|
||||
extern uint64_t spa_guid(spa_t *spa);
|
||||
extern uint64_t spa_last_synced_txg(spa_t *spa);
|
||||
extern uint64_t spa_first_txg(spa_t *spa);
|
||||
extern uint64_t spa_syncing_txg(spa_t *spa);
|
||||
extern uint64_t spa_version(spa_t *spa);
|
||||
extern pool_state_t spa_state(spa_t *spa);
|
||||
extern spa_load_state_t spa_load_state(spa_t *spa);
|
||||
extern uint64_t spa_freeze_txg(spa_t *spa);
|
||||
extern uint64_t spa_get_alloc(spa_t *spa);
|
||||
extern uint64_t spa_get_space(spa_t *spa);
|
||||
extern uint64_t spa_get_dspace(spa_t *spa);
|
||||
extern uint64_t spa_get_asize(spa_t *spa, uint64_t lsize);
|
||||
extern uint64_t spa_get_dspace(spa_t *spa);
|
||||
extern void spa_update_dspace(spa_t *spa);
|
||||
extern uint64_t spa_version(spa_t *spa);
|
||||
extern boolean_t spa_deflate(spa_t *spa);
|
||||
extern metaslab_class_t *spa_normal_class(spa_t *spa);
|
||||
extern metaslab_class_t *spa_log_class(spa_t *spa);
|
||||
extern int spa_max_replication(spa_t *spa);
|
||||
extern int spa_prev_software_version(spa_t *spa);
|
||||
extern int spa_busy(void);
|
||||
extern uint8_t spa_get_failmode(spa_t *spa);
|
||||
extern boolean_t spa_suspended(spa_t *spa);
|
||||
extern uint64_t spa_bootfs(spa_t *spa);
|
||||
extern uint64_t spa_delegation(spa_t *spa);
|
||||
extern objset_t *spa_meta_objset(spa_t *spa);
|
||||
|
||||
/* Miscellaneous support routines */
|
||||
extern int spa_rename(const char *oldname, const char *newname);
|
||||
@ -466,18 +606,24 @@ extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
|
||||
extern char *spa_strdup(const char *);
|
||||
extern void spa_strfree(char *);
|
||||
extern uint64_t spa_get_random(uint64_t range);
|
||||
extern void sprintf_blkptr(char *buf, int len, const blkptr_t *bp);
|
||||
extern uint64_t spa_generate_guid(spa_t *spa);
|
||||
extern void sprintf_blkptr(char *buf, const blkptr_t *bp);
|
||||
extern void spa_freeze(spa_t *spa);
|
||||
extern void spa_upgrade(spa_t *spa, uint64_t version);
|
||||
extern void spa_evict_all(void);
|
||||
extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid,
|
||||
boolean_t l2cache);
|
||||
extern boolean_t spa_has_spare(spa_t *, uint64_t guid);
|
||||
extern uint64_t bp_get_dasize(spa_t *spa, const blkptr_t *bp);
|
||||
extern uint64_t dva_get_dsize_sync(spa_t *spa, const dva_t *dva);
|
||||
extern uint64_t bp_get_dsize_sync(spa_t *spa, const blkptr_t *bp);
|
||||
extern uint64_t bp_get_dsize(spa_t *spa, const blkptr_t *bp);
|
||||
extern boolean_t spa_has_slogs(spa_t *spa);
|
||||
extern boolean_t spa_is_root(spa_t *spa);
|
||||
extern boolean_t spa_writeable(spa_t *spa);
|
||||
extern void spa_rewind_data_to_nvlist(spa_t *spa, nvlist_t *to);
|
||||
|
||||
extern int spa_mode(spa_t *spa);
|
||||
extern uint64_t strtonum(const char *str, char **nptr);
|
||||
|
||||
/* history logging */
|
||||
typedef enum history_log_type {
|
||||
@ -487,10 +633,11 @@ typedef enum history_log_type {
|
||||
} history_log_type_t;
|
||||
|
||||
typedef struct history_arg {
|
||||
const char *ha_history_str;
|
||||
char *ha_history_str;
|
||||
history_log_type_t ha_log_type;
|
||||
history_internal_events_t ha_event;
|
||||
char ha_zone[MAXPATHLEN];
|
||||
char *ha_zone;
|
||||
uid_t ha_uid;
|
||||
} history_arg_t;
|
||||
|
||||
extern char *spa_his_ievent_table[];
|
||||
@ -500,17 +647,17 @@ extern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read,
|
||||
char *his_buf);
|
||||
extern int spa_history_log(spa_t *spa, const char *his_buf,
|
||||
history_log_type_t what);
|
||||
extern void spa_history_internal_log(history_internal_events_t event,
|
||||
spa_t *spa, dmu_tx_t *tx, cred_t *cr, const char *fmt, ...);
|
||||
extern void spa_history_log_internal(history_internal_events_t event,
|
||||
spa_t *spa, dmu_tx_t *tx, const char *fmt, ...);
|
||||
extern void spa_history_log_version(spa_t *spa, history_internal_events_t evt);
|
||||
|
||||
/* error handling */
|
||||
struct zbookmark;
|
||||
struct zio;
|
||||
extern void spa_log_error(spa_t *spa, struct zio *zio);
|
||||
extern void spa_log_error(spa_t *spa, zio_t *zio);
|
||||
extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
|
||||
struct zio *zio, uint64_t stateoroffset, uint64_t length);
|
||||
zio_t *zio, uint64_t stateoroffset, uint64_t length);
|
||||
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
|
||||
extern void zfs_post_state_change(spa_t *spa, vdev_t *vd);
|
||||
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
|
||||
extern uint64_t spa_get_errlog_size(spa_t *spa);
|
||||
extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
|
||||
@ -541,7 +688,7 @@ extern void spa_event_notify(spa_t *spa, vdev_t *vdev, const char *name);
|
||||
#define dprintf_bp(bp, fmt, ...) do { \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
|
||||
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \
|
||||
sprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, (bp)); \
|
||||
sprintf_blkptr(__blkbuf, (bp)); \
|
||||
dprintf(fmt " %s\n", __VA_ARGS__, __blkbuf); \
|
||||
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
|
||||
} \
|
||||
|
@ -19,8 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_SPA_IMPL_H
|
||||
@ -36,6 +35,7 @@
|
||||
#include <sys/avl.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/bplist.h>
|
||||
#include <sys/bpobj.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -78,19 +78,33 @@ typedef struct spa_config_dirent {
|
||||
char *scd_path;
|
||||
} spa_config_dirent_t;
|
||||
|
||||
typedef enum spa_log_state {
|
||||
SPA_LOG_UNKNOWN = 0, /* unknown log state */
|
||||
SPA_LOG_MISSING, /* missing log(s) */
|
||||
SPA_LOG_CLEAR, /* clear the log(s) */
|
||||
SPA_LOG_GOOD, /* log(s) are good */
|
||||
} spa_log_state_t;
|
||||
|
||||
enum zio_taskq_type {
|
||||
ZIO_TASKQ_ISSUE = 0,
|
||||
ZIO_TASKQ_ISSUE_HIGH,
|
||||
ZIO_TASKQ_INTERRUPT,
|
||||
ZIO_TASKQ_INTERRUPT_HIGH,
|
||||
ZIO_TASKQ_TYPES
|
||||
};
|
||||
|
||||
/*
|
||||
* State machine for the zpool-pooname process. The states transitions
|
||||
* are done as follows:
|
||||
*
|
||||
* From To Routine
|
||||
* PROC_NONE -> PROC_CREATED spa_activate()
|
||||
* PROC_CREATED -> PROC_ACTIVE spa_thread()
|
||||
* PROC_ACTIVE -> PROC_DEACTIVATE spa_deactivate()
|
||||
* PROC_DEACTIVATE -> PROC_GONE spa_thread()
|
||||
* PROC_GONE -> PROC_NONE spa_deactivate()
|
||||
*/
|
||||
typedef enum spa_proc_state {
|
||||
SPA_PROC_NONE, /* spa_proc = &p0, no process created */
|
||||
SPA_PROC_CREATED, /* spa_activate() has proc, is waiting */
|
||||
SPA_PROC_ACTIVE, /* taskqs created, spa_proc set */
|
||||
SPA_PROC_DEACTIVATE, /* spa_deactivate() requests process exit */
|
||||
SPA_PROC_GONE /* spa_thread() is exiting, spa_proc = &p0 */
|
||||
} spa_proc_state_t;
|
||||
|
||||
struct spa {
|
||||
/*
|
||||
* Fields protected by spa_namespace_lock.
|
||||
@ -99,6 +113,7 @@ struct spa {
|
||||
avl_node_t spa_avl; /* node in spa_namespace_avl */
|
||||
nvlist_t *spa_config; /* last synced config */
|
||||
nvlist_t *spa_config_syncing; /* currently syncing config */
|
||||
nvlist_t *spa_config_splitting; /* config for splitting */
|
||||
uint64_t spa_config_txg; /* txg of last config change */
|
||||
int spa_sync_pass; /* iterate-to-convergence */
|
||||
pool_state_t spa_state; /* pool state */
|
||||
@ -113,6 +128,8 @@ struct spa {
|
||||
uint64_t spa_first_txg; /* first txg after spa_open() */
|
||||
uint64_t spa_final_txg; /* txg of export/destroy */
|
||||
uint64_t spa_freeze_txg; /* freeze pool at this txg */
|
||||
uint64_t spa_load_max_txg; /* best initial ub_txg */
|
||||
uint64_t spa_claim_max_txg; /* highest claimed birth txg */
|
||||
objset_t *spa_meta_objset; /* copy of dp->dp_meta_objset */
|
||||
txg_list_t spa_vdev_txg_list; /* per-txg dirty vdev list */
|
||||
vdev_t *spa_root_vdev; /* top-level vdev container */
|
||||
@ -122,21 +139,24 @@ struct spa {
|
||||
spa_aux_vdev_t spa_spares; /* hot spares */
|
||||
spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */
|
||||
uint64_t spa_config_object; /* MOS object for pool config */
|
||||
uint64_t spa_config_generation; /* config generation number */
|
||||
uint64_t spa_syncing_txg; /* txg currently syncing */
|
||||
uint64_t spa_sync_bplist_obj; /* object for deferred frees */
|
||||
bplist_t spa_sync_bplist; /* deferred-free bplist */
|
||||
bpobj_t spa_deferred_bpobj; /* deferred-free bplist */
|
||||
bplist_t spa_free_bplist[TXG_SIZE]; /* bplist of stuff to free */
|
||||
uberblock_t spa_ubsync; /* last synced uberblock */
|
||||
uberblock_t spa_uberblock; /* current uberblock */
|
||||
boolean_t spa_extreme_rewind; /* rewind past deferred frees */
|
||||
kmutex_t spa_scrub_lock; /* resilver/scrub lock */
|
||||
uint64_t spa_scrub_inflight; /* in-flight scrub I/Os */
|
||||
uint64_t spa_scrub_maxinflight; /* max in-flight scrub I/Os */
|
||||
uint64_t spa_scrub_errors; /* scrub I/O error count */
|
||||
kcondvar_t spa_scrub_io_cv; /* scrub I/O completion */
|
||||
uint8_t spa_scrub_active; /* active or suspended? */
|
||||
uint8_t spa_scrub_type; /* type of scrub we're doing */
|
||||
uint8_t spa_scrub_finished; /* indicator to rotate logs */
|
||||
uint8_t spa_scrub_started; /* started since last boot */
|
||||
uint8_t spa_scrub_reopen; /* scrub doing vdev_reopen */
|
||||
uint64_t spa_scan_pass_start; /* start time per pass/reboot */
|
||||
uint64_t spa_scan_pass_exam; /* examined bytes per pass */
|
||||
kmutex_t spa_async_lock; /* protect async state */
|
||||
kthread_t *spa_async_thread; /* thread doing async task */
|
||||
int spa_async_suspended; /* async tasks suspended */
|
||||
@ -144,7 +164,14 @@ struct spa {
|
||||
uint16_t spa_async_tasks; /* async task mask */
|
||||
char *spa_root; /* alternate root directory */
|
||||
uint64_t spa_ena; /* spa-wide ereport ENA */
|
||||
boolean_t spa_last_open_failed; /* true if last open faled */
|
||||
int spa_last_open_failed; /* error if last open failed */
|
||||
uint64_t spa_last_ubsync_txg; /* "best" uberblock txg */
|
||||
uint64_t spa_last_ubsync_txg_ts; /* timestamp from that ub */
|
||||
uint64_t spa_load_txg; /* ub txg that loaded */
|
||||
uint64_t spa_load_txg_ts; /* timestamp from that ub */
|
||||
uint64_t spa_load_meta_errors; /* verify metadata err count */
|
||||
uint64_t spa_load_data_errors; /* verify data err count */
|
||||
uint64_t spa_verify_min_txg; /* start txg of verify scrub */
|
||||
kmutex_t spa_errlog_lock; /* error log lock */
|
||||
uint64_t spa_errlog_last; /* last error log object */
|
||||
uint64_t spa_errlog_scrub; /* scrub error log object */
|
||||
@ -166,11 +193,27 @@ struct spa {
|
||||
kmutex_t spa_suspend_lock; /* protects suspend_zio_root */
|
||||
kcondvar_t spa_suspend_cv; /* notification of resume */
|
||||
uint8_t spa_suspended; /* pool is suspended */
|
||||
uint8_t spa_claiming; /* pool is doing zil_claim() */
|
||||
boolean_t spa_is_root; /* pool is root */
|
||||
int spa_minref; /* num refs when first opened */
|
||||
int spa_mode; /* FREAD | FWRITE */
|
||||
spa_log_state_t spa_log_state; /* log state */
|
||||
uint64_t spa_autoexpand; /* lun expansion on/off */
|
||||
ddt_t *spa_ddt[ZIO_CHECKSUM_FUNCTIONS]; /* in-core DDTs */
|
||||
uint64_t spa_ddt_stat_object; /* DDT statistics */
|
||||
uint64_t spa_dedup_ditto; /* dedup ditto threshold */
|
||||
uint64_t spa_dedup_checksum; /* default dedup checksum */
|
||||
uint64_t spa_dspace; /* dspace in normal class */
|
||||
kmutex_t spa_vdev_top_lock; /* dueling offline/remove */
|
||||
kmutex_t spa_proc_lock; /* protects spa_proc* */
|
||||
kcondvar_t spa_proc_cv; /* spa_proc_state transitions */
|
||||
spa_proc_state_t spa_proc_state; /* see definition */
|
||||
struct proc *spa_proc; /* "zpool-poolname" process */
|
||||
uint64_t spa_did; /* if procp != p0, did of t1 */
|
||||
boolean_t spa_autoreplace; /* autoreplace set in open */
|
||||
int spa_vdev_locks; /* locks grabbed */
|
||||
uint64_t spa_creation_version; /* version at pool creation */
|
||||
uint64_t spa_prev_software_version;
|
||||
/*
|
||||
* spa_refcnt & spa_config_lock must be the last elements
|
||||
* because refcount_t changes size based on compilation options.
|
||||
@ -183,12 +226,6 @@ struct spa {
|
||||
|
||||
extern const char *spa_config_path;
|
||||
|
||||
#define BOOTFS_COMPRESS_VALID(compress) \
|
||||
((compress) == ZIO_COMPRESS_LZJB || \
|
||||
((compress) == ZIO_COMPRESS_ON && \
|
||||
ZIO_COMPRESS_ON_VALUE == ZIO_COMPRESS_LZJB) || \
|
||||
(compress) == ZIO_COMPRESS_OFF)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -77,6 +77,7 @@ struct space_map_ops {
|
||||
void (*smop_claim)(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
void (*smop_free)(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
uint64_t (*smop_max)(space_map_t *sm);
|
||||
boolean_t (*smop_fragmented)(space_map_t *sm);
|
||||
};
|
||||
|
||||
/*
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user