Update core ZFS code from build 121 to build 141.

This commit is contained in:
Brian Behlendorf
2010-05-28 13:45:14 -07:00
parent 6119cb885a
commit 428870ff73
174 changed files with 35763 additions and 14592 deletions
+12 -21
View File
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_ARC_H
@@ -48,7 +47,8 @@ arc_done_func_t arc_getbuf_func;
struct arc_buf {
arc_buf_hdr_t *b_hdr;
arc_buf_t *b_next;
krwlock_t b_lock;
kmutex_t b_evict_lock;
krwlock_t b_data_lock;
void *b_data;
arc_evict_func_t *b_efunc;
void *b_private;
@@ -87,10 +87,13 @@ arc_buf_t *arc_buf_alloc(spa_t *spa, int size, void *tag,
arc_buf_contents_t type);
arc_buf_t *arc_loan_buf(spa_t *spa, int size);
void arc_return_buf(arc_buf_t *buf, void *tag);
void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
void arc_buf_add_ref(arc_buf_t *buf, void *tag);
int arc_buf_remove_ref(arc_buf_t *buf, void *tag);
int arc_buf_size(arc_buf_t *buf);
void arc_release(arc_buf_t *buf, void *tag);
int arc_release_bp(arc_buf_t *buf, void *tag, blkptr_t *bp, spa_t *spa,
zbookmark_t *zb);
int arc_released(arc_buf_t *buf);
int arc_has_callback(arc_buf_t *buf);
void arc_buf_freeze(arc_buf_t *buf);
@@ -99,28 +102,16 @@ void arc_buf_thaw(arc_buf_t *buf);
int arc_referenced(arc_buf_t *buf);
#endif
typedef struct writeprops {
dmu_object_type_t wp_type;
uint8_t wp_level;
uint8_t wp_copies;
uint8_t wp_dncompress, wp_oscompress;
uint8_t wp_dnchecksum, wp_oschecksum;
} writeprops_t;
void write_policy(spa_t *spa, const writeprops_t *wp, zio_prop_t *zp);
int arc_read(zio_t *pio, spa_t *spa, blkptr_t *bp, arc_buf_t *pbuf,
int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_buf_t *pbuf,
arc_done_func_t *done, void *private, int priority, int zio_flags,
uint32_t *arc_flags, const zbookmark_t *zb);
int arc_read_nolock(zio_t *pio, spa_t *spa, blkptr_t *bp,
int arc_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bp,
arc_done_func_t *done, void *private, int priority, int flags,
uint32_t *arc_flags, const zbookmark_t *zb);
zio_t *arc_write(zio_t *pio, spa_t *spa, const writeprops_t *wp,
boolean_t l2arc, uint64_t txg, blkptr_t *bp, arc_buf_t *buf,
arc_done_func_t *ready, arc_done_func_t *done, void *private, int priority,
int zio_flags, const zbookmark_t *zb);
int arc_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
zio_done_func_t *done, void *private, uint32_t arc_flags);
int arc_tryread(spa_t *spa, blkptr_t *bp, void *data);
zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp,
arc_done_func_t *ready, arc_done_func_t *done, void *private,
int priority, int zio_flags, const zbookmark_t *zb);
void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
int arc_buf_evict(arc_buf_t *buf);
+14 -46
View File
@@ -19,68 +19,36 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_BPLIST_H
#define _SYS_BPLIST_H
#include <sys/dmu.h>
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/zfs_context.h>
#include <sys/spa.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct bplist_phys {
/*
* This is the bonus buffer for the dead lists. The object's
* contents is an array of bpl_entries blkptr_t's, representing
* a total of bpl_bytes physical space.
*/
uint64_t bpl_entries;
uint64_t bpl_bytes;
uint64_t bpl_comp;
uint64_t bpl_uncomp;
} bplist_phys_t;
#define BPLIST_SIZE_V0 (2 * sizeof (uint64_t))
typedef struct bplist_q {
blkptr_t bpq_blk;
void *bpq_next;
} bplist_q_t;
typedef struct bplist_entry {
blkptr_t bpe_blk;
list_node_t bpe_node;
} bplist_entry_t;
typedef struct bplist {
kmutex_t bpl_lock;
objset_t *bpl_mos;
uint64_t bpl_object;
uint8_t bpl_blockshift;
uint8_t bpl_bpshift;
uint8_t bpl_havecomp;
bplist_q_t *bpl_queue;
bplist_phys_t *bpl_phys;
dmu_buf_t *bpl_dbuf;
dmu_buf_t *bpl_cached_dbuf;
list_t bpl_list;
} bplist_t;
extern uint64_t bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx);
extern void bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx);
extern int bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object);
extern void bplist_close(bplist_t *bpl);
extern boolean_t bplist_empty(bplist_t *bpl);
extern int bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp);
extern int bplist_enqueue(bplist_t *bpl, const blkptr_t *bp, dmu_tx_t *tx);
extern void bplist_enqueue_deferred(bplist_t *bpl, const blkptr_t *bp);
extern void bplist_sync(bplist_t *bpl, dmu_tx_t *tx);
extern void bplist_vacate(bplist_t *bpl, dmu_tx_t *tx);
extern int bplist_space(bplist_t *bpl,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
extern int bplist_space_birthrange(bplist_t *bpl,
uint64_t mintxg, uint64_t maxtxg, uint64_t *dasizep);
typedef int bplist_itor_t(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
void bplist_create(bplist_t *bpl);
void bplist_destroy(bplist_t *bpl);
void bplist_append(bplist_t *bpl, const blkptr_t *bp);
void bplist_iterate(bplist_t *bpl, bplist_itor_t *func,
void *arg, dmu_tx_t *tx);
#ifdef __cplusplus
}
+91
View File
@@ -0,0 +1,91 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_BPOBJ_H
#define _SYS_BPOBJ_H
#include <sys/dmu.h>
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/zio.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct bpobj_phys {
/*
* This is the bonus buffer for the dead lists. The object's
* contents is an array of bpo_entries blkptr_t's, representing
* a total of bpo_bytes physical space.
*/
uint64_t bpo_num_blkptrs;
uint64_t bpo_bytes;
uint64_t bpo_comp;
uint64_t bpo_uncomp;
uint64_t bpo_subobjs;
uint64_t bpo_num_subobjs;
} bpobj_phys_t;
#define BPOBJ_SIZE_V0 (2 * sizeof (uint64_t))
#define BPOBJ_SIZE_V1 (4 * sizeof (uint64_t))
typedef struct bpobj {
kmutex_t bpo_lock;
objset_t *bpo_os;
uint64_t bpo_object;
int bpo_epb;
uint8_t bpo_havecomp;
uint8_t bpo_havesubobj;
bpobj_phys_t *bpo_phys;
dmu_buf_t *bpo_dbuf;
dmu_buf_t *bpo_cached_dbuf;
} bpobj_t;
typedef int bpobj_itor_t(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
uint64_t bpobj_alloc(objset_t *mos, int blocksize, dmu_tx_t *tx);
void bpobj_free(objset_t *os, uint64_t obj, dmu_tx_t *tx);
int bpobj_open(bpobj_t *bpo, objset_t *mos, uint64_t object);
void bpobj_close(bpobj_t *bpo);
int bpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx);
int bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *, dmu_tx_t *);
int bpobj_iterate_dbg(bpobj_t *bpo, uint64_t *itorp, blkptr_t *bp);
void bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx);
void bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx);
int bpobj_space(bpobj_t *bpo,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
int bpobj_space_range(bpobj_t *bpo, uint64_t mintxg, uint64_t maxtxg,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_BPOBJ_H */
+11 -6
View File
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DBUF_H
@@ -38,7 +37,6 @@
extern "C" {
#endif
#define DB_BONUS_BLKID (-1ULL)
#define IN_DMU_SYNC 2
/*
@@ -75,7 +73,6 @@ typedef enum dbuf_states {
DB_EVICTING
} dbuf_states_t;
struct objset_impl;
struct dnode;
struct dmu_tx;
@@ -134,6 +131,7 @@ typedef struct dbuf_dirty_record {
arc_buf_t *dr_data;
blkptr_t dr_overridden_by;
override_states_t dr_override_state;
uint8_t dr_copies;
} dl;
} dt;
} dbuf_dirty_record_t;
@@ -148,7 +146,7 @@ typedef struct dmu_buf_impl {
dmu_buf_t db;
/* the objset we belong to */
struct objset_impl *db_objset;
struct objset *db_objset;
/*
* the dnode we belong to (NULL when evicted)
@@ -242,6 +240,10 @@ uint64_t dbuf_whichblock(struct dnode *di, uint64_t offset);
dmu_buf_impl_t *dbuf_create_tlib(struct dnode *dn, char *data);
void dbuf_create_bonus(struct dnode *dn);
int dbuf_spill_set_blksz(dmu_buf_t *db, uint64_t blksz, dmu_tx_t *tx);
void dbuf_spill_hold(struct dnode *dn, dmu_buf_impl_t **dbp, void *tag);
void dbuf_rm_spill(struct dnode *dn, dmu_tx_t *tx);
dmu_buf_impl_t *dbuf_hold(struct dnode *dn, uint64_t blkid, void *tag);
dmu_buf_impl_t *dbuf_hold_level(struct dnode *dn, int level, uint64_t blkid,
@@ -255,6 +257,7 @@ void dbuf_add_ref(dmu_buf_impl_t *db, void *tag);
uint64_t dbuf_refcount(dmu_buf_impl_t *db);
void dbuf_rele(dmu_buf_impl_t *db, void *tag);
void dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag);
dmu_buf_impl_t *dbuf_find(struct dnode *dn, uint8_t level, uint64_t blkid);
@@ -266,6 +269,7 @@ void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx);
dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
arc_buf_t *dbuf_loan_arcbuf(dmu_buf_impl_t *db);
void dbuf_clear(dmu_buf_impl_t *db);
void dbuf_evict(dmu_buf_impl_t *db);
@@ -273,6 +277,7 @@ void dbuf_evict(dmu_buf_impl_t *db);
void dbuf_setdirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
void dbuf_unoverride(dbuf_dirty_record_t *dr);
void dbuf_sync_list(list_t *list, dmu_tx_t *tx);
void dbuf_release_bp(dmu_buf_impl_t *db);
void dbuf_free_range(struct dnode *dn, uint64_t start, uint64_t end,
struct dmu_tx *);
@@ -324,7 +329,7 @@ _NOTE(CONSTCOND) } while (0)
#define dprintf_dbuf_bp(db, bp, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \
sprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, bp); \
sprintf_blkptr(__blkbuf, bp); \
dprintf_dbuf(db, fmt " %s\n", __VA_ARGS__, __blkbuf); \
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
} \
+246
View File
@@ -0,0 +1,246 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DDT_H
#define _SYS_DDT_H
#include <sys/sysmacros.h>
#include <sys/types.h>
#include <sys/fs/zfs.h>
#include <sys/zio.h>
#include <sys/dmu.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* On-disk DDT formats, in the desired search order (newest version first).
*/
enum ddt_type {
DDT_TYPE_ZAP = 0,
DDT_TYPES
};
/*
* DDT classes, in the desired search order (highest replication level first).
*/
enum ddt_class {
DDT_CLASS_DITTO = 0,
DDT_CLASS_DUPLICATE,
DDT_CLASS_UNIQUE,
DDT_CLASSES
};
#define DDT_TYPE_CURRENT 0
#define DDT_COMPRESS_BYTEORDER_MASK 0x80
#define DDT_COMPRESS_FUNCTION_MASK 0x7f
/*
* On-disk ddt entry: key (name) and physical storage (value).
*/
typedef struct ddt_key {
zio_cksum_t ddk_cksum; /* 256-bit block checksum */
uint64_t ddk_prop; /* LSIZE, PSIZE, compression */
} ddt_key_t;
/*
* ddk_prop layout:
*
* +-------+-------+-------+-------+-------+-------+-------+-------+
* | 0 | 0 | 0 | comp | PSIZE | LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
*/
#define DDK_GET_LSIZE(ddk) \
BF64_GET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)
#define DDK_SET_LSIZE(ddk, x) \
BF64_SET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
#define DDK_GET_PSIZE(ddk) \
BF64_GET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1)
#define DDK_SET_PSIZE(ddk, x) \
BF64_SET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
#define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 8)
#define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 8, x)
#define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t))
typedef struct ddt_phys {
dva_t ddp_dva[SPA_DVAS_PER_BP];
uint64_t ddp_refcnt;
uint64_t ddp_phys_birth;
} ddt_phys_t;
enum ddt_phys_type {
DDT_PHYS_DITTO = 0,
DDT_PHYS_SINGLE = 1,
DDT_PHYS_DOUBLE = 2,
DDT_PHYS_TRIPLE = 3,
DDT_PHYS_TYPES
};
/*
* In-core ddt entry
*/
struct ddt_entry {
ddt_key_t dde_key;
ddt_phys_t dde_phys[DDT_PHYS_TYPES];
zio_t *dde_lead_zio[DDT_PHYS_TYPES];
void *dde_repair_data;
enum ddt_type dde_type;
enum ddt_class dde_class;
uint8_t dde_loading;
uint8_t dde_loaded;
kcondvar_t dde_cv;
avl_node_t dde_node;
};
/*
* In-core ddt
*/
struct ddt {
kmutex_t ddt_lock;
avl_tree_t ddt_tree;
avl_tree_t ddt_repair_tree;
enum zio_checksum ddt_checksum;
spa_t *ddt_spa;
objset_t *ddt_os;
uint64_t ddt_stat_object;
uint64_t ddt_object[DDT_TYPES][DDT_CLASSES];
ddt_histogram_t ddt_histogram[DDT_TYPES][DDT_CLASSES];
ddt_histogram_t ddt_histogram_cache[DDT_TYPES][DDT_CLASSES];
ddt_object_t ddt_object_stats[DDT_TYPES][DDT_CLASSES];
avl_node_t ddt_node;
};
/*
* In-core and on-disk bookmark for DDT walks
*/
typedef struct ddt_bookmark {
uint64_t ddb_class;
uint64_t ddb_type;
uint64_t ddb_checksum;
uint64_t ddb_cursor;
} ddt_bookmark_t;
/*
* Ops vector to access a specific DDT object type.
*/
typedef struct ddt_ops {
char ddt_op_name[32];
int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx,
boolean_t prehash);
int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
int (*ddt_op_lookup)(objset_t *os, uint64_t object, ddt_entry_t *dde);
void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
ddt_entry_t *dde);
int (*ddt_op_update)(objset_t *os, uint64_t object, ddt_entry_t *dde,
dmu_tx_t *tx);
int (*ddt_op_remove)(objset_t *os, uint64_t object, ddt_entry_t *dde,
dmu_tx_t *tx);
int (*ddt_op_walk)(objset_t *os, uint64_t object, ddt_entry_t *dde,
uint64_t *walk);
uint64_t (*ddt_op_count)(objset_t *os, uint64_t object);
} ddt_ops_t;
#define DDT_NAMELEN 80
extern void ddt_object_name(ddt_t *ddt, enum ddt_type type,
enum ddt_class class, char *name);
extern int ddt_object_walk(ddt_t *ddt, enum ddt_type type,
enum ddt_class class, uint64_t *walk, ddt_entry_t *dde);
extern uint64_t ddt_object_count(ddt_t *ddt, enum ddt_type type,
enum ddt_class class);
extern int ddt_object_info(ddt_t *ddt, enum ddt_type type,
enum ddt_class class, dmu_object_info_t *);
extern boolean_t ddt_object_exists(ddt_t *ddt, enum ddt_type type,
enum ddt_class class);
extern void ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp,
uint64_t txg);
extern void ddt_bp_create(enum zio_checksum checksum, const ddt_key_t *ddk,
const ddt_phys_t *ddp, blkptr_t *bp);
extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp);
extern void ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp);
extern void ddt_phys_clear(ddt_phys_t *ddp);
extern void ddt_phys_addref(ddt_phys_t *ddp);
extern void ddt_phys_decref(ddt_phys_t *ddp);
extern void ddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp,
uint64_t txg);
extern ddt_phys_t *ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp);
extern uint64_t ddt_phys_total_refcnt(const ddt_entry_t *dde);
extern void ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg);
extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src);
extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh);
extern boolean_t ddt_histogram_empty(const ddt_histogram_t *ddh);
extern void ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo);
extern void ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh);
extern void ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total);
extern uint64_t ddt_get_dedup_dspace(spa_t *spa);
extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
extern int ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde,
ddt_phys_t *ddp_willref);
extern int ddt_ditto_copies_present(ddt_entry_t *dde);
extern size_t ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len);
extern void ddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len);
extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp);
extern void ddt_enter(ddt_t *ddt);
extern void ddt_exit(ddt_t *ddt);
extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add);
extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp);
extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde);
extern boolean_t ddt_class_contains(spa_t *spa, enum ddt_class max_class,
const blkptr_t *bp);
extern ddt_entry_t *ddt_repair_start(ddt_t *ddt, const blkptr_t *bp);
extern void ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde);
extern int ddt_entry_compare(const void *x1, const void *x2);
extern void ddt_create(spa_t *spa);
extern int ddt_load(spa_t *spa);
extern void ddt_unload(spa_t *spa);
extern void ddt_sync(spa_t *spa, uint64_t txg);
extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde);
extern int ddt_object_update(ddt_t *ddt, enum ddt_type type,
enum ddt_class class, ddt_entry_t *dde, dmu_tx_t *tx);
extern const ddt_ops_t ddt_zap_ops;
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DDT_H */
+129 -56
View File
@@ -19,10 +19,11 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
#ifndef _SYS_DMU_H
#define _SYS_DMU_H
@@ -38,12 +39,14 @@
#include <sys/types.h>
#include <sys/param.h>
#include <sys/cred.h>
#include <sys/time.h>
#ifdef __cplusplus
extern "C" {
#endif
struct uio;
struct xuio;
struct page;
struct vnode;
struct spa;
@@ -59,8 +62,9 @@ struct drr_end;
struct zbookmark;
struct spa;
struct nvlist;
struct objset_impl;
struct arc_buf;
struct zio_prop;
struct sa_handle;
typedef struct objset objset_t;
typedef struct dmu_tx dmu_tx_t;
@@ -73,8 +77,8 @@ typedef enum dmu_object_type {
DMU_OT_OBJECT_ARRAY, /* UINT64 */
DMU_OT_PACKED_NVLIST, /* UINT8 (XDR by nvlist_pack/unpack) */
DMU_OT_PACKED_NVLIST_SIZE, /* UINT64 */
DMU_OT_BPLIST, /* UINT64 */
DMU_OT_BPLIST_HDR, /* UINT64 */
DMU_OT_BPOBJ, /* UINT64 */
DMU_OT_BPOBJ_HDR, /* UINT64 */
/* spa: */
DMU_OT_SPACE_MAP_HEADER, /* UINT64 */
DMU_OT_SPACE_MAP, /* UINT64 */
@@ -114,10 +118,22 @@ typedef enum dmu_object_type {
DMU_OT_FUID, /* FUID table (Packed NVLIST UINT8) */
DMU_OT_FUID_SIZE, /* FUID table size UINT64 */
DMU_OT_NEXT_CLONES, /* ZAP */
DMU_OT_SCRUB_QUEUE, /* ZAP */
DMU_OT_SCAN_QUEUE, /* ZAP */
DMU_OT_USERGROUP_USED, /* ZAP */
DMU_OT_USERGROUP_QUOTA, /* ZAP */
DMU_OT_USERREFS, /* ZAP */
DMU_OT_DDT_ZAP, /* ZAP */
DMU_OT_DDT_STATS, /* ZAP */
DMU_OT_SA, /* System attr */
DMU_OT_SA_MASTER_NODE, /* ZAP */
DMU_OT_SA_ATTR_REGISTRATION, /* ZAP */
DMU_OT_SA_ATTR_LAYOUTS, /* ZAP */
DMU_OT_SCAN_XLATE, /* ZAP */
DMU_OT_DEDUP, /* fake dedup BP from ddt_bp_create() */
DMU_OT_DEADLIST, /* ZAP */
DMU_OT_DEADLIST_HDR, /* UINT64 */
DMU_OT_DSL_CLONES, /* ZAP */
DMU_OT_BPOBJ_SUBOBJ, /* UINT64 */
DMU_OT_NUMTYPES
} dmu_object_type_t;
@@ -140,16 +156,6 @@ void zfs_oldacl_byteswap(void *buf, size_t size);
void zfs_acl_byteswap(void *buf, size_t size);
void zfs_znode_byteswap(void *buf, size_t size);
#define DS_MODE_NOHOLD 0 /* internal use only */
#define DS_MODE_USER 1 /* simple access, no special needs */
#define DS_MODE_OWNER 2 /* the "main" access, e.g. a mount */
#define DS_MODE_TYPE_MASK 0x3
#define DS_MODE_TYPE(x) ((x) & DS_MODE_TYPE_MASK)
#define DS_MODE_READONLY 0x8
#define DS_MODE_IS_READONLY(x) ((x) & DS_MODE_READONLY)
#define DS_MODE_INCONSISTENT 0x10
#define DS_MODE_IS_INCONSISTENT(x) ((x) & DS_MODE_INCONSISTENT)
#define DS_FIND_SNAPSHOTS (1<<0)
#define DS_FIND_CHILDREN (1<<1)
@@ -162,27 +168,35 @@ void zfs_znode_byteswap(void *buf, size_t size);
#define DMU_USERUSED_OBJECT (-1ULL)
#define DMU_GROUPUSED_OBJECT (-2ULL)
#define DMU_DEADLIST_OBJECT (-3ULL)
/*
* artificial blkids for bonus buffer and spill blocks
*/
#define DMU_BONUS_BLKID (-1ULL)
#define DMU_SPILL_BLKID (-2ULL)
/*
* Public routines to create, destroy, open, and close objsets.
*/
int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
objset_t **osp);
int dmu_objset_open_ds(struct dsl_dataset *ds, dmu_objset_type_t type,
objset_t **osp);
void dmu_objset_close(objset_t *os);
int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
int dmu_objset_own(const char *name, dmu_objset_type_t type,
boolean_t readonly, void *tag, objset_t **osp);
void dmu_objset_rele(objset_t *os, void *tag);
void dmu_objset_disown(objset_t *os, void *tag);
int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp);
int dmu_objset_evict_dbufs(objset_t *os);
int dmu_objset_create(const char *name, dmu_objset_type_t type,
objset_t *clone_parent, uint64_t flags,
int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
uint64_t flags);
int dmu_objset_destroy(const char *name, boolean_t defer);
int dmu_snapshots_destroy(char *fsname, char *snapname, boolean_t defer);
int dmu_objset_rollback(objset_t *os);
int dmu_objset_snapshot(char *fsname, char *snapname, struct nvlist *props,
boolean_t recursive);
int dmu_objset_rename(const char *name, const char *newname,
boolean_t recursive);
int dmu_objset_find(char *name, int func(char *, void *), void *arg,
int dmu_objset_find(char *name, int func(const char *, void *), void *arg,
int flags);
void dmu_objset_byteswap(void *buf, size_t size);
@@ -201,7 +215,7 @@ typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
#define DMU_POOL_DIRECTORY_OBJECT 1
#define DMU_POOL_CONFIG "config"
#define DMU_POOL_ROOT_DATASET "root_dataset"
#define DMU_POOL_SYNC_BPLIST "sync_bplist"
#define DMU_POOL_SYNC_BPOBJ "sync_bplist"
#define DMU_POOL_ERRLOG_SCRUB "errlog_scrub"
#define DMU_POOL_ERRLOG_LAST "errlog_last"
#define DMU_POOL_SPARES "spares"
@@ -209,19 +223,12 @@ typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
#define DMU_POOL_HISTORY "history"
#define DMU_POOL_PROPS "pool_props"
#define DMU_POOL_L2CACHE "l2cache"
/* 4x8 zbookmark_t */
#define DMU_POOL_SCRUB_BOOKMARK "scrub_bookmark"
/* 1x8 zap obj DMU_OT_SCRUB_QUEUE */
#define DMU_POOL_SCRUB_QUEUE "scrub_queue"
/* 1x8 txg */
#define DMU_POOL_SCRUB_MIN_TXG "scrub_min_txg"
/* 1x8 txg */
#define DMU_POOL_SCRUB_MAX_TXG "scrub_max_txg"
/* 1x4 enum scrub_func */
#define DMU_POOL_SCRUB_FUNC "scrub_func"
/* 1x8 count */
#define DMU_POOL_SCRUB_ERRORS "scrub_errors"
#define DMU_POOL_TMP_USERREFS "tmp_userrefs"
#define DMU_POOL_DDT "DDT-%s-%s-%s"
#define DMU_POOL_DDT_STATS "DDT-statistics"
#define DMU_POOL_CREATION_VERSION "creation_version"
#define DMU_POOL_SCAN "scan"
#define DMU_POOL_FREE_BPOBJ "free_bpobj"
/*
* Allocate an object from this objset. The range of object numbers
@@ -306,11 +313,14 @@ void dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
dmu_tx_t *tx);
/*
* Decide how many copies of a given block we should make. Can be from
* 1 to SPA_DVAS_PER_BP.
* Decide how to write a block: checksum, compression, number of copies, etc.
*/
int dmu_get_replication_level(struct objset_impl *, struct zbookmark *zb,
dmu_object_type_t ot);
#define WP_NOFILL 0x1
#define WP_DMU_SYNC 0x2
#define WP_SPILL 0x4
void dmu_write_policy(objset_t *os, struct dnode *dn, int level, int wp,
struct zio_prop *zp);
/*
* The bonus data is accessed more or less like a regular buffer.
* You must dmu_bonus_hold() to get the buffer, which will give you a
@@ -324,6 +334,17 @@ int dmu_get_replication_level(struct objset_impl *, struct zbookmark *zb,
int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **);
int dmu_bonus_max(void);
int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
int dmu_set_bonustype(dmu_buf_t *, dmu_object_type_t, dmu_tx_t *);
int dmu_rm_spill(objset_t *, uint64_t, dmu_tx_t *);
/*
* Special spill buffer support used by "SA" framework
*/
int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
int dmu_spill_hold_by_dnode(struct dnode *dn, uint32_t flags,
void *tag, dmu_buf_t **dbp);
int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
/*
* Obtain the DMU buffer from the specified object which contains the
@@ -340,7 +361,7 @@ int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
* The object number must be a valid, allocated object number.
*/
int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
void *tag, dmu_buf_t **);
void *tag, dmu_buf_t **, int flags);
void dmu_buf_add_ref(dmu_buf_t *db, void* tag);
void dmu_buf_rele(dmu_buf_t *db, void *tag);
uint64_t dmu_buf_refcount(dmu_buf_t *db);
@@ -437,11 +458,34 @@ void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
uint64_t len);
void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name);
void dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object);
void dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object);
void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow);
void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size);
void dmu_tx_abort(dmu_tx_t *tx);
int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
void dmu_tx_wait(dmu_tx_t *tx);
void dmu_tx_commit(dmu_tx_t *tx);
/*
* To register a commit callback, dmu_tx_callback_register() must be called.
*
* dcb_data is a pointer to caller private data that is passed on as a
* callback parameter. The caller is responsible for properly allocating and
* freeing it.
*
* When registering a callback, the transaction must be already created, but
* it cannot be committed or aborted. It can be assigned to a txg or not.
*
* The callback will be called after the transaction has been safely written
* to stable storage and will also be called if the dmu_tx is aborted.
* If there is any error which prevents the transaction from being committed to
* disk, the callback will be called with a value of error != 0.
*/
typedef void dmu_tx_callback_func_t(void *dcb_data, int error);
void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
void *dcb_data);
/*
* Free up the data blocks for a defined range of a file. If size is
* zero, the range from offset to end-of-file is freed.
@@ -469,12 +513,23 @@ void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size,
dmu_tx_t *tx);
int dmu_write_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size,
dmu_tx_t *tx);
int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset,
uint64_t size, struct page *pp, dmu_tx_t *tx);
struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
void dmu_return_arcbuf(struct arc_buf *buf);
void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf,
dmu_tx_t *tx);
int dmu_xuio_init(struct xuio *uio, int niov);
void dmu_xuio_fini(struct xuio *uio);
int dmu_xuio_add(struct xuio *uio, struct arc_buf *abuf, offset_t off,
size_t n);
int dmu_xuio_cnt(struct xuio *uio);
struct arc_buf *dmu_xuio_arcbuf(struct xuio *uio, int i);
void dmu_xuio_clear(struct xuio *uio, int i);
void xuio_stat_wbuf_copied();
void xuio_stat_wbuf_nocopy();
extern int zfs_prefetch_disable;
@@ -485,19 +540,19 @@ void dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset,
uint64_t len);
typedef struct dmu_object_info {
/* All sizes are in bytes. */
/* All sizes are in bytes unless otherwise indicated. */
uint32_t doi_data_block_size;
uint32_t doi_metadata_block_size;
uint64_t doi_bonus_size;
dmu_object_type_t doi_type;
dmu_object_type_t doi_bonus_type;
uint64_t doi_bonus_size;
uint8_t doi_indirection; /* 2 = dnode->indirect->data */
uint8_t doi_checksum;
uint8_t doi_compress;
uint8_t doi_pad[5];
/* Values below are number of 512-byte blocks. */
uint64_t doi_physical_blks; /* data + metadata */
uint64_t doi_max_block_offset;
uint64_t doi_physical_blocks_512; /* data + metadata, 512b blks */
uint64_t doi_max_offset;
uint64_t doi_fill_count; /* number of non-empty blocks */
} dmu_object_info_t;
typedef void arc_byteswap_func_t(void *buf, size_t size);
@@ -566,6 +621,11 @@ void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
*/
uint64_t dmu_objset_fsid_guid(objset_t *os);
/*
* Get the [cm]time for an objset's snapshot dir
*/
timestruc_t dmu_objset_snap_cmtime(objset_t *os);
int dmu_objset_is_snapshot(objset_t *os);
extern struct spa *dmu_objset_spa(objset_t *os);
@@ -575,6 +635,8 @@ extern struct dsl_dataset *dmu_objset_ds(objset_t *os);
extern void dmu_objset_name(objset_t *os, char *buf);
extern dmu_objset_type_t dmu_objset_type(objset_t *os);
extern uint64_t dmu_objset_id(objset_t *os);
extern uint64_t dmu_objset_syncprop(objset_t *os);
extern uint64_t dmu_objset_logbias(objset_t *os);
extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
uint64_t *id, uint64_t *offp, boolean_t *case_conflict);
extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
@@ -582,9 +644,8 @@ extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
extern int dmu_dir_list_next(objset_t *os, int namelen, char *name,
uint64_t *idp, uint64_t *offp);
typedef void objset_used_cb_t(objset_t *os, dmu_object_type_t bonustype,
void *oldbonus, void *newbonus, uint64_t oldused, uint64_t newused,
dmu_tx_t *tx);
typedef int objset_used_cb_t(dmu_object_type_t bonustype,
void *bonus, uint64_t *userp, uint64_t *groupp);
extern void dmu_objset_register_type(dmu_objset_type_t ost,
objset_used_cb_t *cb);
extern void dmu_objset_set_user(objset_t *os, void *user_ptr);
@@ -605,9 +666,20 @@ uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
* storage when the write completes this new data does not become a
* permanent part of the file until the associated transaction commits.
*/
typedef void dmu_sync_cb_t(dmu_buf_t *db, void *arg);
int dmu_sync(struct zio *zio, dmu_buf_t *db,
struct blkptr *bp, uint64_t txg, dmu_sync_cb_t *done, void *arg);
/*
* {zfs,zvol,ztest}_get_done() args
*/
typedef struct zgd {
struct zilog *zgd_zilog;
struct blkptr *zgd_bp;
dmu_buf_t *zgd_db;
struct rl *zgd_rl;
void *zgd_private;
} zgd_t;
typedef void dmu_sync_cb_t(zgd_t *arg, int error);
int dmu_sync(struct zio *zio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd);
/*
* Find the next hole or data block in file starting at *off
@@ -642,11 +714,12 @@ typedef struct dmu_recv_cookie {
struct dsl_dataset *drc_real_ds;
struct drr_begin *drc_drrb;
char *drc_tosnap;
char *drc_top_ds;
boolean_t drc_newfs;
boolean_t drc_force;
} dmu_recv_cookie_t;
int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *,
int dmu_recv_begin(char *tofs, char *tosnap, char *topds, struct drr_begin *,
boolean_t force, objset_t *origin, dmu_recv_cookie_t *);
int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp);
int dmu_recv_end(dmu_recv_cookie_t *drc);
+35 -3
View File
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -210,8 +210,7 @@ extern "C" {
*
* ds_lock
* protects:
* ds_user_ptr
* ds_user_evict_func
* ds_objset
* ds_open_refcount
* ds_snapname
* ds_phys accounting
@@ -233,6 +232,39 @@ extern "C" {
struct objset;
struct dmu_pool;
typedef struct dmu_xuio {
int next;
int cnt;
struct arc_buf **bufs;
iovec_t *iovp;
} dmu_xuio_t;
typedef struct xuio_stats {
/* loaned yet not returned arc_buf */
kstat_named_t xuiostat_onloan_rbuf;
kstat_named_t xuiostat_onloan_wbuf;
/* whether a copy is made when loaning out a read buffer */
kstat_named_t xuiostat_rbuf_copied;
kstat_named_t xuiostat_rbuf_nocopy;
/* whether a copy is made when assigning a write buffer */
kstat_named_t xuiostat_wbuf_copied;
kstat_named_t xuiostat_wbuf_nocopy;
} xuio_stats_t;
static xuio_stats_t xuio_stats = {
{ "onloan_read_buf", KSTAT_DATA_UINT64 },
{ "onloan_write_buf", KSTAT_DATA_UINT64 },
{ "read_buf_copied", KSTAT_DATA_UINT64 },
{ "read_buf_nocopy", KSTAT_DATA_UINT64 },
{ "write_buf_copied", KSTAT_DATA_UINT64 },
{ "write_buf_nocopy", KSTAT_DATA_UINT64 }
};
#define XUIOSTAT_INCR(stat, val) \
atomic_add_64(&xuio_stats.stat.value.ui64, (val))
#define XUIOSTAT_BUMP(stat) XUIOSTAT_INCR(stat, 1)
#ifdef __cplusplus
}
#endif
+44 -29
View File
@@ -19,10 +19,11 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
#ifndef _SYS_DMU_OBJSET_H
#define _SYS_DMU_OBJSET_H
@@ -33,6 +34,7 @@
#include <sys/dnode.h>
#include <sys/zio.h>
#include <sys/zil.h>
#include <sys/sa.h>
#ifdef __cplusplus
extern "C" {
@@ -40,11 +42,13 @@ extern "C" {
struct dsl_dataset;
struct dmu_tx;
struct objset_impl;
#define OBJSET_PHYS_SIZE 2048
#define OBJSET_OLD_PHYS_SIZE 1024
#define OBJSET_BUF_HAS_USERUSED(buf) \
(arc_buf_size(buf) > OBJSET_OLD_PHYS_SIZE)
#define OBJSET_FLAG_USERACCOUNTING_COMPLETE (1ULL<<0)
typedef struct objset_phys {
@@ -59,11 +63,6 @@ typedef struct objset_phys {
} objset_phys_t;
struct objset {
struct objset_impl *os;
int os_mode;
};
typedef struct objset_impl {
/* Immutable: */
struct dsl_dataset *os_dsl_dataset;
spa_t *os_spa;
@@ -73,12 +72,17 @@ typedef struct objset_impl {
dnode_t *os_userused_dnode;
dnode_t *os_groupused_dnode;
zilog_t *os_zil;
objset_t os;
uint8_t os_checksum; /* can change, under dsl_dir's locks */
uint8_t os_compress; /* can change, under dsl_dir's locks */
uint8_t os_copies; /* can change, under dsl_dir's locks */
uint8_t os_primary_cache; /* can change, under dsl_dir's locks */
uint8_t os_secondary_cache; /* can change, under dsl_dir's locks */
/* can change, under dsl_dir's locks: */
uint8_t os_checksum;
uint8_t os_compress;
uint8_t os_copies;
uint8_t os_dedup_checksum;
uint8_t os_dedup_verify;
uint8_t os_logbias;
uint8_t os_primary_cache;
uint8_t os_secondary_cache;
uint8_t os_sync;
/* no lock needed: */
struct dmu_tx *os_synctx; /* XXX sketchy */
@@ -101,8 +105,12 @@ typedef struct objset_impl {
/* stuff we store for the user */
kmutex_t os_user_ptr_lock;
void *os_user_ptr;
} objset_impl_t;
/* SA layout/attribute registration */
sa_os_t *os_sa;
};
#define DMU_META_OBJSET 0
#define DMU_META_DNODE_OBJECT 0
#define DMU_OBJECT_IS_SPECIAL(obj) ((int64_t)(obj) <= 0)
@@ -111,14 +119,18 @@ typedef struct objset_impl {
(os)->os_secondary_cache == ZFS_CACHE_METADATA)
/* called from zpl */
int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
objset_t **osp);
void dmu_objset_close(objset_t *os);
int dmu_objset_create(const char *name, dmu_objset_type_t type,
objset_t *clone_parent, uint64_t flags,
int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
int dmu_objset_own(const char *name, dmu_objset_type_t type,
boolean_t readonly, void *tag, objset_t **osp);
void dmu_objset_rele(objset_t *os, void *tag);
void dmu_objset_disown(objset_t *os, void *tag);
int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp);
int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
uint64_t flags);
int dmu_objset_destroy(const char *name, boolean_t defer);
int dmu_objset_rollback(objset_t *os);
int dmu_objset_snapshot(char *fsname, char *snapname, nvlist_t *props,
boolean_t recursive);
void dmu_objset_stats(objset_t *os, nvlist_t *nv);
@@ -126,23 +138,26 @@ void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
uint64_t *usedobjsp, uint64_t *availobjsp);
uint64_t dmu_objset_fsid_guid(objset_t *os);
int dmu_objset_find(char *name, int func(char *, void *), void *arg,
int dmu_objset_find(char *name, int func(const char *, void *), void *arg,
int flags);
int dmu_objset_find_spa(spa_t *spa, const char *name,
int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags);
int dmu_objset_prefetch(char *name, void *arg);
int dmu_objset_prefetch(const char *name, void *arg);
void dmu_objset_byteswap(void *buf, size_t size);
int dmu_objset_evict_dbufs(objset_t *os);
timestruc_t dmu_objset_snap_cmtime(objset_t *os);
/* called from dsl */
void dmu_objset_sync(objset_impl_t *os, zio_t *zio, dmu_tx_t *tx);
objset_impl_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx);
boolean_t dmu_objset_is_dirty(objset_t *os, uint64_t txg);
objset_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx);
int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
objset_impl_t **osip);
void dmu_objset_evict(struct dsl_dataset *ds, void *arg);
void dmu_objset_do_userquota_callbacks(objset_impl_t *os, dmu_tx_t *tx);
boolean_t dmu_objset_userused_enabled(objset_impl_t *os);
objset_t **osp);
void dmu_objset_evict(objset_t *os);
void dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx);
void dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx);
boolean_t dmu_objset_userused_enabled(objset_t *os);
int dmu_objset_userspace_upgrade(objset_t *os);
boolean_t dmu_objset_userspace_present(objset_t *os);
+11 -7
View File
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DMU_TRAVERSE_H
@@ -36,19 +35,24 @@ extern "C" {
struct dnode_phys;
struct dsl_dataset;
struct zilog;
struct arc_buf;
typedef int (blkptr_cb_t)(spa_t *spa, blkptr_t *bp,
const zbookmark_t *zb, const struct dnode_phys *dnp, void *arg);
typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
struct arc_buf *pbuf, const zbookmark_t *zb, const struct dnode_phys *dnp,
void *arg);
#define TRAVERSE_PRE (1<<0)
#define TRAVERSE_POST (1<<1)
#define TRAVERSE_PREFETCH_METADATA (1<<2)
#define TRAVERSE_PREFETCH_DATA (1<<3)
#define TRAVERSE_PREFETCH (TRAVERSE_PREFETCH_METADATA | TRAVERSE_PREFETCH_DATA)
#define TRAVERSE_HARD (1<<4)
int traverse_dataset(struct dsl_dataset *ds, uint64_t txg_start,
int flags, blkptr_cb_t func, void *arg);
int traverse_pool(spa_t *spa, blkptr_cb_t func, void *arg);
int traverse_dataset(struct dsl_dataset *ds,
uint64_t txg_start, int flags, blkptr_cb_t func, void *arg);
int traverse_pool(spa_t *spa,
uint64_t txg_start, int flags, blkptr_cb_t func, void *arg);
#ifdef __cplusplus
}
+12 -3
View File
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DMU_TX_H
#define _SYS_DMU_TX_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/inttypes.h>
#include <sys/dmu.h>
#include <sys/txg.h>
@@ -59,6 +57,7 @@ struct dmu_tx {
txg_handle_t tx_txgh;
void *tx_tempreserve_cookie;
struct dmu_tx_hold *tx_needassign_txh;
list_t tx_callbacks; /* list of dmu_tx_callback_t on this dmu_tx */
uint8_t tx_anyobj;
int tx_err;
#ifdef ZFS_DEBUG
@@ -78,6 +77,7 @@ enum dmu_tx_hold_type {
THT_FREE,
THT_ZAP,
THT_SPACE,
THT_SPILL,
THT_NUMTYPES
};
@@ -98,6 +98,11 @@ typedef struct dmu_tx_hold {
#endif
} dmu_tx_hold_t;
typedef struct dmu_tx_callback {
list_node_t dcb_node; /* linked to tx_callbacks list */
dmu_tx_callback_func_t *dcb_func; /* caller function pointer */
void *dcb_data; /* caller private data */
} dmu_tx_callback_t;
/*
* These routines are defined in dmu.h, and are called by the user.
@@ -109,6 +114,10 @@ void dmu_tx_abort(dmu_tx_t *tx);
uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
void dmu_tx_wait(dmu_tx_t *tx);
void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
void *dcb_data);
void dmu_tx_do_callbacks(list_t *cb_list, int error);
/*
* These routines are defined in dmu_spa.h, and are called by the SPA.
*/
+4 -3
View File
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _DFETCH_H
#define _DFETCH_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/zfs_context.h>
#ifdef __cplusplus
@@ -63,6 +61,9 @@ typedef struct zfetch {
uint64_t zf_alloc_fail; /* # of failed attempts to alloc strm */
} zfetch_t;
void zfetch_init(void);
void zfetch_fini(void);
void dmu_zfetch_init(zfetch_t *, struct dnode *);
void dmu_zfetch_rele(zfetch_t *);
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, int);
+36 -9
View File
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DNODE_H
@@ -62,6 +61,18 @@ extern "C" {
#define DN_MAX_OBJECT_SHIFT 48 /* 256 trillion (zfs_fid_t limit) */
#define DN_MAX_OFFSET_SHIFT 64 /* 2^64 bytes in a dnode */
/*
* dnode id flags
*
* Note: a file will never ever have its
* ids moved from bonus->spill
* and only in a crypto environment would it be on spill
*/
#define DN_ID_CHKED_BONUS 0x1
#define DN_ID_CHKED_SPILL 0x2
#define DN_ID_OLD_EXIST 0x4
#define DN_ID_NEW_EXIST 0x8
/*
* Derived constants.
*/
@@ -70,10 +81,12 @@ extern "C" {
#define DN_MAX_BONUSLEN (DNODE_SIZE - DNODE_CORE_SIZE - (1 << SPA_BLKPTRSHIFT))
#define DN_MAX_OBJECT (1ULL << DN_MAX_OBJECT_SHIFT)
#define DN_ZERO_BONUSLEN (DN_MAX_BONUSLEN + 1)
#define DN_KILL_SPILLBLK (1)
#define DNODES_PER_BLOCK_SHIFT (DNODE_BLOCK_SHIFT - DNODE_SHIFT)
#define DNODES_PER_BLOCK (1ULL << DNODES_PER_BLOCK_SHIFT)
#define DNODES_PER_LEVEL_SHIFT (DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT)
#define DNODES_PER_LEVEL (1ULL << DNODES_PER_LEVEL_SHIFT)
/* The +2 here is a cheesy way to round up */
#define DN_MAX_LEVELS (2 + ((DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT) / \
@@ -88,7 +101,7 @@ extern "C" {
#define EPB(blkshift, typeshift) (1 << (blkshift - typeshift))
struct dmu_buf_impl;
struct objset_impl;
struct objset;
struct zio;
enum dnode_dirtycontext {
@@ -101,6 +114,9 @@ enum dnode_dirtycontext {
#define DNODE_FLAG_USED_BYTES (1<<0)
#define DNODE_FLAG_USERUSED_ACCOUNTED (1<<1)
/* Does dnode have a SA spill blkptr in bonus? */
#define DNODE_FLAG_SPILL_BLKPTR (1<<2)
typedef struct dnode_phys {
uint8_t dn_type; /* dmu_object_type_t */
uint8_t dn_indblkshift; /* ln2(indirect block size) */
@@ -121,7 +137,8 @@ typedef struct dnode_phys {
uint64_t dn_pad3[4];
blkptr_t dn_blkptr[1];
uint8_t dn_bonus[DN_MAX_BONUSLEN];
uint8_t dn_bonus[DN_MAX_BONUSLEN - sizeof (blkptr_t)];
blkptr_t dn_spill;
} dnode_phys_t;
typedef struct dnode {
@@ -136,7 +153,7 @@ typedef struct dnode {
list_node_t dn_link;
/* immutable: */
struct objset_impl *dn_objset;
struct objset *dn_objset;
uint64_t dn_object;
struct dmu_buf_impl *dn_dbuf;
dnode_phys_t *dn_phys; /* pointer into dn->dn_dbuf->db.db_data */
@@ -161,6 +178,8 @@ typedef struct dnode {
uint8_t dn_next_nblkptr[TXG_SIZE];
uint8_t dn_next_nlevels[TXG_SIZE];
uint8_t dn_next_indblkshift[TXG_SIZE];
uint8_t dn_next_bonustype[TXG_SIZE];
uint8_t dn_rm_spillblk[TXG_SIZE]; /* for removing spill blk */
uint16_t dn_next_bonuslen[TXG_SIZE];
uint32_t dn_next_blksz[TXG_SIZE]; /* next block size in bytes */
@@ -185,12 +204,17 @@ typedef struct dnode {
kmutex_t dn_dbufs_mtx;
list_t dn_dbufs; /* linked list of descendent dbuf_t's */
struct dmu_buf_impl *dn_bonus; /* bonus buffer dbuf */
boolean_t dn_have_spill; /* have spill or are spilling */
/* parent IO for current sync write */
zio_t *dn_zio;
/* used in syncing context */
dnode_phys_t *dn_oldphys;
uint64_t dn_oldused; /* old phys used bytes */
uint64_t dn_oldflags; /* old phys dn_flags */
uint64_t dn_olduid, dn_oldgid;
uint64_t dn_newuid, dn_newgid;
int dn_id_flags;
/* holds prefetch structure */
struct zfetch dn_zfetch;
@@ -202,14 +226,17 @@ typedef struct free_range {
uint64_t fr_nblks;
} free_range_t;
dnode_t *dnode_special_open(struct objset_impl *dd, dnode_phys_t *dnp,
dnode_t *dnode_special_open(struct objset *dd, dnode_phys_t *dnp,
uint64_t object);
void dnode_special_close(dnode_t *dn);
void dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx);
int dnode_hold(struct objset_impl *dd, uint64_t object,
void dnode_setbonus_type(dnode_t *dn, dmu_object_type_t, dmu_tx_t *tx);
void dnode_rm_spill(dnode_t *dn, dmu_tx_t *tx);
int dnode_hold(struct objset *dd, uint64_t object,
void *ref, dnode_t **dnp);
int dnode_hold_impl(struct objset_impl *dd, uint64_t object, int flag,
int dnode_hold_impl(struct objset *dd, uint64_t object, int flag,
void *ref, dnode_t **dnp);
boolean_t dnode_add_ref(dnode_t *dn, void *ref);
void dnode_rele(dnode_t *dn, void *ref);
+29 -33
View File
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DSL_DATASET_H
@@ -33,6 +32,7 @@
#include <sys/bplist.h>
#include <sys/dsl_synctask.h>
#include <sys/zfs_context.h>
#include <sys/dsl_deadlist.h>
#ifdef __cplusplus
extern "C" {
@@ -42,8 +42,6 @@ struct dsl_dataset;
struct dsl_dir;
struct dsl_pool;
typedef void dsl_dataset_evict_func_t(struct dsl_dataset *, void *);
#define DS_FLAG_INCONSISTENT (1ULL<<0)
#define DS_IS_INCONSISTENT(ds) \
((ds)->ds_phys->ds_flags & DS_FLAG_INCONSISTENT)
@@ -85,7 +83,7 @@ typedef struct dsl_dataset_phys {
uint64_t ds_num_children; /* clone/snap children; ==0 for head */
uint64_t ds_creation_time; /* seconds since 1970 */
uint64_t ds_creation_txg;
uint64_t ds_deadlist_obj; /* DMU_OT_BPLIST */
uint64_t ds_deadlist_obj; /* DMU_OT_DEADLIST */
uint64_t ds_used_bytes;
uint64_t ds_compressed_bytes;
uint64_t ds_uncompressed_bytes;
@@ -115,10 +113,10 @@ typedef struct dsl_dataset {
/* only used in syncing context, only valid for non-snapshots: */
struct dsl_dataset *ds_prev;
uint64_t ds_origin_txg;
/* has internal locking: */
bplist_t ds_deadlist;
dsl_deadlist_t ds_deadlist;
bplist_t ds_pending_deadlist;
/* to protect against multiple concurrent incremental recv */
kmutex_t ds_recvlock;
@@ -132,8 +130,7 @@ typedef struct dsl_dataset {
* Protected by ds_lock:
*/
kmutex_t ds_lock;
void *ds_user_ptr;
dsl_dataset_evict_func_t *ds_user_evict_func;
objset_t *ds_objset;
uint64_t ds_userrefs;
/*
@@ -165,7 +162,7 @@ struct dsl_ds_destroyarg {
boolean_t need_prep; /* do we need to retry due to EBUSY? */
};
#define dsl_dataset_is_snapshot(ds) \
#define dsl_dataset_is_snapshot(ds) \
((ds)->ds_phys->ds_num_children != 0)
#define DS_UNIQUE_IS_ACCURATE(ds) \
@@ -174,17 +171,17 @@ struct dsl_ds_destroyarg {
int dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp);
int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj,
void *tag, dsl_dataset_t **);
int dsl_dataset_own(const char *name, int flags, void *owner,
dsl_dataset_t **dsp);
int dsl_dataset_own(const char *name, boolean_t inconsistentok,
void *tag, dsl_dataset_t **dsp);
int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
int flags, void *owner, dsl_dataset_t **);
boolean_t inconsistentok, void *tag, dsl_dataset_t **dsp);
void dsl_dataset_name(dsl_dataset_t *ds, char *name);
void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
void dsl_dataset_disown(dsl_dataset_t *ds, void *owner);
void dsl_dataset_disown(dsl_dataset_t *ds, void *tag);
void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag);
boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok,
void *owner);
void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner);
void *tag);
void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *tag);
uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
@@ -195,21 +192,18 @@ dsl_checkfunc_t dsl_dataset_destroy_check;
dsl_syncfunc_t dsl_dataset_destroy_sync;
dsl_checkfunc_t dsl_dataset_snapshot_check;
dsl_syncfunc_t dsl_dataset_snapshot_sync;
int dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost);
int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive);
int dsl_dataset_promote(const char *name);
int dsl_dataset_promote(const char *name, char *conflsnap);
int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
boolean_t force);
int dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
boolean_t recursive);
boolean_t recursive, boolean_t temphold);
int dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
boolean_t recursive);
int dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj,
char *htag);
int dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp);
void *dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
void *p, dsl_dataset_evict_func_t func);
void *dsl_dataset_get_user_ptr(dsl_dataset_t *ds);
blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
@@ -219,10 +213,12 @@ boolean_t dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds);
void dsl_dataset_sync(dsl_dataset_t *os, zio_t *zio, dmu_tx_t *tx);
void dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
int dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
void dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp,
dmu_tx_t *tx);
boolean_t dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth);
int dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp,
dmu_tx_t *tx, boolean_t async);
boolean_t dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
uint64_t blk_birth);
uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds);
void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx);
@@ -238,13 +234,13 @@ int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
uint64_t asize, uint64_t inflight, uint64_t *used,
uint64_t *ref_rsrv);
int dsl_dataset_set_quota(const char *dsname, uint64_t quota);
void dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr,
dmu_tx_t *tx);
int dsl_dataset_set_reservation(const char *dsname, uint64_t reservation);
void dsl_dataset_set_flags(dsl_dataset_t *ds, uint64_t flags);
int64_t dsl_dataset_new_refreservation(dsl_dataset_t *ds, uint64_t reservation,
dmu_tx_t *tx);
int dsl_dataset_set_quota(const char *dsname, zprop_source_t source,
uint64_t quota);
dsl_syncfunc_t dsl_dataset_set_quota_sync;
int dsl_dataset_set_reservation(const char *dsname, zprop_source_t source,
uint64_t reservation);
int dsl_destroy_inconsistent(const char *dsname, void *arg);
#ifdef ZFS_DEBUG
#define dprintf_ds(ds, fmt, ...) do { \
+87
View File
@@ -0,0 +1,87 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DSL_DEADLIST_H
#define _SYS_DSL_DEADLIST_H
#include <sys/bpobj.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
struct dmu_buf;
struct dsl_dataset;
typedef struct dsl_deadlist_phys {
uint64_t dl_used;
uint64_t dl_comp;
uint64_t dl_uncomp;
uint64_t dl_pad[37]; /* pad out to 320b for future expansion */
} dsl_deadlist_phys_t;
typedef struct dsl_deadlist {
objset_t *dl_os;
uint64_t dl_object;
avl_tree_t dl_tree;
boolean_t dl_havetree;
struct dmu_buf *dl_dbuf;
dsl_deadlist_phys_t *dl_phys;
kmutex_t dl_lock;
/* if it's the old on-disk format: */
bpobj_t dl_bpobj;
boolean_t dl_oldfmt;
} dsl_deadlist_t;
typedef struct dsl_deadlist_entry {
avl_node_t dle_node;
uint64_t dle_mintxg;
bpobj_t dle_bpobj;
} dsl_deadlist_entry_t;
void dsl_deadlist_open(dsl_deadlist_t *dl, objset_t *os, uint64_t object);
void dsl_deadlist_close(dsl_deadlist_t *dl);
uint64_t dsl_deadlist_alloc(objset_t *os, dmu_tx_t *tx);
void dsl_deadlist_free(objset_t *os, uint64_t dlobj, dmu_tx_t *tx);
void dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx);
void dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx);
void dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx);
uint64_t dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg,
uint64_t mrs_obj, dmu_tx_t *tx);
void dsl_deadlist_space(dsl_deadlist_t *dl,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
void dsl_deadlist_space_range(dsl_deadlist_t *dl,
uint64_t mintxg, uint64_t maxtxg,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
void dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx);
void dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
dmu_tx_t *tx);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DSL_DEADLIST_H */
+13 -5
View File
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DSL_DIR_H
@@ -70,7 +69,8 @@ typedef struct dsl_dir_phys {
uint64_t dd_deleg_zapobj; /* dataset delegation permissions */
uint64_t dd_flags;
uint64_t dd_used_breakdown[DD_USED_NUM];
uint64_t dd_pad[14]; /* pad out to 256 bytes for good measure */
uint64_t dd_clones; /* dsl_dir objects */
uint64_t dd_pad[13]; /* pad out to 256 bytes for good measure */
} dsl_dir_phys_t;
struct dsl_dir {
@@ -89,6 +89,8 @@ struct dsl_dir {
/* Protected by dd_lock */
kmutex_t dd_lock;
list_t dd_prop_cbs; /* list of dsl_prop_cb_record_t's */
timestruc_t dd_snap_cmtime; /* last time snapshot namespace changed */
uint64_t dd_origin_txg;
/* gross estimate of space used by in-flight tx's */
uint64_t dd_tempreserved[TXG_SIZE];
@@ -125,18 +127,24 @@ void dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx);
void dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx);
int dsl_dir_set_quota(const char *ddname, uint64_t quota);
int dsl_dir_set_reservation(const char *ddname, uint64_t reservation);
int dsl_dir_set_quota(const char *ddname, zprop_source_t source,
uint64_t quota);
int dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
uint64_t reservation);
int dsl_dir_rename(dsl_dir_t *dd, const char *newname);
int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space);
int dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx);
boolean_t dsl_dir_is_clone(dsl_dir_t *dd);
void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds,
uint64_t reservation, cred_t *cr, dmu_tx_t *tx);
void dsl_dir_snap_cmtime_update(dsl_dir_t *dd);
timestruc_t dsl_dir_snap_cmtime(dsl_dir_t *dd);
/* internal reserved dir name */
#define MOS_DIR_NAME "$MOS"
#define ORIGIN_DIR_NAME "$ORIGIN"
#define XLATION_DIR_NAME "$XLATION"
#define FREE_DIR_NAME "$FREE"
#ifdef ZFS_DEBUG
#define dprintf_dd(dd, fmt, ...) do { \
+30 -32
View File
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DSL_POOL_H
@@ -32,6 +31,9 @@
#include <sys/zfs_context.h>
#include <sys/zio.h>
#include <sys/dnode.h>
#include <sys/ddt.h>
#include <sys/arc.h>
#include <sys/bpobj.h>
#ifdef __cplusplus
extern "C" {
@@ -42,12 +44,7 @@ struct dsl_dir;
struct dsl_dataset;
struct dsl_pool;
struct dmu_tx;
enum scrub_func {
SCRUB_FUNC_NONE,
SCRUB_FUNC_CLEAN,
SCRUB_FUNC_NUMFUNCS
};
struct dsl_scan;
/* These macros are for indexing into the zfs_all_blkstats_t. */
#define DMU_OT_DEFERRED DMU_OT_NONE
@@ -75,6 +72,7 @@ typedef struct dsl_pool {
struct objset *dp_meta_objset;
struct dsl_dir *dp_root_dir;
struct dsl_dir *dp_mos_dir;
struct dsl_dir *dp_free_dir;
struct dsl_dataset *dp_origin_snap;
uint64_t dp_root_dir_obj;
struct taskq *dp_vnrele_taskq;
@@ -83,25 +81,18 @@ typedef struct dsl_pool {
blkptr_t dp_meta_rootbp;
list_t dp_synced_datasets;
hrtime_t dp_read_overhead;
uint64_t dp_throughput;
uint64_t dp_throughput; /* bytes per millisec */
uint64_t dp_write_limit;
uint64_t dp_tmp_userrefs_obj;
bpobj_t dp_free_bpobj;
struct dsl_scan *dp_scan;
/* Uses dp_lock */
kmutex_t dp_lock;
uint64_t dp_space_towrite[TXG_SIZE];
uint64_t dp_tempreserved[TXG_SIZE];
enum scrub_func dp_scrub_func;
uint64_t dp_scrub_queue_obj;
uint64_t dp_scrub_min_txg;
uint64_t dp_scrub_max_txg;
zbookmark_t dp_scrub_bookmark;
boolean_t dp_scrub_pausing;
boolean_t dp_scrub_isresilver;
uint64_t dp_scrub_start_time;
kmutex_t dp_scrub_cancel_lock; /* protects dp_scrub_restart */
boolean_t dp_scrub_restart;
/* Has its own locking */
tx_state_t dp_tx;
txg_list_t dp_dirty_datasets;
@@ -123,29 +114,36 @@ int dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
void dsl_pool_close(dsl_pool_t *dp);
dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg);
void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg);
void dsl_pool_zil_clean(dsl_pool_t *dp);
void dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg);
int dsl_pool_sync_context(dsl_pool_t *dp);
uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree);
uint64_t dsl_pool_adjustedfree(dsl_pool_t *dp, boolean_t netfree);
int dsl_pool_tempreserve_space(dsl_pool_t *dp, uint64_t space, dmu_tx_t *tx);
void dsl_pool_tempreserve_clear(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
void dsl_pool_memory_pressure(dsl_pool_t *dp);
void dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
int dsl_free(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp,
zio_done_func_t *done, void *private, uint32_t arc_flags);
void dsl_pool_ds_destroyed(struct dsl_dataset *ds, struct dmu_tx *tx);
void dsl_pool_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
void dsl_pool_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
struct dmu_tx *tx);
void dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp);
void dsl_free_sync(zio_t *pio, dsl_pool_t *dp, uint64_t txg,
const blkptr_t *bpp);
int dsl_read(zio_t *pio, spa_t *spa, const blkptr_t *bpp, arc_buf_t *pbuf,
arc_done_func_t *done, void *private, int priority, int zio_flags,
uint32_t *arc_flags, const zbookmark_t *zb);
int dsl_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bpp,
arc_done_func_t *done, void *private, int priority, int zio_flags,
uint32_t *arc_flags, const zbookmark_t *zb);
void dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx);
void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx);
int dsl_pool_scrub_cancel(dsl_pool_t *dp);
int dsl_pool_scrub_clean(dsl_pool_t *dp);
void dsl_pool_scrub_sync(dsl_pool_t *dp, dmu_tx_t *tx);
void dsl_pool_scrub_restart(dsl_pool_t *dp);
void dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx);
taskq_t *dsl_pool_vnrele_taskq(dsl_pool_t *dp);
extern int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj,
const char *tag, uint64_t *now, dmu_tx_t *tx);
extern int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj,
const char *tag, dmu_tx_t *tx);
extern void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp);
int dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **);
#ifdef __cplusplus
}
#endif
+43 -7
View File
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DSL_PROP_H
@@ -49,6 +48,25 @@ typedef struct dsl_prop_cb_record {
void *cbr_arg;
} dsl_prop_cb_record_t;
typedef struct dsl_props_arg {
nvlist_t *pa_props;
zprop_source_t pa_source;
} dsl_props_arg_t;
typedef struct dsl_prop_set_arg {
const char *psa_name;
zprop_source_t psa_source;
int psa_intsz;
int psa_numints;
const void *psa_value;
/*
* Used to handle the special requirements of the quota and reservation
* properties.
*/
uint64_t psa_effective_value;
} dsl_prop_setarg_t;
int dsl_prop_register(struct dsl_dataset *ds, const char *propname,
dsl_prop_changed_cb_t *callback, void *cbarg);
int dsl_prop_unregister(struct dsl_dataset *ds, const char *propname,
@@ -59,18 +77,36 @@ int dsl_prop_get(const char *ddname, const char *propname,
int intsz, int numints, void *buf, char *setpoint);
int dsl_prop_get_integer(const char *ddname, const char *propname,
uint64_t *valuep, char *setpoint);
int dsl_prop_get_all(objset_t *os, nvlist_t **nvp, boolean_t local);
int dsl_prop_get_all(objset_t *os, nvlist_t **nvp);
int dsl_prop_get_received(objset_t *os, nvlist_t **nvp);
int dsl_prop_get_ds(struct dsl_dataset *ds, const char *propname,
int intsz, int numints, void *buf, char *setpoint);
int dsl_prop_get_dd(struct dsl_dir *dd, const char *propname,
int intsz, int numints, void *buf, char *setpoint);
int intsz, int numints, void *buf, char *setpoint,
boolean_t snapshot);
dsl_syncfunc_t dsl_props_set_sync;
int dsl_prop_set(const char *ddname, const char *propname,
int intsz, int numints, const void *buf);
int dsl_props_set(const char *dsname, nvlist_t *nvl);
zprop_source_t source, int intsz, int numints, const void *buf);
int dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *nvl);
void dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
cred_t *cr, dmu_tx_t *tx);
dmu_tx_t *tx);
void dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,
zprop_source_t source, uint64_t *value);
int dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa);
#ifdef ZFS_DEBUG
void dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa);
#define DSL_PROP_CHECK_PREDICTION(dd, psa) \
dsl_prop_check_prediction((dd), (psa))
#else
#define DSL_PROP_CHECK_PREDICTION(dd, psa) /* nothing */
#endif
/* flag first receive on or after SPA_VERSION_RECVD_PROPS */
boolean_t dsl_prop_get_hasrecvd(objset_t *os);
void dsl_prop_set_hasrecvd(objset_t *os);
void dsl_prop_unset_hasrecvd(objset_t *os);
void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value);
void dsl_prop_nvlist_add_string(nvlist_t *nv,
+108
View File
@@ -0,0 +1,108 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DSL_SCAN_H
#define _SYS_DSL_SCAN_H
#include <sys/zfs_context.h>
#include <sys/zio.h>
#include <sys/ddt.h>
#include <sys/bplist.h>
#ifdef __cplusplus
extern "C" {
#endif
struct objset;
struct dsl_dir;
struct dsl_dataset;
struct dsl_pool;
struct dmu_tx;
/*
* All members of this structure must be uint64_t, for byteswap
* purposes.
*/
typedef struct dsl_scan_phys {
uint64_t scn_func; /* pool_scan_func_t */
uint64_t scn_state; /* dsl_scan_state_t */
uint64_t scn_queue_obj;
uint64_t scn_min_txg;
uint64_t scn_max_txg;
uint64_t scn_cur_min_txg;
uint64_t scn_cur_max_txg;
uint64_t scn_start_time;
uint64_t scn_end_time;
uint64_t scn_to_examine; /* total bytes to be scanned */
uint64_t scn_examined; /* bytes scanned so far */
uint64_t scn_to_process;
uint64_t scn_processed;
uint64_t scn_errors; /* scan I/O error count */
uint64_t scn_ddt_class_max;
ddt_bookmark_t scn_ddt_bookmark;
zbookmark_t scn_bookmark;
uint64_t scn_flags; /* dsl_scan_flags_t */
} dsl_scan_phys_t;
#define SCAN_PHYS_NUMINTS (sizeof (dsl_scan_phys_t) / sizeof (uint64_t))
typedef enum dsl_scan_flags {
DSF_VISIT_DS_AGAIN = 1<<0,
} dsl_scan_flags_t;
typedef struct dsl_scan {
struct dsl_pool *scn_dp;
boolean_t scn_pausing;
uint64_t scn_restart_txg;
uint64_t scn_sync_start_time;
zio_t *scn_zio_root;
/* for debugging / information */
uint64_t scn_visited_this_txg;
dsl_scan_phys_t scn_phys;
} dsl_scan_t;
int dsl_scan_init(struct dsl_pool *dp, uint64_t txg);
void dsl_scan_fini(struct dsl_pool *dp);
void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
int dsl_scan_cancel(struct dsl_pool *);
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
void dsl_resilver_restart(struct dsl_pool *, uint64_t txg);
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
void dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
ddt_entry_t *dde, dmu_tx_t *tx);
void dsl_scan_ds_destroyed(struct dsl_dataset *ds, struct dmu_tx *tx);
void dsl_scan_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
struct dmu_tx *tx);
boolean_t dsl_scan_active(dsl_scan_t *scn);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DSL_SCAN_H */
+2 -6
View File
@@ -19,15 +19,12 @@
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DSL_SYNCTASK_H
#define _SYS_DSL_SYNCTASK_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/txg.h>
#include <sys/zfs_context.h>
@@ -38,7 +35,7 @@ extern "C" {
struct dsl_pool;
typedef int (dsl_checkfunc_t)(void *, void *, dmu_tx_t *);
typedef void (dsl_syncfunc_t)(void *, void *, cred_t *, dmu_tx_t *);
typedef void (dsl_syncfunc_t)(void *, void *, dmu_tx_t *);
typedef struct dsl_sync_task {
list_node_t dst_node;
@@ -53,7 +50,6 @@ typedef struct dsl_sync_task_group {
txg_node_t dstg_node;
list_t dstg_tasks;
struct dsl_pool *dstg_pool;
cred_t *dstg_cr;
uint64_t dstg_txg;
int dstg_err;
int dstg_space;
+13
View File
@@ -68,6 +68,18 @@ extern "C" {
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET "zio_offset"
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE "zio_size"
#define FM_EREPORT_PAYLOAD_ZFS_PREV_STATE "prev_state"
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED "cksum_expected"
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL "cksum_actual"
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO "cksum_algorithm"
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_BYTESWAP "cksum_byteswap"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_OFFSET_RANGES "bad_ranges"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_MIN_GAP "bad_ranges_min_gap"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_SETS "bad_range_sets"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_CLEARS "bad_range_clears"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_SET_BITS "bad_set_bits"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS "bad_cleared_bits"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM "bad_set_histogram"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM "bad_cleared_histogram"
#define FM_EREPORT_FAILMODE_WAIT "wait"
#define FM_EREPORT_FAILMODE_CONTINUE "continue"
@@ -75,6 +87,7 @@ extern "C" {
#define FM_RESOURCE_REMOVED "removed"
#define FM_RESOURCE_AUTOREPLACE "autoreplace"
#define FM_RESOURCE_STATECHANGE "statechange"
#ifdef __cplusplus
}
+9 -3
View File
@@ -20,8 +20,7 @@
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_FM_PROTOCOL_H
@@ -47,6 +46,7 @@ extern "C" {
/* FM event class values */
#define FM_EREPORT_CLASS "ereport"
#define FM_FAULT_CLASS "fault"
#define FM_DEFECT_CLASS "defect"
#define FM_RSRC_CLASS "resource"
#define FM_LIST_EVENT "list"
@@ -83,6 +83,7 @@ extern "C" {
#define FM_SUSPECT_FAULT_LIST "fault-list"
#define FM_SUSPECT_FAULT_SZ "fault-list-sz"
#define FM_SUSPECT_FAULT_STATUS "fault-status"
#define FM_SUSPECT_INJECTED "__injected"
#define FM_SUSPECT_MESSAGE "message"
#define FM_SUSPECT_RETIRE "retire"
#define FM_SUSPECT_RESPONSE "response"
@@ -122,6 +123,7 @@ extern "C" {
#define FM_RSRC_ASRU_REPAIRED "repaired"
#define FM_RSRC_ASRU_REPLACED "replaced"
#define FM_RSRC_ASRU_ACQUITTED "acquitted"
#define FM_RSRC_ASRU_RESOLVED "resolved"
#define FM_RSRC_ASRU_UNUSABLE "unusable"
#define FM_RSRC_ASRU_EVENT "event"
@@ -170,6 +172,7 @@ extern "C" {
/* FMRI authority-type member names */
#define FM_FMRI_AUTH_CHASSIS "chassis-id"
#define FM_FMRI_AUTH_PRODUCT_SN "product-sn"
#define FM_FMRI_AUTH_PRODUCT "product-id"
#define FM_FMRI_AUTH_DOMAIN "domain-id"
#define FM_FMRI_AUTH_SERVER "server-id"
@@ -243,6 +246,7 @@ extern "C" {
/* dev scheme member names */
#define FM_FMRI_DEV_ID "devid"
#define FM_FMRI_DEV_TGTPTLUN0 "target-port-l0id"
#define FM_FMRI_DEV_PATH "device-path"
/* pkg scheme member names */
@@ -311,7 +315,7 @@ extern int i_fm_payload_set(nvlist_t *, const char *, va_list);
extern void fm_fmri_hc_set(nvlist_t *, int, const nvlist_t *, nvlist_t *,
int, ...);
extern void fm_fmri_dev_set(nvlist_t *, int, const nvlist_t *, const char *,
const char *);
const char *, const char *);
extern void fm_fmri_de_set(nvlist_t *, int, const nvlist_t *, const char *);
extern void fm_fmri_cpu_set(nvlist_t *, int, const nvlist_t *, uint32_t,
uint8_t *, const char *);
@@ -320,6 +324,8 @@ extern void fm_fmri_mem_set(nvlist_t *, int, const nvlist_t *, const char *,
extern void fm_authority_set(nvlist_t *, int, const char *, const char *,
const char *, const char *);
extern void fm_fmri_zfs_set(nvlist_t *, int, uint64_t, uint64_t);
extern void fm_fmri_hc_create(nvlist_t *, int, const nvlist_t *, nvlist_t *,
nvlist_t *, int, ...);
extern uint64_t fm_ena_increment(uint64_t);
extern uint64_t fm_ena_generate(uint64_t, uchar_t);
+15 -8
View File
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_METASLAB_H
@@ -36,9 +35,6 @@
extern "C" {
#endif
typedef struct metaslab_class metaslab_class_t;
typedef struct metaslab_group metaslab_group_t;
extern space_map_ops_t *zfs_metaslab_ops;
extern metaslab_t *metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
@@ -46,6 +42,7 @@ extern metaslab_t *metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
extern void metaslab_fini(metaslab_t *msp);
extern void metaslab_sync(metaslab_t *msp, uint64_t txg);
extern void metaslab_sync_done(metaslab_t *msp, uint64_t txg);
extern void metaslab_sync_reassess(metaslab_group_t *mg);
#define METASLAB_HINTBP_FAVOR 0x0
#define METASLAB_HINTBP_AVOID 0x1
@@ -57,14 +54,24 @@ extern void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg,
boolean_t now);
extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg);
extern metaslab_class_t *metaslab_class_create(space_map_ops_t *ops);
extern metaslab_class_t *metaslab_class_create(spa_t *spa,
space_map_ops_t *ops);
extern void metaslab_class_destroy(metaslab_class_t *mc);
extern void metaslab_class_add(metaslab_class_t *mc, metaslab_group_t *mg);
extern void metaslab_class_remove(metaslab_class_t *mc, metaslab_group_t *mg);
extern int metaslab_class_validate(metaslab_class_t *mc);
extern void metaslab_class_space_update(metaslab_class_t *mc,
int64_t alloc_delta, int64_t defer_delta,
int64_t space_delta, int64_t dspace_delta);
extern uint64_t metaslab_class_get_alloc(metaslab_class_t *mc);
extern uint64_t metaslab_class_get_space(metaslab_class_t *mc);
extern uint64_t metaslab_class_get_dspace(metaslab_class_t *mc);
extern uint64_t metaslab_class_get_deferred(metaslab_class_t *mc);
extern metaslab_group_t *metaslab_group_create(metaslab_class_t *mc,
vdev_t *vd);
extern void metaslab_group_destroy(metaslab_group_t *mg);
extern void metaslab_group_activate(metaslab_group_t *mg);
extern void metaslab_group_passivate(metaslab_group_t *mg);
#ifdef __cplusplus
}
+10 -1
View File
@@ -37,16 +37,23 @@ extern "C" {
#endif
struct metaslab_class {
spa_t *mc_spa;
metaslab_group_t *mc_rotor;
uint64_t mc_allocated;
space_map_ops_t *mc_ops;
uint64_t mc_aliquot;
uint64_t mc_alloc; /* total allocated space */
uint64_t mc_deferred; /* total deferred frees */
uint64_t mc_space; /* total space (alloc + free) */
uint64_t mc_dspace; /* total deflated space */
};
struct metaslab_group {
kmutex_t mg_lock;
avl_tree_t mg_metaslab_tree;
uint64_t mg_aliquot;
uint64_t mg_bonus_area;
int64_t mg_bias;
int64_t mg_activation_count;
metaslab_class_t *mg_class;
vdev_t *mg_vd;
metaslab_group_t *mg_prev;
@@ -66,7 +73,9 @@ struct metaslab {
space_map_obj_t ms_smo_syncing; /* syncing space map object */
space_map_t ms_allocmap[TXG_SIZE]; /* allocated this txg */
space_map_t ms_freemap[TXG_SIZE]; /* freed this txg */
space_map_t ms_defermap[TXG_DEFER_SIZE]; /* deferred frees */
space_map_t ms_map; /* in-core free space map */
int64_t ms_deferspace; /* sum of ms_defermap[] space */
uint64_t ms_weight; /* weight vs. others in group */
metaslab_group_t *ms_group; /* metaslab group */
avl_node_t ms_group_node; /* node in metaslab group tree */
+6 -4
View File
@@ -19,15 +19,12 @@
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_REFCOUNT_H
#define _SYS_REFCOUNT_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/inttypes.h>
#include <sys/list.h>
#include <sys/zfs_context.h>
@@ -91,6 +88,11 @@ typedef struct refcount {
atomic_add_64_nv(&(rc)->rc_count, number)
#define refcount_remove_many(rc, number, holder) \
atomic_add_64_nv(&(rc)->rc_count, -number)
#define refcount_transfer(dst, src) { \
uint64_t __tmp = (src)->rc_count; \
atomic_add_64(&(src)->rc_count, -__tmp); \
atomic_add_64(&(dst)->rc_count, __tmp); \
}
#define refcount_init()
#define refcount_fini()
+171
View File
@@ -0,0 +1,171 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_SA_H
#define _SYS_SA_H
#include <sys/dmu.h>
/*
* Currently available byteswap functions.
* If it all possible new attributes should used
* one of the already defined byteswap functions.
* If a new byteswap function is added then the
* ZPL/Pool version will need to be bumped.
*/
typedef enum sa_bswap_type {
SA_UINT64_ARRAY,
SA_UINT32_ARRAY,
SA_UINT16_ARRAY,
SA_UINT8_ARRAY,
SA_ACL,
} sa_bswap_type_t;
typedef uint16_t sa_attr_type_t;
/*
* Attribute to register support for.
*/
typedef struct sa_attr_reg {
char *sa_name; /* attribute name */
uint16_t sa_length;
sa_bswap_type_t sa_byteswap; /* bswap functon enum */
sa_attr_type_t sa_attr; /* filled in during registration */
} sa_attr_reg_t;
typedef void (sa_data_locator_t)(void **, uint32_t *, uint32_t,
boolean_t, void *userptr);
/*
* array of attributes to store.
*
* This array should be treated as opaque/private data.
* The SA_BULK_ADD_ATTR() macro should be used for manipulating
* the array.
*
* When sa_replace_all_by_template() is used the attributes
* will be stored in the order defined in the array, except that
* the attributes may be split between the bonus and the spill buffer
*
*/
typedef struct sa_bulk_attr {
void *sa_data;
sa_data_locator_t *sa_data_func;
uint16_t sa_length;
sa_attr_type_t sa_attr;
/* the following are private to the sa framework */
void *sa_addr;
uint16_t sa_buftype;
uint16_t sa_size;
} sa_bulk_attr_t;
/*
* special macro for adding entries for bulk attr support
* bulk - sa_bulk_attr_t
* count - integer that will be incremented during each add
* attr - attribute to manipulate
* func - function for accessing data.
* data - pointer to data.
* len - length of data
*/
#define SA_ADD_BULK_ATTR(b, idx, attr, func, data, len) \
{ \
b[idx].sa_attr = attr;\
b[idx].sa_data_func = func; \
b[idx].sa_data = data; \
b[idx++].sa_length = len; \
}
typedef struct sa_os sa_os_t;
typedef enum sa_handle_type {
SA_HDL_SHARED,
SA_HDL_PRIVATE
} sa_handle_type_t;
struct sa_handle;
typedef void *sa_lookup_tab_t;
typedef struct sa_handle sa_handle_t;
typedef void (sa_update_cb_t)(sa_handle_t *, dmu_tx_t *tx);
int sa_handle_get(objset_t *, uint64_t, void *userp,
sa_handle_type_t, sa_handle_t **);
int sa_handle_get_from_db(objset_t *, dmu_buf_t *, void *userp,
sa_handle_type_t, sa_handle_t **);
void sa_handle_destroy(sa_handle_t *);
int sa_buf_hold(objset_t *, uint64_t, void *, dmu_buf_t **);
void sa_buf_rele(dmu_buf_t *, void *);
int sa_lookup(sa_handle_t *, sa_attr_type_t, void *buf, uint32_t buflen);
int sa_update(sa_handle_t *, sa_attr_type_t, void *buf,
uint32_t buflen, dmu_tx_t *);
int sa_remove(sa_handle_t *, sa_attr_type_t, dmu_tx_t *);
int sa_bulk_lookup(sa_handle_t *, sa_bulk_attr_t *, int count);
int sa_bulk_lookup_locked(sa_handle_t *, sa_bulk_attr_t *, int count);
int sa_bulk_update(sa_handle_t *, sa_bulk_attr_t *, int count, dmu_tx_t *);
int sa_size(sa_handle_t *, sa_attr_type_t, int *);
int sa_update_from_cb(sa_handle_t *, sa_attr_type_t,
uint32_t buflen, sa_data_locator_t *, void *userdata, dmu_tx_t *);
void sa_object_info(sa_handle_t *, dmu_object_info_t *);
void sa_object_size(sa_handle_t *, uint32_t *, u_longlong_t *);
void sa_update_user(sa_handle_t *, sa_handle_t *);
void *sa_get_userdata(sa_handle_t *);
void sa_set_userp(sa_handle_t *, void *);
dmu_buf_t *sa_get_db(sa_handle_t *);
uint64_t sa_handle_object(sa_handle_t *);
boolean_t sa_attr_would_spill(sa_handle_t *, sa_attr_type_t, int size);
void sa_register_update_callback(objset_t *, sa_update_cb_t *);
sa_attr_type_t *sa_setup(objset_t *, uint64_t, sa_attr_reg_t *, int);
void sa_tear_down(objset_t *);
int sa_replace_all_by_template(sa_handle_t *, sa_bulk_attr_t *,
int, dmu_tx_t *);
int sa_replace_all_by_template_locked(sa_handle_t *, sa_bulk_attr_t *,
int, dmu_tx_t *);
boolean_t sa_enabled(objset_t *);
void sa_cache_init();
void sa_cache_fini();
int sa_set_sa_object(objset_t *, uint64_t);
int sa_hdrsize(void *);
void sa_handle_lock(sa_handle_t *);
void sa_handle_unlock(sa_handle_t *);
#ifdef _KERNEL
int sa_lookup_uio(sa_handle_t *, sa_attr_type_t, uio_t *);
#endif
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __cplusplus
}
#endif
#endif /* _SYS_SA_H */
+288
View File
@@ -0,0 +1,288 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_SA_IMPL_H
#define _SYS_SA_IMPL_H
#include <sys/dmu.h>
#include <sys/refcount.h>
#include <sys/list.h>
/*
* Array of known attributes and their
* various characteristics.
*/
typedef struct sa_attr_table {
sa_attr_type_t sa_attr;
uint8_t sa_registered;
uint16_t sa_length;
sa_bswap_type_t sa_byteswap;
char *sa_name;
} sa_attr_table_t;
/*
* Zap attribute format for attribute registration
*
* 64 56 48 40 32 24 16 8 0
* +-------+-------+-------+-------+-------+-------+-------+-------+
* | unused | len | bswap | attr num |
* +-------+-------+-------+-------+-------+-------+-------+-------+
*
* Zap attribute format for layout information.
*
* layout information is stored as an array of attribute numbers
* The name of the attribute is the layout number (0, 1, 2, ...)
*
* 16 0
* +---- ---+
* | attr # |
* +--------+
* | attr # |
* +--- ----+
* ......
*
*/
#define ATTR_BSWAP(x) BF32_GET(x, 16, 8)
#define ATTR_LENGTH(x) BF32_GET(x, 24, 16)
#define ATTR_NUM(x) BF32_GET(x, 0, 16)
#define ATTR_ENCODE(x, attr, length, bswap) \
{ \
BF64_SET(x, 24, 16, length); \
BF64_SET(x, 16, 8, bswap); \
BF64_SET(x, 0, 16, attr); \
}
#define TOC_OFF(x) BF32_GET(x, 0, 23)
#define TOC_ATTR_PRESENT(x) BF32_GET(x, 31, 1)
#define TOC_LEN_IDX(x) BF32_GET(x, 24, 4)
#define TOC_ATTR_ENCODE(x, len_idx, offset) \
{ \
BF32_SET(x, 31, 1, 1); \
BF32_SET(x, 24, 7, len_idx); \
BF32_SET(x, 0, 24, offset); \
}
#define SA_LAYOUTS "LAYOUTS"
#define SA_REGISTRY "REGISTRY"
/*
* Each unique layout will have their own table
* sa_lot (layout_table)
*/
typedef struct sa_lot {
avl_node_t lot_num_node;
avl_node_t lot_hash_node;
uint64_t lot_num;
uint64_t lot_hash;
sa_attr_type_t *lot_attrs; /* array of attr #'s */
uint32_t lot_var_sizes; /* how many aren't fixed size */
uint32_t lot_attr_count; /* total attr count */
list_t lot_idx_tab; /* should be only a couple of entries */
int lot_instance; /* used with lot_hash to identify entry */
} sa_lot_t;
/* index table of offsets */
typedef struct sa_idx_tab {
list_node_t sa_next;
sa_lot_t *sa_layout;
uint16_t *sa_variable_lengths;
refcount_t sa_refcount;
uint32_t *sa_idx_tab; /* array of offsets */
} sa_idx_tab_t;
/*
* Since the offset/index information into the actual data
* will usually be identical we can share that information with
* all handles that have the exact same offsets.
*
* You would typically only have a large number of different table of
* contents if you had a several variable sized attributes.
*
* Two AVL trees are used to track the attribute layout numbers.
* one is keyed by number and will be consulted when a DMU_OT_SA
* object is first read. The second tree is keyed by the hash signature
* of the attributes and will be consulted when an attribute is added
* to determine if we already have an instance of that layout. Both
* of these tree's are interconnected. The only difference is that
* when an entry is found in the "hash" tree the list of attributes will
* need to be compared against the list of attributes you have in hand.
* The assumption is that typically attributes will just be updated and
* adding a completely new attribute is a very rare operation.
*/
struct sa_os {
kmutex_t sa_lock;
boolean_t sa_need_attr_registration;
boolean_t sa_force_spill;
uint64_t sa_master_obj;
uint64_t sa_reg_attr_obj;
uint64_t sa_layout_attr_obj;
int sa_num_attrs;
sa_attr_table_t *sa_attr_table; /* private attr table */
sa_update_cb_t *sa_update_cb;
avl_tree_t sa_layout_num_tree; /* keyed by layout number */
avl_tree_t sa_layout_hash_tree; /* keyed by layout hash value */
int sa_user_table_sz;
sa_attr_type_t *sa_user_table; /* user name->attr mapping table */
};
/*
* header for all bonus and spill buffers.
* The header has a fixed portion with a variable number
* of "lengths" depending on the number of variable sized
* attribues which are determined by the "layout number"
*/
#define SA_MAGIC 0x2F505A /* ZFS SA */
typedef struct sa_hdr_phys {
uint32_t sa_magic;
uint16_t sa_layout_info; /* Encoded with hdrsize and layout number */
uint16_t sa_lengths[1]; /* optional sizes for variable length attrs */
/* ... Data follows the lengths. */
} sa_hdr_phys_t;
/*
* sa_hdr_phys -> sa_layout_info
*
* 16 10 0
* +--------+-------+
* | hdrsz |layout |
* +--------+-------+
*
* Bits 0-10 are the layout number
* Bits 11-16 are the size of the header.
* The hdrsize is the number * 8
*
* For example.
* hdrsz of 1 ==> 8 byte header
* 2 ==> 16 byte header
*
*/
#define SA_HDR_LAYOUT_NUM(hdr) BF32_GET(hdr->sa_layout_info, 0, 10)
#define SA_HDR_SIZE(hdr) BF32_GET_SB(hdr->sa_layout_info, 10, 16, 3, 0)
#define SA_HDR_LAYOUT_INFO_ENCODE(x, num, size) \
{ \
BF32_SET_SB(x, 10, 6, 3, 0, size); \
BF32_SET(x, 0, 10, num); \
}
typedef enum sa_buf_type {
SA_BONUS = 1,
SA_SPILL = 2
} sa_buf_type_t;
typedef enum sa_data_op {
SA_LOOKUP,
SA_UPDATE,
SA_ADD,
SA_REPLACE,
SA_REMOVE
} sa_data_op_t;
/*
* Opaque handle used for most sa functions
*
* This needs to be kept as small as possible.
*/
struct sa_handle {
kmutex_t sa_lock;
dmu_buf_t *sa_bonus;
dmu_buf_t *sa_spill;
objset_t *sa_os;
void *sa_userp;
sa_idx_tab_t *sa_bonus_tab; /* idx of bonus */
sa_idx_tab_t *sa_spill_tab; /* only present if spill activated */
};
#define SA_GET_DB(hdl, type) \
(dmu_buf_impl_t *)((type == SA_BONUS) ? hdl->sa_bonus : hdl->sa_spill)
#define SA_GET_HDR(hdl, type) \
((sa_hdr_phys_t *)((dmu_buf_impl_t *)(SA_GET_DB(hdl, \
type))->db.db_data))
#define SA_IDX_TAB_GET(hdl, type) \
(type == SA_BONUS ? hdl->sa_bonus_tab : hdl->sa_spill_tab)
#define IS_SA_BONUSTYPE(a) \
((a == DMU_OT_SA) ? B_TRUE : B_FALSE)
#define SA_BONUSTYPE_FROM_DB(db) \
(((dmu_buf_impl_t *)db)->db_dnode->dn_bonustype)
#define SA_BLKPTR_SPACE (DN_MAX_BONUSLEN - sizeof (blkptr_t))
#define SA_LAYOUT_NUM(x, type) \
((!IS_SA_BONUSTYPE(type) ? 0 : (((IS_SA_BONUSTYPE(type)) && \
((SA_HDR_LAYOUT_NUM(x)) == 0)) ? 1 : SA_HDR_LAYOUT_NUM(x))))
#define SA_REGISTERED_LEN(sa, attr) sa->sa_attr_table[attr].sa_length
#define SA_ATTR_LEN(sa, idx, attr, hdr) ((SA_REGISTERED_LEN(sa, attr) == 0) ?\
hdr->sa_lengths[TOC_LEN_IDX(idx->sa_idx_tab[attr])] : \
SA_REGISTERED_LEN(sa, attr))
#define SA_SET_HDR(hdr, num, size) \
{ \
hdr->sa_magic = SA_MAGIC; \
SA_HDR_LAYOUT_INFO_ENCODE(hdr->sa_layout_info, num, size); \
}
#define SA_ATTR_INFO(sa, idx, hdr, attr, bulk, type, hdl) \
{ \
bulk.sa_size = SA_ATTR_LEN(sa, idx, attr, hdr); \
bulk.sa_buftype = type; \
bulk.sa_addr = \
(void *)((uintptr_t)TOC_OFF(idx->sa_idx_tab[attr]) + \
(uintptr_t)hdr); \
}
#define SA_HDR_SIZE_MATCH_LAYOUT(hdr, tb) \
(SA_HDR_SIZE(hdr) == (sizeof (sa_hdr_phys_t) + \
(tb->lot_var_sizes > 1 ? P2ROUNDUP((tb->lot_var_sizes - 1) * \
sizeof (uint16_t), 8) : 0)))
int sa_add_impl(sa_handle_t *, sa_attr_type_t,
uint32_t, sa_data_locator_t, void *, dmu_tx_t *);
void sa_register_update_callback_locked(objset_t *, sa_update_cb_t *);
int sa_size_locked(sa_handle_t *, sa_attr_type_t, int *);
void sa_default_locator(void **, uint32_t *, uint32_t, boolean_t, void *);
int sa_attr_size(sa_os_t *, sa_idx_tab_t *, sa_attr_type_t,
uint16_t *, sa_hdr_phys_t *);
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __cplusplus
}
#endif
#endif /* _SYS_SA_IMPL_H */
+203 -56
View File
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_SPA_H
@@ -43,8 +42,13 @@ extern "C" {
typedef struct spa spa_t;
typedef struct vdev vdev_t;
typedef struct metaslab metaslab_t;
typedef struct metaslab_group metaslab_group_t;
typedef struct metaslab_class metaslab_class_t;
typedef struct zio zio_t;
typedef struct zilog zilog_t;
typedef struct spa_aux_vdev spa_aux_vdev_t;
typedef struct ddt ddt_t;
typedef struct ddt_entry ddt_entry_t;
struct dsl_pool;
/*
@@ -134,15 +138,15 @@ typedef struct zio_cksum {
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 5 |G| offset3 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 6 |E| lvl | type | cksum | comp | PSIZE | LSIZE |
* 6 |BDX|lvl| type | cksum | comp | PSIZE | LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 7 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 8 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 9 | padding |
* 9 | physical birth txg |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* a | birth txg |
* a | logical birth txg |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* b | fill count |
* +-------+-------+-------+-------+-------+-------+-------+-------+
@@ -166,25 +170,29 @@ typedef struct zio_cksum {
* cksum checksum function
* comp compression function
* G gang block indicator
* E endianness
* type DMU object type
* B byteorder (endianness)
* D dedup
* X unused
* lvl level of indirection
* birth txg transaction group in which the block was born
* type DMU object type
* phys birth txg of block allocation; zero if same as logical birth txg
* log. birth transaction group in which the block was logically born
* fill count number of non-zero blocks under this bp
* checksum[4] 256-bit checksum of the data this bp describes
*/
typedef struct blkptr {
dva_t blk_dva[3]; /* 128-bit Data Virtual Address */
uint64_t blk_prop; /* size, compression, type, etc */
uint64_t blk_pad[3]; /* Extra space for the future */
uint64_t blk_birth; /* transaction group at birth */
uint64_t blk_fill; /* fill count */
zio_cksum_t blk_cksum; /* 256-bit checksum */
} blkptr_t;
#define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */
#define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */
typedef struct blkptr {
dva_t blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */
uint64_t blk_prop; /* size, compression, type, etc */
uint64_t blk_pad[2]; /* Extra space for the future */
uint64_t blk_phys_birth; /* txg when block was allocated */
uint64_t blk_birth; /* transaction group at birth */
uint64_t blk_fill; /* fill count */
zio_cksum_t blk_cksum; /* 256-bit checksum */
} blkptr_t;
/*
* Macros to get and set fields in a bp or DVA.
*/
@@ -208,8 +216,7 @@ typedef struct blkptr {
#define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x)
#define BP_GET_LSIZE(bp) \
(BP_IS_HOLE(bp) ? 0 : \
BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1))
BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)
#define BP_SET_LSIZE(bp, x) \
BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
@@ -218,20 +225,35 @@ typedef struct blkptr {
#define BP_SET_PSIZE(bp, x) \
BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8)
#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x)
#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8)
#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x)
#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8)
#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x)
#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8)
#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x)
#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8)
#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x)
#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8)
#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x)
#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5)
#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x)
#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5)
#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x)
#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1))
#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x)
#define BP_GET_PROP_BIT_61(bp) BF64_GET((bp)->blk_prop, 61, 1)
#define BP_SET_PROP_BIT_61(bp, x) BF64_SET((bp)->blk_prop, 61, 1, x)
#define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1)
#define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x)
#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1))
#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x)
#define BP_PHYSICAL_BIRTH(bp) \
((bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth)
#define BP_SET_BIRTH(bp, logical, physical) \
{ \
(bp)->blk_birth = (logical); \
(bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \
}
#define BP_GET_ASIZE(bp) \
(DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
@@ -239,7 +261,7 @@ typedef struct blkptr {
#define BP_GET_UCSIZE(bp) \
((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp));
BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
#define BP_GET_NDVAS(bp) \
(!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
@@ -255,6 +277,12 @@ typedef struct blkptr {
((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
(dva1)->dva_word[0] == (dva2)->dva_word[0])
#define BP_EQUAL(bp1, bp2) \
(BP_PHYSICAL_BIRTH(bp1) == BP_PHYSICAL_BIRTH(bp2) && \
DVA_EQUAL(&(bp1)->blk_dva[0], &(bp2)->blk_dva[0]) && \
DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) && \
DVA_EQUAL(&(bp1)->blk_dva[2], &(bp2)->blk_dva[2]))
#define ZIO_CHECKSUM_EQUAL(zc1, zc2) \
(0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \
((zc1).zc_word[1] - (zc2).zc_word[1]) | \
@@ -274,7 +302,10 @@ typedef struct blkptr {
#define BP_IDENTITY(bp) (&(bp)->blk_dva[0])
#define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp))
#define BP_IS_HOLE(bp) ((bp)->blk_birth == 0)
#define BP_IS_OLDER(bp, txg) (!BP_IS_HOLE(bp) && (bp)->blk_birth < (txg))
/* BP_IS_RAIDZ(bp) assumes no block compression */
#define BP_IS_RAIDZ(bp) (DVA_GET_ASIZE(&(bp)->blk_dva[0]) > \
BP_GET_PSIZE(bp))
#define BP_ZERO(bp) \
{ \
@@ -287,14 +318,12 @@ typedef struct blkptr {
(bp)->blk_prop = 0; \
(bp)->blk_pad[0] = 0; \
(bp)->blk_pad[1] = 0; \
(bp)->blk_pad[2] = 0; \
(bp)->blk_phys_birth = 0; \
(bp)->blk_birth = 0; \
(bp)->blk_fill = 0; \
ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \
}
#define BLK_FILL_ALREADY_FREED (-1ULL)
/*
* Note: the byteorder is either 0 or -1, both of which are palindromes.
* This simplifies the endianness handling a bit.
@@ -309,17 +338,81 @@ typedef struct blkptr {
#define BP_SPRINTF_LEN 320
/*
* This macro allows code sharing between zfs, libzpool, and mdb.
* 'func' is either snprintf() or mdb_snprintf().
* 'ws' (whitespace) can be ' ' for single-line format, '\n' for multi-line.
*/
#define SPRINTF_BLKPTR(func, ws, buf, bp, type, checksum, compress) \
{ \
static const char *copyname[] = \
{ "zero", "single", "double", "triple" }; \
int size = BP_SPRINTF_LEN; \
int len = 0; \
int copies = 0; \
\
if (bp == NULL) { \
len = func(buf + len, size - len, "<NULL>"); \
} else if (BP_IS_HOLE(bp)) { \
len = func(buf + len, size - len, "<hole>"); \
} else { \
for (int d = 0; d < BP_GET_NDVAS(bp); d++) { \
const dva_t *dva = &bp->blk_dva[d]; \
if (DVA_IS_VALID(dva)) \
copies++; \
len += func(buf + len, size - len, \
"DVA[%d]=<%llu:%llx:%llx>%c", d, \
(u_longlong_t)DVA_GET_VDEV(dva), \
(u_longlong_t)DVA_GET_OFFSET(dva), \
(u_longlong_t)DVA_GET_ASIZE(dva), \
ws); \
} \
if (BP_IS_GANG(bp) && \
DVA_GET_ASIZE(&bp->blk_dva[2]) <= \
DVA_GET_ASIZE(&bp->blk_dva[1]) / 2) \
copies--; \
len += func(buf + len, size - len, \
"[L%llu %s] %s %s %s %s %s %s%c" \
"size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \
"cksum=%llx:%llx:%llx:%llx", \
(u_longlong_t)BP_GET_LEVEL(bp), \
type, \
checksum, \
compress, \
BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE", \
BP_IS_GANG(bp) ? "gang" : "contiguous", \
BP_GET_DEDUP(bp) ? "dedup" : "unique", \
copyname[copies], \
ws, \
(u_longlong_t)BP_GET_LSIZE(bp), \
(u_longlong_t)BP_GET_PSIZE(bp), \
(u_longlong_t)bp->blk_birth, \
(u_longlong_t)BP_PHYSICAL_BIRTH(bp), \
(u_longlong_t)bp->blk_fill, \
ws, \
(u_longlong_t)bp->blk_cksum.zc_word[0], \
(u_longlong_t)bp->blk_cksum.zc_word[1], \
(u_longlong_t)bp->blk_cksum.zc_word[2], \
(u_longlong_t)bp->blk_cksum.zc_word[3]); \
} \
ASSERT(len < size); \
}
#include <sys/dmu.h>
#define BP_GET_BUFC_TYPE(bp) \
(((BP_GET_LEVEL(bp) > 0) || (dmu_ot[BP_GET_TYPE(bp)].ot_metadata)) ? \
ARC_BUFC_METADATA : ARC_BUFC_DATA);
/*
* Routines found in spa.c
*/
typedef enum spa_import_type {
SPA_IMPORT_EXISTING,
SPA_IMPORT_ASSEMBLE
} spa_import_type_t;
/* state manipulation functions */
extern int spa_open(const char *pool, spa_t **, void *tag);
extern int spa_open_rewind(const char *pool, spa_t **, void *tag,
nvlist_t *policy, nvlist_t **config);
extern int spa_get_stats(const char *pool, nvlist_t **config,
char *altroot, size_t buflen);
extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
@@ -338,6 +431,8 @@ extern void spa_async_suspend(spa_t *spa);
extern void spa_async_resume(spa_t *spa);
extern spa_t *spa_inject_addref(char *pool);
extern void spa_inject_delref(spa_t *spa);
extern void spa_scan_stat_init(spa_t *spa);
extern int spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps);
#define SPA_ASYNC_CONFIG_UPDATE 0x01
#define SPA_ASYNC_REMOVE 0x02
@@ -345,6 +440,14 @@ extern void spa_inject_delref(spa_t *spa);
#define SPA_ASYNC_RESILVER_DONE 0x08
#define SPA_ASYNC_RESILVER 0x10
#define SPA_ASYNC_AUTOEXPAND 0x20
#define SPA_ASYNC_REMOVE_DONE 0x40
#define SPA_ASYNC_REMOVE_STOP 0x80
/*
* Controls the behavior of spa_vdev_remove().
*/
#define SPA_REMOVE_UNSPARE 0x01
#define SPA_REMOVE_DONE 0x02
/* device manipulation */
extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
@@ -353,8 +456,11 @@ extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot,
extern int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid,
int replace_done);
extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
extern boolean_t spa_vdev_remove_active(spa_t *spa);
extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
extern int spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru);
extern int spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
nvlist_t *props, boolean_t exp);
/* spare state (which is global across all pools) */
extern void spa_spare_add(vdev_t *vd);
@@ -368,15 +474,23 @@ extern void spa_l2cache_remove(vdev_t *vd);
extern boolean_t spa_l2cache_exists(uint64_t guid, uint64_t *pool);
extern void spa_l2cache_activate(vdev_t *vd);
extern void spa_l2cache_drop(spa_t *spa);
extern void spa_l2cache_space_update(vdev_t *vd, int64_t space, int64_t alloc);
/* scrubbing */
extern int spa_scrub(spa_t *spa, pool_scrub_type_t type);
/* scanning */
extern int spa_scan(spa_t *spa, pool_scan_func_t func);
extern int spa_scan_stop(spa_t *spa);
/* spa syncing */
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
extern void spa_sync_allpools(void);
/*
* DEFERRED_FREE must be large enough that regular blocks are not
* deferred. XXX so can't we change it back to 1?
*/
#define SYNC_PASS_DEFERRED_FREE 2 /* defer frees after this pass */
#define SYNC_PASS_DONT_COMPRESS 4 /* don't compress after this pass */
#define SYNC_PASS_REWRITE 1 /* rewrite new bps after this pass */
/* spa namespace global mutex */
extern kmutex_t spa_namespace_lock;
@@ -394,7 +508,6 @@ extern void spa_config_set(spa_t *spa, nvlist_t *config);
extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
int getstats);
extern void spa_config_update(spa_t *spa, int what);
extern void spa_config_update_common(spa_t *spa, int what, boolean_t isroot);
/*
* Miscellaneous SPA routines in spa_misc.c
@@ -402,7 +515,7 @@ extern void spa_config_update_common(spa_t *spa, int what, boolean_t isroot);
/* Namespace manipulation */
extern spa_t *spa_lookup(const char *name);
extern spa_t *spa_add(const char *name, const char *altroot);
extern spa_t *spa_add(const char *name, nvlist_t *config, const char *altroot);
extern void spa_remove(spa_t *spa);
extern spa_t *spa_next(spa_t *prev);
@@ -411,6 +524,7 @@ extern void spa_open_ref(spa_t *spa, void *tag);
extern void spa_close(spa_t *spa, void *tag);
extern boolean_t spa_refcount_zero(spa_t *spa);
#define SCL_NONE 0x00
#define SCL_CONFIG 0x01
#define SCL_STATE 0x02
#define SCL_L2ARC 0x04 /* hack until L2ARC 2.0 */
@@ -430,12 +544,30 @@ extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
/* Pool vdev add/remove lock */
extern uint64_t spa_vdev_enter(spa_t *spa);
extern uint64_t spa_vdev_config_enter(spa_t *spa);
extern void spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg,
int error, char *tag);
extern int spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error);
/* Pool vdev state change lock */
extern void spa_vdev_state_enter(spa_t *spa);
extern void spa_vdev_state_enter(spa_t *spa, int oplock);
extern int spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error);
/* Log state */
typedef enum spa_log_state {
SPA_LOG_UNKNOWN = 0, /* unknown log state */
SPA_LOG_MISSING, /* missing log(s) */
SPA_LOG_CLEAR, /* clear the log(s) */
SPA_LOG_GOOD, /* log(s) are good */
} spa_log_state_t;
extern spa_log_state_t spa_get_log_state(spa_t *spa);
extern void spa_set_log_state(spa_t *spa, spa_log_state_t state);
extern int spa_offline_log(spa_t *spa);
/* Log claim callback */
extern void spa_claim_notify(zio_t *zio);
/* Accessor functions */
extern boolean_t spa_shutting_down(spa_t *spa);
extern struct dsl_pool *spa_get_dsl(spa_t *spa);
@@ -447,18 +579,26 @@ extern char *spa_name(spa_t *spa);
extern uint64_t spa_guid(spa_t *spa);
extern uint64_t spa_last_synced_txg(spa_t *spa);
extern uint64_t spa_first_txg(spa_t *spa);
extern uint64_t spa_syncing_txg(spa_t *spa);
extern uint64_t spa_version(spa_t *spa);
extern pool_state_t spa_state(spa_t *spa);
extern spa_load_state_t spa_load_state(spa_t *spa);
extern uint64_t spa_freeze_txg(spa_t *spa);
extern uint64_t spa_get_alloc(spa_t *spa);
extern uint64_t spa_get_space(spa_t *spa);
extern uint64_t spa_get_dspace(spa_t *spa);
extern uint64_t spa_get_asize(spa_t *spa, uint64_t lsize);
extern uint64_t spa_get_dspace(spa_t *spa);
extern void spa_update_dspace(spa_t *spa);
extern uint64_t spa_version(spa_t *spa);
extern boolean_t spa_deflate(spa_t *spa);
extern metaslab_class_t *spa_normal_class(spa_t *spa);
extern metaslab_class_t *spa_log_class(spa_t *spa);
extern int spa_max_replication(spa_t *spa);
extern int spa_prev_software_version(spa_t *spa);
extern int spa_busy(void);
extern uint8_t spa_get_failmode(spa_t *spa);
extern boolean_t spa_suspended(spa_t *spa);
extern uint64_t spa_bootfs(spa_t *spa);
extern uint64_t spa_delegation(spa_t *spa);
extern objset_t *spa_meta_objset(spa_t *spa);
/* Miscellaneous support routines */
extern int spa_rename(const char *oldname, const char *newname);
@@ -466,18 +606,24 @@ extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
extern char *spa_strdup(const char *);
extern void spa_strfree(char *);
extern uint64_t spa_get_random(uint64_t range);
extern void sprintf_blkptr(char *buf, int len, const blkptr_t *bp);
extern uint64_t spa_generate_guid(spa_t *spa);
extern void sprintf_blkptr(char *buf, const blkptr_t *bp);
extern void spa_freeze(spa_t *spa);
extern void spa_upgrade(spa_t *spa, uint64_t version);
extern void spa_evict_all(void);
extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid,
boolean_t l2cache);
extern boolean_t spa_has_spare(spa_t *, uint64_t guid);
extern uint64_t bp_get_dasize(spa_t *spa, const blkptr_t *bp);
extern uint64_t dva_get_dsize_sync(spa_t *spa, const dva_t *dva);
extern uint64_t bp_get_dsize_sync(spa_t *spa, const blkptr_t *bp);
extern uint64_t bp_get_dsize(spa_t *spa, const blkptr_t *bp);
extern boolean_t spa_has_slogs(spa_t *spa);
extern boolean_t spa_is_root(spa_t *spa);
extern boolean_t spa_writeable(spa_t *spa);
extern void spa_rewind_data_to_nvlist(spa_t *spa, nvlist_t *to);
extern int spa_mode(spa_t *spa);
extern uint64_t strtonum(const char *str, char **nptr);
/* history logging */
typedef enum history_log_type {
@@ -487,10 +633,11 @@ typedef enum history_log_type {
} history_log_type_t;
typedef struct history_arg {
const char *ha_history_str;
char *ha_history_str;
history_log_type_t ha_log_type;
history_internal_events_t ha_event;
char ha_zone[MAXPATHLEN];
char *ha_zone;
uid_t ha_uid;
} history_arg_t;
extern char *spa_his_ievent_table[];
@@ -500,17 +647,17 @@ extern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read,
char *his_buf);
extern int spa_history_log(spa_t *spa, const char *his_buf,
history_log_type_t what);
extern void spa_history_internal_log(history_internal_events_t event,
spa_t *spa, dmu_tx_t *tx, cred_t *cr, const char *fmt, ...);
extern void spa_history_log_internal(history_internal_events_t event,
spa_t *spa, dmu_tx_t *tx, const char *fmt, ...);
extern void spa_history_log_version(spa_t *spa, history_internal_events_t evt);
/* error handling */
struct zbookmark;
struct zio;
extern void spa_log_error(spa_t *spa, struct zio *zio);
extern void spa_log_error(spa_t *spa, zio_t *zio);
extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
struct zio *zio, uint64_t stateoroffset, uint64_t length);
zio_t *zio, uint64_t stateoroffset, uint64_t length);
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
extern void zfs_post_state_change(spa_t *spa, vdev_t *vd);
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
extern uint64_t spa_get_errlog_size(spa_t *spa);
extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
@@ -541,7 +688,7 @@ extern void spa_event_notify(spa_t *spa, vdev_t *vdev, const char *name);
#define dprintf_bp(bp, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \
sprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, (bp)); \
sprintf_blkptr(__blkbuf, (bp)); \
dprintf(fmt " %s\n", __VA_ARGS__, __blkbuf); \
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
} \
+56 -19
View File
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_SPA_IMPL_H
@@ -36,6 +35,7 @@
#include <sys/avl.h>
#include <sys/refcount.h>
#include <sys/bplist.h>
#include <sys/bpobj.h>
#ifdef __cplusplus
extern "C" {
@@ -78,19 +78,33 @@ typedef struct spa_config_dirent {
char *scd_path;
} spa_config_dirent_t;
typedef enum spa_log_state {
SPA_LOG_UNKNOWN = 0, /* unknown log state */
SPA_LOG_MISSING, /* missing log(s) */
SPA_LOG_CLEAR, /* clear the log(s) */
SPA_LOG_GOOD, /* log(s) are good */
} spa_log_state_t;
enum zio_taskq_type {
ZIO_TASKQ_ISSUE = 0,
ZIO_TASKQ_ISSUE_HIGH,
ZIO_TASKQ_INTERRUPT,
ZIO_TASKQ_INTERRUPT_HIGH,
ZIO_TASKQ_TYPES
};
/*
* State machine for the zpool-pooname process. The states transitions
* are done as follows:
*
* From To Routine
* PROC_NONE -> PROC_CREATED spa_activate()
* PROC_CREATED -> PROC_ACTIVE spa_thread()
* PROC_ACTIVE -> PROC_DEACTIVATE spa_deactivate()
* PROC_DEACTIVATE -> PROC_GONE spa_thread()
* PROC_GONE -> PROC_NONE spa_deactivate()
*/
typedef enum spa_proc_state {
SPA_PROC_NONE, /* spa_proc = &p0, no process created */
SPA_PROC_CREATED, /* spa_activate() has proc, is waiting */
SPA_PROC_ACTIVE, /* taskqs created, spa_proc set */
SPA_PROC_DEACTIVATE, /* spa_deactivate() requests process exit */
SPA_PROC_GONE /* spa_thread() is exiting, spa_proc = &p0 */
} spa_proc_state_t;
struct spa {
/*
* Fields protected by spa_namespace_lock.
@@ -99,6 +113,7 @@ struct spa {
avl_node_t spa_avl; /* node in spa_namespace_avl */
nvlist_t *spa_config; /* last synced config */
nvlist_t *spa_config_syncing; /* currently syncing config */
nvlist_t *spa_config_splitting; /* config for splitting */
uint64_t spa_config_txg; /* txg of last config change */
int spa_sync_pass; /* iterate-to-convergence */
pool_state_t spa_state; /* pool state */
@@ -113,6 +128,8 @@ struct spa {
uint64_t spa_first_txg; /* first txg after spa_open() */
uint64_t spa_final_txg; /* txg of export/destroy */
uint64_t spa_freeze_txg; /* freeze pool at this txg */
uint64_t spa_load_max_txg; /* best initial ub_txg */
uint64_t spa_claim_max_txg; /* highest claimed birth txg */
objset_t *spa_meta_objset; /* copy of dp->dp_meta_objset */
txg_list_t spa_vdev_txg_list; /* per-txg dirty vdev list */
vdev_t *spa_root_vdev; /* top-level vdev container */
@@ -122,21 +139,24 @@ struct spa {
spa_aux_vdev_t spa_spares; /* hot spares */
spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */
uint64_t spa_config_object; /* MOS object for pool config */
uint64_t spa_config_generation; /* config generation number */
uint64_t spa_syncing_txg; /* txg currently syncing */
uint64_t spa_sync_bplist_obj; /* object for deferred frees */
bplist_t spa_sync_bplist; /* deferred-free bplist */
bpobj_t spa_deferred_bpobj; /* deferred-free bplist */
bplist_t spa_free_bplist[TXG_SIZE]; /* bplist of stuff to free */
uberblock_t spa_ubsync; /* last synced uberblock */
uberblock_t spa_uberblock; /* current uberblock */
boolean_t spa_extreme_rewind; /* rewind past deferred frees */
kmutex_t spa_scrub_lock; /* resilver/scrub lock */
uint64_t spa_scrub_inflight; /* in-flight scrub I/Os */
uint64_t spa_scrub_maxinflight; /* max in-flight scrub I/Os */
uint64_t spa_scrub_errors; /* scrub I/O error count */
kcondvar_t spa_scrub_io_cv; /* scrub I/O completion */
uint8_t spa_scrub_active; /* active or suspended? */
uint8_t spa_scrub_type; /* type of scrub we're doing */
uint8_t spa_scrub_finished; /* indicator to rotate logs */
uint8_t spa_scrub_started; /* started since last boot */
uint8_t spa_scrub_reopen; /* scrub doing vdev_reopen */
uint64_t spa_scan_pass_start; /* start time per pass/reboot */
uint64_t spa_scan_pass_exam; /* examined bytes per pass */
kmutex_t spa_async_lock; /* protect async state */
kthread_t *spa_async_thread; /* thread doing async task */
int spa_async_suspended; /* async tasks suspended */
@@ -144,7 +164,14 @@ struct spa {
uint16_t spa_async_tasks; /* async task mask */
char *spa_root; /* alternate root directory */
uint64_t spa_ena; /* spa-wide ereport ENA */
boolean_t spa_last_open_failed; /* true if last open faled */
int spa_last_open_failed; /* error if last open failed */
uint64_t spa_last_ubsync_txg; /* "best" uberblock txg */
uint64_t spa_last_ubsync_txg_ts; /* timestamp from that ub */
uint64_t spa_load_txg; /* ub txg that loaded */
uint64_t spa_load_txg_ts; /* timestamp from that ub */
uint64_t spa_load_meta_errors; /* verify metadata err count */
uint64_t spa_load_data_errors; /* verify data err count */
uint64_t spa_verify_min_txg; /* start txg of verify scrub */
kmutex_t spa_errlog_lock; /* error log lock */
uint64_t spa_errlog_last; /* last error log object */
uint64_t spa_errlog_scrub; /* scrub error log object */
@@ -166,11 +193,27 @@ struct spa {
kmutex_t spa_suspend_lock; /* protects suspend_zio_root */
kcondvar_t spa_suspend_cv; /* notification of resume */
uint8_t spa_suspended; /* pool is suspended */
uint8_t spa_claiming; /* pool is doing zil_claim() */
boolean_t spa_is_root; /* pool is root */
int spa_minref; /* num refs when first opened */
int spa_mode; /* FREAD | FWRITE */
spa_log_state_t spa_log_state; /* log state */
uint64_t spa_autoexpand; /* lun expansion on/off */
ddt_t *spa_ddt[ZIO_CHECKSUM_FUNCTIONS]; /* in-core DDTs */
uint64_t spa_ddt_stat_object; /* DDT statistics */
uint64_t spa_dedup_ditto; /* dedup ditto threshold */
uint64_t spa_dedup_checksum; /* default dedup checksum */
uint64_t spa_dspace; /* dspace in normal class */
kmutex_t spa_vdev_top_lock; /* dueling offline/remove */
kmutex_t spa_proc_lock; /* protects spa_proc* */
kcondvar_t spa_proc_cv; /* spa_proc_state transitions */
spa_proc_state_t spa_proc_state; /* see definition */
struct proc *spa_proc; /* "zpool-poolname" process */
uint64_t spa_did; /* if procp != p0, did of t1 */
boolean_t spa_autoreplace; /* autoreplace set in open */
int spa_vdev_locks; /* locks grabbed */
uint64_t spa_creation_version; /* version at pool creation */
uint64_t spa_prev_software_version;
/*
* spa_refcnt & spa_config_lock must be the last elements
* because refcount_t changes size based on compilation options.
@@ -183,12 +226,6 @@ struct spa {
extern const char *spa_config_path;
#define BOOTFS_COMPRESS_VALID(compress) \
((compress) == ZIO_COMPRESS_LZJB || \
((compress) == ZIO_COMPRESS_ON && \
ZIO_COMPRESS_ON_VALUE == ZIO_COMPRESS_LZJB) || \
(compress) == ZIO_COMPRESS_OFF)
#ifdef __cplusplus
}
#endif
+1
View File
@@ -77,6 +77,7 @@ struct space_map_ops {
void (*smop_claim)(space_map_t *sm, uint64_t start, uint64_t size);
void (*smop_free)(space_map_t *sm, uint64_t start, uint64_t size);
uint64_t (*smop_max)(space_map_t *sm);
boolean_t (*smop_fragmented)(space_map_t *sm);
};
/*
+6 -5
View File
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_TXG_H
#define _SYS_TXG_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/spa.h>
#include <sys/zfs_context.h>
@@ -41,6 +39,9 @@ extern "C" {
#define TXG_INITIAL TXG_SIZE /* initial txg */
#define TXG_IDX (txg & TXG_MASK)
/* Number of txgs worth of frees we defer adding to in-core spacemaps */
#define TXG_DEFER_SIZE 2
#define TXG_WAIT 1ULL
#define TXG_NOWAIT 2ULL
@@ -71,8 +72,7 @@ extern void txg_sync_stop(struct dsl_pool *dp);
extern uint64_t txg_hold_open(struct dsl_pool *dp, txg_handle_t *txghp);
extern void txg_rele_to_quiesce(txg_handle_t *txghp);
extern void txg_rele_to_sync(txg_handle_t *txghp);
extern void txg_suspend(struct dsl_pool *dp);
extern void txg_resume(struct dsl_pool *dp);
extern void txg_register_callbacks(txg_handle_t *txghp, list_t *tx_callbacks);
/*
* Delay the caller by the specified number of ticks or until
@@ -117,6 +117,7 @@ extern void txg_list_create(txg_list_t *tl, size_t offset);
extern void txg_list_destroy(txg_list_t *tl);
extern int txg_list_empty(txg_list_t *tl, uint64_t txg);
extern int txg_list_add(txg_list_t *tl, void *p, uint64_t txg);
extern int txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg);
extern void *txg_list_remove(txg_list_t *tl, uint64_t txg);
extern void *txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg);
extern int txg_list_member(txg_list_t *tl, void *p, uint64_t txg);
+4 -2
View File
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -37,13 +37,13 @@ struct tx_cpu {
kmutex_t tc_lock;
kcondvar_t tc_cv[TXG_SIZE];
uint64_t tc_count[TXG_SIZE];
list_t tc_callbacks[TXG_SIZE]; /* commit cb list */
char tc_pad[16];
};
typedef struct tx_state {
tx_cpu_t *tx_cpu; /* protects right to enter txg */
kmutex_t tx_sync_lock; /* protects tx_state_t */
krwlock_t tx_suspend;
uint64_t tx_open_txg; /* currently open txg id */
uint64_t tx_quiesced_txg; /* quiesced txg waiting for sync */
uint64_t tx_syncing_txg; /* currently syncing txg id */
@@ -64,6 +64,8 @@ typedef struct tx_state {
kthread_t *tx_sync_thread;
kthread_t *tx_quiesce_thread;
taskq_t *tx_commit_cb_taskq; /* commit callback taskq */
} tx_state_t;
#ifdef __cplusplus
+3 -7
View File
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,19 +19,16 @@
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_UBERBLOCK_H
#define _SYS_UBERBLOCK_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/spa.h>
#include <sys/vdev.h>
#include <sys/zio.h>
#include <sys/zio_checksum.h>
#ifdef __cplusplus
extern "C" {
+4 -7
View File
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_UBERBLOCK_IMPL_H
@@ -32,11 +31,6 @@
extern "C" {
#endif
/*
* For zdb use and debugging purposes only
*/
extern uint64_t ub_max_txg;
/*
* The uberblock version is incremented whenever an incompatible on-disk
* format change is made to the SPA, DMU, or ZAP.
@@ -57,6 +51,9 @@ struct uberblock {
uint64_t ub_guid_sum; /* sum of all vdev guids */
uint64_t ub_timestamp; /* UTC time of last sync */
blkptr_t ub_rootbp; /* MOS objset_phys_t */
/* highest SPA_VERSION supported by software that wrote this txg */
uint64_t ub_software_version;
};
#ifdef __cplusplus
+23 -12
View File
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_VDEV_H
@@ -47,7 +46,8 @@ typedef enum vdev_dtl_type {
extern boolean_t zfs_nocacheflush;
extern int vdev_open(vdev_t *);
extern void vdev_open_children(vdev_t *vd);
extern void vdev_open_children(vdev_t *);
extern boolean_t vdev_uses_zvols(vdev_t *);
extern int vdev_validate(vdev_t *);
extern void vdev_close(vdev_t *);
extern int vdev_create(vdev_t *, uint64_t txg, boolean_t isreplace);
@@ -69,28 +69,31 @@ extern boolean_t vdev_dtl_required(vdev_t *vd);
extern boolean_t vdev_resilver_needed(vdev_t *vd,
uint64_t *minp, uint64_t *maxp);
extern void vdev_hold(vdev_t *);
extern void vdev_rele(vdev_t *);
extern int vdev_metaslab_init(vdev_t *vd, uint64_t txg);
extern void vdev_metaslab_fini(vdev_t *vd);
extern void vdev_metaslab_set_size(vdev_t *);
extern void vdev_expand(vdev_t *vd, uint64_t txg);
extern void vdev_split(vdev_t *vd);
extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
extern void vdev_clear_stats(vdev_t *vd);
extern void vdev_stat_update(zio_t *zio, uint64_t psize);
extern void vdev_scrub_stat_update(vdev_t *vd, pool_scrub_type_t type,
boolean_t complete);
extern int vdev_getspec(spa_t *spa, uint64_t vdev, char **vdev_spec);
extern void vdev_scan_stat_init(vdev_t *vd);
extern void vdev_propagate_state(vdev_t *vd);
extern void vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state,
vdev_aux_t aux);
extern void vdev_space_update(vdev_t *vd, int64_t space_delta,
int64_t alloc_delta, boolean_t update_root);
extern void vdev_space_update(vdev_t *vd,
int64_t alloc_delta, int64_t defer_delta, int64_t space_delta);
extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize);
extern int vdev_fault(spa_t *spa, uint64_t guid);
extern int vdev_degrade(spa_t *spa, uint64_t guid);
extern int vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux);
extern int vdev_degrade(spa_t *spa, uint64_t guid, vdev_aux_t aux);
extern int vdev_online(spa_t *spa, uint64_t guid, uint64_t flags,
vdev_state_t *);
extern int vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags);
@@ -121,8 +124,15 @@ extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg,
extern void vdev_state_dirty(vdev_t *vd);
extern void vdev_state_clean(vdev_t *vd);
typedef enum vdev_config_flag {
VDEV_CONFIG_SPARE = 1 << 0,
VDEV_CONFIG_L2CACHE = 1 << 1,
VDEV_CONFIG_REMOVING = 1 << 2
} vdev_config_flag_t;
extern void vdev_top_config_generate(spa_t *spa, nvlist_t *config);
extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
boolean_t getstats, boolean_t isspare, boolean_t isl2cache);
boolean_t getstats, vdev_config_flag_t flags);
/*
* Label routines
@@ -138,7 +148,8 @@ typedef enum {
VDEV_LABEL_REPLACE, /* replace an existing device */
VDEV_LABEL_SPARE, /* add a new hot spare */
VDEV_LABEL_REMOVE, /* remove an existing device */
VDEV_LABEL_L2CACHE /* add an L2ARC cache device */
VDEV_LABEL_L2CACHE, /* add an L2ARC cache device */
VDEV_LABEL_SPLIT /* generating new label for split-off dev */
} vdev_labeltype_t;
extern int vdev_label_init(vdev_t *vd, uint64_t txg, vdev_labeltype_t reason);
+24 -5
View File
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_VDEV_IMPL_H
@@ -62,6 +61,8 @@ typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize);
typedef int vdev_io_start_func_t(zio_t *zio);
typedef void vdev_io_done_func_t(zio_t *zio);
typedef void vdev_state_change_func_t(vdev_t *vd, int, int);
typedef void vdev_hold_func_t(vdev_t *vd);
typedef void vdev_rele_func_t(vdev_t *vd);
typedef struct vdev_ops {
vdev_open_func_t *vdev_op_open;
@@ -70,6 +71,8 @@ typedef struct vdev_ops {
vdev_io_start_func_t *vdev_op_io_start;
vdev_io_done_func_t *vdev_op_io_done;
vdev_state_change_func_t *vdev_op_state_change;
vdev_hold_func_t *vdev_op_hold;
vdev_rele_func_t *vdev_op_rele;
char vdev_op_type[16];
boolean_t vdev_op_leaf;
} vdev_ops_t;
@@ -112,6 +115,7 @@ struct vdev {
uint64_t vdev_id; /* child number in vdev parent */
uint64_t vdev_guid; /* unique ID for this vdev */
uint64_t vdev_guid_sum; /* self guid + all child guids */
uint64_t vdev_orig_guid; /* orig. guid prior to remove */
uint64_t vdev_asize; /* allocatable device capacity */
uint64_t vdev_min_asize; /* min acceptable asize */
uint64_t vdev_ashift; /* block alignment shift */
@@ -120,6 +124,8 @@ struct vdev {
vdev_ops_t *vdev_ops; /* vdev operations */
spa_t *vdev_spa; /* spa for this vdev */
void *vdev_tsd; /* type-specific data */
vnode_t *vdev_name_vp; /* vnode for pathname */
vnode_t *vdev_devid_vp; /* vnode for devid */
vdev_t *vdev_top; /* top-level vdev */
vdev_t *vdev_parent; /* parent vdev */
vdev_t **vdev_child; /* array of children */
@@ -127,8 +133,10 @@ struct vdev {
space_map_t vdev_dtl[DTL_TYPES]; /* in-core dirty time logs */
vdev_stat_t vdev_stat; /* virtual device statistics */
boolean_t vdev_expanding; /* expand the vdev? */
boolean_t vdev_reopening; /* reopen in progress? */
int vdev_open_error; /* error on last open */
kthread_t *vdev_open_thread; /* thread opening children */
uint64_t vdev_crtxg; /* txg when top-level was added */
/*
* Top-level vdev state.
@@ -143,10 +151,12 @@ struct vdev {
txg_node_t vdev_txg_node; /* per-txg dirty vdev linkage */
boolean_t vdev_remove_wanted; /* async remove wanted? */
boolean_t vdev_probe_wanted; /* async probe wanted? */
uint64_t vdev_removing; /* device is being removed? */
list_node_t vdev_config_dirty_node; /* config dirty list */
list_node_t vdev_state_dirty_node; /* state dirty list */
uint64_t vdev_deflate_ratio; /* deflation ratio (x512) */
uint64_t vdev_islog; /* is an intent log device */
uint64_t vdev_ishole; /* is a hole in the namespace */
/*
* Leaf vdev state.
@@ -170,6 +180,8 @@ struct vdev {
boolean_t vdev_nowritecache; /* true if flushwritecache failed */
boolean_t vdev_checkremove; /* temporary online test */
boolean_t vdev_forcefault; /* force online fault */
boolean_t vdev_splitting; /* split or repair in progress */
boolean_t vdev_delayed_close; /* delayed device close? */
uint8_t vdev_tmpoffline; /* device taken offline temporarily? */
uint8_t vdev_detached; /* device detached? */
uint8_t vdev_cant_read; /* vdev is failing all reads */
@@ -180,6 +192,7 @@ struct vdev {
vdev_cache_t vdev_cache; /* physical block cache */
spa_aux_vdev_t *vdev_aux; /* for l2cache vdevs */
zio_t *vdev_probe_zio; /* root of current probe */
vdev_aux_t vdev_label_aux; /* on-disk aux state */
/*
* For DTrace to work in userland (libzpool) context, these fields must
@@ -193,6 +206,8 @@ struct vdev {
kmutex_t vdev_probe_lock; /* protects vdev_probe_zio */
};
#define VDEV_RAIDZ_MAXPARITY 3
#define VDEV_PAD_SIZE (8 << 10)
/* 2 padding areas (vl_pad1 and vl_pad2) to skip */
#define VDEV_SKIP_SIZE VDEV_PAD_SIZE * 2
@@ -208,8 +223,8 @@ struct vdev {
#define VDEV_UBERBLOCK_SIZE(vd) (1ULL << VDEV_UBERBLOCK_SHIFT(vd))
typedef struct vdev_phys {
char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_block_tail_t)];
zio_block_tail_t vp_zbt;
char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_eck_t)];
zio_eck_t vp_zbt;
} vdev_phys_t;
typedef struct vdev_label {
@@ -244,10 +259,13 @@ typedef struct vdev_label {
#define VDEV_ALLOC_SPARE 2
#define VDEV_ALLOC_L2CACHE 3
#define VDEV_ALLOC_ROOTPOOL 4
#define VDEV_ALLOC_SPLIT 5
/*
* Allocate or free a vdev
*/
extern vdev_t *vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid,
vdev_ops_t *ops);
extern int vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *config,
vdev_t *parent, uint_t id, int alloctype);
extern void vdev_free(vdev_t *vd);
@@ -264,7 +282,7 @@ extern void vdev_remove_parent(vdev_t *cvd);
/*
* vdev sync load and sync
*/
extern void vdev_load_log_state(vdev_t *vd, nvlist_t *nv);
extern void vdev_load_log_state(vdev_t *nvd, vdev_t *ovd);
extern void vdev_load(vdev_t *vd);
extern void vdev_sync(vdev_t *vd, uint64_t txg);
extern void vdev_sync_done(vdev_t *vd, uint64_t txg);
@@ -280,6 +298,7 @@ extern vdev_ops_t vdev_raidz_ops;
extern vdev_ops_t vdev_disk_ops;
extern vdev_ops_t vdev_file_ops;
extern vdev_ops_t vdev_missing_ops;
extern vdev_ops_t vdev_hole_ops;
extern vdev_ops_t vdev_spare_ops;
/*
+63 -4
View File
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_ZAP_H
@@ -101,6 +100,18 @@ typedef enum matchtype
MT_FIRST
} matchtype_t;
typedef enum zap_flags {
/* Use 64-bit hash value (serialized cursors will always use 64-bits) */
ZAP_FLAG_HASH64 = 1 << 0,
/* Key is binary, not string (zap_add_uint64() can be used) */
ZAP_FLAG_UINT64_KEY = 1 << 1,
/*
* First word of key (which must be an array of uint64) is
* already randomly distributed.
*/
ZAP_FLAG_PRE_HASHED_KEY = 1 << 2,
} zap_flags_t;
/*
* Create a new zapobj with no attributes and return its object number.
* MT_EXACT will cause the zap object to only support MT_EXACT lookups,
@@ -118,6 +129,9 @@ uint64_t zap_create(objset_t *ds, dmu_object_type_t ot,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
uint64_t zap_create_norm(objset_t *ds, int normflags, dmu_object_type_t ot,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
uint64_t zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
/*
* Create a new zapobj with no attributes from the given (unallocated)
@@ -180,6 +194,11 @@ int zap_lookup_norm(objset_t *ds, uint64_t zapobj, const char *name,
uint64_t integer_size, uint64_t num_integers, void *buf,
matchtype_t mt, char *realname, int rn_len,
boolean_t *normalization_conflictp);
int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf);
int zap_contains(objset_t *ds, uint64_t zapobj, const char *name);
int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints);
int zap_count_write(objset_t *os, uint64_t zapobj, const char *name,
int add, uint64_t *towrite, uint64_t *tooverwrite);
@@ -190,9 +209,12 @@ int zap_count_write(objset_t *os, uint64_t zapobj, const char *name,
* If an attribute with the given name already exists, the call will
* fail and return EEXIST.
*/
int zap_add(objset_t *ds, uint64_t zapobj, const char *name,
int zap_add(objset_t *ds, uint64_t zapobj, const char *key,
int integer_size, uint64_t num_integers,
const void *val, dmu_tx_t *tx);
int zap_add_uint64(objset_t *ds, uint64_t zapobj, const uint64_t *key,
int key_numints, int integer_size, uint64_t num_integers,
const void *val, dmu_tx_t *tx);
/*
* Set the attribute with the given name to the given value. If an
@@ -204,6 +226,9 @@ int zap_add(objset_t *ds, uint64_t zapobj, const char *name,
*/
int zap_update(objset_t *ds, uint64_t zapobj, const char *name,
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
int zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints,
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
/*
* Get the length (in integers) and the integer size of the specified
@@ -214,6 +239,8 @@ int zap_update(objset_t *ds, uint64_t zapobj, const char *name,
*/
int zap_length(objset_t *ds, uint64_t zapobj, const char *name,
uint64_t *integer_size, uint64_t *num_integers);
int zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints, uint64_t *integer_size, uint64_t *num_integers);
/*
* Remove the specified attribute.
@@ -224,6 +251,8 @@ int zap_length(objset_t *ds, uint64_t zapobj, const char *name,
int zap_remove(objset_t *ds, uint64_t zapobj, const char *name, dmu_tx_t *tx);
int zap_remove_norm(objset_t *ds, uint64_t zapobj, const char *name,
matchtype_t mt, dmu_tx_t *tx);
int zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints, dmu_tx_t *tx);
/*
* Returns (in *count) the number of attributes in the specified zap
@@ -231,7 +260,6 @@ int zap_remove_norm(objset_t *ds, uint64_t zapobj, const char *name,
*/
int zap_count(objset_t *ds, uint64_t zapobj, uint64_t *count);
/*
* Returns (in name) the name of the entry whose (value & mask)
* (za_first_integer) is value, or ENOENT if not found. The string
@@ -248,6 +276,14 @@ int zap_value_search(objset_t *os, uint64_t zapobj,
*/
int zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx);
/* Same as zap_join, but set the values to 'value'. */
int zap_join_key(objset_t *os, uint64_t fromobj, uint64_t intoobj,
uint64_t value, dmu_tx_t *tx);
/* Same as zap_join, but add together any duplicated entries. */
int zap_join_increment(objset_t *os, uint64_t fromobj, uint64_t intoobj,
dmu_tx_t *tx);
/*
* Manipulate entries where the name + value are the "same" (the name is
* a stringified version of the value).
@@ -255,6 +291,23 @@ int zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx);
int zap_add_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);
int zap_remove_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);
int zap_lookup_int(objset_t *os, uint64_t obj, uint64_t value);
int zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
dmu_tx_t *tx);
/* Here the key is an int and the value is a different int. */
int zap_add_int_key(objset_t *os, uint64_t obj,
uint64_t key, uint64_t value, dmu_tx_t *tx);
int zap_lookup_int_key(objset_t *os, uint64_t obj,
uint64_t key, uint64_t *valuep);
/*
* They name is a stringified version of key; increment its value by
* delta. Zero values will be zap_remove()-ed.
*/
int zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
dmu_tx_t *tx);
int zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta,
dmu_tx_t *tx);
struct zap;
struct zap_leaf;
@@ -264,6 +317,7 @@ typedef struct zap_cursor {
struct zap *zc_zap;
struct zap_leaf *zc_leaf;
uint64_t zc_zapobj;
uint64_t zc_serialized;
uint64_t zc_hash;
uint32_t zc_cd;
} zap_cursor_t;
@@ -314,6 +368,11 @@ void zap_cursor_advance(zap_cursor_t *zc);
*/
uint64_t zap_cursor_serialize(zap_cursor_t *zc);
/*
* Advance the cursor to the attribute having the given key.
*/
int zap_cursor_move_to_key(zap_cursor_t *zc, const char *name, matchtype_t mt);
/*
* Initialize a zap cursor pointing to the position recorded by
* zap_cursor_serialize (in the "serialized" argument). You can also
+19 -9
View File
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_ZAP_IMPL_H
@@ -40,13 +39,13 @@ extern int fzap_default_block_shift;
#define FZAP_BLOCK_SHIFT(zap) ((zap)->zap_f.zap_block_shift)
#define ZAP_MAXCD (uint32_t)(-1)
#define ZAP_HASHBITS 28
#define MZAP_ENT_LEN 64
#define MZAP_NAME_LEN (MZAP_ENT_LEN - 8 - 4 - 2)
#define MZAP_MAX_BLKSHIFT SPA_MAXBLOCKSHIFT
#define MZAP_MAX_BLKSZ (1 << MZAP_MAX_BLKSHIFT)
#define ZAP_NEED_CD (-1U)
typedef struct mzap_ent_phys {
uint64_t mze_value;
uint32_t mze_cd;
@@ -67,9 +66,11 @@ typedef struct mzap_ent {
avl_node_t mze_node;
int mze_chunkid;
uint64_t mze_hash;
mzap_ent_phys_t mze_phys;
uint32_t mze_cd; /* copy from mze_phys->mze_cd */
} mzap_ent_t;
#define MZE_PHYS(zap, mze) \
(&(zap)->zap_m.zap_phys->mz_chunk[(mze)->mze_chunkid])
/*
* The (fat) zap is stored in one object. It is an array of
@@ -127,6 +128,7 @@ typedef struct zap_phys {
uint64_t zap_num_entries; /* number of entries */
uint64_t zap_salt; /* salt to stir into hash function */
uint64_t zap_normflags; /* flags for u8_textprep_str() */
uint64_t zap_flags; /* zap_flags_t */
/*
* This structure is followed by padding, and then the embedded
* pointer table. The embedded pointer table takes up second
@@ -168,10 +170,13 @@ typedef struct zap {
typedef struct zap_name {
zap_t *zn_zap;
const char *zn_name_orij;
int zn_key_intlen;
const void *zn_key_orig;
int zn_key_orig_numints;
const void *zn_key_norm;
int zn_key_norm_numints;
uint64_t zn_hash;
matchtype_t zn_matchtype;
const char *zn_name_norm;
char zn_normbuf[ZAP_MAXNAMELEN];
} zap_name_t;
@@ -183,8 +188,11 @@ int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp);
void zap_unlockdir(zap_t *zap);
void zap_evict(dmu_buf_t *db, void *vmzap);
zap_name_t *zap_name_alloc(zap_t *zap, const char *name, matchtype_t mt);
zap_name_t *zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt);
void zap_name_free(zap_name_t *zn);
int zap_hashbits(zap_t *zap);
uint32_t zap_maxcd(zap_t *zap);
uint64_t zap_getflags(zap_t *zap);
#define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))
@@ -193,6 +201,7 @@ int fzap_count(zap_t *zap, uint64_t *count);
int fzap_lookup(zap_name_t *zn,
uint64_t integer_size, uint64_t num_integers, void *buf,
char *realname, int rn_len, boolean_t *normalization_conflictp);
void fzap_prefetch(zap_name_t *zn);
int fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite,
uint64_t *tooverwrite);
int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
@@ -209,7 +218,8 @@ void zap_put_leaf(struct zap_leaf *l);
int fzap_add_cd(zap_name_t *zn,
uint64_t integer_size, uint64_t num_integers,
const void *val, uint32_t cd, dmu_tx_t *tx);
void fzap_upgrade(zap_t *zap, dmu_tx_t *tx);
void fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags);
int fzap_cursor_move_to_key(zap_cursor_t *zc, zap_name_t *zn);
#ifdef __cplusplus
}
+18 -17
View File
@@ -19,20 +19,21 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_ZAP_LEAF_H
#define _SYS_ZAP_LEAF_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/zap.h>
#ifdef __cplusplus
extern "C" {
#endif
struct zap;
struct zap_name;
struct zap_stats;
#define ZAP_LEAF_MAGIC 0x2AB1EAF
@@ -129,12 +130,12 @@ typedef struct zap_leaf_phys {
typedef union zap_leaf_chunk {
struct zap_leaf_entry {
uint8_t le_type; /* always ZAP_CHUNK_ENTRY */
uint8_t le_int_size; /* size of ints */
uint8_t le_value_intlen; /* size of value's ints */
uint16_t le_next; /* next entry in hash chain */
uint16_t le_name_chunk; /* first chunk of the name */
uint16_t le_name_length; /* bytes in name, incl null */
uint16_t le_name_numints; /* ints in name (incl null) */
uint16_t le_value_chunk; /* first chunk of the value */
uint16_t le_value_length; /* value length in ints */
uint16_t le_value_numints; /* value length in ints */
uint32_t le_cd; /* collision differentiator */
uint64_t le_hash; /* hash value of the name */
} l_entry;
@@ -177,7 +178,7 @@ typedef struct zap_entry_handle {
* value must equal zap_hash(name).
*/
extern int zap_leaf_lookup(zap_leaf_t *l,
zap_name_t *zn, zap_entry_handle_t *zeh);
struct zap_name *zn, zap_entry_handle_t *zeh);
/*
* Return a handle to the entry with this hash+cd, or the entry with the
@@ -193,10 +194,10 @@ extern int zap_leaf_lookup_closest(zap_leaf_t *l,
* num_integers in the attribute.
*/
extern int zap_entry_read(const zap_entry_handle_t *zeh,
uint8_t integer_size, uint64_t num_integers, void *buf);
uint8_t integer_size, uint64_t num_integers, void *buf);
extern int zap_entry_read_name(const zap_entry_handle_t *zeh,
uint16_t buflen, char *buf);
extern int zap_entry_read_name(struct zap *zap, const zap_entry_handle_t *zeh,
uint16_t buflen, char *buf);
/*
* Replace the value of an existing entry.
@@ -204,7 +205,7 @@ extern int zap_entry_read_name(const zap_entry_handle_t *zeh,
* zap_entry_update may fail if it runs out of space (ENOSPC).
*/
extern int zap_entry_update(zap_entry_handle_t *zeh,
uint8_t integer_size, uint64_t num_integers, const void *buf);
uint8_t integer_size, uint64_t num_integers, const void *buf);
/*
* Remove an entry.
@@ -216,17 +217,16 @@ extern void zap_entry_remove(zap_entry_handle_t *zeh);
* belong in this leaf (according to its hash value). Fills in the
* entry handle on success. Returns 0 on success or ENOSPC on failure.
*/
extern int zap_entry_create(zap_leaf_t *l,
const char *name, uint64_t h, uint32_t cd,
uint8_t integer_size, uint64_t num_integers, const void *buf,
zap_entry_handle_t *zeh);
extern int zap_entry_create(zap_leaf_t *l, struct zap_name *zn, uint32_t cd,
uint8_t integer_size, uint64_t num_integers, const void *buf,
zap_entry_handle_t *zeh);
/*
* Return true if there are additional entries with the same normalized
* form.
*/
extern boolean_t zap_entry_normalization_conflict(zap_entry_handle_t *zeh,
zap_name_t *zn, const char *name, zap_t *zap);
struct zap_name *zn, const char *name, struct zap *zap);
/*
* Other stuff.
@@ -235,7 +235,8 @@ extern boolean_t zap_entry_normalization_conflict(zap_entry_handle_t *zeh,
extern void zap_leaf_init(zap_leaf_t *l, boolean_t sort);
extern void zap_leaf_byteswap(zap_leaf_phys_t *buf, int len);
extern void zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort);
extern void zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs);
extern void zap_leaf_stats(struct zap *zap, zap_leaf_t *l,
struct zap_stats *zs);
#ifdef __cplusplus
}
+31 -6
View File
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_FS_ZFS_ACL_H
@@ -33,6 +32,7 @@
#include <sys/acl.h>
#include <sys/dmu.h>
#include <sys/zfs_fuid.h>
#include <sys/sa.h>
#ifdef __cplusplus
extern "C" {
@@ -106,12 +106,18 @@ typedef struct zfs_acl_phys_v0 {
#define ZFS_ACE_SPACE (sizeof (zfs_oldace_t) * ACE_SLOT_CNT)
/*
* Size of ACL count is always 2 bytes.
* Necessary to for dealing with both V0 ACL and V1 ACL layout
*/
#define ZFS_ACL_COUNT_SIZE (sizeof (uint16_t))
typedef struct zfs_acl_phys {
uint64_t z_acl_extern_obj; /* ext acl pieces */
uint32_t z_acl_size; /* Number of bytes in ACL */
uint16_t z_acl_version; /* acl version */
uint16_t z_acl_count; /* ace count */
uint8_t z_ace_data[ZFS_ACE_SPACE]; /* space for embedded ACEs */
uint8_t z_ace_data[ZFS_ACE_SPACE]; /* space for embedded ACEs */
} zfs_acl_phys_t;
typedef struct acl_ops {
@@ -146,21 +152,26 @@ typedef struct zfs_acl_node {
void *z_allocdata; /* pointer to kmem allocated memory */
size_t z_allocsize; /* Size of blob in bytes */
size_t z_size; /* length of ACL data */
int z_ace_count; /* number of ACEs in this acl node */
uint64_t z_ace_count; /* number of ACEs in this acl node */
int z_ace_idx; /* ace iterator positioned on */
} zfs_acl_node_t;
typedef struct zfs_acl {
int z_acl_count; /* Number of ACEs */
uint64_t z_acl_count; /* Number of ACEs */
size_t z_acl_bytes; /* Number of bytes in ACL */
uint_t z_version; /* version of ACL */
void *z_next_ace; /* pointer to next ACE */
int z_hints; /* ACL hints (ZFS_INHERIT_ACE ...) */
uint64_t z_hints; /* ACL hints (ZFS_INHERIT_ACE ...) */
zfs_acl_node_t *z_curr_node; /* current node iterator is handling */
list_t z_acl; /* chunks of ACE data */
acl_ops_t z_ops; /* ACL operations */
} zfs_acl_t;
typedef struct acl_locator_cb {
zfs_acl_t *cb_aclp;
zfs_acl_node_t *cb_acl_node;
} zfs_acl_locator_cb_t;
#define ACL_DATA_ALLOCED 0x1
#define ZFS_ACL_SIZE(aclcnt) (sizeof (ace_t) * (aclcnt))
@@ -174,6 +185,10 @@ typedef struct zfs_acl_ids {
struct zfs_fuid_info *z_fuidp; /* for tracking fuids for log */
} zfs_acl_ids_t;
#define ZFS_EXTERNAL_ACL(zp) \
(zp->z_is_sa ? 0 : zfs_external_acl(zp))
#define ZNODE_ACL_VERSION(zp) \
(zp->z_is_sa ? ZFS_ACL_VERSION_FUID : zfs_znode_acl_version(zp))
/*
* Property values for acl_mode and acl_inherit.
*
@@ -215,6 +230,16 @@ void zfs_acl_free(zfs_acl_t *);
int zfs_vsec_2_aclp(struct zfsvfs *, vtype_t, vsecattr_t *, cred_t *,
struct zfs_fuid_info **, zfs_acl_t **);
int zfs_aclset_common(struct znode *, zfs_acl_t *, cred_t *, dmu_tx_t *);
uint64_t zfs_external_acl(struct znode *);
int zfs_znode_acl_version(struct znode *);
int zfs_acl_size(struct znode *, int *);
zfs_acl_t *zfs_acl_alloc(int);
zfs_acl_node_t *zfs_acl_node_alloc(size_t);
void zfs_acl_xform(struct znode *, zfs_acl_t *, cred_t *);
void zfs_acl_data_locator(void **, uint32_t *, uint32_t, boolean_t, void *);
uint64_t zfs_mode_compute(uint64_t, zfs_acl_t *,
uint64_t *, uint64_t, uint64_t);
int zfs_acl_chown_setattr(struct znode *);
#endif
+1
View File
@@ -62,6 +62,7 @@ extern "C" {
#include <sys/sysevent/eventdefs.h>
#include <sys/sysevent/dev.h>
#include <sys/fm/util.h>
#include <sys/sunddi.h>
#define CPU_SEQID (CPU->cpu_seqid)
+2 -2
View File
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _ZFS_CTLDIR_H
@@ -49,6 +48,7 @@ void zfsctl_destroy(zfsvfs_t *);
vnode_t *zfsctl_root(znode_t *);
void zfsctl_init(void);
void zfsctl_fini(void);
boolean_t zfsctl_is_node(vnode_t *);
int zfsctl_rename_snapshot(const char *from, const char *to);
int zfsctl_destroy_snapshot(const char *snapname, int force);
+11 -4
View File
@@ -19,15 +19,12 @@
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_ZFS_DEBUG_H
#define _SYS_ZFS_DEBUG_H
#pragma ident "%Z%%M% %I% %E% SMI"
#ifdef __cplusplus
extern "C" {
#endif
@@ -68,6 +65,16 @@ extern void __dprintf(const char *file, const char *func,
extern void zfs_panic_recover(const char *fmt, ...);
typedef struct zfs_dbgmsg {
list_node_t zdm_node;
time_t zdm_timestamp;
char zdm_msg[1]; /* variable length allocation */
} zfs_dbgmsg_t;
extern void zfs_dbgmsg_init(void);
extern void zfs_dbgmsg_fini(void);
extern void zfs_dbgmsg(const char *fmt, ...);
#ifdef __cplusplus
}
#endif
+3 -3
View File
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -42,11 +42,11 @@ extern "C" {
#define ZRENAMING 0x0010 /* znode is being renamed */
#define ZCILOOK 0x0020 /* case-insensitive lookup requested */
#define ZCIEXACT 0x0040 /* c-i requires c-s match (rename) */
#define ZHAVELOCK 0x0080 /* z_name_lock is already held */
/* mknode flags */
#define IS_ROOT_NODE 0x01 /* create a root node */
#define IS_XATTR 0x02 /* create an extended attribute node */
#define IS_REPLAY 0x04 /* we are replaying intent log */
extern int zfs_dirent_lock(zfs_dirlock_t **, znode_t *, char *, znode_t **,
int, int *, pathname_t *);
@@ -57,7 +57,7 @@ extern int zfs_link_destroy(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int,
extern int zfs_dirlook(znode_t *, char *, vnode_t **, int, int *,
pathname_t *);
extern void zfs_mknode(znode_t *, vattr_t *, dmu_tx_t *, cred_t *,
uint_t, znode_t **, int, zfs_acl_ids_t *);
uint_t, znode_t **, zfs_acl_ids_t *);
extern void zfs_rmnode(znode_t *);
extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old);
extern boolean_t zfs_dirempty(znode_t *);
+3 -1
View File
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -100,6 +100,8 @@ typedef struct zfs_fuid_info {
#ifdef _KERNEL
struct znode;
extern uid_t zfs_fuid_map_id(zfsvfs_t *, uint64_t, cred_t *, zfs_fuid_type_t);
extern void zfs_fuid_node_add(zfs_fuid_info_t **, const char *, uint32_t,
uint64_t, uint64_t, zfs_fuid_type_t);
extern void zfs_fuid_destroy(zfsvfs_t *);
extern uint64_t zfs_fuid_create_cred(zfsvfs_t *, zfs_fuid_type_t,
cred_t *, zfs_fuid_info_t **);
+108 -7
View File
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -30,6 +30,7 @@
#include <sys/dmu.h>
#include <sys/zio.h>
#include <sys/dsl_deleg.h>
#include <sys/spa.h>
#ifdef _KERNEL
#include <sys/nvpair.h>
@@ -45,26 +46,86 @@ extern "C" {
#define ZFS_SNAPDIR_HIDDEN 0
#define ZFS_SNAPDIR_VISIBLE 1
#define DMU_BACKUP_STREAM_VERSION (1ULL)
#define DMU_BACKUP_HEADER_VERSION (2ULL)
/*
* Field manipulation macros for the drr_versioninfo field of the
* send stream header.
*/
/*
* Header types for zfs send streams.
*/
typedef enum drr_headertype {
DMU_SUBSTREAM = 0x1,
DMU_COMPOUNDSTREAM = 0x2
} drr_headertype_t;
#define DMU_GET_STREAM_HDRTYPE(vi) BF64_GET((vi), 0, 2)
#define DMU_SET_STREAM_HDRTYPE(vi, x) BF64_SET((vi), 0, 2, x)
#define DMU_GET_FEATUREFLAGS(vi) BF64_GET((vi), 2, 30)
#define DMU_SET_FEATUREFLAGS(vi, x) BF64_SET((vi), 2, 30, x)
/*
* Feature flags for zfs send streams (flags in drr_versioninfo)
*/
#define DMU_BACKUP_FEATURE_DEDUP (0x1)
#define DMU_BACKUP_FEATURE_DEDUPPROPS (0x2)
#define DMU_BACKUP_FEATURE_SA_SPILL (0x4)
/*
* Mask of all supported backup features
*/
#define DMU_BACKUP_FEATURE_MASK (DMU_BACKUP_FEATURE_DEDUP | \
DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL)
/* Are all features in the given flag word currently supported? */
#define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK))
/*
* The drr_versioninfo field of the dmu_replay_record has the
* following layout:
*
* 64 56 48 40 32 24 16 8 0
* +-------+-------+-------+-------+-------+-------+-------+-------+
* | reserved | feature-flags |C|S|
* +-------+-------+-------+-------+-------+-------+-------+-------+
*
* The low order two bits indicate the header type: SUBSTREAM (0x1)
* or COMPOUNDSTREAM (0x2). Using two bits for this is historical:
* this field used to be a version number, where the two version types
* were 1 and 2. Using two bits for this allows earlier versions of
* the code to be able to recognize send streams that don't use any
* of the features indicated by feature flags.
*/
#define DMU_BACKUP_MAGIC 0x2F5bacbacULL
#define DRR_FLAG_CLONE (1<<0)
#define DRR_FLAG_CI_DATA (1<<1)
/*
* flags in the drr_checksumflags field in the DRR_WRITE and
* DRR_WRITE_BYREF blocks
*/
#define DRR_CHECKSUM_DEDUP (1<<0)
#define DRR_IS_DEDUP_CAPABLE(flags) ((flags) & DRR_CHECKSUM_DEDUP)
/*
* zfs ioctl command structure
*/
typedef struct dmu_replay_record {
enum {
DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
DRR_WRITE, DRR_FREE, DRR_END,
DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF,
DRR_SPILL, DRR_NUMTYPES
} drr_type;
uint32_t drr_payloadlen;
union {
struct drr_begin {
uint64_t drr_magic;
uint64_t drr_version;
uint64_t drr_versioninfo; /* was drr_version */
uint64_t drr_creation_time;
dmu_objset_type_t drr_type;
uint32_t drr_flags;
@@ -74,6 +135,7 @@ typedef struct dmu_replay_record {
} drr_begin;
struct drr_end {
zio_cksum_t drr_checksum;
uint64_t drr_toguid;
} drr_end;
struct drr_object {
uint64_t drr_object;
@@ -81,14 +143,16 @@ typedef struct dmu_replay_record {
dmu_object_type_t drr_bonustype;
uint32_t drr_blksz;
uint32_t drr_bonuslen;
uint8_t drr_checksum;
uint8_t drr_checksumtype;
uint8_t drr_compress;
uint8_t drr_pad[6];
uint64_t drr_toguid;
/* bonus content follows */
} drr_object;
struct drr_freeobjects {
uint64_t drr_firstobj;
uint64_t drr_numobjs;
uint64_t drr_toguid;
} drr_freeobjects;
struct drr_write {
uint64_t drr_object;
@@ -96,13 +160,42 @@ typedef struct dmu_replay_record {
uint32_t drr_pad;
uint64_t drr_offset;
uint64_t drr_length;
uint64_t drr_toguid;
uint8_t drr_checksumtype;
uint8_t drr_checksumflags;
uint8_t drr_pad2[6];
ddt_key_t drr_key; /* deduplication key */
/* content follows */
} drr_write;
struct drr_free {
uint64_t drr_object;
uint64_t drr_offset;
uint64_t drr_length;
uint64_t drr_toguid;
} drr_free;
struct drr_write_byref {
/* where to put the data */
uint64_t drr_object;
uint64_t drr_offset;
uint64_t drr_length;
uint64_t drr_toguid;
/* where to find the prior copy of the data */
uint64_t drr_refguid;
uint64_t drr_refobject;
uint64_t drr_refoffset;
/* properties of the data */
uint8_t drr_checksumtype;
uint8_t drr_checksumflags;
uint8_t drr_pad2[6];
ddt_key_t drr_key; /* deduplication key */
} drr_write_byref;
struct drr_spill {
uint64_t drr_object;
uint64_t drr_length;
uint64_t drr_toguid;
uint64_t drr_pad[4]; /* needed for crypto */
/* spill data follows */
} drr_spill;
} drr_u;
} dmu_replay_record_t;
@@ -117,6 +210,10 @@ typedef struct zinject_record {
uint64_t zi_type;
uint32_t zi_freq;
uint32_t zi_failfast;
char zi_func[MAXNAMELEN];
uint32_t zi_iotype;
int32_t zi_duration;
uint64_t zi_timer;
} zinject_record_t;
#define ZINJECT_NULL 0x1
@@ -146,6 +243,7 @@ typedef struct zfs_cmd {
char zc_name[MAXPATHLEN];
char zc_value[MAXPATHLEN * 2];
char zc_string[MAXNAMELEN];
char zc_top_ds[MAXPATHLEN];
uint64_t zc_guid;
uint64_t zc_nvlist_conf; /* really (char *) */
uint64_t zc_nvlist_conf_size;
@@ -166,6 +264,7 @@ typedef struct zfs_cmd {
struct drr_begin zc_begin_record;
zinject_record_t zc_inject_record;
boolean_t zc_defer_destroy;
boolean_t zc_temphold;
} zfs_cmd_t;
typedef struct zfs_useracct {
@@ -178,6 +277,8 @@ typedef struct zfs_useracct {
#define ZVOL_MAX_MINOR (1 << 16)
#define ZFS_MIN_MINOR (ZVOL_MAX_MINOR + 1)
#define ZPOOL_EXPORT_AFTER_SPLIT 0x1
#ifdef _KERNEL
typedef struct zfs_creat {
@@ -192,7 +293,7 @@ extern int zfs_secpolicy_rename_perms(const char *from,
const char *to, cred_t *cr);
extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
extern int zfs_busy(void);
extern int zfs_unmount_snap(char *, void *);
extern int zfs_unmount_snap(const char *, void *);
#endif /* _KERNEL */
+143
View File
@@ -0,0 +1,143 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZFS_SA_H
#define _SYS_ZFS_SA_H
#ifdef _KERNEL
#include <sys/types32.h>
#include <sys/list.h>
#include <sys/dmu.h>
#include <sys/zfs_acl.h>
#include <sys/zfs_znode.h>
#include <sys/sa.h>
#include <sys/zil.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/*
* This is the list of known attributes
* to the ZPL. The values of the actual
* attributes are not defined by the order
* the enums. It is controlled by the attribute
* registration mechanism. Two different file system
* could have different numeric values for the same
* attributes. this list is only used for dereferencing
* into the table that will hold the actual numeric value.
*/
typedef enum zpl_attr {
ZPL_ATIME,
ZPL_MTIME,
ZPL_CTIME,
ZPL_CRTIME,
ZPL_GEN,
ZPL_MODE,
ZPL_SIZE,
ZPL_PARENT,
ZPL_LINKS,
ZPL_XATTR,
ZPL_RDEV,
ZPL_FLAGS,
ZPL_UID,
ZPL_GID,
ZPL_PAD,
ZPL_ZNODE_ACL,
ZPL_DACL_COUNT,
ZPL_SYMLINK,
ZPL_SCANSTAMP,
ZPL_DACL_ACES,
ZPL_END
} zpl_attr_t;
#define ZFS_OLD_ZNODE_PHYS_SIZE 0x108
#define ZFS_SA_BASE_ATTR_SIZE (ZFS_OLD_ZNODE_PHYS_SIZE - \
sizeof (zfs_acl_phys_t))
#define SA_MODE_OFFSET 0
#define SA_SIZE_OFFSET 8
#define SA_GEN_OFFSET 16
#define SA_UID_OFFSET 24
#define SA_GID_OFFSET 32
#define SA_PARENT_OFFSET 40
extern sa_attr_reg_t zfs_attr_table[ZPL_END + 1];
extern sa_attr_reg_t zfs_legacy_attr_table[ZPL_END + 1];
/*
* This is a deprecated data structure that only exists for
* dealing with file systems create prior to ZPL version 5.
*/
typedef struct znode_phys {
uint64_t zp_atime[2]; /* 0 - last file access time */
uint64_t zp_mtime[2]; /* 16 - last file modification time */
uint64_t zp_ctime[2]; /* 32 - last file change time */
uint64_t zp_crtime[2]; /* 48 - creation time */
uint64_t zp_gen; /* 64 - generation (txg of creation) */
uint64_t zp_mode; /* 72 - file mode bits */
uint64_t zp_size; /* 80 - size of file */
uint64_t zp_parent; /* 88 - directory parent (`..') */
uint64_t zp_links; /* 96 - number of links to file */
uint64_t zp_xattr; /* 104 - DMU object for xattrs */
uint64_t zp_rdev; /* 112 - dev_t for VBLK & VCHR files */
uint64_t zp_flags; /* 120 - persistent flags */
uint64_t zp_uid; /* 128 - file owner */
uint64_t zp_gid; /* 136 - owning group */
uint64_t zp_zap; /* 144 - extra attributes */
uint64_t zp_pad[3]; /* 152 - future */
zfs_acl_phys_t zp_acl; /* 176 - 263 ACL */
/*
* Data may pad out any remaining bytes in the znode buffer, eg:
*
* |<---------------------- dnode_phys (512) ------------------------>|
* |<-- dnode (192) --->|<----------- "bonus" buffer (320) ---------->|
* |<---- znode (264) ---->|<---- data (56) ---->|
*
* At present, we use this space for the following:
* - symbolic links
* - 32-byte anti-virus scanstamp (regular files only)
*/
} znode_phys_t;
#ifdef _KERNEL
int zfs_sa_readlink(struct znode *, uio_t *);
void zfs_sa_symlink(struct znode *, char *link, int len, dmu_tx_t *);
void zfs_sa_upgrade(struct sa_handle *, dmu_tx_t *);
void zfs_sa_get_scanstamp(struct znode *, xvattr_t *);
void zfs_sa_set_scanstamp(struct znode *, xvattr_t *, dmu_tx_t *);
void zfs_sa_uprade_pre(struct sa_handle *, void *, dmu_tx_t *);
void zfs_sa_upgrade_post(struct sa_handle *, void *, dmu_tx_t *);
void zfs_sa_upgrade_txholds(dmu_tx_t *, struct znode *);
#endif
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZFS_SA_H */
+13 -8
View File
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_FS_ZFS_VFSOPS_H
@@ -31,6 +30,7 @@
#include <sys/list.h>
#include <sys/vfs.h>
#include <sys/zil.h>
#include <sys/sa.h>
#include <sys/rrwlock.h>
#include <sys/zfs_ioctl.h>
@@ -39,6 +39,7 @@ extern "C" {
#endif
typedef struct zfsvfs zfsvfs_t;
struct znode;
struct zfsvfs {
vfs_t *z_vfs; /* generic fs struct */
@@ -56,7 +57,6 @@ struct zfsvfs {
boolean_t z_fuid_dirty; /* need to sync fuid table ? */
struct zfs_fuid_info *z_fuid_replay; /* fuid info for replay */
zilog_t *z_log; /* intent log pointer */
uint_t z_acl_mode; /* acl chmod/mode behavior */
uint_t z_acl_inherit; /* acl inheritance behavior */
zfs_case_t z_case; /* case-sense */
boolean_t z_utf8; /* utf8-only */
@@ -73,11 +73,13 @@ struct zfsvfs {
boolean_t z_vscan; /* virus scan on/off */
boolean_t z_use_fuids; /* version allows fuids */
boolean_t z_replay; /* set during ZIL replay */
boolean_t z_use_sa; /* version allow system attributes */
uint64_t z_version; /* ZPL version */
uint64_t z_shares_dir; /* hidden shares dir */
kmutex_t z_lock;
uint64_t z_userquota_obj;
uint64_t z_groupquota_obj;
sa_attr_type_t *z_attr_table; /* SA attr mapping->id */
#define ZFS_OBJ_MTX_SZ 64
kmutex_t z_hold_mtx[ZFS_OBJ_MTX_SZ]; /* znode hold locks */
};
@@ -132,19 +134,22 @@ typedef struct zfid_long {
extern uint_t zfs_fsyncer_key;
extern int zfs_suspend_fs(zfsvfs_t *zfsvfs, char *osname, int *mode);
extern int zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode);
extern int zfs_suspend_fs(zfsvfs_t *zfsvfs);
extern int zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname);
extern int zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
const char *domain, uint64_t rid, uint64_t *valuep);
extern int zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
uint64_t *cookiep, void *vbuf, uint64_t *bufsizep);
extern int zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
const char *domain, uint64_t rid, uint64_t quota);
extern boolean_t zfs_usergroup_overquota(zfsvfs_t *zfsvfs,
boolean_t isgroup, uint64_t fuid);
extern boolean_t zfs_owner_overquota(zfsvfs_t *zfsvfs, struct znode *,
boolean_t isgroup);
extern boolean_t zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup,
uint64_t fuid);
extern int zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers);
extern int zfsvfs_create(const char *name, int mode, zfsvfs_t **zvp);
extern int zfsvfs_create(const char *name, zfsvfs_t **zfvp);
extern void zfsvfs_free(zfsvfs_t *zfsvfs);
extern int zfs_check_global_label(const char *dsname, const char *hexsl);
#ifdef __cplusplus
}
+50 -52
View File
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -32,8 +32,10 @@
#include <sys/attr.h>
#include <sys/list.h>
#include <sys/dmu.h>
#include <sys/sa.h>
#include <sys/zfs_vfsops.h>
#include <sys/rrwlock.h>
#include <sys/zfs_sa.h>
#endif
#include <sys/zfs_acl.h>
#include <sys/zil.h>
@@ -57,13 +59,16 @@ extern "C" {
#define ZFS_OPAQUE 0x0000010000000000
#define ZFS_AV_QUARANTINED 0x0000020000000000
#define ZFS_AV_MODIFIED 0x0000040000000000
#define ZFS_REPARSE 0x0000080000000000
#define ZFS_ATTR_SET(zp, attr, value) \
#define ZFS_ATTR_SET(zp, attr, value, pflags, tx) \
{ \
if (value) \
zp->z_phys->zp_flags |= attr; \
pflags |= attr; \
else \
zp->z_phys->zp_flags &= ~attr; \
pflags &= ~attr; \
VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zp->z_zfsvfs), \
&pflags, sizeof (pflags), tx)); \
}
/*
@@ -79,6 +84,27 @@ extern "C" {
#define ZFS_BONUS_SCANSTAMP 0x80 /* Scanstamp in bonus area */
#define ZFS_NO_EXECS_DENIED 0x100 /* exec was given to everyone */
#define SA_ZPL_ATIME(z) z->z_attr_table[ZPL_ATIME]
#define SA_ZPL_MTIME(z) z->z_attr_table[ZPL_MTIME]
#define SA_ZPL_CTIME(z) z->z_attr_table[ZPL_CTIME]
#define SA_ZPL_CRTIME(z) z->z_attr_table[ZPL_CRTIME]
#define SA_ZPL_GEN(z) z->z_attr_table[ZPL_GEN]
#define SA_ZPL_DACL_ACES(z) z->z_attr_table[ZPL_DACL_ACES]
#define SA_ZPL_XATTR(z) z->z_attr_table[ZPL_XATTR]
#define SA_ZPL_SYMLINK(z) z->z_attr_table[ZPL_SYMLINK]
#define SA_ZPL_RDEV(z) z->z_attr_table[ZPL_RDEV]
#define SA_ZPL_SCANSTAMP(z) z->z_attr_table[ZPL_SCANSTAMP]
#define SA_ZPL_UID(z) z->z_attr_table[ZPL_UID]
#define SA_ZPL_GID(z) z->z_attr_table[ZPL_GID]
#define SA_ZPL_PARENT(z) z->z_attr_table[ZPL_PARENT]
#define SA_ZPL_LINKS(z) z->z_attr_table[ZPL_LINKS]
#define SA_ZPL_MODE(z) z->z_attr_table[ZPL_MODE]
#define SA_ZPL_DACL_COUNT(z) z->z_attr_table[ZPL_DACL_COUNT]
#define SA_ZPL_FLAGS(z) z->z_attr_table[ZPL_FLAGS]
#define SA_ZPL_SIZE(z) z->z_attr_table[ZPL_SIZE]
#define SA_ZPL_ZNODE_ACL(z) z->z_attr_table[ZPL_ZNODE_ACL]
#define SA_ZPL_PAD(z) z->z_attr_table[ZPL_PAD]
/*
* Is ID ephemeral?
*/
@@ -87,8 +113,10 @@ extern "C" {
/*
* Should we use FUIDs?
*/
#define USE_FUIDS(version, os) (version >= ZPL_VERSION_FUID &&\
#define USE_FUIDS(version, os) (version >= ZPL_VERSION_FUID && \
spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID)
#define USE_SA(version, os) (version >= ZPL_VERSION_SA && \
spa_version(dmu_objset_spa(os)) >= SPA_VERSION_SA)
#define MASTER_NODE_OBJ 1
@@ -103,6 +131,7 @@ extern "C" {
#define ZPL_VERSION_STR "VERSION"
#define ZFS_FUID_TABLES "FUID"
#define ZFS_SHARES_DIR "SHARES"
#define ZFS_SA_ATTRS "SA_ATTRS"
#define ZFS_MAX_BLOCKSIZE (SPA_MAXBLOCKSIZE)
@@ -130,42 +159,6 @@ extern "C" {
#define ZFS_DIRENT_TYPE(de) BF64_GET(de, 60, 4)
#define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48)
/*
* This is the persistent portion of the znode. It is stored
* in the "bonus buffer" of the file. Short symbolic links
* are also stored in the bonus buffer.
*/
typedef struct znode_phys {
uint64_t zp_atime[2]; /* 0 - last file access time */
uint64_t zp_mtime[2]; /* 16 - last file modification time */
uint64_t zp_ctime[2]; /* 32 - last file change time */
uint64_t zp_crtime[2]; /* 48 - creation time */
uint64_t zp_gen; /* 64 - generation (txg of creation) */
uint64_t zp_mode; /* 72 - file mode bits */
uint64_t zp_size; /* 80 - size of file */
uint64_t zp_parent; /* 88 - directory parent (`..') */
uint64_t zp_links; /* 96 - number of links to file */
uint64_t zp_xattr; /* 104 - DMU object for xattrs */
uint64_t zp_rdev; /* 112 - dev_t for VBLK & VCHR files */
uint64_t zp_flags; /* 120 - persistent flags */
uint64_t zp_uid; /* 128 - file owner */
uint64_t zp_gid; /* 136 - owning group */
uint64_t zp_zap; /* 144 - extra attributes */
uint64_t zp_pad[3]; /* 152 - future */
zfs_acl_phys_t zp_acl; /* 176 - 263 ACL */
/*
* Data may pad out any remaining bytes in the znode buffer, eg:
*
* |<---------------------- dnode_phys (512) ------------------------>|
* |<-- dnode (192) --->|<----------- "bonus" buffer (320) ---------->|
* |<---- znode (264) ---->|<---- data (56) ---->|
*
* At present, we use this space for the following:
* - symbolic links
* - 32-byte anti-virus scanstamp (regular files only)
*/
} znode_phys_t;
/*
* Directory entry locks control access to directory entries.
* They are used to protect creates, deletes, and renames.
@@ -175,6 +168,7 @@ typedef struct znode_phys {
typedef struct zfs_dirlock {
char *dl_name; /* directory entry being locked */
uint32_t dl_sharecnt; /* 0 if exclusive, > 0 if shared */
uint8_t dl_namelock; /* 1 if z_name_lock is NOT held */
uint16_t dl_namesize; /* set if dl_name was allocated */
kcondvar_t dl_cv; /* wait for entry to be unlocked */
struct znode *dl_dzp; /* directory znode */
@@ -198,16 +192,20 @@ typedef struct znode {
uint_t z_seq; /* modification sequence number */
uint64_t z_mapcnt; /* number of pages mapped to file */
uint64_t z_last_itx; /* last ZIL itx on this znode */
uint64_t z_gen; /* generation (same as zp_gen) */
uint64_t z_gen; /* generation (cached) */
uint64_t z_size; /* file size (cached) */
uint64_t z_atime[2]; /* atime (cached) */
uint64_t z_links; /* file links (cached) */
uint64_t z_pflags; /* pflags (cached) */
uid_t z_uid; /* uid mapped (cached) */
uid_t z_gid; /* gid mapped (cached) */
mode_t z_mode; /* mode (cached) */
uint32_t z_sync_cnt; /* synchronous open count */
kmutex_t z_acl_lock; /* acl data lock */
zfs_acl_t *z_acl_cached; /* cached acl */
list_node_t z_link_node; /* all znodes in fs link */
/*
* These are dmu managed fields.
*/
znode_phys_t *z_phys; /* pointer to persistent znode */
dmu_buf_t *z_dbuf; /* buffer containing the z_phys */
sa_handle_t *z_sa_hdl; /* handle to sa data */
boolean_t z_is_sa; /* are we native sa? */
} znode_t;
@@ -250,7 +248,7 @@ typedef struct znode {
#define ZFS_EXIT(zfsvfs) rrw_exit(&(zfsvfs)->z_teardown_lock, FTAG)
#define ZFS_VERIFY_ZP(zp) \
if ((zp)->z_dbuf == NULL) { \
if ((zp)->z_sa_hdl == NULL) { \
ZFS_EXIT((zp)->z_zfsvfs); \
return (EIO); \
} \
@@ -292,14 +290,14 @@ typedef struct znode {
#define ZFS_ACCESSTIME_STAMP(zfsvfs, zp) \
if ((zfsvfs)->z_atime && !((zfsvfs)->z_vfs->vfs_flag & VFS_RDONLY)) \
zfs_time_stamper(zp, ACCESSED, NULL)
zfs_tstamp_update_setup(zp, ACCESSED, NULL, NULL, B_FALSE);
extern int zfs_init_fs(zfsvfs_t *, znode_t **);
extern void zfs_set_dataprop(objset_t *);
extern void zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *,
dmu_tx_t *tx);
extern void zfs_time_stamper(znode_t *, uint_t, dmu_tx_t *);
extern void zfs_time_stamper_locked(znode_t *, uint_t, dmu_tx_t *);
extern void zfs_tstamp_update_setup(znode_t *, uint_t, uint64_t [2],
uint64_t [2], boolean_t);
extern void zfs_grow_blocksize(znode_t *, uint64_t, dmu_tx_t *);
extern int zfs_freesp(znode_t *, uint64_t, uint64_t, int, boolean_t);
extern void zfs_znode_init(void);
@@ -338,7 +336,7 @@ extern void zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp);
extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
vsecattr_t *vsecp, zfs_fuid_info_t *fuidp);
extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap);
extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx);
extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
extern int zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
+63 -38
View File
@@ -19,10 +19,11 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
#ifndef _SYS_ZIL_H
#define _SYS_ZIL_H
@@ -55,34 +56,40 @@ typedef struct zil_header {
uint64_t zh_claim_txg; /* txg in which log blocks were claimed */
uint64_t zh_replay_seq; /* highest replayed sequence number */
blkptr_t zh_log; /* log chain */
uint64_t zh_claim_seq; /* highest claimed sequence number */
uint64_t zh_claim_blk_seq; /* highest claimed block sequence number */
uint64_t zh_flags; /* header flags */
uint64_t zh_pad[4];
uint64_t zh_claim_lr_seq; /* highest claimed lr sequence number */
uint64_t zh_pad[3];
} zil_header_t;
/*
* zh_flags bit settings
*/
#define ZIL_REPLAY_NEEDED 0x1 /* replay needed - internal only */
#define ZIL_REPLAY_NEEDED 0x1 /* replay needed - internal only */
#define ZIL_CLAIM_LR_SEQ_VALID 0x2 /* zh_claim_lr_seq field is valid */
/*
* Log block trailer - structure at the end of the header and each log block
* Log block chaining.
*
* The zit_bt contains a zbt_cksum which for the intent log is
* Log blocks are chained together. Originally they were chained at the
* end of the block. For performance reasons the chain was moved to the
* beginning of the block which allows writes for only the data being used.
* The older position is supported for backwards compatability.
*
* The zio_eck_t contains a zec_cksum which for the intent log is
* the sequence number of this log block. A seq of 0 is invalid.
* The zbt_cksum is checked by the SPA against the sequence
* The zec_cksum is checked by the SPA against the sequence
* number passed in the blk_cksum field of the blkptr_t
*/
typedef struct zil_trailer {
uint64_t zit_pad;
blkptr_t zit_next_blk; /* next block in chain */
uint64_t zit_nused; /* bytes in log block used */
zio_block_tail_t zit_bt; /* block trailer */
} zil_trailer_t;
typedef struct zil_chain {
uint64_t zc_pad;
blkptr_t zc_next_blk; /* next block in chain */
uint64_t zc_nused; /* bytes in log block used */
zio_eck_t zc_eck; /* block trailer */
} zil_chain_t;
#define ZIL_MIN_BLKSZ 4096ULL
#define ZIL_MAX_BLKSZ SPA_MAXBLOCKSIZE
#define ZIL_BLK_DATA_SZ(lwb) ((lwb)->lwb_sz - sizeof (zil_trailer_t))
/*
* The words of a log block checksum.
@@ -139,7 +146,8 @@ typedef enum zil_create {
#define TX_MKDIR_ACL 17 /* mkdir with ACL */
#define TX_MKDIR_ATTR 18 /* mkdir with attr */
#define TX_MKDIR_ACL_ATTR 19 /* mkdir with ACL + attrs */
#define TX_MAX_TYPE 20 /* Max transaction type */
#define TX_WRITE2 20 /* dmu_sync EALREADY write */
#define TX_MAX_TYPE 21 /* Max transaction type */
/*
* The transactions for mkdir, symlink, remove, rmdir, link, and rename
@@ -148,6 +156,20 @@ typedef enum zil_create {
*/
#define TX_CI ((uint64_t)0x1 << 63) /* case-insensitive behavior requested */
/*
* Transactions for write, truncate, setattr, acl_v0, and acl can be logged
* out of order. For convenience in the code, all such records must have
* lr_foid at the same offset.
*/
#define TX_OOO(txtype) \
((txtype) == TX_WRITE || \
(txtype) == TX_TRUNCATE || \
(txtype) == TX_SETATTR || \
(txtype) == TX_ACL_V0 || \
(txtype) == TX_ACL || \
(txtype) == TX_WRITE2)
/*
* Format of log records.
* The fields are carefully defined to allow them to be aligned
@@ -167,6 +189,14 @@ typedef struct { /* common log record header */
uint64_t lrc_seq; /* see comment above */
} lr_t;
/*
* Common start of all out-of-order record types (TX_OOO() above).
*/
typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_foid; /* object id */
} lr_ooo_t;
/*
* Handle option extended vattr attributes.
*
@@ -257,7 +287,7 @@ typedef struct {
uint64_t lr_foid; /* file object to write */
uint64_t lr_offset; /* offset to write to */
uint64_t lr_length; /* user data length to write */
uint64_t lr_blkoff; /* offset represented by lr_blkptr */
uint64_t lr_blkoff; /* no longer used */
blkptr_t lr_blkptr; /* spa block pointer for replay */
/* write data will follow for small writes */
} lr_write_t;
@@ -332,6 +362,7 @@ typedef enum {
/* and put blkptr in log, rather than actual data) */
WR_COPIED, /* immediate - data is copied into lr_write_t */
WR_NEED_COPY, /* immediate - data needs to be copied if pushed */
WR_NUM_STATES /* number of states */
} itx_wr_state_t;
typedef struct itx {
@@ -344,26 +375,14 @@ typedef struct itx {
/* followed by type-specific part of lr_xx_t and its immediate data */
} itx_t;
/*
* zgd_t is passed through dmu_sync() to the callback routine zfs_get_done()
* to handle the cleanup of the dmu_sync() buffer write
*/
typedef struct {
zilog_t *zgd_zilog; /* zilog */
blkptr_t *zgd_bp; /* block pointer */
struct rl *zgd_rl; /* range lock */
} zgd_t;
typedef void zil_parse_blk_func_t(zilog_t *zilog, blkptr_t *bp, void *arg,
typedef int zil_parse_blk_func_t(zilog_t *zilog, blkptr_t *bp, void *arg,
uint64_t txg);
typedef void zil_parse_lr_func_t(zilog_t *zilog, lr_t *lr, void *arg,
typedef int zil_parse_lr_func_t(zilog_t *zilog, lr_t *lr, void *arg,
uint64_t txg);
typedef int zil_replay_func_t();
typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio);
extern uint64_t zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
extern int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg);
extern void zil_init(void);
@@ -377,27 +396,33 @@ extern void zil_close(zilog_t *zilog);
extern void zil_replay(objset_t *os, void *arg,
zil_replay_func_t *replay_func[TX_MAX_TYPE]);
extern boolean_t zil_replaying(zilog_t *zilog, dmu_tx_t *tx);
extern void zil_destroy(zilog_t *zilog, boolean_t keep_first);
extern void zil_rollback_destroy(zilog_t *zilog, dmu_tx_t *tx);
extern itx_t *zil_itx_create(uint64_t txtype, size_t lrsize);
extern void zil_itx_destroy(itx_t *itx);
extern uint64_t zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx);
extern void zil_commit(zilog_t *zilog, uint64_t seq, uint64_t oid);
extern int zil_vdev_offline(char *osname, void *txarg);
extern int zil_claim(char *osname, void *txarg);
extern int zil_check_log_chain(char *osname, void *txarg);
extern int zil_vdev_offline(const char *osname, void *txarg);
extern int zil_claim(const char *osname, void *txarg);
extern int zil_check_log_chain(const char *osname, void *txarg);
extern void zil_sync(zilog_t *zilog, dmu_tx_t *tx);
extern void zil_clean(zilog_t *zilog);
extern int zil_is_committed(zilog_t *zilog);
extern int zil_suspend(zilog_t *zilog);
extern void zil_resume(zilog_t *zilog);
extern void zil_add_block(zilog_t *zilog, blkptr_t *bp);
extern void zil_add_block(zilog_t *zilog, const blkptr_t *bp);
extern int zil_bp_tree_add(zilog_t *zilog, const blkptr_t *bp);
extern int zil_disable;
extern void zil_set_sync(zilog_t *zilog, uint64_t syncval);
extern void zil_set_logbias(zilog_t *zilog, uint64_t slogval);
extern int zil_replay_disable;
#ifdef __cplusplus
}
+23 -10
View File
@@ -19,10 +19,11 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
#ifndef _SYS_ZIL_IMPL_H
#define _SYS_ZIL_IMPL_H
@@ -43,8 +44,8 @@ typedef struct lwb {
int lwb_sz; /* size of block and buffer */
char *lwb_buf; /* log write buffer */
zio_t *lwb_zio; /* zio for this buffer */
dmu_tx_t *lwb_tx; /* tx for log block allocation */
uint64_t lwb_max_txg; /* highest txg in this lwb */
txg_handle_t lwb_txgh; /* txg handle for txg_exit() */
list_node_t lwb_node; /* zilog->zl_lwb_list linkage */
} lwb_t;
@@ -57,6 +58,8 @@ typedef struct zil_vdev_node {
avl_node_t zv_node; /* AVL tree linkage */
} zil_vdev_node_t;
#define ZIL_PREV_BLKS 16
/*
* Stable storage intent log management structure. One per dataset.
*/
@@ -68,9 +71,10 @@ struct zilog {
objset_t *zl_os; /* object set we're logging */
zil_get_data_t *zl_get_data; /* callback to get object content */
zio_t *zl_root_zio; /* log writer root zio */
uint64_t zl_itx_seq; /* next itx sequence number */
uint64_t zl_itx_seq; /* next in-core itx sequence number */
uint64_t zl_lr_seq; /* on-disk log record sequence number */
uint64_t zl_commit_seq; /* committed upto this number */
uint64_t zl_lr_seq; /* log record sequence number */
uint64_t zl_commit_lr_seq; /* last committed on-disk lr seq */
uint64_t zl_destroy_txg; /* txg of last zil_destroy() */
uint64_t zl_replayed_seq[TXG_SIZE]; /* last replayed rec seq */
uint64_t zl_replaying_seq; /* current replay seq number */
@@ -82,7 +86,13 @@ struct zilog {
uint8_t zl_replay; /* replaying records while set */
uint8_t zl_stop_sync; /* for debugging */
uint8_t zl_writer; /* boolean: write setup in progress */
uint8_t zl_log_error; /* boolean: log write error */
uint8_t zl_logbias; /* latency or throughput */
uint8_t zl_sync; /* synchronous or asynchronous */
int zl_parse_error; /* last zil_parse() error */
uint64_t zl_parse_blk_seq; /* highest blk seq on last parse */
uint64_t zl_parse_lr_seq; /* highest lr seq on last parse */
uint64_t zl_parse_blk_count; /* number of blocks parsed */
uint64_t zl_parse_lr_count; /* number of log records parsed */
list_t zl_itx_list; /* in-memory itx list */
uint64_t zl_itx_list_sz; /* total size of records on list */
uint64_t zl_cur_used; /* current commit log size used */
@@ -91,17 +101,20 @@ struct zilog {
kmutex_t zl_vdev_lock; /* protects zl_vdev_tree */
avl_tree_t zl_vdev_tree; /* vdevs to flush in zil_commit() */
taskq_t *zl_clean_taskq; /* runs lwb and itx clean tasks */
avl_tree_t zl_dva_tree; /* track DVAs during log parse */
avl_tree_t zl_bp_tree; /* track bps during log parse */
clock_t zl_replay_time; /* lbolt of when replay started */
uint64_t zl_replay_blks; /* number of log blocks replayed */
zil_header_t zl_old_header; /* debugging aid */
uint_t zl_prev_blks[ZIL_PREV_BLKS]; /* size - sector rounded */
uint_t zl_prev_rotor; /* rotor for zl_prev[] */
};
typedef struct zil_dva_node {
typedef struct zil_bp_node {
dva_t zn_dva;
avl_node_t zn_node;
} zil_dva_node_t;
} zil_bp_node_t;
#define ZIL_MAX_LOG_DATA (SPA_MAXBLOCKSIZE - sizeof (zil_trailer_t) - \
#define ZIL_MAX_LOG_DATA (SPA_MAXBLOCKSIZE - sizeof (zil_chain_t) - \
sizeof (lr_write_t))
#ifdef __cplusplus
+223 -117
View File
@@ -20,8 +20,7 @@
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _ZIO_H
@@ -38,12 +37,15 @@
extern "C" {
#endif
#define ZBT_MAGIC 0x210da7ab10c7a11ULL /* zio data bloc tail */
/*
* Embedded checksum
*/
#define ZEC_MAGIC 0x210da7ab10c7a11ULL
typedef struct zio_block_tail {
uint64_t zbt_magic; /* for validation, endianness */
zio_cksum_t zbt_cksum; /* 256-bit checksum */
} zio_block_tail_t;
typedef struct zio_eck {
uint64_t zec_magic; /* for validation, endianness */
zio_cksum_t zec_cksum; /* 256-bit checksum */
} zio_eck_t;
/*
* Gang block headers are self-checksumming and contain an array
@@ -51,16 +53,16 @@ typedef struct zio_block_tail {
*/
#define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE
#define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \
sizeof (zio_block_tail_t)) / sizeof (blkptr_t))
sizeof (zio_eck_t)) / sizeof (blkptr_t))
#define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \
sizeof (zio_block_tail_t) - \
sizeof (zio_eck_t) - \
(SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\
sizeof (uint64_t))
typedef struct zio_gbh {
blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS];
uint64_t zg_filler[SPA_GBH_FILLER];
zio_block_tail_t zg_tail;
zio_eck_t zg_tail;
} zio_gbh_phys_t;
enum zio_checksum {
@@ -73,12 +75,19 @@ enum zio_checksum {
ZIO_CHECKSUM_FLETCHER_2,
ZIO_CHECKSUM_FLETCHER_4,
ZIO_CHECKSUM_SHA256,
ZIO_CHECKSUM_ZILOG2,
ZIO_CHECKSUM_FUNCTIONS
};
#define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_4
#define ZIO_CHECKSUM_DEFAULT ZIO_CHECKSUM_ON
#define ZIO_CHECKSUM_MASK 0xffULL
#define ZIO_CHECKSUM_VERIFY (1 << 8)
#define ZIO_DEDUPCHECKSUM ZIO_CHECKSUM_SHA256
#define ZIO_DEDUPDITTO_MIN 100
enum zio_compress {
ZIO_COMPRESS_INHERIT = 0,
ZIO_COMPRESS_ON,
@@ -94,12 +103,19 @@ enum zio_compress {
ZIO_COMPRESS_GZIP_7,
ZIO_COMPRESS_GZIP_8,
ZIO_COMPRESS_GZIP_9,
ZIO_COMPRESS_ZLE,
ZIO_COMPRESS_FUNCTIONS
};
#define ZIO_COMPRESS_ON_VALUE ZIO_COMPRESS_LZJB
#define ZIO_COMPRESS_DEFAULT ZIO_COMPRESS_OFF
#define BOOTFS_COMPRESS_VALID(compress) \
((compress) == ZIO_COMPRESS_LZJB || \
((compress) == ZIO_COMPRESS_ON && \
ZIO_COMPRESS_ON_VALUE == ZIO_COMPRESS_LZJB) || \
(compress) == ZIO_COMPRESS_OFF)
#define ZIO_FAILURE_MODE_WAIT 0
#define ZIO_FAILURE_MODE_CONTINUE 1
#define ZIO_FAILURE_MODE_PANIC 2
@@ -107,84 +123,89 @@ enum zio_compress {
#define ZIO_PRIORITY_NOW (zio_priority_table[0])
#define ZIO_PRIORITY_SYNC_READ (zio_priority_table[1])
#define ZIO_PRIORITY_SYNC_WRITE (zio_priority_table[2])
#define ZIO_PRIORITY_ASYNC_READ (zio_priority_table[3])
#define ZIO_PRIORITY_ASYNC_WRITE (zio_priority_table[4])
#define ZIO_PRIORITY_FREE (zio_priority_table[5])
#define ZIO_PRIORITY_CACHE_FILL (zio_priority_table[6])
#define ZIO_PRIORITY_LOG_WRITE (zio_priority_table[7])
#define ZIO_PRIORITY_RESILVER (zio_priority_table[8])
#define ZIO_PRIORITY_SCRUB (zio_priority_table[9])
#define ZIO_PRIORITY_TABLE_SIZE 10
#define ZIO_FLAG_MUSTSUCCEED 0x000000
#define ZIO_FLAG_CANFAIL 0x000001
#define ZIO_FLAG_SPECULATIVE 0x000002
#define ZIO_FLAG_CONFIG_WRITER 0x000004
#define ZIO_FLAG_DONT_RETRY 0x000008
#define ZIO_FLAG_DONT_CACHE 0x000010
#define ZIO_FLAG_DONT_QUEUE 0x000020
#define ZIO_FLAG_DONT_AGGREGATE 0x000040
#define ZIO_FLAG_DONT_PROPAGATE 0x000080
#define ZIO_FLAG_IO_BYPASS 0x000100
#define ZIO_FLAG_IO_REPAIR 0x000200
#define ZIO_FLAG_IO_RETRY 0x000400
#define ZIO_FLAG_IO_REWRITE 0x000800
#define ZIO_FLAG_SELF_HEAL 0x001000
#define ZIO_FLAG_RESILVER 0x002000
#define ZIO_FLAG_SCRUB 0x004000
#define ZIO_FLAG_SCRUB_THREAD 0x008000
#define ZIO_FLAG_PROBE 0x010000
#define ZIO_FLAG_GANG_CHILD 0x020000
#define ZIO_FLAG_RAW 0x040000
#define ZIO_FLAG_GODFATHER 0x080000
#define ZIO_FLAG_TRYHARD 0x100000
#define ZIO_FLAG_NODATA 0x200000
#define ZIO_FLAG_OPTIONAL 0x400000
#define ZIO_FLAG_GANG_INHERIT \
(ZIO_FLAG_CANFAIL | \
ZIO_FLAG_SPECULATIVE | \
ZIO_FLAG_CONFIG_WRITER | \
ZIO_FLAG_DONT_RETRY | \
ZIO_FLAG_DONT_CACHE | \
ZIO_FLAG_DONT_AGGREGATE | \
ZIO_FLAG_SELF_HEAL | \
ZIO_FLAG_RESILVER | \
ZIO_FLAG_SCRUB | \
ZIO_FLAG_SCRUB_THREAD)
#define ZIO_FLAG_VDEV_INHERIT \
(ZIO_FLAG_GANG_INHERIT | \
ZIO_FLAG_IO_REPAIR | \
ZIO_FLAG_IO_RETRY | \
ZIO_FLAG_PROBE | \
ZIO_FLAG_TRYHARD | \
ZIO_FLAG_NODATA | \
ZIO_FLAG_OPTIONAL)
#define ZIO_FLAG_AGG_INHERIT \
(ZIO_FLAG_DONT_AGGREGATE | \
ZIO_FLAG_IO_REPAIR | \
ZIO_FLAG_SELF_HEAL | \
ZIO_FLAG_RESILVER | \
ZIO_FLAG_SCRUB | \
ZIO_FLAG_SCRUB_THREAD)
#define ZIO_PRIORITY_LOG_WRITE (zio_priority_table[3])
#define ZIO_PRIORITY_CACHE_FILL (zio_priority_table[4])
#define ZIO_PRIORITY_AGG (zio_priority_table[5])
#define ZIO_PRIORITY_FREE (zio_priority_table[6])
#define ZIO_PRIORITY_ASYNC_WRITE (zio_priority_table[7])
#define ZIO_PRIORITY_ASYNC_READ (zio_priority_table[8])
#define ZIO_PRIORITY_RESILVER (zio_priority_table[9])
#define ZIO_PRIORITY_SCRUB (zio_priority_table[10])
#define ZIO_PRIORITY_DDT_PREFETCH (zio_priority_table[11])
#define ZIO_PRIORITY_TABLE_SIZE 12
#define ZIO_PIPELINE_CONTINUE 0x100
#define ZIO_PIPELINE_STOP 0x101
enum zio_flag {
/*
* Flags inherited by gang, ddt, and vdev children,
* and that must be equal for two zios to aggregate
*/
ZIO_FLAG_DONT_AGGREGATE = 1 << 0,
ZIO_FLAG_IO_REPAIR = 1 << 1,
ZIO_FLAG_SELF_HEAL = 1 << 2,
ZIO_FLAG_RESILVER = 1 << 3,
ZIO_FLAG_SCRUB = 1 << 4,
ZIO_FLAG_SCRUB_THREAD = 1 << 5,
#define ZIO_FLAG_AGG_INHERIT (ZIO_FLAG_CANFAIL - 1)
/*
* Flags inherited by ddt, gang, and vdev children.
*/
ZIO_FLAG_CANFAIL = 1 << 6, /* must be first for INHERIT */
ZIO_FLAG_SPECULATIVE = 1 << 7,
ZIO_FLAG_CONFIG_WRITER = 1 << 8,
ZIO_FLAG_DONT_RETRY = 1 << 9,
ZIO_FLAG_DONT_CACHE = 1 << 10,
ZIO_FLAG_NODATA = 1 << 11,
ZIO_FLAG_INDUCE_DAMAGE = 1 << 12,
#define ZIO_FLAG_DDT_INHERIT (ZIO_FLAG_IO_RETRY - 1)
#define ZIO_FLAG_GANG_INHERIT (ZIO_FLAG_IO_RETRY - 1)
/*
* Flags inherited by vdev children.
*/
ZIO_FLAG_IO_RETRY = 1 << 13, /* must be first for INHERIT */
ZIO_FLAG_PROBE = 1 << 14,
ZIO_FLAG_TRYHARD = 1 << 15,
ZIO_FLAG_OPTIONAL = 1 << 16,
#define ZIO_FLAG_VDEV_INHERIT (ZIO_FLAG_DONT_QUEUE - 1)
/*
* Flags not inherited by any children.
*/
ZIO_FLAG_DONT_QUEUE = 1 << 17, /* must be first for INHERIT */
ZIO_FLAG_DONT_PROPAGATE = 1 << 18,
ZIO_FLAG_IO_BYPASS = 1 << 19,
ZIO_FLAG_IO_REWRITE = 1 << 20,
ZIO_FLAG_RAW = 1 << 21,
ZIO_FLAG_GANG_CHILD = 1 << 22,
ZIO_FLAG_DDT_CHILD = 1 << 23,
ZIO_FLAG_GODFATHER = 1 << 24
};
#define ZIO_FLAG_MUSTSUCCEED 0
#define ZIO_DDT_CHILD_FLAGS(zio) \
(((zio)->io_flags & ZIO_FLAG_DDT_INHERIT) | \
ZIO_FLAG_DDT_CHILD | ZIO_FLAG_CANFAIL)
#define ZIO_GANG_CHILD_FLAGS(zio) \
(((zio)->io_flags & ZIO_FLAG_GANG_INHERIT) | \
ZIO_FLAG_GANG_CHILD | ZIO_FLAG_CANFAIL)
#define ZIO_VDEV_CHILD_FLAGS(zio) \
(((zio)->io_flags & ZIO_FLAG_VDEV_INHERIT) | \
ZIO_FLAG_CANFAIL)
enum zio_child {
ZIO_CHILD_VDEV = 0,
ZIO_CHILD_GANG,
ZIO_CHILD_DDT,
ZIO_CHILD_LOGICAL,
ZIO_CHILD_TYPES
};
@@ -202,7 +223,6 @@ enum zio_wait_type {
#define ECKSUM EBADE
#define EFRAGS EBADR
typedef struct zio zio_t;
typedef void zio_done_func_t(zio_t *zio);
extern uint8_t zio_priority_table[ZIO_PRIORITY_TABLE_SIZE];
@@ -211,18 +231,15 @@ extern char *zio_type_name[ZIO_TYPES];
/*
* A bookmark is a four-tuple <objset, object, level, blkid> that uniquely
* identifies any block in the pool. By convention, the meta-objset (MOS)
* is objset 0, the meta-dnode is object 0, the root block (osphys_t) is
* level -1 of the meta-dnode, and intent log blocks (which are chained
* off the root block) have blkid == sequence number. In summary:
* is objset 0, and the meta-dnode is object 0. This covers all blocks
* except root blocks and ZIL blocks, which are defined as follows:
*
* mos is objset 0
* meta-dnode is object 0
* root block is <objset, 0, -1, 0>
* intent log is <objset, 0, -1, ZIL sequence number>
* Root blocks (objset_phys_t) are object 0, level -1: <objset, 0, -1, 0>.
* ZIL blocks are bookmarked <objset, 0, -2, blkid == ZIL sequence number>.
* dmu_sync()ed ZIL data blocks are bookmarked <objset, object, -2, blkid>.
*
* Note: this structure is called a bookmark because its first purpose was
* to remember where to resume a pool-wide traverse. The absolute ordering
* for block visitation during traversal is defined in compare_bookmark().
* Note: this structure is called a bookmark because its original purpose
* was to remember where to resume a pool-wide traverse.
*
* Note: this structure is passed between userland and the kernel.
* Therefore it must not change size or alignment between 32/64 bit
@@ -235,14 +252,66 @@ typedef struct zbookmark {
uint64_t zb_blkid;
} zbookmark_t;
#define SET_BOOKMARK(zb, objset, object, level, blkid) \
{ \
(zb)->zb_objset = objset; \
(zb)->zb_object = object; \
(zb)->zb_level = level; \
(zb)->zb_blkid = blkid; \
}
#define ZB_DESTROYED_OBJSET (-1ULL)
#define ZB_ROOT_OBJECT (0ULL)
#define ZB_ROOT_LEVEL (-1LL)
#define ZB_ROOT_BLKID (0ULL)
#define ZB_ZIL_OBJECT (0ULL)
#define ZB_ZIL_LEVEL (-2LL)
typedef struct zio_prop {
enum zio_checksum zp_checksum;
enum zio_compress zp_compress;
dmu_object_type_t zp_type;
uint8_t zp_level;
uint8_t zp_ndvas;
uint8_t zp_copies;
uint8_t zp_dedup;
uint8_t zp_dedup_verify;
} zio_prop_t;
typedef struct zio_cksum_report zio_cksum_report_t;
typedef void zio_cksum_finish_f(zio_cksum_report_t *rep,
const void *good_data);
typedef void zio_cksum_free_f(void *cbdata, size_t size);
struct zio_bad_cksum; /* defined in zio_checksum.h */
struct zio_cksum_report {
struct zio_cksum_report *zcr_next;
nvlist_t *zcr_ereport;
nvlist_t *zcr_detector;
void *zcr_cbdata;
size_t zcr_cbinfo; /* passed to zcr_free() */
uint64_t zcr_align;
uint64_t zcr_length;
zio_cksum_finish_f *zcr_finish;
zio_cksum_free_f *zcr_free;
/* internal use only */
struct zio_bad_cksum *zcr_ckinfo; /* information from failure */
};
typedef void zio_vsd_cksum_report_f(zio_t *zio, zio_cksum_report_t *zcr,
void *arg);
zio_vsd_cksum_report_f zio_vsd_default_cksum_report;
typedef struct zio_vsd_ops {
zio_done_func_t *vsd_free;
zio_vsd_cksum_report_f *vsd_cksum_report;
} zio_vsd_ops_t;
typedef struct zio_gang_node {
zio_gbh_phys_t *gn_gbh;
struct zio_gang_node *gn_child[SPA_GBH_NBLKPTRS];
@@ -293,6 +362,7 @@ struct zio {
uint64_t io_txg;
spa_t *io_spa;
blkptr_t *io_bp;
blkptr_t *io_bp_override;
blkptr_t io_bp_copy;
list_t io_parent_list;
list_t io_child_list;
@@ -304,16 +374,20 @@ struct zio {
zio_done_func_t *io_ready;
zio_done_func_t *io_done;
void *io_private;
int64_t io_prev_space_delta; /* DMU private */
blkptr_t io_bp_orig;
/* Data represented by this I/O */
void *io_data;
void *io_orig_data;
uint64_t io_size;
uint64_t io_orig_size;
/* Stuff for the vdev stack */
vdev_t *io_vd;
void *io_vsd;
zio_done_func_t *io_vsd_free;
const zio_vsd_ops_t *io_vsd_ops;
uint64_t io_offset;
uint64_t io_deadline;
avl_node_t io_offset_node;
@@ -321,15 +395,17 @@ struct zio {
avl_tree_t *io_vdev_tree;
/* Internal pipeline state */
int io_flags;
zio_stage_t io_stage;
uint32_t io_pipeline;
int io_orig_flags;
zio_stage_t io_orig_stage;
uint32_t io_orig_pipeline;
enum zio_flag io_flags;
enum zio_stage io_stage;
enum zio_stage io_pipeline;
enum zio_flag io_orig_flags;
enum zio_stage io_orig_stage;
enum zio_stage io_orig_pipeline;
int io_error;
int io_child_error[ZIO_CHILD_TYPES];
uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES];
uint64_t io_child_count;
uint64_t io_parent_count;
uint64_t *io_stall;
zio_t *io_gang_leader;
zio_gang_node_t *io_gang_tree;
@@ -339,53 +415,58 @@ struct zio {
kcondvar_t io_cv;
/* FMA state */
zio_cksum_report_t *io_cksum_report;
uint64_t io_ena;
};
extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
zio_done_func_t *done, void *private, int flags);
zio_done_func_t *done, void *private, enum zio_flag flags);
extern zio_t *zio_root(spa_t *spa,
zio_done_func_t *done, void *private, int flags);
zio_done_func_t *done, void *private, enum zio_flag flags);
extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data,
uint64_t size, zio_done_func_t *done, void *private,
int priority, int flags, const zbookmark_t *zb);
int priority, enum zio_flag flags, const zbookmark_t *zb);
extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
void *data, uint64_t size, zio_prop_t *zp,
void *data, uint64_t size, const zio_prop_t *zp,
zio_done_func_t *ready, zio_done_func_t *done, void *private,
int priority, int flags, const zbookmark_t *zb);
int priority, enum zio_flag flags, const zbookmark_t *zb);
extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
void *data, uint64_t size, zio_done_func_t *done, void *private,
int priority, int flags, zbookmark_t *zb);
int priority, enum zio_flag flags, zbookmark_t *zb);
extern void zio_skip_write(zio_t *zio);
extern void zio_write_override(zio_t *zio, blkptr_t *bp, int copies);
extern zio_t *zio_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
zio_done_func_t *done, void *private, int flags);
extern void zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp);
extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
zio_done_func_t *done, void *private, int flags);
extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg,
const blkptr_t *bp,
zio_done_func_t *done, void *private, enum zio_flag flags);
extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
zio_done_func_t *done, void *private, int priority, int flags);
zio_done_func_t *done, void *private, int priority, enum zio_flag flags);
extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
uint64_t size, void *data, int checksum,
zio_done_func_t *done, void *private, int priority, int flags,
zio_done_func_t *done, void *private, int priority, enum zio_flag flags,
boolean_t labels);
extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
uint64_t size, void *data, int checksum,
zio_done_func_t *done, void *private, int priority, int flags,
zio_done_func_t *done, void *private, int priority, enum zio_flag flags,
boolean_t labels);
extern int zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp,
blkptr_t *old_bp, uint64_t txg);
extern void zio_free_blk(spa_t *spa, blkptr_t *bp, uint64_t txg);
extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,
const blkptr_t *bp, enum zio_flag flags);
extern int zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp,
blkptr_t *old_bp, uint64_t size, boolean_t use_slog);
extern void zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp);
extern void zio_flush(zio_t *zio, vdev_t *vd);
extern void zio_shrink(zio_t *zio, uint64_t size);
extern int zio_wait(zio_t *zio);
extern void zio_nowait(zio_t *zio);
@@ -406,11 +487,11 @@ extern void zio_resubmit_stage_async(void *);
extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd,
uint64_t offset, void *data, uint64_t size, int type, int priority,
int flags, zio_done_func_t *done, void *private);
enum zio_flag flags, zio_done_func_t *done, void *private);
extern zio_t *zio_vdev_delegated_io(vdev_t *vd, uint64_t offset,
void *data, uint64_t size, int type, int priority,
int flags, zio_done_func_t *done, void *private);
enum zio_flag flags, zio_done_func_t *done, void *private);
extern void zio_vdev_io_bypass(zio_t *zio);
extern void zio_vdev_io_reissue(zio_t *zio);
@@ -419,8 +500,12 @@ extern void zio_vdev_io_redone(zio_t *zio);
extern void zio_checksum_verified(zio_t *zio);
extern int zio_worst_error(int e1, int e2);
extern uint8_t zio_checksum_select(uint8_t child, uint8_t parent);
extern uint8_t zio_compress_select(uint8_t child, uint8_t parent);
extern enum zio_checksum zio_checksum_select(enum zio_checksum child,
enum zio_checksum parent);
extern enum zio_checksum zio_checksum_dedup_select(spa_t *spa,
enum zio_checksum child, enum zio_checksum parent);
extern enum zio_compress zio_compress_select(enum zio_compress child,
enum zio_compress parent);
extern void zio_suspend(spa_t *spa, zio_t *zio);
extern int zio_resume(spa_t *spa);
@@ -442,9 +527,30 @@ extern int zio_inject_fault(char *name, int flags, int *id,
extern int zio_inject_list_next(int *id, char *name, size_t buflen,
struct zinject_record *record);
extern int zio_clear_fault(int id);
extern void zio_handle_panic_injection(spa_t *spa, char *tag, uint64_t type);
extern int zio_handle_fault_injection(zio_t *zio, int error);
extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error);
extern int zio_handle_label_injection(zio_t *zio, int error);
extern void zio_handle_ignored_writes(zio_t *zio);
/*
* Checksum ereport functions
*/
extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, struct zio *zio,
uint64_t offset, uint64_t length, void *arg, struct zio_bad_cksum *info);
extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report,
const void *good_data, const void *bad_data, boolean_t drop_if_identical);
extern void zfs_ereport_send_interim_checksum(zio_cksum_report_t *report);
extern void zfs_ereport_free_checksum(zio_cksum_report_t *report);
/* If we have the good data in hand, this function can be used */
extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
struct zio *zio, uint64_t offset, uint64_t length,
const void *good_data, const void *bad_data, struct zio_bad_cksum *info);
/* Called from spa_sync(), but primarily an injection handler */
extern void spa_handle_ignored_writes(spa_t *spa);
#ifdef __cplusplus
}
+14 -12
View File
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_ZIO_CHECKSUM_H
@@ -43,28 +42,31 @@ typedef void zio_checksum_t(const void *data, uint64_t size, zio_cksum_t *zcp);
typedef struct zio_checksum_info {
zio_checksum_t *ci_func[2]; /* checksum function for each byteorder */
int ci_correctable; /* number of correctable bits */
int ci_zbt; /* uses zio block tail? */
int ci_eck; /* uses zio embedded checksum? */
int ci_dedup; /* strong enough for dedup? */
char *ci_name; /* descriptive name */
} zio_checksum_info_t;
typedef struct zio_bad_cksum {
zio_cksum_t zbc_expected;
zio_cksum_t zbc_actual;
const char *zbc_checksum_name;
uint8_t zbc_byteswapped;
uint8_t zbc_injected;
uint8_t zbc_has_cksum; /* expected/actual valid */
} zio_bad_cksum_t;
extern zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS];
/*
* Checksum routines.
*/
extern zio_checksum_t fletcher_2_native;
extern zio_checksum_t fletcher_4_native;
extern zio_checksum_t fletcher_4_incremental_native;
extern zio_checksum_t fletcher_2_byteswap;
extern zio_checksum_t fletcher_4_byteswap;
extern zio_checksum_t fletcher_4_incremental_byteswap;
extern zio_checksum_t zio_checksum_SHA256;
extern void zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
void *data, uint64_t size);
extern int zio_checksum_error(zio_t *zio);
extern int zio_checksum_error(zio_t *zio, zio_bad_cksum_t *out);
extern enum zio_checksum spa_dedup_checksum(spa_t *spa);
#ifdef __cplusplus
}
+9 -7
View File
@@ -20,15 +20,13 @@
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZIO_COMPRESS_H
#define _SYS_ZIO_COMPRESS_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/zio.h>
#ifdef __cplusplus
@@ -66,14 +64,18 @@ extern size_t gzip_compress(void *src, void *dst, size_t s_len, size_t d_len,
int level);
extern int gzip_decompress(void *src, void *dst, size_t s_len, size_t d_len,
int level);
extern size_t zle_compress(void *src, void *dst, size_t s_len, size_t d_len,
int level);
extern int zle_decompress(void *src, void *dst, size_t s_len, size_t d_len,
int level);
/*
* Compress and decompress data if necessary.
*/
extern int zio_compress_data(int cpfunc, void *src, uint64_t srcsize,
void **destp, uint64_t *destsizep, uint64_t *destbufsizep);
extern int zio_decompress_data(int cpfunc, void *src, uint64_t srcsize,
void *dest, uint64_t destsize);
extern size_t zio_compress_data(enum zio_compress c, void *src, void *dst,
size_t s_len);
extern int zio_decompress_data(enum zio_compress c, void *src, void *dst,
size_t s_len, size_t d_len);
#ifdef __cplusplus
}
+102 -70
View File
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -34,104 +34,136 @@ extern "C" {
#endif
/*
* I/O Groups: pipeline stage definitions.
* zio pipeline stage definitions
*/
typedef enum zio_stage {
ZIO_STAGE_OPEN = 0, /* RWFCI */
enum zio_stage {
ZIO_STAGE_OPEN = 1 << 0, /* RWFCI */
ZIO_STAGE_ISSUE_ASYNC, /* -W--- */
ZIO_STAGE_READ_BP_INIT = 1 << 1, /* R---- */
ZIO_STAGE_FREE_BP_INIT = 1 << 2, /* --F-- */
ZIO_STAGE_ISSUE_ASYNC = 1 << 3, /* RWF-- */
ZIO_STAGE_WRITE_BP_INIT = 1 << 4, /* -W--- */
ZIO_STAGE_READ_BP_INIT, /* R---- */
ZIO_STAGE_WRITE_BP_INIT, /* -W--- */
ZIO_STAGE_CHECKSUM_GENERATE = 1 << 5, /* -W--- */
ZIO_STAGE_CHECKSUM_GENERATE, /* -W--- */
ZIO_STAGE_DDT_READ_START = 1 << 6, /* R---- */
ZIO_STAGE_DDT_READ_DONE = 1 << 7, /* R---- */
ZIO_STAGE_DDT_WRITE = 1 << 8, /* -W--- */
ZIO_STAGE_DDT_FREE = 1 << 9, /* --F-- */
ZIO_STAGE_GANG_ASSEMBLE, /* RWFC- */
ZIO_STAGE_GANG_ISSUE, /* RWFC- */
ZIO_STAGE_GANG_ASSEMBLE = 1 << 10, /* RWFC- */
ZIO_STAGE_GANG_ISSUE = 1 << 11, /* RWFC- */
ZIO_STAGE_DVA_ALLOCATE, /* -W--- */
ZIO_STAGE_DVA_FREE, /* --F-- */
ZIO_STAGE_DVA_CLAIM, /* ---C- */
ZIO_STAGE_DVA_ALLOCATE = 1 << 12, /* -W--- */
ZIO_STAGE_DVA_FREE = 1 << 13, /* --F-- */
ZIO_STAGE_DVA_CLAIM = 1 << 14, /* ---C- */
ZIO_STAGE_READY, /* RWFCI */
ZIO_STAGE_READY = 1 << 15, /* RWFCI */
ZIO_STAGE_VDEV_IO_START, /* RW--I */
ZIO_STAGE_VDEV_IO_DONE, /* RW--I */
ZIO_STAGE_VDEV_IO_ASSESS, /* RW--I */
ZIO_STAGE_VDEV_IO_START = 1 << 16, /* RW--I */
ZIO_STAGE_VDEV_IO_DONE = 1 << 17, /* RW--I */
ZIO_STAGE_VDEV_IO_ASSESS = 1 << 18, /* RW--I */
ZIO_STAGE_CHECKSUM_VERIFY, /* R---- */
ZIO_STAGE_CHECKSUM_VERIFY = 1 << 19, /* R---- */
ZIO_STAGE_DONE, /* RWFCI */
ZIO_STAGES
} zio_stage_t;
ZIO_STAGE_DONE = 1 << 20 /* RWFCI */
};
#define ZIO_INTERLOCK_STAGES \
((1U << ZIO_STAGE_READY) | \
(1U << ZIO_STAGE_DONE))
#define ZIO_INTERLOCK_STAGES \
(ZIO_STAGE_READY | \
ZIO_STAGE_DONE)
#define ZIO_INTERLOCK_PIPELINE \
#define ZIO_INTERLOCK_PIPELINE \
ZIO_INTERLOCK_STAGES
#define ZIO_VDEV_IO_STAGES \
((1U << ZIO_STAGE_VDEV_IO_START) | \
(1U << ZIO_STAGE_VDEV_IO_DONE) | \
(1U << ZIO_STAGE_VDEV_IO_ASSESS))
#define ZIO_VDEV_IO_STAGES \
(ZIO_STAGE_VDEV_IO_START | \
ZIO_STAGE_VDEV_IO_DONE | \
ZIO_STAGE_VDEV_IO_ASSESS)
#define ZIO_VDEV_CHILD_PIPELINE \
(ZIO_VDEV_IO_STAGES | \
(1U << ZIO_STAGE_DONE))
#define ZIO_VDEV_CHILD_PIPELINE \
(ZIO_VDEV_IO_STAGES | \
ZIO_STAGE_DONE)
#define ZIO_READ_COMMON_STAGES \
(ZIO_INTERLOCK_STAGES | \
ZIO_VDEV_IO_STAGES | \
(1U << ZIO_STAGE_CHECKSUM_VERIFY))
#define ZIO_READ_COMMON_STAGES \
(ZIO_INTERLOCK_STAGES | \
ZIO_VDEV_IO_STAGES | \
ZIO_STAGE_CHECKSUM_VERIFY)
#define ZIO_READ_PHYS_PIPELINE \
#define ZIO_READ_PHYS_PIPELINE \
ZIO_READ_COMMON_STAGES
#define ZIO_READ_PIPELINE \
(ZIO_READ_COMMON_STAGES | \
(1U << ZIO_STAGE_READ_BP_INIT))
#define ZIO_READ_PIPELINE \
(ZIO_READ_COMMON_STAGES | \
ZIO_STAGE_READ_BP_INIT)
#define ZIO_WRITE_COMMON_STAGES \
(ZIO_INTERLOCK_STAGES | \
ZIO_VDEV_IO_STAGES | \
(1U << ZIO_STAGE_ISSUE_ASYNC) | \
(1U << ZIO_STAGE_CHECKSUM_GENERATE))
#define ZIO_DDT_CHILD_READ_PIPELINE \
ZIO_READ_COMMON_STAGES
#define ZIO_WRITE_PHYS_PIPELINE \
#define ZIO_DDT_READ_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
ZIO_STAGE_READ_BP_INIT | \
ZIO_STAGE_DDT_READ_START | \
ZIO_STAGE_DDT_READ_DONE)
#define ZIO_WRITE_COMMON_STAGES \
(ZIO_INTERLOCK_STAGES | \
ZIO_VDEV_IO_STAGES | \
ZIO_STAGE_ISSUE_ASYNC | \
ZIO_STAGE_CHECKSUM_GENERATE)
#define ZIO_WRITE_PHYS_PIPELINE \
ZIO_WRITE_COMMON_STAGES
#define ZIO_REWRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \
(1U << ZIO_STAGE_WRITE_BP_INIT))
#define ZIO_REWRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \
ZIO_STAGE_WRITE_BP_INIT)
#define ZIO_WRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \
(1U << ZIO_STAGE_WRITE_BP_INIT) | \
(1U << ZIO_STAGE_DVA_ALLOCATE))
#define ZIO_WRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \
ZIO_STAGE_WRITE_BP_INIT | \
ZIO_STAGE_DVA_ALLOCATE)
#define ZIO_GANG_STAGES \
((1U << ZIO_STAGE_GANG_ASSEMBLE) | \
(1U << ZIO_STAGE_GANG_ISSUE))
#define ZIO_DDT_CHILD_WRITE_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
ZIO_VDEV_IO_STAGES | \
ZIO_STAGE_DVA_ALLOCATE)
#define ZIO_FREE_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
(1U << ZIO_STAGE_DVA_FREE))
#define ZIO_DDT_WRITE_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
ZIO_STAGE_ISSUE_ASYNC | \
ZIO_STAGE_WRITE_BP_INIT | \
ZIO_STAGE_CHECKSUM_GENERATE | \
ZIO_STAGE_DDT_WRITE)
#define ZIO_CLAIM_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
(1U << ZIO_STAGE_DVA_CLAIM))
#define ZIO_GANG_STAGES \
(ZIO_STAGE_GANG_ASSEMBLE | \
ZIO_STAGE_GANG_ISSUE)
#define ZIO_IOCTL_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
(1U << ZIO_STAGE_VDEV_IO_START) | \
(1U << ZIO_STAGE_VDEV_IO_ASSESS))
#define ZIO_FREE_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
ZIO_STAGE_FREE_BP_INIT | \
ZIO_STAGE_DVA_FREE)
#define ZIO_CONFIG_LOCK_BLOCKING_STAGES \
((1U << ZIO_STAGE_VDEV_IO_START) | \
(1U << ZIO_STAGE_DVA_ALLOCATE) | \
(1U << ZIO_STAGE_DVA_CLAIM))
#define ZIO_DDT_FREE_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
ZIO_STAGE_FREE_BP_INIT | \
ZIO_STAGE_ISSUE_ASYNC | \
ZIO_STAGE_DDT_FREE)
#define ZIO_CLAIM_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
ZIO_STAGE_DVA_CLAIM)
#define ZIO_IOCTL_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
ZIO_STAGE_VDEV_IO_START | \
ZIO_STAGE_VDEV_IO_ASSESS)
#define ZIO_BLOCKING_STAGES \
(ZIO_STAGE_DVA_ALLOCATE | \
ZIO_STAGE_DVA_CLAIM | \
ZIO_STAGE_VDEV_IO_START)
extern void zio_inject_init(void);
extern void zio_inject_fini(void);
+12 -6
View File
@@ -20,15 +20,12 @@
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_ZVOL_H
#define _SYS_ZVOL_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/zfs_context.h>
#ifdef __cplusplus
@@ -43,10 +40,10 @@ extern int zvol_check_volsize(uint64_t volsize, uint64_t blocksize);
extern int zvol_check_volblocksize(uint64_t volblocksize);
extern int zvol_get_stats(objset_t *os, nvlist_t *nv);
extern void zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
extern int zvol_create_minor(const char *, major_t);
extern int zvol_create_minor(const char *);
extern int zvol_remove_minor(const char *);
extern void zvol_remove_minors(const char *);
extern int zvol_set_volsize(const char *, major_t, uint64_t);
extern int zvol_set_volblocksize(const char *, uint64_t);
extern int zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr);
extern int zvol_dump(dev_t dev, caddr_t addr, daddr_t offset, int nblocks);
@@ -61,6 +58,15 @@ extern int zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr,
extern int zvol_busy(void);
extern void zvol_init(void);
extern void zvol_fini(void);
extern int zvol_get_volume_params(minor_t minor, uint64_t *blksize,
uint64_t *max_xfer_len, void **minor_hdl, void **objset_hdl, void **zil_hdl,
void **rl_hdl, void **bonus_hdl);
extern uint64_t zvol_get_volume_size(void *minor_hdl);
extern int zvol_get_volume_wce(void *minor_hdl);
extern void zvol_log_write_minor(void *minor_hdl, dmu_tx_t *tx, offset_t off,
ssize_t resid, boolean_t sync);
#endif
#ifdef __cplusplus