Move the world out of /zfs/ and seperate out module build tree

This commit is contained in:
Brian Behlendorf
2008-12-11 11:08:09 -08:00
parent 9e8b1e836c
commit 172bb4bd5e
193 changed files with 51 additions and 47 deletions
+138
View File
@@ -0,0 +1,138 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ARC_H
#define _SYS_ARC_H
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
#include <sys/zio.h>
#include <sys/dmu.h>
#include <sys/spa.h>
typedef struct arc_buf_hdr arc_buf_hdr_t;
typedef struct arc_buf arc_buf_t;
typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *private);
typedef int arc_evict_func_t(void *private);
/* generic arc_done_func_t's which you can use */
arc_done_func_t arc_bcopy_func;
arc_done_func_t arc_getbuf_func;
struct arc_buf {
arc_buf_hdr_t *b_hdr;
arc_buf_t *b_next;
krwlock_t b_lock;
void *b_data;
arc_evict_func_t *b_efunc;
void *b_private;
};
typedef enum arc_buf_contents {
ARC_BUFC_DATA, /* buffer contains data */
ARC_BUFC_METADATA, /* buffer contains metadata */
ARC_BUFC_NUMTYPES
} arc_buf_contents_t;
/*
* These are the flags we pass into calls to the arc
*/
#define ARC_WAIT (1 << 1) /* perform I/O synchronously */
#define ARC_NOWAIT (1 << 2) /* perform I/O asynchronously */
#define ARC_PREFETCH (1 << 3) /* I/O is a prefetch */
#define ARC_CACHED (1 << 4) /* I/O was already in cache */
#define ARC_L2CACHE (1 << 5) /* cache in L2ARC */
void arc_space_consume(uint64_t space);
void arc_space_return(uint64_t space);
void *arc_data_buf_alloc(uint64_t space);
void arc_data_buf_free(void *buf, uint64_t space);
arc_buf_t *arc_buf_alloc(spa_t *spa, int size, void *tag,
arc_buf_contents_t type);
void arc_buf_add_ref(arc_buf_t *buf, void *tag);
int arc_buf_remove_ref(arc_buf_t *buf, void *tag);
int arc_buf_size(arc_buf_t *buf);
void arc_release(arc_buf_t *buf, void *tag);
int arc_released(arc_buf_t *buf);
int arc_has_callback(arc_buf_t *buf);
void arc_buf_freeze(arc_buf_t *buf);
void arc_buf_thaw(arc_buf_t *buf);
#ifdef ZFS_DEBUG
int arc_referenced(arc_buf_t *buf);
#endif
typedef struct writeprops {
dmu_object_type_t wp_type;
uint8_t wp_level;
uint8_t wp_copies;
uint8_t wp_dncompress, wp_oscompress;
uint8_t wp_dnchecksum, wp_oschecksum;
} writeprops_t;
void write_policy(spa_t *spa, const writeprops_t *wp, zio_prop_t *zp);
int arc_read(zio_t *pio, spa_t *spa, blkptr_t *bp, arc_buf_t *pbuf,
arc_done_func_t *done, void *private, int priority, int zio_flags,
uint32_t *arc_flags, const zbookmark_t *zb);
int arc_read_nolock(zio_t *pio, spa_t *spa, blkptr_t *bp,
arc_done_func_t *done, void *private, int priority, int flags,
uint32_t *arc_flags, const zbookmark_t *zb);
zio_t *arc_write(zio_t *pio, spa_t *spa, const writeprops_t *wp,
boolean_t l2arc, uint64_t txg, blkptr_t *bp, arc_buf_t *buf,
arc_done_func_t *ready, arc_done_func_t *done, void *private, int priority,
int zio_flags, const zbookmark_t *zb);
int arc_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
zio_done_func_t *done, void *private, uint32_t arc_flags);
int arc_tryread(spa_t *spa, blkptr_t *bp, void *data);
void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
int arc_buf_evict(arc_buf_t *buf);
void arc_flush(spa_t *spa);
void arc_tempreserve_clear(uint64_t reserve);
int arc_tempreserve_space(uint64_t reserve, uint64_t txg);
void arc_init(void);
void arc_fini(void);
/*
* Level 2 ARC
*/
void l2arc_add_vdev(spa_t *spa, vdev_t *vd, uint64_t start, uint64_t end);
void l2arc_remove_vdev(vdev_t *vd);
boolean_t l2arc_vdev_present(vdev_t *vd);
void l2arc_init(void);
void l2arc_fini(void);
void l2arc_start(void);
void l2arc_stop(void);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ARC_H */
+89
View File
@@ -0,0 +1,89 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_BPLIST_H
#define _SYS_BPLIST_H
#include <sys/dmu.h>
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct bplist_phys {
/*
* This is the bonus buffer for the dead lists. The object's
* contents is an array of bpl_entries blkptr_t's, representing
* a total of bpl_bytes physical space.
*/
uint64_t bpl_entries;
uint64_t bpl_bytes;
uint64_t bpl_comp;
uint64_t bpl_uncomp;
} bplist_phys_t;
#define BPLIST_SIZE_V0 (2 * sizeof (uint64_t))
typedef struct bplist_q {
blkptr_t bpq_blk;
void *bpq_next;
} bplist_q_t;
typedef struct bplist {
kmutex_t bpl_lock;
objset_t *bpl_mos;
uint64_t bpl_object;
uint8_t bpl_blockshift;
uint8_t bpl_bpshift;
uint8_t bpl_havecomp;
bplist_q_t *bpl_queue;
bplist_phys_t *bpl_phys;
dmu_buf_t *bpl_dbuf;
dmu_buf_t *bpl_cached_dbuf;
} bplist_t;
extern uint64_t bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx);
extern void bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx);
extern int bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object);
extern void bplist_close(bplist_t *bpl);
extern boolean_t bplist_empty(bplist_t *bpl);
extern int bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp);
extern int bplist_enqueue(bplist_t *bpl, const blkptr_t *bp, dmu_tx_t *tx);
extern void bplist_enqueue_deferred(bplist_t *bpl, const blkptr_t *bp);
extern void bplist_sync(bplist_t *bpl, dmu_tx_t *tx);
extern void bplist_vacate(bplist_t *bpl, dmu_tx_t *tx);
extern int bplist_space(bplist_t *bpl,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
extern int bplist_space_birthrange(bplist_t *bpl,
uint64_t mintxg, uint64_t maxtxg, uint64_t *dasizep);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_BPLIST_H */
+347
View File
@@ -0,0 +1,347 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DBUF_H
#define _SYS_DBUF_H
#include <sys/dmu.h>
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/zio.h>
#include <sys/arc.h>
#include <sys/zfs_context.h>
#include <sys/refcount.h>
#ifdef __cplusplus
extern "C" {
#endif
#define DB_BONUS_BLKID (-1ULL)
#define IN_DMU_SYNC 2
/*
* define flags for dbuf_read
*/
#define DB_RF_MUST_SUCCEED (1 << 0)
#define DB_RF_CANFAIL (1 << 1)
#define DB_RF_HAVESTRUCT (1 << 2)
#define DB_RF_NOPREFETCH (1 << 3)
#define DB_RF_NEVERWAIT (1 << 4)
#define DB_RF_CACHED (1 << 5)
/*
* The simplified state transition diagram for dbufs looks like:
*
* +----> READ ----+
* | |
* | V
* (alloc)-->UNCACHED CACHED-->EVICTING-->(free)
* | ^ ^
* | | |
* +----> FILL ----+ |
* | |
* | |
* +--------> NOFILL -------+
*/
typedef enum dbuf_states {
DB_UNCACHED,
DB_FILL,
DB_NOFILL,
DB_READ,
DB_CACHED,
DB_EVICTING
} dbuf_states_t;
struct objset_impl;
struct dnode;
struct dmu_tx;
/*
* level = 0 means the user data
* level = 1 means the single indirect block
* etc.
*/
#define LIST_LINK_INACTIVE(link) \
((link)->list_next == NULL && (link)->list_prev == NULL)
struct dmu_buf_impl;
typedef enum override_states {
DR_NOT_OVERRIDDEN,
DR_IN_DMU_SYNC,
DR_OVERRIDDEN
} override_states_t;
typedef struct dbuf_dirty_record {
/* link on our parents dirty list */
list_node_t dr_dirty_node;
/* transaction group this data will sync in */
uint64_t dr_txg;
/* zio of outstanding write IO */
zio_t *dr_zio;
/* pointer back to our dbuf */
struct dmu_buf_impl *dr_dbuf;
/* pointer to next dirty record */
struct dbuf_dirty_record *dr_next;
/* pointer to parent dirty record */
struct dbuf_dirty_record *dr_parent;
union dirty_types {
struct dirty_indirect {
/* protect access to list */
kmutex_t dr_mtx;
/* Our list of dirty children */
list_t dr_children;
} di;
struct dirty_leaf {
/*
* dr_data is set when we dirty the buffer
* so that we can retain the pointer even if it
* gets COW'd in a subsequent transaction group.
*/
arc_buf_t *dr_data;
blkptr_t dr_overridden_by;
override_states_t dr_override_state;
} dl;
} dt;
} dbuf_dirty_record_t;
typedef struct dmu_buf_impl {
/*
* The following members are immutable, with the exception of
* db.db_data, which is protected by db_mtx.
*/
/* the publicly visible structure */
dmu_buf_t db;
/* the objset we belong to */
struct objset_impl *db_objset;
/*
* the dnode we belong to (NULL when evicted)
*/
struct dnode *db_dnode;
/*
* our parent buffer; if the dnode points to us directly,
* db_parent == db_dnode->dn_dbuf
* only accessed by sync thread ???
* (NULL when evicted)
*/
struct dmu_buf_impl *db_parent;
/*
* link for hash table of all dmu_buf_impl_t's
*/
struct dmu_buf_impl *db_hash_next;
/* our block number */
uint64_t db_blkid;
/*
* Pointer to the blkptr_t which points to us. May be NULL if we
* don't have one yet. (NULL when evicted)
*/
blkptr_t *db_blkptr;
/*
* Our indirection level. Data buffers have db_level==0.
* Indirect buffers which point to data buffers have
* db_level==1. etc. Buffers which contain dnodes have
* db_level==0, since the dnodes are stored in a file.
*/
uint8_t db_level;
/* db_mtx protects the members below */
kmutex_t db_mtx;
/*
* Current state of the buffer
*/
dbuf_states_t db_state;
/*
* Refcount accessed by dmu_buf_{hold,rele}.
* If nonzero, the buffer can't be destroyed.
* Protected by db_mtx.
*/
refcount_t db_holds;
/* buffer holding our data */
arc_buf_t *db_buf;
kcondvar_t db_changed;
dbuf_dirty_record_t *db_data_pending;
/* pointer to most recent dirty record for this buffer */
dbuf_dirty_record_t *db_last_dirty;
/*
* Our link on the owner dnodes's dn_dbufs list.
* Protected by its dn_dbufs_mtx.
*/
list_node_t db_link;
/* Data which is unique to data (leaf) blocks: */
/* stuff we store for the user (see dmu_buf_set_user) */
void *db_user_ptr;
void **db_user_data_ptr_ptr;
dmu_buf_evict_func_t *db_evict_func;
uint8_t db_immediate_evict;
uint8_t db_freed_in_flight;
uint8_t db_dirtycnt;
} dmu_buf_impl_t;
/* Note: the dbuf hash table is exposed only for the mdb module */
#define DBUF_MUTEXES 256
#define DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)])
typedef struct dbuf_hash_table {
uint64_t hash_table_mask;
dmu_buf_impl_t **hash_table;
kmutex_t hash_mutexes[DBUF_MUTEXES];
} dbuf_hash_table_t;
uint64_t dbuf_whichblock(struct dnode *di, uint64_t offset);
dmu_buf_impl_t *dbuf_create_tlib(struct dnode *dn, char *data);
void dbuf_create_bonus(struct dnode *dn);
dmu_buf_impl_t *dbuf_hold(struct dnode *dn, uint64_t blkid, void *tag);
dmu_buf_impl_t *dbuf_hold_level(struct dnode *dn, int level, uint64_t blkid,
void *tag);
int dbuf_hold_impl(struct dnode *dn, uint8_t level, uint64_t blkid, int create,
void *tag, dmu_buf_impl_t **dbp);
void dbuf_prefetch(struct dnode *dn, uint64_t blkid);
void dbuf_add_ref(dmu_buf_impl_t *db, void *tag);
uint64_t dbuf_refcount(dmu_buf_impl_t *db);
void dbuf_rele(dmu_buf_impl_t *db, void *tag);
dmu_buf_impl_t *dbuf_find(struct dnode *dn, uint8_t level, uint64_t blkid);
int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags);
void dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
void dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx);
void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
void dbuf_clear(dmu_buf_impl_t *db);
void dbuf_evict(dmu_buf_impl_t *db);
void dbuf_setdirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
void dbuf_unoverride(dbuf_dirty_record_t *dr);
void dbuf_sync_list(list_t *list, dmu_tx_t *tx);
void dbuf_free_range(struct dnode *dn, uint64_t start, uint64_t end,
struct dmu_tx *);
void dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx);
void dbuf_init(void);
void dbuf_fini(void);
#define DBUF_IS_METADATA(db) \
((db)->db_level > 0 || dmu_ot[(db)->db_dnode->dn_type].ot_metadata)
#define DBUF_GET_BUFC_TYPE(db) \
(DBUF_IS_METADATA(db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
#define DBUF_IS_CACHEABLE(db) \
((db)->db_objset->os_primary_cache == ZFS_CACHE_ALL || \
(DBUF_IS_METADATA(db) && \
((db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA)))
#define DBUF_IS_L2CACHEABLE(db) \
((db)->db_objset->os_secondary_cache == ZFS_CACHE_ALL || \
(DBUF_IS_METADATA(db) && \
((db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
#ifdef ZFS_DEBUG
/*
* There should be a ## between the string literal and fmt, to make it
* clear that we're joining two strings together, but gcc does not
* support that preprocessor token.
*/
#define dprintf_dbuf(dbuf, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char __db_buf[32]; \
uint64_t __db_obj = (dbuf)->db.db_object; \
if (__db_obj == DMU_META_DNODE_OBJECT) \
(void) strcpy(__db_buf, "mdn"); \
else \
(void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \
(u_longlong_t)__db_obj); \
dprintf_ds((dbuf)->db_objset->os_dsl_dataset, \
"obj=%s lvl=%u blkid=%lld " fmt, \
__db_buf, (dbuf)->db_level, \
(u_longlong_t)(dbuf)->db_blkid, __VA_ARGS__); \
} \
_NOTE(CONSTCOND) } while (0)
#define dprintf_dbuf_bp(db, bp, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \
sprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, bp); \
dprintf_dbuf(db, fmt " %s\n", __VA_ARGS__, __blkbuf); \
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
} \
_NOTE(CONSTCOND) } while (0)
#define DBUF_VERIFY(db) dbuf_verify(db)
#else
#define dprintf_dbuf(db, fmt, ...)
#define dprintf_dbuf_bp(db, bp, fmt, ...)
#define DBUF_VERIFY(db)
#endif
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DBUF_H */
+638
View File
@@ -0,0 +1,638 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DMU_H
#define _SYS_DMU_H
/*
* This file describes the interface that the DMU provides for its
* consumers.
*
* The DMU also interacts with the SPA. That interface is described in
* dmu_spa.h.
*/
#include <sys/inttypes.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/cred.h>
#ifdef __cplusplus
extern "C" {
#endif
struct uio;
struct page;
struct vnode;
struct spa;
struct zilog;
struct zio;
struct blkptr;
struct zap_cursor;
struct dsl_dataset;
struct dsl_pool;
struct dnode;
struct drr_begin;
struct drr_end;
struct zbookmark;
struct spa;
struct nvlist;
struct objset_impl;
typedef struct objset objset_t;
typedef struct dmu_tx dmu_tx_t;
typedef struct dsl_dir dsl_dir_t;
typedef enum dmu_object_type {
DMU_OT_NONE,
/* general: */
DMU_OT_OBJECT_DIRECTORY, /* ZAP */
DMU_OT_OBJECT_ARRAY, /* UINT64 */
DMU_OT_PACKED_NVLIST, /* UINT8 (XDR by nvlist_pack/unpack) */
DMU_OT_PACKED_NVLIST_SIZE, /* UINT64 */
DMU_OT_BPLIST, /* UINT64 */
DMU_OT_BPLIST_HDR, /* UINT64 */
/* spa: */
DMU_OT_SPACE_MAP_HEADER, /* UINT64 */
DMU_OT_SPACE_MAP, /* UINT64 */
/* zil: */
DMU_OT_INTENT_LOG, /* UINT64 */
/* dmu: */
DMU_OT_DNODE, /* DNODE */
DMU_OT_OBJSET, /* OBJSET */
/* dsl: */
DMU_OT_DSL_DIR, /* UINT64 */
DMU_OT_DSL_DIR_CHILD_MAP, /* ZAP */
DMU_OT_DSL_DS_SNAP_MAP, /* ZAP */
DMU_OT_DSL_PROPS, /* ZAP */
DMU_OT_DSL_DATASET, /* UINT64 */
/* zpl: */
DMU_OT_ZNODE, /* ZNODE */
DMU_OT_OLDACL, /* Old ACL */
DMU_OT_PLAIN_FILE_CONTENTS, /* UINT8 */
DMU_OT_DIRECTORY_CONTENTS, /* ZAP */
DMU_OT_MASTER_NODE, /* ZAP */
DMU_OT_UNLINKED_SET, /* ZAP */
/* zvol: */
DMU_OT_ZVOL, /* UINT8 */
DMU_OT_ZVOL_PROP, /* ZAP */
/* other; for testing only! */
DMU_OT_PLAIN_OTHER, /* UINT8 */
DMU_OT_UINT64_OTHER, /* UINT64 */
DMU_OT_ZAP_OTHER, /* ZAP */
/* new object types: */
DMU_OT_ERROR_LOG, /* ZAP */
DMU_OT_SPA_HISTORY, /* UINT8 */
DMU_OT_SPA_HISTORY_OFFSETS, /* spa_his_phys_t */
DMU_OT_POOL_PROPS, /* ZAP */
DMU_OT_DSL_PERMS, /* ZAP */
DMU_OT_ACL, /* ACL */
DMU_OT_SYSACL, /* SYSACL */
DMU_OT_FUID, /* FUID table (Packed NVLIST UINT8) */
DMU_OT_FUID_SIZE, /* FUID table size UINT64 */
DMU_OT_NEXT_CLONES, /* ZAP */
DMU_OT_SCRUB_QUEUE, /* ZAP */
DMU_OT_NUMTYPES
} dmu_object_type_t;
typedef enum dmu_objset_type {
DMU_OST_NONE,
DMU_OST_META,
DMU_OST_ZFS,
DMU_OST_ZVOL,
DMU_OST_OTHER, /* For testing only! */
DMU_OST_ANY, /* Be careful! */
DMU_OST_NUMTYPES
} dmu_objset_type_t;
void byteswap_uint64_array(void *buf, size_t size);
void byteswap_uint32_array(void *buf, size_t size);
void byteswap_uint16_array(void *buf, size_t size);
void byteswap_uint8_array(void *buf, size_t size);
void zap_byteswap(void *buf, size_t size);
void zfs_oldacl_byteswap(void *buf, size_t size);
void zfs_acl_byteswap(void *buf, size_t size);
void zfs_znode_byteswap(void *buf, size_t size);
#define DS_MODE_NOHOLD 0 /* internal use only */
#define DS_MODE_USER 1 /* simple access, no special needs */
#define DS_MODE_OWNER 2 /* the "main" access, e.g. a mount */
#define DS_MODE_TYPE_MASK 0x3
#define DS_MODE_TYPE(x) ((x) & DS_MODE_TYPE_MASK)
#define DS_MODE_READONLY 0x8
#define DS_MODE_IS_READONLY(x) ((x) & DS_MODE_READONLY)
#define DS_MODE_INCONSISTENT 0x10
#define DS_MODE_IS_INCONSISTENT(x) ((x) & DS_MODE_INCONSISTENT)
#define DS_FIND_SNAPSHOTS (1<<0)
#define DS_FIND_CHILDREN (1<<1)
/*
* The maximum number of bytes that can be accessed as part of one
* operation, including metadata.
*/
#define DMU_MAX_ACCESS (10<<20) /* 10MB */
#define DMU_MAX_DELETEBLKCNT (20480) /* ~5MB of indirect blocks */
/*
* Public routines to create, destroy, open, and close objsets.
*/
int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
objset_t **osp);
int dmu_objset_open_ds(struct dsl_dataset *ds, dmu_objset_type_t type,
objset_t **osp);
void dmu_objset_close(objset_t *os);
int dmu_objset_evict_dbufs(objset_t *os);
int dmu_objset_create(const char *name, dmu_objset_type_t type,
objset_t *clone_parent, uint64_t flags,
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
int dmu_objset_destroy(const char *name);
int dmu_snapshots_destroy(char *fsname, char *snapname);
int dmu_objset_rollback(objset_t *os);
int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive);
int dmu_objset_rename(const char *name, const char *newname,
boolean_t recursive);
int dmu_objset_find(char *name, int func(char *, void *), void *arg,
int flags);
void dmu_objset_byteswap(void *buf, size_t size);
typedef struct dmu_buf {
uint64_t db_object; /* object that this buffer is part of */
uint64_t db_offset; /* byte offset in this object */
uint64_t db_size; /* size of buffer in bytes */
void *db_data; /* data in buffer */
} dmu_buf_t;
typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
/*
* The names of zap entries in the DIRECTORY_OBJECT of the MOS.
*/
#define DMU_POOL_DIRECTORY_OBJECT 1
#define DMU_POOL_CONFIG "config"
#define DMU_POOL_ROOT_DATASET "root_dataset"
#define DMU_POOL_SYNC_BPLIST "sync_bplist"
#define DMU_POOL_ERRLOG_SCRUB "errlog_scrub"
#define DMU_POOL_ERRLOG_LAST "errlog_last"
#define DMU_POOL_SPARES "spares"
#define DMU_POOL_DEFLATE "deflate"
#define DMU_POOL_HISTORY "history"
#define DMU_POOL_PROPS "pool_props"
#define DMU_POOL_L2CACHE "l2cache"
/* 4x8 zbookmark_t */
#define DMU_POOL_SCRUB_BOOKMARK "scrub_bookmark"
/* 1x8 zap obj DMU_OT_SCRUB_QUEUE */
#define DMU_POOL_SCRUB_QUEUE "scrub_queue"
/* 1x8 txg */
#define DMU_POOL_SCRUB_MIN_TXG "scrub_min_txg"
/* 1x8 txg */
#define DMU_POOL_SCRUB_MAX_TXG "scrub_max_txg"
/* 1x4 enum scrub_func */
#define DMU_POOL_SCRUB_FUNC "scrub_func"
/* 1x8 count */
#define DMU_POOL_SCRUB_ERRORS "scrub_errors"
/*
* Allocate an object from this objset. The range of object numbers
* available is (0, DN_MAX_OBJECT). Object 0 is the meta-dnode.
*
* The transaction must be assigned to a txg. The newly allocated
* object will be "held" in the transaction (ie. you can modify the
* newly allocated object in this transaction).
*
* dmu_object_alloc() chooses an object and returns it in *objectp.
*
* dmu_object_claim() allocates a specific object number. If that
* number is already allocated, it fails and returns EEXIST.
*
* Return 0 on success, or ENOSPC or EEXIST as specified above.
*/
uint64_t dmu_object_alloc(objset_t *os, dmu_object_type_t ot,
int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
/*
* Free an object from this objset.
*
* The object's data will be freed as well (ie. you don't need to call
* dmu_free(object, 0, -1, tx)).
*
* The object need not be held in the transaction.
*
* If there are any holds on this object's buffers (via dmu_buf_hold()),
* or tx holds on the object (via dmu_tx_hold_object()), you can not
* free it; it fails and returns EBUSY.
*
* If the object is not allocated, it fails and returns ENOENT.
*
* Return 0 on success, or EBUSY or ENOENT as specified above.
*/
int dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx);
/*
* Find the next allocated or free object.
*
* The objectp parameter is in-out. It will be updated to be the next
* object which is allocated. Ignore objects which have not been
* modified since txg.
*
* XXX Can only be called on a objset with no dirty data.
*
* Returns 0 on success, or ENOENT if there are no more objects.
*/
int dmu_object_next(objset_t *os, uint64_t *objectp,
boolean_t hole, uint64_t txg);
/*
* Set the data blocksize for an object.
*
* The object cannot have any blocks allcated beyond the first. If
* the first block is allocated already, the new size must be greater
* than the current block size. If these conditions are not met,
* ENOTSUP will be returned.
*
* Returns 0 on success, or EBUSY if there are any holds on the object
* contents, or ENOTSUP as described above.
*/
int dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size,
int ibs, dmu_tx_t *tx);
/*
* Set the checksum property on a dnode. The new checksum algorithm will
* apply to all newly written blocks; existing blocks will not be affected.
*/
void dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum,
dmu_tx_t *tx);
/*
* Set the compress property on a dnode. The new compression algorithm will
* apply to all newly written blocks; existing blocks will not be affected.
*/
void dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
dmu_tx_t *tx);
/*
* Decide how many copies of a given block we should make. Can be from
* 1 to SPA_DVAS_PER_BP.
*/
int dmu_get_replication_level(struct objset_impl *, struct zbookmark *zb,
dmu_object_type_t ot);
/*
* The bonus data is accessed more or less like a regular buffer.
* You must dmu_bonus_hold() to get the buffer, which will give you a
* dmu_buf_t with db_offset==-1ULL, and db_size = the size of the bonus
* data. As with any normal buffer, you must call dmu_buf_read() to
* read db_data, dmu_buf_will_dirty() before modifying it, and the
* object must be held in an assigned transaction before calling
* dmu_buf_will_dirty. You may use dmu_buf_set_user() on the bonus
* buffer as well. You must release your hold with dmu_buf_rele().
*/
int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **);
int dmu_bonus_max(void);
int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
/*
* Obtain the DMU buffer from the specified object which contains the
* specified offset. dmu_buf_hold() puts a "hold" on the buffer, so
* that it will remain in memory. You must release the hold with
* dmu_buf_rele(). You musn't access the dmu_buf_t after releasing your
* hold. You must have a hold on any dmu_buf_t* you pass to the DMU.
*
* You must call dmu_buf_read, dmu_buf_will_dirty, or dmu_buf_will_fill
* on the returned buffer before reading or writing the buffer's
* db_data. The comments for those routines describe what particular
* operations are valid after calling them.
*
* The object number must be a valid, allocated object number.
*/
int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
void *tag, dmu_buf_t **);
void dmu_buf_add_ref(dmu_buf_t *db, void* tag);
void dmu_buf_rele(dmu_buf_t *db, void *tag);
uint64_t dmu_buf_refcount(dmu_buf_t *db);
/*
* dmu_buf_hold_array holds the DMU buffers which contain all bytes in a
* range of an object. A pointer to an array of dmu_buf_t*'s is
* returned (in *dbpp).
*
* dmu_buf_rele_array releases the hold on an array of dmu_buf_t*'s, and
* frees the array. The hold on the array of buffers MUST be released
* with dmu_buf_rele_array. You can NOT release the hold on each buffer
* individually with dmu_buf_rele.
*/
int dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset,
uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp);
void dmu_buf_rele_array(dmu_buf_t **, int numbufs, void *tag);
/*
* Returns NULL on success, or the existing user ptr if it's already
* been set.
*
* user_ptr is for use by the user and can be obtained via dmu_buf_get_user().
*
* user_data_ptr_ptr should be NULL, or a pointer to a pointer which
* will be set to db->db_data when you are allowed to access it. Note
* that db->db_data (the pointer) can change when you do dmu_buf_read(),
* dmu_buf_tryupgrade(), dmu_buf_will_dirty(), or dmu_buf_will_fill().
* *user_data_ptr_ptr will be set to the new value when it changes.
*
* If non-NULL, pageout func will be called when this buffer is being
* excised from the cache, so that you can clean up the data structure
* pointed to by user_ptr.
*
* dmu_evict_user() will call the pageout func for all buffers in a
* objset with a given pageout func.
*/
void *dmu_buf_set_user(dmu_buf_t *db, void *user_ptr, void *user_data_ptr_ptr,
dmu_buf_evict_func_t *pageout_func);
/*
* set_user_ie is the same as set_user, but request immediate eviction
* when hold count goes to zero.
*/
void *dmu_buf_set_user_ie(dmu_buf_t *db, void *user_ptr,
void *user_data_ptr_ptr, dmu_buf_evict_func_t *pageout_func);
void *dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr,
void *user_ptr, void *user_data_ptr_ptr,
dmu_buf_evict_func_t *pageout_func);
void dmu_evict_user(objset_t *os, dmu_buf_evict_func_t *func);
/*
* Returns the user_ptr set with dmu_buf_set_user(), or NULL if not set.
*/
void *dmu_buf_get_user(dmu_buf_t *db);
/*
* Indicate that you are going to modify the buffer's data (db_data).
*
* The transaction (tx) must be assigned to a txg (ie. you've called
* dmu_tx_assign()). The buffer's object must be held in the tx
* (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
*/
void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
/*
* You must create a transaction, then hold the objects which you will
* (or might) modify as part of this transaction. Then you must assign
* the transaction to a transaction group. Once the transaction has
* been assigned, you can modify buffers which belong to held objects as
* part of this transaction. You can't modify buffers before the
* transaction has been assigned; you can't modify buffers which don't
* belong to objects which this transaction holds; you can't hold
* objects once the transaction has been assigned. You may hold an
* object which you are going to free (with dmu_object_free()), but you
* don't have to.
*
* You can abort the transaction before it has been assigned.
*
* Note that you may hold buffers (with dmu_buf_hold) at any time,
* regardless of transaction state.
*/
#define DMU_NEW_OBJECT (-1ULL)
#define DMU_OBJECT_END (-1ULL)
dmu_tx_t *dmu_tx_create(objset_t *os);
void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
uint64_t len);
void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name);
void dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object);
void dmu_tx_abort(dmu_tx_t *tx);
int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
void dmu_tx_wait(dmu_tx_t *tx);
void dmu_tx_commit(dmu_tx_t *tx);
/*
* Free up the data blocks for a defined range of a file. If size is
* zero, the range from offset to end-of-file is freed.
*/
int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
uint64_t size, dmu_tx_t *tx);
int dmu_free_long_range(objset_t *os, uint64_t object, uint64_t offset,
uint64_t size);
int dmu_free_object(objset_t *os, uint64_t object);
/*
* Convenience functions.
*
* Canfail routines will return 0 on success, or an errno if there is a
* nonrecoverable I/O error.
*/
int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
void *buf);
void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
const void *buf, dmu_tx_t *tx);
void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
dmu_tx_t *tx);
int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size,
dmu_tx_t *tx);
int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset,
uint64_t size, struct page *pp, dmu_tx_t *tx);
extern int zfs_prefetch_disable;
/*
* Asynchronously try to read in the data.
*/
void dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset,
uint64_t len);
typedef struct dmu_object_info {
/* All sizes are in bytes. */
uint32_t doi_data_block_size;
uint32_t doi_metadata_block_size;
uint64_t doi_bonus_size;
dmu_object_type_t doi_type;
dmu_object_type_t doi_bonus_type;
uint8_t doi_indirection; /* 2 = dnode->indirect->data */
uint8_t doi_checksum;
uint8_t doi_compress;
uint8_t doi_pad[5];
/* Values below are number of 512-byte blocks. */
uint64_t doi_physical_blks; /* data + metadata */
uint64_t doi_max_block_offset;
} dmu_object_info_t;
typedef void arc_byteswap_func_t(void *buf, size_t size);
typedef struct dmu_object_type_info {
arc_byteswap_func_t *ot_byteswap;
boolean_t ot_metadata;
char *ot_name;
} dmu_object_type_info_t;
extern const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES];
/*
* Get information on a DMU object.
*
* Return 0 on success or ENOENT if object is not allocated.
*
* If doi is NULL, just indicates whether the object exists.
*/
int dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi);
void dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi);
void dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi);
void dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize,
u_longlong_t *nblk512);
typedef struct dmu_objset_stats {
uint64_t dds_num_clones; /* number of clones of this */
uint64_t dds_creation_txg;
uint64_t dds_guid;
dmu_objset_type_t dds_type;
uint8_t dds_is_snapshot;
uint8_t dds_inconsistent;
char dds_origin[MAXNAMELEN];
} dmu_objset_stats_t;
/*
* Get stats on a dataset.
*/
void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
/*
* Add entries to the nvlist for all the objset's properties. See
* zfs_prop_table[] and zfs(1m) for details on the properties.
*/
void dmu_objset_stats(objset_t *os, struct nvlist *nv);
/*
* Get the space usage statistics for statvfs().
*
* refdbytes is the amount of space "referenced" by this objset.
* availbytes is the amount of space available to this objset, taking
* into account quotas & reservations, assuming that no other objsets
* use the space first. These values correspond to the 'referenced' and
* 'available' properties, described in the zfs(1m) manpage.
*
* usedobjs and availobjs are the number of objects currently allocated,
* and available.
*/
void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
uint64_t *usedobjsp, uint64_t *availobjsp);
/*
* The fsid_guid is a 56-bit ID that can change to avoid collisions.
* (Contrast with the ds_guid which is a 64-bit ID that will never
* change, so there is a small probability that it will collide.)
*/
uint64_t dmu_objset_fsid_guid(objset_t *os);
int dmu_objset_is_snapshot(objset_t *os);
extern struct spa *dmu_objset_spa(objset_t *os);
extern struct zilog *dmu_objset_zil(objset_t *os);
extern struct dsl_pool *dmu_objset_pool(objset_t *os);
extern struct dsl_dataset *dmu_objset_ds(objset_t *os);
extern void dmu_objset_name(objset_t *os, char *buf);
extern dmu_objset_type_t dmu_objset_type(objset_t *os);
extern uint64_t dmu_objset_id(objset_t *os);
extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
uint64_t *id, uint64_t *offp, boolean_t *case_conflict);
extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
int maxlen, boolean_t *conflict);
extern int dmu_dir_list_next(objset_t *os, int namelen, char *name,
uint64_t *idp, uint64_t *offp);
extern void dmu_objset_set_user(objset_t *os, void *user_ptr);
extern void *dmu_objset_get_user(objset_t *os);
/*
* Return the txg number for the given assigned transaction.
*/
uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
/*
* Synchronous write.
* If a parent zio is provided this function initiates a write on the
* provided buffer as a child of the parent zio.
* In the absence of a parent zio, the write is completed synchronously.
* At write completion, blk is filled with the bp of the written block.
* Note that while the data covered by this function will be on stable
* storage when the write completes this new data does not become a
* permanent part of the file until the associated transaction commits.
*/
typedef void dmu_sync_cb_t(dmu_buf_t *db, void *arg);
int dmu_sync(struct zio *zio, dmu_buf_t *db,
struct blkptr *bp, uint64_t txg, dmu_sync_cb_t *done, void *arg);
/*
* Find the next hole or data block in file starting at *off
* Return found offset in *off. Return ESRCH for end of file.
*/
int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole,
uint64_t *off);
/*
* Initial setup and final teardown.
*/
extern void dmu_init(void);
extern void dmu_fini(void);
typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp,
uint64_t object, uint64_t offset, int len);
void dmu_traverse_objset(objset_t *os, uint64_t txg_start,
dmu_traverse_cb_t cb, void *arg);
int dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
struct vnode *vp, offset_t *off);
typedef struct dmu_recv_cookie {
/*
* This structure is opaque!
*
* If logical and real are different, we are recving the stream
* into the "real" temporary clone, and then switching it with
* the "logical" target.
*/
struct dsl_dataset *drc_logical_ds;
struct dsl_dataset *drc_real_ds;
struct drr_begin *drc_drrb;
char *drc_tosnap;
boolean_t drc_newfs;
boolean_t drc_force;
} dmu_recv_cookie_t;
int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *,
boolean_t force, objset_t *origin, boolean_t online, dmu_recv_cookie_t *);
int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp);
int dmu_recv_end(dmu_recv_cookie_t *drc);
void dmu_recv_abort_cleanup(dmu_recv_cookie_t *drc);
/* CRC64 table */
#define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */
extern uint64_t zfs_crc64_table[256];
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DMU_H */
+239
View File
@@ -0,0 +1,239 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DMU_IMPL_H
#define _SYS_DMU_IMPL_H
#include <sys/txg_impl.h>
#include <sys/zio.h>
#include <sys/dnode.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* This is the locking strategy for the DMU. Numbers in parenthesis are
* cases that use that lock order, referenced below:
*
* ARC is self-contained
* bplist is self-contained
* refcount is self-contained
* txg is self-contained (hopefully!)
* zst_lock
* zf_rwlock
*
* XXX try to improve evicting path?
*
* dp_config_rwlock > os_obj_lock > dn_struct_rwlock >
* dn_dbufs_mtx > hash_mutexes > db_mtx > dd_lock > leafs
*
* dp_config_rwlock
* must be held before: everything
* protects dd namespace changes
* protects property changes globally
* held from:
* dsl_dir_open/r:
* dsl_dir_create_sync/w:
* dsl_dir_sync_destroy/w:
* dsl_dir_rename_sync/w:
* dsl_prop_changed_notify/r:
*
* os_obj_lock
* must be held before:
* everything except dp_config_rwlock
* protects os_obj_next
* held from:
* dmu_object_alloc: dn_dbufs_mtx, db_mtx, hash_mutexes, dn_struct_rwlock
*
* dn_struct_rwlock
* must be held before:
* everything except dp_config_rwlock and os_obj_lock
* protects structure of dnode (eg. nlevels)
* db_blkptr can change when syncing out change to nlevels
* dn_maxblkid
* dn_nlevels
* dn_*blksz*
* phys nlevels, maxblkid, physical blkptr_t's (?)
* held from:
* callers of dbuf_read_impl, dbuf_hold[_impl], dbuf_prefetch
* dmu_object_info_from_dnode: dn_dirty_mtx (dn_datablksz)
* dmu_tx_count_free:
* dbuf_read_impl: db_mtx, dmu_zfetch()
* dmu_zfetch: zf_rwlock/r, zst_lock, dbuf_prefetch()
* dbuf_new_size: db_mtx
* dbuf_dirty: db_mtx
* dbuf_findbp: (callers, phys? - the real need)
* dbuf_create: dn_dbufs_mtx, hash_mutexes, db_mtx (phys?)
* dbuf_prefetch: dn_dirty_mtx, hash_mutexes, db_mtx, dn_dbufs_mtx
* dbuf_hold_impl: hash_mutexes, db_mtx, dn_dbufs_mtx, dbuf_findbp()
* dnode_sync/w (increase_indirection): db_mtx (phys)
* dnode_set_blksz/w: dn_dbufs_mtx (dn_*blksz*)
* dnode_new_blkid/w: (dn_maxblkid)
* dnode_free_range/w: dn_dirty_mtx (dn_maxblkid)
* dnode_next_offset: (phys)
*
* dn_dbufs_mtx
* must be held before:
* db_mtx, hash_mutexes
* protects:
* dn_dbufs
* dn_evicted
* held from:
* dmu_evict_user: db_mtx (dn_dbufs)
* dbuf_free_range: db_mtx (dn_dbufs)
* dbuf_remove_ref: db_mtx, callees:
* dbuf_hash_remove: hash_mutexes, db_mtx
* dbuf_create: hash_mutexes, db_mtx (dn_dbufs)
* dnode_set_blksz: (dn_dbufs)
*
* hash_mutexes (global)
* must be held before:
* db_mtx
* protects dbuf_hash_table (global) and db_hash_next
* held from:
* dbuf_find: db_mtx
* dbuf_hash_insert: db_mtx
* dbuf_hash_remove: db_mtx
*
* db_mtx (meta-leaf)
* must be held before:
* dn_mtx, dn_dirty_mtx, dd_lock (leaf mutexes)
* protects:
* db_state
* db_holds
* db_buf
* db_changed
* db_data_pending
* db_dirtied
* db_link
* db_dirty_node (??)
* db_dirtycnt
* db_d.*
* db.*
* held from:
* dbuf_dirty: dn_mtx, dn_dirty_mtx
* dbuf_dirty->dsl_dir_willuse_space: dd_lock
* dbuf_dirty->dbuf_new_block->dsl_dataset_block_freeable: dd_lock
* dbuf_undirty: dn_dirty_mtx (db_d)
* dbuf_write_done: dn_dirty_mtx (db_state)
* dbuf_*
* dmu_buf_update_user: none (db_d)
* dmu_evict_user: none (db_d) (maybe can eliminate)
* dbuf_find: none (db_holds)
* dbuf_hash_insert: none (db_holds)
* dmu_buf_read_array_impl: none (db_state, db_changed)
* dmu_sync: none (db_dirty_node, db_d)
* dnode_reallocate: none (db)
*
* dn_mtx (leaf)
* protects:
* dn_dirty_dbufs
* dn_ranges
* phys accounting
* dn_allocated_txg
* dn_free_txg
* dn_assigned_txg
* dd_assigned_tx
* dn_notxholds
* dn_dirtyctx
* dn_dirtyctx_firstset
* (dn_phys copy fields?)
* (dn_phys contents?)
* held from:
* dnode_*
* dbuf_dirty: none
* dbuf_sync: none (phys accounting)
* dbuf_undirty: none (dn_ranges, dn_dirty_dbufs)
* dbuf_write_done: none (phys accounting)
* dmu_object_info_from_dnode: none (accounting)
* dmu_tx_commit: none
* dmu_tx_hold_object_impl: none
* dmu_tx_try_assign: dn_notxholds(cv)
* dmu_tx_unassign: none
*
* dd_lock
* must be held before:
* ds_lock
* ancestors' dd_lock
* protects:
* dd_prop_cbs
* dd_sync_*
* dd_used_bytes
* dd_tempreserved
* dd_space_towrite
* dd_myname
* dd_phys accounting?
* held from:
* dsl_dir_*
* dsl_prop_changed_notify: none (dd_prop_cbs)
* dsl_prop_register: none (dd_prop_cbs)
* dsl_prop_unregister: none (dd_prop_cbs)
* dsl_dataset_block_freeable: none (dd_sync_*)
*
* os_lock (leaf)
* protects:
* os_dirty_dnodes
* os_free_dnodes
* os_dnodes
* os_downgraded_dbufs
* dn_dirtyblksz
* dn_dirty_link
* held from:
* dnode_create: none (os_dnodes)
* dnode_destroy: none (os_dnodes)
* dnode_setdirty: none (dn_dirtyblksz, os_*_dnodes)
* dnode_free: none (dn_dirtyblksz, os_*_dnodes)
*
* ds_lock
* protects:
* ds_user_ptr
* ds_user_evice_func
* ds_open_refcount
* ds_snapname
* ds_phys accounting
* ds_reserved
* held from:
* dsl_dataset_*
*
* dr_mtx (leaf)
* protects:
* dr_children
* held from:
* dbuf_dirty
* dbuf_undirty
* dbuf_sync_indirect
* dnode_new_blkid
*/
struct objset;
struct dmu_pool;
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DMU_IMPL_H */
+136
View File
@@ -0,0 +1,136 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DMU_OBJSET_H
#define _SYS_DMU_OBJSET_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/spa.h>
#include <sys/arc.h>
#include <sys/txg.h>
#include <sys/zfs_context.h>
#include <sys/dnode.h>
#include <sys/zio.h>
#include <sys/zil.h>
#ifdef __cplusplus
extern "C" {
#endif
struct dsl_dataset;
struct dmu_tx;
struct objset_impl;
typedef struct objset_phys {
dnode_phys_t os_meta_dnode;
zil_header_t os_zil_header;
uint64_t os_type;
char os_pad[1024 - sizeof (dnode_phys_t) - sizeof (zil_header_t) -
sizeof (uint64_t)];
} objset_phys_t;
struct objset {
struct objset_impl *os;
int os_mode;
};
typedef struct objset_impl {
/* Immutable: */
struct dsl_dataset *os_dsl_dataset;
spa_t *os_spa;
arc_buf_t *os_phys_buf;
objset_phys_t *os_phys;
dnode_t *os_meta_dnode;
zilog_t *os_zil;
objset_t os;
uint8_t os_checksum; /* can change, under dsl_dir's locks */
uint8_t os_compress; /* can change, under dsl_dir's locks */
uint8_t os_copies; /* can change, under dsl_dir's locks */
uint8_t os_primary_cache; /* can change, under dsl_dir's locks */
uint8_t os_secondary_cache; /* can change, under dsl_dir's locks */
/* no lock needed: */
struct dmu_tx *os_synctx; /* XXX sketchy */
blkptr_t *os_rootbp;
zil_header_t os_zil_header;
/* Protected by os_obj_lock */
kmutex_t os_obj_lock;
uint64_t os_obj_next;
/* Protected by os_lock */
kmutex_t os_lock;
list_t os_dirty_dnodes[TXG_SIZE];
list_t os_free_dnodes[TXG_SIZE];
list_t os_dnodes;
list_t os_downgraded_dbufs;
/* stuff we store for the user */
kmutex_t os_user_ptr_lock;
void *os_user_ptr;
} objset_impl_t;
#define DMU_META_DNODE_OBJECT 0
#define DMU_OS_IS_L2CACHEABLE(os) \
((os)->os_secondary_cache == ZFS_CACHE_ALL || \
(os)->os_secondary_cache == ZFS_CACHE_METADATA)
/* called from zpl */
int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
objset_t **osp);
void dmu_objset_close(objset_t *os);
int dmu_objset_create(const char *name, dmu_objset_type_t type,
objset_t *clone_parent, uint64_t flags,
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
int dmu_objset_destroy(const char *name);
int dmu_objset_rollback(objset_t *os);
int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive);
void dmu_objset_stats(objset_t *os, nvlist_t *nv);
void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
uint64_t *usedobjsp, uint64_t *availobjsp);
uint64_t dmu_objset_fsid_guid(objset_t *os);
int dmu_objset_find(char *name, int func(char *, void *), void *arg,
int flags);
int dmu_objset_find_spa(spa_t *spa, const char *name,
int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags);
void dmu_objset_byteswap(void *buf, size_t size);
int dmu_objset_evict_dbufs(objset_t *os);
/* called from dsl */
void dmu_objset_sync(objset_impl_t *os, zio_t *zio, dmu_tx_t *tx);
objset_impl_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx);
int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
objset_impl_t **osip);
void dmu_objset_evict(struct dsl_dataset *ds, void *arg);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DMU_OBJSET_H */
+57
View File
@@ -0,0 +1,57 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DMU_TRAVERSE_H
#define _SYS_DMU_TRAVERSE_H
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/zio.h>
#ifdef __cplusplus
extern "C" {
#endif
struct dnode_phys;
struct dsl_dataset;
typedef int (blkptr_cb_t)(spa_t *spa, blkptr_t *bp,
const zbookmark_t *zb, const struct dnode_phys *dnp, void *arg);
#define TRAVERSE_PRE (1<<0)
#define TRAVERSE_POST (1<<1)
#define TRAVERSE_PREFETCH_METADATA (1<<2)
#define TRAVERSE_PREFETCH_DATA (1<<3)
#define TRAVERSE_PREFETCH (TRAVERSE_PREFETCH_METADATA | TRAVERSE_PREFETCH_DATA)
int traverse_dataset(struct dsl_dataset *ds, uint64_t txg_start,
int flags, blkptr_cb_t func, void *arg);
int traverse_pool(spa_t *spa, blkptr_cb_t func, void *arg);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DMU_TRAVERSE_H */
+139
View File
@@ -0,0 +1,139 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DMU_TX_H
#define _SYS_DMU_TX_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/inttypes.h>
#include <sys/dmu.h>
#include <sys/txg.h>
#include <sys/refcount.h>
#ifdef __cplusplus
extern "C" {
#endif
struct dmu_buf_impl;
struct dmu_tx_hold;
struct dnode_link;
struct dsl_pool;
struct dnode;
struct dsl_dir;
struct dmu_tx {
/*
* No synchronization is needed because a tx can only be handled
* by one thread.
*/
list_t tx_holds; /* list of dmu_tx_hold_t */
objset_t *tx_objset;
struct dsl_dir *tx_dir;
struct dsl_pool *tx_pool;
uint64_t tx_txg;
uint64_t tx_lastsnap_txg;
uint64_t tx_lasttried_txg;
txg_handle_t tx_txgh;
void *tx_tempreserve_cookie;
struct dmu_tx_hold *tx_needassign_txh;
uint8_t tx_anyobj;
int tx_err;
#ifdef ZFS_DEBUG
uint64_t tx_space_towrite;
uint64_t tx_space_tofree;
uint64_t tx_space_tooverwrite;
uint64_t tx_space_tounref;
refcount_t tx_space_written;
refcount_t tx_space_freed;
#endif
};
enum dmu_tx_hold_type {
THT_NEWOBJECT,
THT_WRITE,
THT_BONUS,
THT_FREE,
THT_ZAP,
THT_SPACE,
THT_NUMTYPES
};
typedef struct dmu_tx_hold {
dmu_tx_t *txh_tx;
list_node_t txh_node;
struct dnode *txh_dnode;
uint64_t txh_space_towrite;
uint64_t txh_space_tofree;
uint64_t txh_space_tooverwrite;
uint64_t txh_space_tounref;
uint64_t txh_memory_tohold;
uint64_t txh_fudge;
#ifdef ZFS_DEBUG
enum dmu_tx_hold_type txh_type;
uint64_t txh_arg1;
uint64_t txh_arg2;
#endif
} dmu_tx_hold_t;
/*
* These routines are defined in dmu.h, and are called by the user.
*/
dmu_tx_t *dmu_tx_create(objset_t *dd);
int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
void dmu_tx_commit(dmu_tx_t *tx);
void dmu_tx_abort(dmu_tx_t *tx);
uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
void dmu_tx_wait(dmu_tx_t *tx);
/*
* These routines are defined in dmu_spa.h, and are called by the SPA.
*/
extern dmu_tx_t *dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg);
/*
* These routines are only called by the DMU.
*/
dmu_tx_t *dmu_tx_create_dd(dsl_dir_t *dd);
int dmu_tx_is_syncing(dmu_tx_t *tx);
int dmu_tx_private_ok(dmu_tx_t *tx);
void dmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object);
void dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta);
void dmu_tx_dirty_buf(dmu_tx_t *tx, struct dmu_buf_impl *db);
int dmu_tx_holds(dmu_tx_t *tx, uint64_t object);
void dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space);
#ifdef ZFS_DEBUG
#define DMU_TX_DIRTY_BUF(tx, db) dmu_tx_dirty_buf(tx, db)
#else
#define DMU_TX_DIRTY_BUF(tx, db)
#endif
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DMU_TX_H */
+75
View File
@@ -0,0 +1,75 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _DFETCH_H
#define _DFETCH_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
extern uint64_t zfetch_array_rd_sz;
struct dnode; /* so we can reference dnode */
typedef enum zfetch_dirn {
ZFETCH_FORWARD = 1, /* prefetch increasing block numbers */
ZFETCH_BACKWARD = -1 /* prefetch decreasing block numbers */
} zfetch_dirn_t;
typedef struct zstream {
uint64_t zst_offset; /* offset of starting block in range */
uint64_t zst_len; /* length of range, in blocks */
zfetch_dirn_t zst_direction; /* direction of prefetch */
uint64_t zst_stride; /* length of stride, in blocks */
uint64_t zst_ph_offset; /* prefetch offset, in blocks */
uint64_t zst_cap; /* prefetch limit (cap), in blocks */
kmutex_t zst_lock; /* protects stream */
clock_t zst_last; /* lbolt of last prefetch */
avl_node_t zst_node; /* embed avl node here */
} zstream_t;
typedef struct zfetch {
krwlock_t zf_rwlock; /* protects zfetch structure */
list_t zf_stream; /* AVL tree of zstream_t's */
struct dnode *zf_dnode; /* dnode that owns this zfetch */
uint32_t zf_stream_cnt; /* # of active streams */
uint64_t zf_alloc_fail; /* # of failed attempts to alloc strm */
} zfetch_t;
void dmu_zfetch_init(zfetch_t *, struct dnode *);
void dmu_zfetch_rele(zfetch_t *);
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, int);
#ifdef __cplusplus
}
#endif
#endif /* _DFETCH_H */
+275
View File
@@ -0,0 +1,275 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DNODE_H
#define _SYS_DNODE_H
#include <sys/zfs_context.h>
#include <sys/avl.h>
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/zio.h>
#include <sys/refcount.h>
#include <sys/dmu_zfetch.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* dnode_hold() flags.
*/
#define DNODE_MUST_BE_ALLOCATED 1
#define DNODE_MUST_BE_FREE 2
/*
* dnode_next_offset() flags.
*/
#define DNODE_FIND_HOLE 1
#define DNODE_FIND_BACKWARDS 2
#define DNODE_FIND_HAVELOCK 4
/*
* Fixed constants.
*/
#define DNODE_SHIFT 9 /* 512 bytes */
#define DN_MIN_INDBLKSHIFT 10 /* 1k */
#define DN_MAX_INDBLKSHIFT 14 /* 16k */
#define DNODE_BLOCK_SHIFT 14 /* 16k */
#define DNODE_CORE_SIZE 64 /* 64 bytes for dnode sans blkptrs */
#define DN_MAX_OBJECT_SHIFT 48 /* 256 trillion (zfs_fid_t limit) */
#define DN_MAX_OFFSET_SHIFT 64 /* 2^64 bytes in a dnode */
/*
* Derived constants.
*/
#define DNODE_SIZE (1 << DNODE_SHIFT)
#define DN_MAX_NBLKPTR ((DNODE_SIZE - DNODE_CORE_SIZE) >> SPA_BLKPTRSHIFT)
#define DN_MAX_BONUSLEN (DNODE_SIZE - DNODE_CORE_SIZE - (1 << SPA_BLKPTRSHIFT))
#define DN_MAX_OBJECT (1ULL << DN_MAX_OBJECT_SHIFT)
#define DN_ZERO_BONUSLEN (DN_MAX_BONUSLEN + 1)
#define DNODES_PER_BLOCK_SHIFT (DNODE_BLOCK_SHIFT - DNODE_SHIFT)
#define DNODES_PER_BLOCK (1ULL << DNODES_PER_BLOCK_SHIFT)
#define DNODES_PER_LEVEL_SHIFT (DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT)
/* The +2 here is a cheesy way to round up */
#define DN_MAX_LEVELS (2 + ((DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT) / \
(DN_MIN_INDBLKSHIFT - SPA_BLKPTRSHIFT)))
#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \
(((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
#define DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \
(dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT)
#define EPB(blkshift, typeshift) (1 << (blkshift - typeshift))
struct dmu_buf_impl;
struct objset_impl;
struct zio;
enum dnode_dirtycontext {
DN_UNDIRTIED,
DN_DIRTY_OPEN,
DN_DIRTY_SYNC
};
/* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */
#define DNODE_FLAG_USED_BYTES (1<<0)
typedef struct dnode_phys {
uint8_t dn_type; /* dmu_object_type_t */
uint8_t dn_indblkshift; /* ln2(indirect block size) */
uint8_t dn_nlevels; /* 1=dn_blkptr->data blocks */
uint8_t dn_nblkptr; /* length of dn_blkptr */
uint8_t dn_bonustype; /* type of data in bonus buffer */
uint8_t dn_checksum; /* ZIO_CHECKSUM type */
uint8_t dn_compress; /* ZIO_COMPRESS type */
uint8_t dn_flags; /* DNODE_FLAG_* */
uint16_t dn_datablkszsec; /* data block size in 512b sectors */
uint16_t dn_bonuslen; /* length of dn_bonus */
uint8_t dn_pad2[4];
/* accounting is protected by dn_dirty_mtx */
uint64_t dn_maxblkid; /* largest allocated block ID */
uint64_t dn_used; /* bytes (or sectors) of disk space */
uint64_t dn_pad3[4];
blkptr_t dn_blkptr[1];
uint8_t dn_bonus[DN_MAX_BONUSLEN];
} dnode_phys_t;
typedef struct dnode {
/*
* dn_struct_rwlock protects the structure of the dnode,
* including the number of levels of indirection (dn_nlevels),
* dn_maxblkid, and dn_next_*
*/
krwlock_t dn_struct_rwlock;
/*
* Our link on dataset's dd_dnodes list.
* Protected by dd_accounting_mtx.
*/
list_node_t dn_link;
/* immutable: */
struct objset_impl *dn_objset;
uint64_t dn_object;
struct dmu_buf_impl *dn_dbuf;
dnode_phys_t *dn_phys; /* pointer into dn->dn_dbuf->db.db_data */
/*
* Copies of stuff in dn_phys. They're valid in the open
* context (eg. even before the dnode is first synced).
* Where necessary, these are protected by dn_struct_rwlock.
*/
dmu_object_type_t dn_type; /* object type */
uint16_t dn_bonuslen; /* bonus length */
uint8_t dn_bonustype; /* bonus type */
uint8_t dn_nblkptr; /* number of blkptrs (immutable) */
uint8_t dn_checksum; /* ZIO_CHECKSUM type */
uint8_t dn_compress; /* ZIO_COMPRESS type */
uint8_t dn_nlevels;
uint8_t dn_indblkshift;
uint8_t dn_datablkshift; /* zero if blksz not power of 2! */
uint16_t dn_datablkszsec; /* in 512b sectors */
uint32_t dn_datablksz; /* in bytes */
uint64_t dn_maxblkid;
uint8_t dn_next_nlevels[TXG_SIZE];
uint8_t dn_next_indblkshift[TXG_SIZE];
uint16_t dn_next_bonuslen[TXG_SIZE];
uint32_t dn_next_blksz[TXG_SIZE]; /* next block size in bytes */
/* protected by os_lock: */
list_node_t dn_dirty_link[TXG_SIZE]; /* next on dataset's dirty */
/* protected by dn_mtx: */
kmutex_t dn_mtx;
list_t dn_dirty_records[TXG_SIZE];
avl_tree_t dn_ranges[TXG_SIZE];
uint64_t dn_allocated_txg;
uint64_t dn_free_txg;
uint64_t dn_assigned_txg;
kcondvar_t dn_notxholds;
enum dnode_dirtycontext dn_dirtyctx;
uint8_t *dn_dirtyctx_firstset; /* dbg: contents meaningless */
/* protected by own devices */
refcount_t dn_tx_holds;
refcount_t dn_holds;
kmutex_t dn_dbufs_mtx;
list_t dn_dbufs; /* linked list of descendent dbuf_t's */
struct dmu_buf_impl *dn_bonus; /* bonus buffer dbuf */
/* parent IO for current sync write */
zio_t *dn_zio;
/* holds prefetch structure */
struct zfetch dn_zfetch;
} dnode_t;
typedef struct free_range {
avl_node_t fr_node;
uint64_t fr_blkid;
uint64_t fr_nblks;
} free_range_t;
dnode_t *dnode_special_open(struct objset_impl *dd, dnode_phys_t *dnp,
uint64_t object);
void dnode_special_close(dnode_t *dn);
void dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx);
int dnode_hold(struct objset_impl *dd, uint64_t object,
void *ref, dnode_t **dnp);
int dnode_hold_impl(struct objset_impl *dd, uint64_t object, int flag,
void *ref, dnode_t **dnp);
boolean_t dnode_add_ref(dnode_t *dn, void *ref);
void dnode_rele(dnode_t *dn, void *ref);
void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
void dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
void dnode_free(dnode_t *dn, dmu_tx_t *tx);
void dnode_byteswap(dnode_phys_t *dnp);
void dnode_buf_byteswap(void *buf, size_t size);
void dnode_verify(dnode_t *dn);
int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
uint64_t dnode_current_max_length(dnode_t *dn);
void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
void dnode_clear_range(dnode_t *dn, uint64_t blkid,
uint64_t nblks, dmu_tx_t *tx);
void dnode_diduse_space(dnode_t *dn, int64_t space);
void dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx);
void dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t);
uint64_t dnode_block_freed(dnode_t *dn, uint64_t blkid);
void dnode_init(void);
void dnode_fini(void);
int dnode_next_offset(dnode_t *dn, int flags, uint64_t *off,
int minlvl, uint64_t blkfill, uint64_t txg);
void dnode_evict_dbufs(dnode_t *dn);
#ifdef ZFS_DEBUG
/*
* There should be a ## between the string literal and fmt, to make it
* clear that we're joining two strings together, but that piece of shit
* gcc doesn't support that preprocessor token.
*/
#define dprintf_dnode(dn, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char __db_buf[32]; \
uint64_t __db_obj = (dn)->dn_object; \
if (__db_obj == DMU_META_DNODE_OBJECT) \
(void) strcpy(__db_buf, "mdn"); \
else \
(void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \
(u_longlong_t)__db_obj);\
dprintf_ds((dn)->dn_objset->os_dsl_dataset, "obj=%s " fmt, \
__db_buf, __VA_ARGS__); \
} \
_NOTE(CONSTCOND) } while (0)
#define DNODE_VERIFY(dn) dnode_verify(dn)
#define FREE_VERIFY(db, start, end, tx) free_verify(db, start, end, tx)
#else
#define dprintf_dnode(db, fmt, ...)
#define DNODE_VERIFY(dn)
#define FREE_VERIFY(db, start, end, tx)
#endif
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DNODE_H */
+239
View File
@@ -0,0 +1,239 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DSL_DATASET_H
#define _SYS_DSL_DATASET_H
#include <sys/dmu.h>
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/zio.h>
#include <sys/bplist.h>
#include <sys/dsl_synctask.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
struct dsl_dataset;
struct dsl_dir;
struct dsl_pool;
typedef void dsl_dataset_evict_func_t(struct dsl_dataset *, void *);
#define DS_FLAG_INCONSISTENT (1ULL<<0)
#define DS_IS_INCONSISTENT(ds) \
((ds)->ds_phys->ds_flags & DS_FLAG_INCONSISTENT)
/*
* NB: nopromote can not yet be set, but we want support for it in this
* on-disk version, so that we don't need to upgrade for it later. It
* will be needed when we implement 'zfs split' (where the split off
* clone should not be promoted).
*/
#define DS_FLAG_NOPROMOTE (1ULL<<1)
/*
* DS_FLAG_UNIQUE_ACCURATE is set if ds_unique_bytes has been correctly
* calculated for head datasets (starting with SPA_VERSION_UNIQUE_ACCURATE,
* refquota/refreservations).
*/
#define DS_FLAG_UNIQUE_ACCURATE (1ULL<<2)
/*
* DS_FLAG_CI_DATASET is set if the dataset contains a file system whose
* name lookups should be performed case-insensitively.
*/
#define DS_FLAG_CI_DATASET (1ULL<<16)
typedef struct dsl_dataset_phys {
uint64_t ds_dir_obj; /* DMU_OT_DSL_DIR */
uint64_t ds_prev_snap_obj; /* DMU_OT_DSL_DATASET */
uint64_t ds_prev_snap_txg;
uint64_t ds_next_snap_obj; /* DMU_OT_DSL_DATASET */
uint64_t ds_snapnames_zapobj; /* DMU_OT_DSL_DS_SNAP_MAP 0 for snaps */
uint64_t ds_num_children; /* clone/snap children; ==0 for head */
uint64_t ds_creation_time; /* seconds since 1970 */
uint64_t ds_creation_txg;
uint64_t ds_deadlist_obj; /* DMU_OT_BPLIST */
uint64_t ds_used_bytes;
uint64_t ds_compressed_bytes;
uint64_t ds_uncompressed_bytes;
uint64_t ds_unique_bytes; /* only relevant to snapshots */
/*
* The ds_fsid_guid is a 56-bit ID that can change to avoid
* collisions. The ds_guid is a 64-bit ID that will never
* change, so there is a small probability that it will collide.
*/
uint64_t ds_fsid_guid;
uint64_t ds_guid;
uint64_t ds_flags; /* DS_FLAG_* */
blkptr_t ds_bp;
uint64_t ds_next_clones_obj; /* DMU_OT_DSL_CLONES */
uint64_t ds_props_obj; /* DMU_OT_DSL_PROPS for snaps */
uint64_t ds_pad[6]; /* pad out to 320 bytes for good measure */
} dsl_dataset_phys_t;
typedef struct dsl_dataset {
/* Immutable: */
struct dsl_dir *ds_dir;
dsl_dataset_phys_t *ds_phys;
dmu_buf_t *ds_dbuf;
uint64_t ds_object;
uint64_t ds_fsid_guid;
/* only used in syncing context, only valid for non-snapshots: */
struct dsl_dataset *ds_prev;
uint64_t ds_origin_txg;
/* has internal locking: */
bplist_t ds_deadlist;
/* protected by lock on pool's dp_dirty_datasets list */
txg_node_t ds_dirty_link;
list_node_t ds_synced_link;
/*
* ds_phys->ds_<accounting> is also protected by ds_lock.
* Protected by ds_lock:
*/
kmutex_t ds_lock;
void *ds_user_ptr;
dsl_dataset_evict_func_t *ds_user_evict_func;
/*
* ds_owner is protected by the ds_rwlock and the ds_lock
*/
krwlock_t ds_rwlock;
kcondvar_t ds_exclusive_cv;
void *ds_owner;
/* no locking; only for making guesses */
uint64_t ds_trysnap_txg;
/* for objset_open() */
kmutex_t ds_opening_lock;
uint64_t ds_reserved; /* cached refreservation */
uint64_t ds_quota; /* cached refquota */
/* Protected by ds_lock; keep at end of struct for better locality */
char ds_snapname[MAXNAMELEN];
} dsl_dataset_t;
#define dsl_dataset_is_snapshot(ds) \
((ds)->ds_phys->ds_num_children != 0)
#define DS_UNIQUE_IS_ACCURATE(ds) \
(((ds)->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
int dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp);
int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj,
void *tag, dsl_dataset_t **);
int dsl_dataset_own(const char *name, int flags, void *owner,
dsl_dataset_t **dsp);
int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
int flags, void *owner, dsl_dataset_t **);
void dsl_dataset_name(dsl_dataset_t *ds, char *name);
void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
void dsl_dataset_disown(dsl_dataset_t *ds, void *owner);
void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag);
boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok,
void *owner);
void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner);
uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
uint64_t flags, dmu_tx_t *tx);
int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag);
int dsl_snapshots_destroy(char *fsname, char *snapname);
dsl_checkfunc_t dsl_dataset_destroy_check;
dsl_syncfunc_t dsl_dataset_destroy_sync;
dsl_checkfunc_t dsl_dataset_snapshot_check;
dsl_syncfunc_t dsl_dataset_snapshot_sync;
int dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost);
int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive);
int dsl_dataset_promote(const char *name);
int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
boolean_t force);
void *dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
void *p, dsl_dataset_evict_func_t func);
void *dsl_dataset_get_user_ptr(dsl_dataset_t *ds);
blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
spa_t *dsl_dataset_get_spa(dsl_dataset_t *ds);
boolean_t dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds);
void dsl_dataset_sync(dsl_dataset_t *os, zio_t *zio, dmu_tx_t *tx);
void dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
int dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
dmu_tx_t *tx);
int dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth);
uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds);
void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx);
void dsl_dataset_stats(dsl_dataset_t *os, nvlist_t *nv);
void dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat);
void dsl_dataset_space(dsl_dataset_t *ds,
uint64_t *refdbytesp, uint64_t *availbytesp,
uint64_t *usedobjsp, uint64_t *availobjsp);
uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds);
int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
uint64_t asize, uint64_t inflight, uint64_t *used,
uint64_t *ref_rsrv);
int dsl_dataset_set_quota(const char *dsname, uint64_t quota);
void dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr,
dmu_tx_t *tx);
int dsl_dataset_set_reservation(const char *dsname, uint64_t reservation);
void dsl_dataset_set_flags(dsl_dataset_t *ds, uint64_t flags);
int64_t dsl_dataset_new_refreservation(dsl_dataset_t *ds, uint64_t reservation,
dmu_tx_t *tx);
#ifdef ZFS_DEBUG
#define dprintf_ds(ds, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char *__ds_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); \
dsl_dataset_name(ds, __ds_name); \
dprintf("ds=%s " fmt, __ds_name, __VA_ARGS__); \
kmem_free(__ds_name, MAXNAMELEN); \
} \
_NOTE(CONSTCOND) } while (0)
#else
#define dprintf_ds(dd, fmt, ...)
#endif
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DSL_DATASET_H */
+73
View File
@@ -0,0 +1,73 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DSL_DELEG_H
#define _SYS_DSL_DELEG_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/dmu.h>
#include <sys/dsl_pool.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
#define ZFS_DELEG_PERM_NONE ""
#define ZFS_DELEG_PERM_CREATE "create"
#define ZFS_DELEG_PERM_DESTROY "destroy"
#define ZFS_DELEG_PERM_SNAPSHOT "snapshot"
#define ZFS_DELEG_PERM_ROLLBACK "rollback"
#define ZFS_DELEG_PERM_CLONE "clone"
#define ZFS_DELEG_PERM_PROMOTE "promote"
#define ZFS_DELEG_PERM_RENAME "rename"
#define ZFS_DELEG_PERM_MOUNT "mount"
#define ZFS_DELEG_PERM_SHARE "share"
#define ZFS_DELEG_PERM_SEND "send"
#define ZFS_DELEG_PERM_RECEIVE "receive"
#define ZFS_DELEG_PERM_ALLOW "allow"
#define ZFS_DELEG_PERM_USERPROP "userprop"
#define ZFS_DELEG_PERM_VSCAN "vscan"
/*
* Note: the names of properties that are marked delegatable are also
* valid delegated permissions
*/
int dsl_deleg_get(const char *ddname, nvlist_t **nvp);
int dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset);
int dsl_deleg_access(const char *ddname, const char *perm, cred_t *cr);
void dsl_deleg_set_create_perms(dsl_dir_t *dd, dmu_tx_t *tx, cred_t *cr);
int dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr);
int dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr);
int dsl_deleg_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx);
boolean_t dsl_delegation_on(objset_t *os);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DSL_DELEG_H */
+160
View File
@@ -0,0 +1,160 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DSL_DIR_H
#define _SYS_DSL_DIR_H
#include <sys/dmu.h>
#include <sys/dsl_pool.h>
#include <sys/dsl_synctask.h>
#include <sys/refcount.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
struct dsl_dataset;
typedef enum dd_used {
DD_USED_HEAD,
DD_USED_SNAP,
DD_USED_CHILD,
DD_USED_CHILD_RSRV,
DD_USED_REFRSRV,
DD_USED_NUM
} dd_used_t;
#define DD_FLAG_USED_BREAKDOWN (1<<0)
typedef struct dsl_dir_phys {
uint64_t dd_creation_time; /* not actually used */
uint64_t dd_head_dataset_obj;
uint64_t dd_parent_obj;
uint64_t dd_origin_obj;
uint64_t dd_child_dir_zapobj;
/*
* how much space our children are accounting for; for leaf
* datasets, == physical space used by fs + snaps
*/
uint64_t dd_used_bytes;
uint64_t dd_compressed_bytes;
uint64_t dd_uncompressed_bytes;
/* Administrative quota setting */
uint64_t dd_quota;
/* Administrative reservation setting */
uint64_t dd_reserved;
uint64_t dd_props_zapobj;
uint64_t dd_deleg_zapobj; /* dataset delegation permissions */
uint64_t dd_flags;
uint64_t dd_used_breakdown[DD_USED_NUM];
uint64_t dd_pad[14]; /* pad out to 256 bytes for good measure */
} dsl_dir_phys_t;
struct dsl_dir {
/* These are immutable; no lock needed: */
uint64_t dd_object;
dsl_dir_phys_t *dd_phys;
dmu_buf_t *dd_dbuf;
dsl_pool_t *dd_pool;
/* protected by lock on pool's dp_dirty_dirs list */
txg_node_t dd_dirty_link;
/* protected by dp_config_rwlock */
dsl_dir_t *dd_parent;
/* Protected by dd_lock */
kmutex_t dd_lock;
list_t dd_prop_cbs; /* list of dsl_prop_cb_record_t's */
/* gross estimate of space used by in-flight tx's */
uint64_t dd_tempreserved[TXG_SIZE];
/* amount of space we expect to write; == amount of dirty data */
int64_t dd_space_towrite[TXG_SIZE];
/* protected by dd_lock; keep at end of struct for better locality */
char dd_myname[MAXNAMELEN];
};
void dsl_dir_close(dsl_dir_t *dd, void *tag);
int dsl_dir_open(const char *name, void *tag, dsl_dir_t **, const char **tail);
int dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, dsl_dir_t **,
const char **tailp);
int dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
const char *tail, void *tag, dsl_dir_t **);
void dsl_dir_name(dsl_dir_t *dd, char *buf);
int dsl_dir_namelen(dsl_dir_t *dd);
int dsl_dir_is_private(dsl_dir_t *dd);
uint64_t dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds,
const char *name, dmu_tx_t *tx);
dsl_checkfunc_t dsl_dir_destroy_check;
dsl_syncfunc_t dsl_dir_destroy_sync;
void dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv);
uint64_t dsl_dir_space_available(dsl_dir_t *dd,
dsl_dir_t *ancestor, int64_t delta, int ondiskonly);
void dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx);
void dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx);
int dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t mem,
uint64_t asize, uint64_t fsize, uint64_t usize, void **tr_cookiep,
dmu_tx_t *tx);
void dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx);
void dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx);
void dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx);
void dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx);
int dsl_dir_set_quota(const char *ddname, uint64_t quota);
int dsl_dir_set_reservation(const char *ddname, uint64_t reservation);
int dsl_dir_rename(dsl_dir_t *dd, const char *newname);
int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space);
int dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx);
boolean_t dsl_dir_is_clone(dsl_dir_t *dd);
void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds,
uint64_t reservation, cred_t *cr, dmu_tx_t *tx);
/* internal reserved dir name */
#define MOS_DIR_NAME "$MOS"
#define ORIGIN_DIR_NAME "$ORIGIN"
#ifdef ZFS_DEBUG
#define dprintf_dd(dd, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char *__ds_name = kmem_alloc(MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, \
KM_SLEEP); \
dsl_dir_name(dd, __ds_name); \
dprintf("dd=%s " fmt, __ds_name, __VA_ARGS__); \
kmem_free(__ds_name, MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); \
} \
_NOTE(CONSTCOND) } while (0)
#else
#define dprintf_dd(dd, fmt, ...)
#endif
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DSL_DIR_H */
+150
View File
@@ -0,0 +1,150 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DSL_POOL_H
#define _SYS_DSL_POOL_H
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/txg_impl.h>
#include <sys/zfs_context.h>
#include <sys/zio.h>
#include <sys/dnode.h>
#ifdef __cplusplus
extern "C" {
#endif
struct objset;
struct dsl_dir;
struct dsl_dataset;
struct dsl_pool;
struct dmu_tx;
enum scrub_func {
SCRUB_FUNC_NONE,
SCRUB_FUNC_CLEAN,
SCRUB_FUNC_NUMFUNCS
};
/* These macros are for indexing into the zfs_all_blkstats_t. */
#define DMU_OT_DEFERRED DMU_OT_NONE
#define DMU_OT_TOTAL DMU_OT_NUMTYPES
typedef struct zfs_blkstat {
uint64_t zb_count;
uint64_t zb_asize;
uint64_t zb_lsize;
uint64_t zb_psize;
uint64_t zb_gangs;
uint64_t zb_ditto_2_of_2_samevdev;
uint64_t zb_ditto_2_of_3_samevdev;
uint64_t zb_ditto_3_of_3_samevdev;
} zfs_blkstat_t;
typedef struct zfs_all_blkstats {
zfs_blkstat_t zab_type[DN_MAX_LEVELS + 1][DMU_OT_TOTAL + 1];
} zfs_all_blkstats_t;
typedef struct dsl_pool {
/* Immutable */
spa_t *dp_spa;
struct objset *dp_meta_objset;
struct dsl_dir *dp_root_dir;
struct dsl_dir *dp_mos_dir;
struct dsl_dataset *dp_origin_snap;
uint64_t dp_root_dir_obj;
/* No lock needed - sync context only */
blkptr_t dp_meta_rootbp;
list_t dp_synced_datasets;
hrtime_t dp_read_overhead;
uint64_t dp_throughput;
uint64_t dp_write_limit;
/* Uses dp_lock */
kmutex_t dp_lock;
uint64_t dp_space_towrite[TXG_SIZE];
uint64_t dp_tempreserved[TXG_SIZE];
enum scrub_func dp_scrub_func;
uint64_t dp_scrub_queue_obj;
uint64_t dp_scrub_min_txg;
uint64_t dp_scrub_max_txg;
zbookmark_t dp_scrub_bookmark;
boolean_t dp_scrub_pausing;
boolean_t dp_scrub_isresilver;
uint64_t dp_scrub_start_time;
kmutex_t dp_scrub_cancel_lock; /* protects dp_scrub_restart */
boolean_t dp_scrub_restart;
/* Has its own locking */
tx_state_t dp_tx;
txg_list_t dp_dirty_datasets;
txg_list_t dp_dirty_dirs;
txg_list_t dp_sync_tasks;
/*
* Protects administrative changes (properties, namespace)
* It is only held for write in syncing context. Therefore
* syncing context does not need to ever have it for read, since
* nobody else could possibly have it for write.
*/
krwlock_t dp_config_rwlock;
zfs_all_blkstats_t *dp_blkstats;
} dsl_pool_t;
int dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
void dsl_pool_close(dsl_pool_t *dp);
dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg);
void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg);
void dsl_pool_zil_clean(dsl_pool_t *dp);
int dsl_pool_sync_context(dsl_pool_t *dp);
uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree);
int dsl_pool_tempreserve_space(dsl_pool_t *dp, uint64_t space, dmu_tx_t *tx);
void dsl_pool_tempreserve_clear(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
void dsl_pool_memory_pressure(dsl_pool_t *dp);
void dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
int dsl_free(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp,
zio_done_func_t *done, void *private, uint32_t arc_flags);
void dsl_pool_ds_destroyed(struct dsl_dataset *ds, struct dmu_tx *tx);
void dsl_pool_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
void dsl_pool_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
struct dmu_tx *tx);
void dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx);
void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx);
int dsl_pool_scrub_cancel(dsl_pool_t *dp);
int dsl_pool_scrub_clean(dsl_pool_t *dp);
void dsl_pool_scrub_sync(dsl_pool_t *dp, dmu_tx_t *tx);
void dsl_pool_scrub_restart(dsl_pool_t *dp);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DSL_POOL_H */
+82
View File
@@ -0,0 +1,82 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DSL_PROP_H
#define _SYS_DSL_PROP_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/dmu.h>
#include <sys/dsl_pool.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
struct dsl_dataset;
struct dsl_dir;
/* The callback func may not call into the DMU or DSL! */
typedef void (dsl_prop_changed_cb_t)(void *arg, uint64_t newval);
typedef struct dsl_prop_cb_record {
list_node_t cbr_node; /* link on dd_prop_cbs */
struct dsl_dataset *cbr_ds;
const char *cbr_propname;
dsl_prop_changed_cb_t *cbr_func;
void *cbr_arg;
} dsl_prop_cb_record_t;
int dsl_prop_register(struct dsl_dataset *ds, const char *propname,
dsl_prop_changed_cb_t *callback, void *cbarg);
int dsl_prop_unregister(struct dsl_dataset *ds, const char *propname,
dsl_prop_changed_cb_t *callback, void *cbarg);
int dsl_prop_numcb(struct dsl_dataset *ds);
int dsl_prop_get(const char *ddname, const char *propname,
int intsz, int numints, void *buf, char *setpoint);
int dsl_prop_get_integer(const char *ddname, const char *propname,
uint64_t *valuep, char *setpoint);
int dsl_prop_get_all(objset_t *os, nvlist_t **nvp, boolean_t local);
int dsl_prop_get_ds(struct dsl_dataset *ds, const char *propname,
int intsz, int numints, void *buf, char *setpoint);
int dsl_prop_get_dd(struct dsl_dir *dd, const char *propname,
int intsz, int numints, void *buf, char *setpoint);
int dsl_prop_set(const char *ddname, const char *propname,
int intsz, int numints, const void *buf);
void dsl_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
cred_t *cr, dmu_tx_t *tx);
void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value);
void dsl_prop_nvlist_add_string(nvlist_t *nv,
zfs_prop_t prop, const char *value);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DSL_PROP_H */
+83
View File
@@ -0,0 +1,83 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DSL_SYNCTASK_H
#define _SYS_DSL_SYNCTASK_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/txg.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
struct dsl_pool;
typedef int (dsl_checkfunc_t)(void *, void *, dmu_tx_t *);
typedef void (dsl_syncfunc_t)(void *, void *, cred_t *, dmu_tx_t *);
typedef struct dsl_sync_task {
list_node_t dst_node;
dsl_checkfunc_t *dst_checkfunc;
dsl_syncfunc_t *dst_syncfunc;
void *dst_arg1;
void *dst_arg2;
int dst_err;
} dsl_sync_task_t;
typedef struct dsl_sync_task_group {
txg_node_t dstg_node;
list_t dstg_tasks;
struct dsl_pool *dstg_pool;
cred_t *dstg_cr;
uint64_t dstg_txg;
int dstg_err;
int dstg_space;
boolean_t dstg_nowaiter;
} dsl_sync_task_group_t;
dsl_sync_task_group_t *dsl_sync_task_group_create(struct dsl_pool *dp);
void dsl_sync_task_create(dsl_sync_task_group_t *dstg,
dsl_checkfunc_t *, dsl_syncfunc_t *,
void *arg1, void *arg2, int blocks_modified);
int dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg);
void dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
void dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg);
void dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
int dsl_sync_task_do(struct dsl_pool *dp,
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
void *arg1, void *arg2, int blocks_modified);
void dsl_sync_task_do_nowait(struct dsl_pool *dp,
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DSL_SYNCTASK_H */
+71
View File
@@ -0,0 +1,71 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_METASLAB_H
#define _SYS_METASLAB_H
#include <sys/spa.h>
#include <sys/space_map.h>
#include <sys/txg.h>
#include <sys/zio.h>
#include <sys/avl.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct metaslab_class metaslab_class_t;
typedef struct metaslab_group metaslab_group_t;
extern metaslab_t *metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
uint64_t start, uint64_t size, uint64_t txg);
extern void metaslab_fini(metaslab_t *msp);
extern void metaslab_sync(metaslab_t *msp, uint64_t txg);
extern void metaslab_sync_done(metaslab_t *msp, uint64_t txg);
#define METASLAB_HINTBP_FAVOR 0x0
#define METASLAB_HINTBP_AVOID 0x1
#define METASLAB_GANG_HEADER 0x2
extern int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
blkptr_t *bp, int ncopies, uint64_t txg, blkptr_t *hintbp, int flags);
extern void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg,
boolean_t now);
extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg);
extern metaslab_class_t *metaslab_class_create(void);
extern void metaslab_class_destroy(metaslab_class_t *mc);
extern void metaslab_class_add(metaslab_class_t *mc, metaslab_group_t *mg);
extern void metaslab_class_remove(metaslab_class_t *mc, metaslab_group_t *mg);
extern metaslab_group_t *metaslab_group_create(metaslab_class_t *mc,
vdev_t *vd);
extern void metaslab_group_destroy(metaslab_group_t *mg);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_METASLAB_H */
+81
View File
@@ -0,0 +1,81 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_METASLAB_IMPL_H
#define _SYS_METASLAB_IMPL_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/metaslab.h>
#include <sys/space_map.h>
#include <sys/vdev.h>
#include <sys/txg.h>
#include <sys/avl.h>
#ifdef __cplusplus
extern "C" {
#endif
struct metaslab_class {
metaslab_group_t *mc_rotor;
uint64_t mc_allocated;
};
struct metaslab_group {
kmutex_t mg_lock;
avl_tree_t mg_metaslab_tree;
uint64_t mg_aliquot;
int64_t mg_bias;
metaslab_class_t *mg_class;
vdev_t *mg_vd;
metaslab_group_t *mg_prev;
metaslab_group_t *mg_next;
};
/*
* Each metaslab's free space is tracked in space map object in the MOS,
* which is only updated in syncing context. Each time we sync a txg,
* we append the allocs and frees from that txg to the space map object.
* When the txg is done syncing, metaslab_sync_done() updates ms_smo
* to ms_smo_syncing. Everything in ms_smo is always safe to allocate.
*/
struct metaslab {
kmutex_t ms_lock; /* metaslab lock */
space_map_obj_t ms_smo; /* synced space map object */
space_map_obj_t ms_smo_syncing; /* syncing space map object */
space_map_t ms_allocmap[TXG_SIZE]; /* allocated this txg */
space_map_t ms_freemap[TXG_SIZE]; /* freed this txg */
space_map_t ms_map; /* in-core free space map */
uint64_t ms_weight; /* weight vs. others in group */
metaslab_group_t *ms_group; /* metaslab group */
avl_node_t ms_group_node; /* node in metaslab group tree */
txg_node_t ms_txg_node; /* per-txg dirty metaslab links */
};
#ifdef __cplusplus
}
#endif
#endif /* _SYS_METASLAB_IMPL_H */
+104
View File
@@ -0,0 +1,104 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_REFCOUNT_H
#define _SYS_REFCOUNT_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/inttypes.h>
#include <sys/list.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* If the reference is held only by the calling function and not any
* particular object, use FTAG (which is a string) for the holder_tag.
* Otherwise, use the object that holds the reference.
*/
#define FTAG ((char *)__func__)
#if defined(DEBUG) || !defined(_KERNEL)
typedef struct reference {
list_node_t ref_link;
void *ref_holder;
uint64_t ref_number;
uint8_t *ref_removed;
} reference_t;
typedef struct refcount {
kmutex_t rc_mtx;
list_t rc_list;
list_t rc_removed;
int64_t rc_count;
int64_t rc_removed_count;
} refcount_t;
/* Note: refcount_t must be initialized with refcount_create() */
void refcount_create(refcount_t *rc);
void refcount_destroy(refcount_t *rc);
void refcount_destroy_many(refcount_t *rc, uint64_t number);
int refcount_is_zero(refcount_t *rc);
int64_t refcount_count(refcount_t *rc);
int64_t refcount_add(refcount_t *rc, void *holder_tag);
int64_t refcount_remove(refcount_t *rc, void *holder_tag);
int64_t refcount_add_many(refcount_t *rc, uint64_t number, void *holder_tag);
int64_t refcount_remove_many(refcount_t *rc, uint64_t number, void *holder_tag);
void refcount_init(void);
void refcount_fini(void);
#else /* DEBUG */
typedef struct refcount {
uint64_t rc_count;
} refcount_t;
#define refcount_create(rc) ((rc)->rc_count = 0)
#define refcount_destroy(rc) ((rc)->rc_count = 0)
#define refcount_destroy_many(rc, number) ((rc)->rc_count = 0)
#define refcount_is_zero(rc) ((rc)->rc_count == 0)
#define refcount_count(rc) ((rc)->rc_count)
#define refcount_add(rc, holder) atomic_add_64_nv(&(rc)->rc_count, 1)
#define refcount_remove(rc, holder) atomic_add_64_nv(&(rc)->rc_count, -1)
#define refcount_add_many(rc, number, holder) \
atomic_add_64_nv(&(rc)->rc_count, number)
#define refcount_remove_many(rc, number, holder) \
atomic_add_64_nv(&(rc)->rc_count, -number)
#define refcount_init()
#define refcount_fini()
#endif /* DEBUG */
#ifdef __cplusplus
}
#endif
#endif /* _SYS_REFCOUNT_H */
+80
View File
@@ -0,0 +1,80 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_RR_RW_LOCK_H
#define _SYS_RR_RW_LOCK_H
#pragma ident "%Z%%M% %I% %E% SMI"
#ifdef __cplusplus
extern "C" {
#endif
#include <sys/inttypes.h>
#include <sys/zfs_context.h>
#include <sys/refcount.h>
/*
* A reader-writer lock implementation that allows re-entrant reads, but
* still gives writers priority on "new" reads.
*
* See rrwlock.c for more details about the implementation.
*
* Fields of the rrwlock_t structure:
* - rr_lock: protects modification and reading of rrwlock_t fields
* - rr_cv: cv for waking up readers or waiting writers
* - rr_writer: thread id of the current writer
* - rr_anon_rount: number of active anonymous readers
* - rr_linked_rcount: total number of non-anonymous active readers
* - rr_writer_wanted: a writer wants the lock
*/
typedef struct rrwlock {
kmutex_t rr_lock;
kcondvar_t rr_cv;
kthread_t *rr_writer;
refcount_t rr_anon_rcount;
refcount_t rr_linked_rcount;
boolean_t rr_writer_wanted;
} rrwlock_t;
/*
* 'tag' is used in reference counting tracking. The
* 'tag' must be the same in a rrw_enter() as in its
* corresponding rrw_exit().
*/
void rrw_init(rrwlock_t *rrl);
void rrw_destroy(rrwlock_t *rrl);
void rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag);
void rrw_exit(rrwlock_t *rrl, void *tag);
boolean_t rrw_held(rrwlock_t *rrl, krw_t rw);
#define RRW_READ_HELD(x) rrw_held(x, RW_READER)
#define RRW_WRITE_HELD(x) rrw_held(x, RW_WRITER)
#ifdef __cplusplus
}
#endif
#endif /* _SYS_RR_RW_LOCK_H */
+554
View File
@@ -0,0 +1,554 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_SPA_H
#define _SYS_SPA_H
#include <sys/avl.h>
#include <sys/zfs_context.h>
#include <sys/nvpair.h>
#include <sys/sysmacros.h>
#include <sys/types.h>
#include <sys/fs/zfs.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Forward references that lots of things need.
*/
typedef struct spa spa_t;
typedef struct vdev vdev_t;
typedef struct metaslab metaslab_t;
typedef struct zilog zilog_t;
typedef struct spa_aux_vdev spa_aux_vdev_t;
struct dsl_pool;
/*
* General-purpose 32-bit and 64-bit bitfield encodings.
*/
#define BF32_DECODE(x, low, len) P2PHASE((x) >> (low), 1U << (len))
#define BF64_DECODE(x, low, len) P2PHASE((x) >> (low), 1ULL << (len))
#define BF32_ENCODE(x, low, len) (P2PHASE((x), 1U << (len)) << (low))
#define BF64_ENCODE(x, low, len) (P2PHASE((x), 1ULL << (len)) << (low))
#define BF32_GET(x, low, len) BF32_DECODE(x, low, len)
#define BF64_GET(x, low, len) BF64_DECODE(x, low, len)
#define BF32_SET(x, low, len, val) \
((x) ^= BF32_ENCODE((x >> low) ^ (val), low, len))
#define BF64_SET(x, low, len, val) \
((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len))
#define BF32_GET_SB(x, low, len, shift, bias) \
((BF32_GET(x, low, len) + (bias)) << (shift))
#define BF64_GET_SB(x, low, len, shift, bias) \
((BF64_GET(x, low, len) + (bias)) << (shift))
#define BF32_SET_SB(x, low, len, shift, bias, val) \
BF32_SET(x, low, len, ((val) >> (shift)) - (bias))
#define BF64_SET_SB(x, low, len, shift, bias, val) \
BF64_SET(x, low, len, ((val) >> (shift)) - (bias))
/*
* We currently support nine block sizes, from 512 bytes to 128K.
* We could go higher, but the benefits are near-zero and the cost
* of COWing a giant block to modify one byte would become excessive.
*/
#define SPA_MINBLOCKSHIFT 9
#define SPA_MAXBLOCKSHIFT 17
#define SPA_MINBLOCKSIZE (1ULL << SPA_MINBLOCKSHIFT)
#define SPA_MAXBLOCKSIZE (1ULL << SPA_MAXBLOCKSHIFT)
#define SPA_BLOCKSIZES (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1)
/*
* Size of block to hold the configuration data (a packed nvlist)
*/
#define SPA_CONFIG_BLOCKSIZE (1 << 14)
/*
* The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
* The ASIZE encoding should be at least 64 times larger (6 more bits)
* to support up to 4-way RAID-Z mirror mode with worst-case gang block
* overhead, three DVAs per bp, plus one more bit in case we do anything
* else that expands the ASIZE.
*/
#define SPA_LSIZEBITS 16 /* LSIZE up to 32M (2^16 * 512) */
#define SPA_PSIZEBITS 16 /* PSIZE up to 32M (2^16 * 512) */
#define SPA_ASIZEBITS 24 /* ASIZE up to 64 times larger */
/*
* All SPA data is represented by 128-bit data virtual addresses (DVAs).
* The members of the dva_t should be considered opaque outside the SPA.
*/
typedef struct dva {
uint64_t dva_word[2];
} dva_t;
/*
* Each block has a 256-bit checksum -- strong enough for cryptographic hashes.
*/
typedef struct zio_cksum {
uint64_t zc_word[4];
} zio_cksum_t;
/*
* Each block is described by its DVAs, time of birth, checksum, etc.
* The word-by-word, bit-by-bit layout of the blkptr is as follows:
*
* 64 56 48 40 32 24 16 8 0
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 0 | vdev1 | GRID | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 1 |G| offset1 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 2 | vdev2 | GRID | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 3 |G| offset2 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 4 | vdev3 | GRID | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 5 |G| offset3 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 6 |E| lvl | type | cksum | comp | PSIZE | LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 7 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 8 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 9 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* a | birth txg |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* b | fill count |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* c | checksum[0] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* d | checksum[1] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* e | checksum[2] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* f | checksum[3] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
*
* Legend:
*
* vdev virtual device ID
* offset offset into virtual device
* LSIZE logical size
* PSIZE physical size (after compression)
* ASIZE allocated size (including RAID-Z parity and gang block headers)
* GRID RAID-Z layout information (reserved for future use)
* cksum checksum function
* comp compression function
* G gang block indicator
* E endianness
* type DMU object type
* lvl level of indirection
* birth txg transaction group in which the block was born
* fill count number of non-zero blocks under this bp
* checksum[4] 256-bit checksum of the data this bp describes
*/
typedef struct blkptr {
dva_t blk_dva[3]; /* 128-bit Data Virtual Address */
uint64_t blk_prop; /* size, compression, type, etc */
uint64_t blk_pad[3]; /* Extra space for the future */
uint64_t blk_birth; /* transaction group at birth */
uint64_t blk_fill; /* fill count */
zio_cksum_t blk_cksum; /* 256-bit checksum */
} blkptr_t;
#define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */
#define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */
/*
* Macros to get and set fields in a bp or DVA.
*/
#define DVA_GET_ASIZE(dva) \
BF64_GET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0)
#define DVA_SET_ASIZE(dva, x) \
BF64_SET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0, x)
#define DVA_GET_GRID(dva) BF64_GET((dva)->dva_word[0], 24, 8)
#define DVA_SET_GRID(dva, x) BF64_SET((dva)->dva_word[0], 24, 8, x)
#define DVA_GET_VDEV(dva) BF64_GET((dva)->dva_word[0], 32, 32)
#define DVA_SET_VDEV(dva, x) BF64_SET((dva)->dva_word[0], 32, 32, x)
#define DVA_GET_OFFSET(dva) \
BF64_GET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0)
#define DVA_SET_OFFSET(dva, x) \
BF64_SET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0, x)
#define DVA_GET_GANG(dva) BF64_GET((dva)->dva_word[1], 63, 1)
#define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x)
#define BP_GET_LSIZE(bp) \
(BP_IS_HOLE(bp) ? 0 : \
BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1))
#define BP_SET_LSIZE(bp, x) \
BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
#define BP_GET_PSIZE(bp) \
BF64_GET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1)
#define BP_SET_PSIZE(bp, x) \
BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8)
#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x)
#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8)
#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x)
#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8)
#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x)
#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5)
#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x)
#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1))
#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x)
#define BP_GET_ASIZE(bp) \
(DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
DVA_GET_ASIZE(&(bp)->blk_dva[2]))
#define BP_GET_UCSIZE(bp) \
((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp));
#define BP_GET_NDVAS(bp) \
(!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
!!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
!!DVA_GET_ASIZE(&(bp)->blk_dva[2]))
#define BP_COUNT_GANG(bp) \
(DVA_GET_GANG(&(bp)->blk_dva[0]) + \
DVA_GET_GANG(&(bp)->blk_dva[1]) + \
DVA_GET_GANG(&(bp)->blk_dva[2]))
#define DVA_EQUAL(dva1, dva2) \
((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
(dva1)->dva_word[0] == (dva2)->dva_word[0])
#define ZIO_CHECKSUM_EQUAL(zc1, zc2) \
(0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \
((zc1).zc_word[1] - (zc2).zc_word[1]) | \
((zc1).zc_word[2] - (zc2).zc_word[2]) | \
((zc1).zc_word[3] - (zc2).zc_word[3])))
#define DVA_IS_VALID(dva) (DVA_GET_ASIZE(dva) != 0)
#define ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3) \
{ \
(zcp)->zc_word[0] = w0; \
(zcp)->zc_word[1] = w1; \
(zcp)->zc_word[2] = w2; \
(zcp)->zc_word[3] = w3; \
}
#define BP_IDENTITY(bp) (&(bp)->blk_dva[0])
#define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp))
#define BP_IS_HOLE(bp) ((bp)->blk_birth == 0)
#define BP_IS_OLDER(bp, txg) (!BP_IS_HOLE(bp) && (bp)->blk_birth < (txg))
#define BP_ZERO(bp) \
{ \
(bp)->blk_dva[0].dva_word[0] = 0; \
(bp)->blk_dva[0].dva_word[1] = 0; \
(bp)->blk_dva[1].dva_word[0] = 0; \
(bp)->blk_dva[1].dva_word[1] = 0; \
(bp)->blk_dva[2].dva_word[0] = 0; \
(bp)->blk_dva[2].dva_word[1] = 0; \
(bp)->blk_prop = 0; \
(bp)->blk_pad[0] = 0; \
(bp)->blk_pad[1] = 0; \
(bp)->blk_pad[2] = 0; \
(bp)->blk_birth = 0; \
(bp)->blk_fill = 0; \
ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \
}
#define BLK_FILL_ALREADY_FREED (-1ULL)
/*
* Note: the byteorder is either 0 or -1, both of which are palindromes.
* This simplifies the endianness handling a bit.
*/
#ifdef _BIG_ENDIAN
#define ZFS_HOST_BYTEORDER (0ULL)
#else
#define ZFS_HOST_BYTEORDER (-1ULL)
#endif
#define BP_SHOULD_BYTESWAP(bp) (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER)
#define BP_SPRINTF_LEN 320
#include <sys/dmu.h>
#define BP_GET_BUFC_TYPE(bp) \
(((BP_GET_LEVEL(bp) > 0) || (dmu_ot[BP_GET_TYPE(bp)].ot_metadata)) ? \
ARC_BUFC_METADATA : ARC_BUFC_DATA);
/*
* Routines found in spa.c
*/
/* state manipulation functions */
extern int spa_open(const char *pool, spa_t **, void *tag);
extern int spa_get_stats(const char *pool, nvlist_t **config,
char *altroot, size_t buflen);
extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
const char *history_str, nvlist_t *zplprops);
extern int spa_check_rootconf(char *devpath, char *devid,
nvlist_t **bestconf, uint64_t *besttxg);
extern boolean_t spa_rootdev_validate(nvlist_t *nv);
extern int spa_import_rootpool(char *devpath, char *devid);
extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props);
extern int spa_import_faulted(const char *, nvlist_t *, nvlist_t *);
extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
extern int spa_destroy(char *pool);
extern int spa_export(char *pool, nvlist_t **oldconfig, boolean_t force);
extern int spa_reset(char *pool);
extern void spa_async_request(spa_t *spa, int flag);
extern void spa_async_unrequest(spa_t *spa, int flag);
extern void spa_async_suspend(spa_t *spa);
extern void spa_async_resume(spa_t *spa);
extern spa_t *spa_inject_addref(char *pool);
extern void spa_inject_delref(spa_t *spa);
#define SPA_ASYNC_CONFIG_UPDATE 0x01
#define SPA_ASYNC_REMOVE 0x02
#define SPA_ASYNC_PROBE 0x04
#define SPA_ASYNC_RESILVER_DONE 0x08
#define SPA_ASYNC_RESILVER 0x10
/* device manipulation */
extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot,
int replacing);
extern int spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done);
extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
/* spare state (which is global across all pools) */
extern void spa_spare_add(vdev_t *vd);
extern void spa_spare_remove(vdev_t *vd);
extern boolean_t spa_spare_exists(uint64_t guid, uint64_t *pool, int *refcnt);
extern void spa_spare_activate(vdev_t *vd);
/* L2ARC state (which is global across all pools) */
extern void spa_l2cache_add(vdev_t *vd);
extern void spa_l2cache_remove(vdev_t *vd);
extern boolean_t spa_l2cache_exists(uint64_t guid, uint64_t *pool);
extern void spa_l2cache_activate(vdev_t *vd);
extern void spa_l2cache_drop(spa_t *spa);
extern void spa_l2cache_space_update(vdev_t *vd, int64_t space, int64_t alloc);
/* scrubbing */
extern int spa_scrub(spa_t *spa, pool_scrub_type_t type);
/* spa syncing */
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
extern void spa_sync_allpools(void);
/* spa namespace global mutex */
extern kmutex_t spa_namespace_lock;
/*
* SPA configuration functions in spa_config.c
*/
#define SPA_CONFIG_UPDATE_POOL 0
#define SPA_CONFIG_UPDATE_VDEVS 1
extern void spa_config_sync(spa_t *, boolean_t, boolean_t);
extern void spa_config_load(void);
extern nvlist_t *spa_all_configs(uint64_t *);
extern void spa_config_set(spa_t *spa, nvlist_t *config);
extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
int getstats);
extern void spa_config_update(spa_t *spa, int what);
extern void spa_config_update_common(spa_t *spa, int what, boolean_t isroot);
/*
* Miscellaneous SPA routines in spa_misc.c
*/
/* Namespace manipulation */
extern spa_t *spa_lookup(const char *name);
extern spa_t *spa_add(const char *name, const char *altroot);
extern void spa_remove(spa_t *spa);
extern spa_t *spa_next(spa_t *prev);
/* Refcount functions */
extern void spa_open_ref(spa_t *spa, void *tag);
extern void spa_close(spa_t *spa, void *tag);
extern boolean_t spa_refcount_zero(spa_t *spa);
#define SCL_CONFIG 0x01
#define SCL_STATE 0x02
#define SCL_L2ARC 0x04 /* hack until L2ARC 2.0 */
#define SCL_ALLOC 0x08
#define SCL_ZIO 0x10
#define SCL_FREE 0x20
#define SCL_VDEV 0x40
#define SCL_LOCKS 7
#define SCL_ALL ((1 << SCL_LOCKS) - 1)
#define SCL_STATE_ALL (SCL_STATE | SCL_L2ARC | SCL_ZIO)
/* Pool configuration locks */
extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
extern void spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw);
extern void spa_config_exit(spa_t *spa, int locks, void *tag);
extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
/* Pool vdev add/remove lock */
extern uint64_t spa_vdev_enter(spa_t *spa);
extern int spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error);
/* Pool vdev state change lock */
extern void spa_vdev_state_enter(spa_t *spa);
extern int spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error);
/* Accessor functions */
extern boolean_t spa_shutting_down(spa_t *spa);
extern struct dsl_pool *spa_get_dsl(spa_t *spa);
extern blkptr_t *spa_get_rootblkptr(spa_t *spa);
extern void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp);
extern void spa_altroot(spa_t *, char *, size_t);
extern int spa_sync_pass(spa_t *spa);
extern char *spa_name(spa_t *spa);
extern uint64_t spa_guid(spa_t *spa);
extern uint64_t spa_last_synced_txg(spa_t *spa);
extern uint64_t spa_first_txg(spa_t *spa);
extern uint64_t spa_version(spa_t *spa);
extern pool_state_t spa_state(spa_t *spa);
extern uint64_t spa_freeze_txg(spa_t *spa);
extern uint64_t spa_get_alloc(spa_t *spa);
extern uint64_t spa_get_space(spa_t *spa);
extern uint64_t spa_get_dspace(spa_t *spa);
extern uint64_t spa_get_asize(spa_t *spa, uint64_t lsize);
extern uint64_t spa_version(spa_t *spa);
extern int spa_max_replication(spa_t *spa);
extern int spa_busy(void);
extern uint8_t spa_get_failmode(spa_t *spa);
extern boolean_t spa_suspended(spa_t *spa);
/* Miscellaneous support routines */
extern int spa_rename(const char *oldname, const char *newname);
extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
extern char *spa_strdup(const char *);
extern void spa_strfree(char *);
extern uint64_t spa_get_random(uint64_t range);
extern void sprintf_blkptr(char *buf, int len, const blkptr_t *bp);
extern void spa_freeze(spa_t *spa);
extern void spa_upgrade(spa_t *spa, uint64_t version);
extern void spa_evict_all(void);
extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid,
boolean_t l2cache);
extern boolean_t spa_has_spare(spa_t *, uint64_t guid);
extern uint64_t bp_get_dasize(spa_t *spa, const blkptr_t *bp);
extern boolean_t spa_has_slogs(spa_t *spa);
extern boolean_t spa_is_root(spa_t *spa);
/* history logging */
typedef enum history_log_type {
LOG_CMD_POOL_CREATE,
LOG_CMD_NORMAL,
LOG_INTERNAL
} history_log_type_t;
typedef struct history_arg {
const char *ha_history_str;
history_log_type_t ha_log_type;
history_internal_events_t ha_event;
char ha_zone[MAXPATHLEN];
} history_arg_t;
extern char *spa_his_ievent_table[];
extern void spa_history_create_obj(spa_t *spa, dmu_tx_t *tx);
extern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read,
char *his_buf);
extern int spa_history_log(spa_t *spa, const char *his_buf,
history_log_type_t what);
void spa_history_internal_log(history_internal_events_t event, spa_t *spa,
dmu_tx_t *tx, cred_t *cr, const char *fmt, ...);
/* error handling */
struct zbookmark;
struct zio;
extern void spa_log_error(spa_t *spa, struct zio *zio);
extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
struct zio *zio, uint64_t stateoroffset, uint64_t length);
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
extern uint64_t spa_get_errlog_size(spa_t *spa);
extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
extern void spa_errlog_rotate(spa_t *spa);
extern void spa_errlog_drain(spa_t *spa);
extern void spa_errlog_sync(spa_t *spa, uint64_t txg);
extern void spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub);
/* vdev cache */
extern void vdev_cache_stat_init(void);
extern void vdev_cache_stat_fini(void);
/* Initialization and termination */
extern void spa_init(int flags);
extern void spa_fini(void);
extern void spa_boot_init();
/* properties */
extern int spa_prop_set(spa_t *spa, nvlist_t *nvp);
extern int spa_prop_get(spa_t *spa, nvlist_t **nvp);
extern void spa_prop_clear_bootfs(spa_t *spa, uint64_t obj, dmu_tx_t *tx);
/* asynchronous event notification */
extern void spa_event_notify(spa_t *spa, vdev_t *vdev, const char *name);
#ifdef ZFS_DEBUG
#define dprintf_bp(bp, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \
sprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, (bp)); \
dprintf(fmt " %s\n", __VA_ARGS__, __blkbuf); \
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
} \
_NOTE(CONSTCOND) } while (0)
#else
#define dprintf_bp(bp, fmt, ...)
#endif
extern int spa_mode; /* mode, e.g. FREAD | FWRITE */
#ifdef __cplusplus
}
#endif
#endif /* _SYS_SPA_H */
+45
View File
@@ -0,0 +1,45 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_SPA_BOOT_H
#define _SYS_SPA_BOOT_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/nvpair.h>
#ifdef __cplusplus
extern "C" {
#endif
extern char *spa_get_bootprop(char *prop);
extern void spa_free_bootprop(char *prop);
extern int spa_get_rootconf(char *devpath, char *devid, nvlist_t **bestconf_p);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_SPA_BOOT_H */
+196
View File
@@ -0,0 +1,196 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_SPA_IMPL_H
#define _SYS_SPA_IMPL_H
#include <sys/spa.h>
#include <sys/vdev.h>
#include <sys/metaslab.h>
#include <sys/dmu.h>
#include <sys/dsl_pool.h>
#include <sys/uberblock_impl.h>
#include <sys/zfs_context.h>
#include <sys/avl.h>
#include <sys/refcount.h>
#include <sys/bplist.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct spa_error_entry {
zbookmark_t se_bookmark;
char *se_name;
avl_node_t se_avl;
} spa_error_entry_t;
typedef struct spa_history_phys {
uint64_t sh_pool_create_len; /* ending offset of zpool create */
uint64_t sh_phys_max_off; /* physical EOF */
uint64_t sh_bof; /* logical BOF */
uint64_t sh_eof; /* logical EOF */
uint64_t sh_records_lost; /* num of records overwritten */
} spa_history_phys_t;
struct spa_aux_vdev {
uint64_t sav_object; /* MOS object for device list */
nvlist_t *sav_config; /* cached device config */
vdev_t **sav_vdevs; /* devices */
int sav_count; /* number devices */
boolean_t sav_sync; /* sync the device list */
nvlist_t **sav_pending; /* pending device additions */
uint_t sav_npending; /* # pending devices */
};
typedef struct spa_config_lock {
kmutex_t scl_lock;
kthread_t *scl_writer;
int scl_write_wanted;
kcondvar_t scl_cv;
refcount_t scl_count;
} spa_config_lock_t;
typedef struct spa_config_dirent {
list_node_t scd_link;
char *scd_path;
} spa_config_dirent_t;
typedef enum spa_log_state {
SPA_LOG_UNKNOWN = 0, /* unknown log state */
SPA_LOG_MISSING, /* missing log(s) */
SPA_LOG_CLEAR, /* clear the log(s) */
SPA_LOG_GOOD, /* log(s) are good */
} spa_log_state_t;
enum zio_taskq_type {
ZIO_TASKQ_ISSUE = 0,
ZIO_TASKQ_INTERRUPT,
ZIO_TASKQ_TYPES
};
struct spa {
/*
* Fields protected by spa_namespace_lock.
*/
char spa_name[MAXNAMELEN]; /* pool name */
avl_node_t spa_avl; /* node in spa_namespace_avl */
nvlist_t *spa_config; /* last synced config */
nvlist_t *spa_config_syncing; /* currently syncing config */
uint64_t spa_config_txg; /* txg of last config change */
int spa_sync_pass; /* iterate-to-convergence */
pool_state_t spa_state; /* pool state */
int spa_inject_ref; /* injection references */
uint8_t spa_sync_on; /* sync threads are running */
spa_load_state_t spa_load_state; /* current load operation */
taskq_t *spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES];
dsl_pool_t *spa_dsl_pool;
metaslab_class_t *spa_normal_class; /* normal data class */
metaslab_class_t *spa_log_class; /* intent log data class */
uint64_t spa_first_txg; /* first txg after spa_open() */
uint64_t spa_final_txg; /* txg of export/destroy */
uint64_t spa_freeze_txg; /* freeze pool at this txg */
objset_t *spa_meta_objset; /* copy of dp->dp_meta_objset */
txg_list_t spa_vdev_txg_list; /* per-txg dirty vdev list */
vdev_t *spa_root_vdev; /* top-level vdev container */
uint64_t spa_load_guid; /* initial guid for spa_load */
list_t spa_config_dirty_list; /* vdevs with dirty config */
list_t spa_state_dirty_list; /* vdevs with dirty state */
spa_aux_vdev_t spa_spares; /* hot spares */
spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */
uint64_t spa_config_object; /* MOS object for pool config */
uint64_t spa_syncing_txg; /* txg currently syncing */
uint64_t spa_sync_bplist_obj; /* object for deferred frees */
bplist_t spa_sync_bplist; /* deferred-free bplist */
uberblock_t spa_ubsync; /* last synced uberblock */
uberblock_t spa_uberblock; /* current uberblock */
kmutex_t spa_scrub_lock; /* resilver/scrub lock */
uint64_t spa_scrub_inflight; /* in-flight scrub I/Os */
uint64_t spa_scrub_maxinflight; /* max in-flight scrub I/Os */
uint64_t spa_scrub_errors; /* scrub I/O error count */
kcondvar_t spa_scrub_io_cv; /* scrub I/O completion */
uint8_t spa_scrub_active; /* active or suspended? */
uint8_t spa_scrub_type; /* type of scrub we're doing */
uint8_t spa_scrub_finished; /* indicator to rotate logs */
uint8_t spa_scrub_started; /* started since last boot */
uint8_t spa_scrub_reopen; /* scrub doing vdev_reopen */
kmutex_t spa_async_lock; /* protect async state */
kthread_t *spa_async_thread; /* thread doing async task */
int spa_async_suspended; /* async tasks suspended */
kcondvar_t spa_async_cv; /* wait for thread_exit() */
uint16_t spa_async_tasks; /* async task mask */
kmutex_t spa_async_root_lock; /* protects async root count */
uint64_t spa_async_root_count; /* number of async root zios */
kcondvar_t spa_async_root_cv; /* notify when count == 0 */
char *spa_root; /* alternate root directory */
uint64_t spa_ena; /* spa-wide ereport ENA */
boolean_t spa_last_open_failed; /* true if last open faled */
kmutex_t spa_errlog_lock; /* error log lock */
uint64_t spa_errlog_last; /* last error log object */
uint64_t spa_errlog_scrub; /* scrub error log object */
kmutex_t spa_errlist_lock; /* error list/ereport lock */
avl_tree_t spa_errlist_last; /* last error list */
avl_tree_t spa_errlist_scrub; /* scrub error list */
uint64_t spa_deflate; /* should we deflate? */
uint64_t spa_history; /* history object */
kmutex_t spa_history_lock; /* history lock */
vdev_t *spa_pending_vdev; /* pending vdev additions */
kmutex_t spa_props_lock; /* property lock */
uint64_t spa_pool_props_object; /* object for properties */
uint64_t spa_bootfs; /* default boot filesystem */
uint64_t spa_failmode; /* failure mode for the pool */
uint64_t spa_delegation; /* delegation on/off */
list_t spa_config_list; /* previous cache file(s) */
zio_t *spa_suspend_zio_root; /* root of all suspended I/O */
kmutex_t spa_suspend_lock; /* protects suspend_zio_root */
kcondvar_t spa_suspend_cv; /* notification of resume */
uint8_t spa_suspended; /* pool is suspended */
boolean_t spa_import_faulted; /* allow faulted vdevs */
boolean_t spa_is_root; /* pool is root */
int spa_minref; /* num refs when first opened */
spa_log_state_t spa_log_state; /* log state */
/*
* spa_refcnt & spa_config_lock must be the last elements
* because refcount_t changes size based on compilation options.
* In order for the MDB module to function correctly, the other
* fields must remain in the same location.
*/
spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */
refcount_t spa_refcount; /* number of opens */
};
extern const char *spa_config_path;
#define BOOTFS_COMPRESS_VALID(compress) \
((compress) == ZIO_COMPRESS_LZJB || \
((compress) == ZIO_COMPRESS_ON && \
ZIO_COMPRESS_ON_VALUE == ZIO_COMPRESS_LZJB) || \
(compress) == ZIO_COMPRESS_OFF)
#ifdef __cplusplus
}
#endif
#endif /* _SYS_SPA_IMPL_H */
+162
View File
@@ -0,0 +1,162 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_SPACE_MAP_H
#define _SYS_SPACE_MAP_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/avl.h>
#include <sys/dmu.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct space_map_ops space_map_ops_t;
typedef struct space_map {
avl_tree_t sm_root; /* AVL tree of map segments */
uint64_t sm_space; /* sum of all segments in the map */
uint64_t sm_start; /* start of map */
uint64_t sm_size; /* size of map */
uint8_t sm_shift; /* unit shift */
uint8_t sm_pad[3]; /* unused */
uint8_t sm_loaded; /* map loaded? */
uint8_t sm_loading; /* map loading? */
kcondvar_t sm_load_cv; /* map load completion */
space_map_ops_t *sm_ops; /* space map block picker ops vector */
void *sm_ppd; /* picker-private data */
kmutex_t *sm_lock; /* pointer to lock that protects map */
} space_map_t;
typedef struct space_seg {
avl_node_t ss_node; /* AVL node */
uint64_t ss_start; /* starting offset of this segment */
uint64_t ss_end; /* ending offset (non-inclusive) */
} space_seg_t;
typedef struct space_map_obj {
uint64_t smo_object; /* on-disk space map object */
uint64_t smo_objsize; /* size of the object */
uint64_t smo_alloc; /* space allocated from the map */
} space_map_obj_t;
struct space_map_ops {
void (*smop_load)(space_map_t *sm);
void (*smop_unload)(space_map_t *sm);
uint64_t (*smop_alloc)(space_map_t *sm, uint64_t size);
void (*smop_claim)(space_map_t *sm, uint64_t start, uint64_t size);
void (*smop_free)(space_map_t *sm, uint64_t start, uint64_t size);
};
/*
* debug entry
*
* 1 3 10 50
* ,---+--------+------------+---------------------------------.
* | 1 | action | syncpass | txg (lower bits) |
* `---+--------+------------+---------------------------------'
* 63 62 60 59 50 49 0
*
*
*
* non-debug entry
*
* 1 47 1 15
* ,-----------------------------------------------------------.
* | 0 | offset (sm_shift units) | type | run |
* `-----------------------------------------------------------'
* 63 62 17 16 15 0
*/
/* All this stuff takes and returns bytes */
#define SM_RUN_DECODE(x) (BF64_DECODE(x, 0, 15) + 1)
#define SM_RUN_ENCODE(x) BF64_ENCODE((x) - 1, 0, 15)
#define SM_TYPE_DECODE(x) BF64_DECODE(x, 15, 1)
#define SM_TYPE_ENCODE(x) BF64_ENCODE(x, 15, 1)
#define SM_OFFSET_DECODE(x) BF64_DECODE(x, 16, 47)
#define SM_OFFSET_ENCODE(x) BF64_ENCODE(x, 16, 47)
#define SM_DEBUG_DECODE(x) BF64_DECODE(x, 63, 1)
#define SM_DEBUG_ENCODE(x) BF64_ENCODE(x, 63, 1)
#define SM_DEBUG_ACTION_DECODE(x) BF64_DECODE(x, 60, 3)
#define SM_DEBUG_ACTION_ENCODE(x) BF64_ENCODE(x, 60, 3)
#define SM_DEBUG_SYNCPASS_DECODE(x) BF64_DECODE(x, 50, 10)
#define SM_DEBUG_SYNCPASS_ENCODE(x) BF64_ENCODE(x, 50, 10)
#define SM_DEBUG_TXG_DECODE(x) BF64_DECODE(x, 0, 50)
#define SM_DEBUG_TXG_ENCODE(x) BF64_ENCODE(x, 0, 50)
#define SM_RUN_MAX SM_RUN_DECODE(~0ULL)
#define SM_ALLOC 0x0
#define SM_FREE 0x1
/*
* The data for a given space map can be kept on blocks of any size.
* Larger blocks entail fewer i/o operations, but they also cause the
* DMU to keep more data in-core, and also to waste more i/o bandwidth
* when only a few blocks have changed since the last transaction group.
* This could use a lot more research, but for now, set the freelist
* block size to 4k (2^12).
*/
#define SPACE_MAP_BLOCKSHIFT 12
typedef void space_map_func_t(space_map_t *sm, uint64_t start, uint64_t size);
extern void space_map_create(space_map_t *sm, uint64_t start, uint64_t size,
uint8_t shift, kmutex_t *lp);
extern void space_map_destroy(space_map_t *sm);
extern void space_map_add(space_map_t *sm, uint64_t start, uint64_t size);
extern void space_map_remove(space_map_t *sm, uint64_t start, uint64_t size);
extern int space_map_contains(space_map_t *sm, uint64_t start, uint64_t size);
extern void space_map_vacate(space_map_t *sm,
space_map_func_t *func, space_map_t *mdest);
extern void space_map_walk(space_map_t *sm,
space_map_func_t *func, space_map_t *mdest);
extern void space_map_excise(space_map_t *sm, uint64_t start, uint64_t size);
extern void space_map_union(space_map_t *smd, space_map_t *sms);
extern void space_map_load_wait(space_map_t *sm);
extern int space_map_load(space_map_t *sm, space_map_ops_t *ops,
uint8_t maptype, space_map_obj_t *smo, objset_t *os);
extern void space_map_unload(space_map_t *sm);
extern uint64_t space_map_alloc(space_map_t *sm, uint64_t size);
extern void space_map_claim(space_map_t *sm, uint64_t start, uint64_t size);
extern void space_map_free(space_map_t *sm, uint64_t start, uint64_t size);
extern void space_map_sync(space_map_t *sm, uint8_t maptype,
space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx);
extern void space_map_truncate(space_map_obj_t *smo,
objset_t *os, dmu_tx_t *tx);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_SPACE_MAP_H */
+130
View File
@@ -0,0 +1,130 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_TXG_H
#define _SYS_TXG_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/spa.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
#define TXG_CONCURRENT_STATES 3 /* open, quiescing, syncing */
#define TXG_SIZE 4 /* next power of 2 */
#define TXG_MASK (TXG_SIZE - 1) /* mask for size */
#define TXG_INITIAL TXG_SIZE /* initial txg */
#define TXG_IDX (txg & TXG_MASK)
#define TXG_WAIT 1ULL
#define TXG_NOWAIT 2ULL
typedef struct tx_cpu tx_cpu_t;
typedef struct txg_handle {
tx_cpu_t *th_cpu;
uint64_t th_txg;
} txg_handle_t;
typedef struct txg_node {
struct txg_node *tn_next[TXG_SIZE];
uint8_t tn_member[TXG_SIZE];
} txg_node_t;
typedef struct txg_list {
kmutex_t tl_lock;
size_t tl_offset;
txg_node_t *tl_head[TXG_SIZE];
} txg_list_t;
struct dsl_pool;
extern void txg_init(struct dsl_pool *dp, uint64_t txg);
extern void txg_fini(struct dsl_pool *dp);
extern void txg_sync_start(struct dsl_pool *dp);
extern void txg_sync_stop(struct dsl_pool *dp);
extern uint64_t txg_hold_open(struct dsl_pool *dp, txg_handle_t *txghp);
extern void txg_rele_to_quiesce(txg_handle_t *txghp);
extern void txg_rele_to_sync(txg_handle_t *txghp);
extern void txg_suspend(struct dsl_pool *dp);
extern void txg_resume(struct dsl_pool *dp);
/*
* Delay the caller by the specified number of ticks or until
* the txg closes (whichever comes first). This is intended
* to be used to throttle writers when the system nears its
* capacity.
*/
extern void txg_delay(struct dsl_pool *dp, uint64_t txg, int ticks);
/*
* Wait until the given transaction group has finished syncing.
* Try to make this happen as soon as possible (eg. kick off any
* necessary syncs immediately). If txg==0, wait for the currently open
* txg to finish syncing.
*/
extern void txg_wait_synced(struct dsl_pool *dp, uint64_t txg);
/*
* Wait until the given transaction group, or one after it, is
* the open transaction group. Try to make this happen as soon
* as possible (eg. kick off any necessary syncs immediately).
* If txg == 0, wait for the next open txg.
*/
extern void txg_wait_open(struct dsl_pool *dp, uint64_t txg);
/*
* Returns TRUE if we are "backed up" waiting for the syncing
* transaction to complete; otherwise returns FALSE.
*/
extern boolean_t txg_stalled(struct dsl_pool *dp);
/* returns TRUE if someone is waiting for the next txg to sync */
extern boolean_t txg_sync_waiting(struct dsl_pool *dp);
/*
* Per-txg object lists.
*/
#define TXG_CLEAN(txg) ((txg) - 1)
extern void txg_list_create(txg_list_t *tl, size_t offset);
extern void txg_list_destroy(txg_list_t *tl);
extern int txg_list_empty(txg_list_t *tl, uint64_t txg);
extern int txg_list_add(txg_list_t *tl, void *p, uint64_t txg);
extern void *txg_list_remove(txg_list_t *tl, uint64_t txg);
extern void *txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg);
extern int txg_list_member(txg_list_t *tl, void *p, uint64_t txg);
extern void *txg_list_head(txg_list_t *tl, uint64_t txg);
extern void *txg_list_next(txg_list_t *tl, void *p, uint64_t txg);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_TXG_H */
+73
View File
@@ -0,0 +1,73 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_TXG_IMPL_H
#define _SYS_TXG_IMPL_H
#include <sys/spa.h>
#include <sys/txg.h>
#ifdef __cplusplus
extern "C" {
#endif
struct tx_cpu {
kmutex_t tc_lock;
kcondvar_t tc_cv[TXG_SIZE];
uint64_t tc_count[TXG_SIZE];
char tc_pad[16];
};
typedef struct tx_state {
tx_cpu_t *tx_cpu; /* protects right to enter txg */
kmutex_t tx_sync_lock; /* protects tx_state_t */
krwlock_t tx_suspend;
uint64_t tx_open_txg; /* currently open txg id */
uint64_t tx_quiesced_txg; /* quiesced txg waiting for sync */
uint64_t tx_syncing_txg; /* currently syncing txg id */
uint64_t tx_synced_txg; /* last synced txg id */
uint64_t tx_sync_txg_waiting; /* txg we're waiting to sync */
uint64_t tx_quiesce_txg_waiting; /* txg we're waiting to open */
kcondvar_t tx_sync_more_cv;
kcondvar_t tx_sync_done_cv;
kcondvar_t tx_quiesce_more_cv;
kcondvar_t tx_quiesce_done_cv;
kcondvar_t tx_timeout_cv;
kcondvar_t tx_exit_cv; /* wait for all threads to exit */
uint8_t tx_threads; /* number of threads */
uint8_t tx_exiting; /* set when we're exiting */
kthread_t *tx_sync_thread;
kthread_t *tx_quiesce_thread;
} tx_state_t;
#ifdef __cplusplus
}
#endif
#endif /* _SYS_TXG_IMPL_H */
+50
View File
@@ -0,0 +1,50 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_UBERBLOCK_H
#define _SYS_UBERBLOCK_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/spa.h>
#include <sys/vdev.h>
#include <sys/zio.h>
#include <sys/zio_checksum.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct uberblock uberblock_t;
extern int uberblock_verify(uberblock_t *ub);
extern int uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_UBERBLOCK_H */
+63
View File
@@ -0,0 +1,63 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_UBERBLOCK_IMPL_H
#define _SYS_UBERBLOCK_IMPL_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/uberblock.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* The uberblock version is incremented whenever an incompatible on-disk
* format change is made to the SPA, DMU, or ZAP.
*
* Note: the first two fields should never be moved. When a storage pool
* is opened, the uberblock must be read off the disk before the version
* can be checked. If the ub_version field is moved, we may not detect
* version mismatch. If the ub_magic field is moved, applications that
* expect the magic number in the first word won't work.
*/
#define UBERBLOCK_MAGIC 0x00bab10c /* oo-ba-bloc! */
#define UBERBLOCK_SHIFT 10 /* up to 1K */
struct uberblock {
uint64_t ub_magic; /* UBERBLOCK_MAGIC */
uint64_t ub_version; /* SPA_VERSION */
uint64_t ub_txg; /* txg of last sync */
uint64_t ub_guid_sum; /* sum of all vdev guids */
uint64_t ub_timestamp; /* UTC time of last sync */
blkptr_t ub_rootbp; /* MOS objset_phys_t */
};
#ifdef __cplusplus
}
#endif
#endif /* _SYS_UBERBLOCK_IMPL_H */
+59
View File
@@ -0,0 +1,59 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_UNIQUE_H
#define _SYS_UNIQUE_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
/* The number of significant bits in each unique value. */
#define UNIQUE_BITS 56
void unique_init(void);
void unique_fini(void);
/*
* Return a new unique value (which will not be uniquified against until
* it is unique_insert()-ed.
*/
uint64_t unique_create(void);
/* Return a unique value, which equals the one passed in if possible. */
uint64_t unique_insert(uint64_t value);
/* Indicate that this value no longer needs to be uniquified against. */
void unique_remove(uint64_t value);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_UNIQUE_H */
+135
View File
@@ -0,0 +1,135 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_VDEV_H
#define _SYS_VDEV_H
#include <sys/spa.h>
#include <sys/zio.h>
#include <sys/dmu.h>
#include <sys/space_map.h>
#include <sys/fs/zfs.h>
#ifdef __cplusplus
extern "C" {
#endif
extern boolean_t zfs_nocacheflush;
extern int vdev_open(vdev_t *);
extern int vdev_validate(vdev_t *);
extern void vdev_close(vdev_t *);
extern int vdev_create(vdev_t *, uint64_t txg, boolean_t isreplace);
extern void vdev_init(vdev_t *, uint64_t txg);
extern void vdev_reopen(vdev_t *);
extern int vdev_validate_aux(vdev_t *vd);
extern zio_t *vdev_probe(vdev_t *vd, zio_t *pio);
extern boolean_t vdev_is_bootable(vdev_t *vd);
extern vdev_t *vdev_lookup_top(spa_t *spa, uint64_t vdev);
extern vdev_t *vdev_lookup_by_guid(vdev_t *vd, uint64_t guid);
extern void vdev_dtl_dirty(space_map_t *sm, uint64_t txg, uint64_t size);
extern int vdev_dtl_contains(space_map_t *sm, uint64_t txg, uint64_t size);
extern void vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
int scrub_done);
extern boolean_t vdev_resilver_needed(vdev_t *vd,
uint64_t *minp, uint64_t *maxp);
extern int vdev_metaslab_init(vdev_t *vd, uint64_t txg);
extern void vdev_metaslab_fini(vdev_t *vd);
extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
extern void vdev_clear_stats(vdev_t *vd);
extern void vdev_stat_update(zio_t *zio, uint64_t psize);
extern void vdev_scrub_stat_update(vdev_t *vd, pool_scrub_type_t type,
boolean_t complete);
extern int vdev_getspec(spa_t *spa, uint64_t vdev, char **vdev_spec);
extern void vdev_propagate_state(vdev_t *vd);
extern void vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state,
vdev_aux_t aux);
extern void vdev_space_update(vdev_t *vd, int64_t space_delta,
int64_t alloc_delta, boolean_t update_root);
extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize);
extern int vdev_fault(spa_t *spa, uint64_t guid);
extern int vdev_degrade(spa_t *spa, uint64_t guid);
extern int vdev_online(spa_t *spa, uint64_t guid, uint64_t flags,
vdev_state_t *);
extern int vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags);
extern void vdev_clear(spa_t *spa, vdev_t *vd);
extern boolean_t vdev_is_dead(vdev_t *vd);
extern boolean_t vdev_readable(vdev_t *vd);
extern boolean_t vdev_writeable(vdev_t *vd);
extern boolean_t vdev_allocatable(vdev_t *vd);
extern boolean_t vdev_accessible(vdev_t *vd, zio_t *zio);
extern void vdev_cache_init(vdev_t *vd);
extern void vdev_cache_fini(vdev_t *vd);
extern int vdev_cache_read(zio_t *zio);
extern void vdev_cache_write(zio_t *zio);
extern void vdev_cache_purge(vdev_t *vd);
extern void vdev_queue_init(vdev_t *vd);
extern void vdev_queue_fini(vdev_t *vd);
extern zio_t *vdev_queue_io(zio_t *zio);
extern void vdev_queue_io_done(zio_t *zio);
extern void vdev_config_dirty(vdev_t *vd);
extern void vdev_config_clean(vdev_t *vd);
extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg);
extern void vdev_state_dirty(vdev_t *vd);
extern void vdev_state_clean(vdev_t *vd);
extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
boolean_t getstats, boolean_t isspare, boolean_t isl2cache);
/*
* Label routines
*/
struct uberblock;
extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset);
extern int vdev_label_number(uint64_t psise, uint64_t offset);
extern nvlist_t *vdev_label_read_config(vdev_t *vd);
extern void vdev_uberblock_load(zio_t *zio, vdev_t *vd, struct uberblock *ub);
typedef enum {
VDEV_LABEL_CREATE, /* create/add a new device */
VDEV_LABEL_REPLACE, /* replace an existing device */
VDEV_LABEL_SPARE, /* add a new hot spare */
VDEV_LABEL_REMOVE, /* remove an existing device */
VDEV_LABEL_L2CACHE /* add an L2ARC cache device */
} vdev_labeltype_t;
extern int vdev_label_init(vdev_t *vd, uint64_t txg, vdev_labeltype_t reason);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_VDEV_H */
+46
View File
@@ -0,0 +1,46 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_VDEV_FILE_H
#define _SYS_VDEV_FILE_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/vdev.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct vdev_file {
vnode_t *vf_vnode;
} vdev_file_t;
#ifdef __cplusplus
}
#endif
#endif /* _SYS_VDEV_FILE_H */
+305
View File
@@ -0,0 +1,305 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_VDEV_IMPL_H
#define _SYS_VDEV_IMPL_H
#include <sys/avl.h>
#include <sys/dmu.h>
#include <sys/metaslab.h>
#include <sys/nvpair.h>
#include <sys/space_map.h>
#include <sys/vdev.h>
#include <sys/dkio.h>
#include <sys/uberblock_impl.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Virtual device descriptors.
*
* All storage pool operations go through the virtual device framework,
* which provides data replication and I/O scheduling.
*/
/*
* Forward declarations that lots of things need.
*/
typedef struct vdev_queue vdev_queue_t;
typedef struct vdev_cache vdev_cache_t;
typedef struct vdev_cache_entry vdev_cache_entry_t;
/*
* Virtual device operations
*/
typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *ashift);
typedef void vdev_close_func_t(vdev_t *vd);
typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize);
typedef int vdev_io_start_func_t(zio_t *zio);
typedef void vdev_io_done_func_t(zio_t *zio);
typedef void vdev_state_change_func_t(vdev_t *vd, int, int);
typedef struct vdev_ops {
vdev_open_func_t *vdev_op_open;
vdev_close_func_t *vdev_op_close;
vdev_asize_func_t *vdev_op_asize;
vdev_io_start_func_t *vdev_op_io_start;
vdev_io_done_func_t *vdev_op_io_done;
vdev_state_change_func_t *vdev_op_state_change;
char vdev_op_type[16];
boolean_t vdev_op_leaf;
} vdev_ops_t;
/*
* Virtual device properties
*/
struct vdev_cache_entry {
char *ve_data;
uint64_t ve_offset;
uint64_t ve_lastused;
avl_node_t ve_offset_node;
avl_node_t ve_lastused_node;
uint32_t ve_hits;
uint16_t ve_missed_update;
zio_t *ve_fill_io;
};
struct vdev_cache {
avl_tree_t vc_offset_tree;
avl_tree_t vc_lastused_tree;
kmutex_t vc_lock;
};
struct vdev_queue {
avl_tree_t vq_deadline_tree;
avl_tree_t vq_read_tree;
avl_tree_t vq_write_tree;
avl_tree_t vq_pending_tree;
kmutex_t vq_lock;
};
/*
* Virtual device descriptor
*/
struct vdev {
/*
* Common to all vdev types.
*/
uint64_t vdev_id; /* child number in vdev parent */
uint64_t vdev_guid; /* unique ID for this vdev */
uint64_t vdev_guid_sum; /* self guid + all child guids */
uint64_t vdev_asize; /* allocatable device capacity */
uint64_t vdev_ashift; /* block alignment shift */
uint64_t vdev_state; /* see VDEV_STATE_* #defines */
uint64_t vdev_prevstate; /* used when reopening a vdev */
vdev_ops_t *vdev_ops; /* vdev operations */
spa_t *vdev_spa; /* spa for this vdev */
void *vdev_tsd; /* type-specific data */
vdev_t *vdev_top; /* top-level vdev */
vdev_t *vdev_parent; /* parent vdev */
vdev_t **vdev_child; /* array of children */
uint64_t vdev_children; /* number of children */
space_map_t vdev_dtl_map; /* dirty time log in-core state */
space_map_t vdev_dtl_scrub; /* DTL for scrub repair writes */
vdev_stat_t vdev_stat; /* virtual device statistics */
/*
* Top-level vdev state.
*/
uint64_t vdev_ms_array; /* metaslab array object */
uint64_t vdev_ms_shift; /* metaslab size shift */
uint64_t vdev_ms_count; /* number of metaslabs */
metaslab_group_t *vdev_mg; /* metaslab group */
metaslab_t **vdev_ms; /* metaslab array */
txg_list_t vdev_ms_list; /* per-txg dirty metaslab lists */
txg_list_t vdev_dtl_list; /* per-txg dirty DTL lists */
txg_node_t vdev_txg_node; /* per-txg dirty vdev linkage */
boolean_t vdev_remove_wanted; /* async remove wanted? */
boolean_t vdev_probe_wanted; /* async probe wanted? */
list_node_t vdev_config_dirty_node; /* config dirty list */
list_node_t vdev_state_dirty_node; /* state dirty list */
uint64_t vdev_deflate_ratio; /* deflation ratio (x512) */
uint64_t vdev_islog; /* is an intent log device */
/*
* Leaf vdev state.
*/
uint64_t vdev_psize; /* physical device capacity */
space_map_obj_t vdev_dtl; /* dirty time log on-disk state */
txg_node_t vdev_dtl_node; /* per-txg dirty DTL linkage */
uint64_t vdev_wholedisk; /* true if this is a whole disk */
uint64_t vdev_offline; /* persistent offline state */
uint64_t vdev_faulted; /* persistent faulted state */
uint64_t vdev_degraded; /* persistent degraded state */
uint64_t vdev_removed; /* persistent removed state */
uint64_t vdev_nparity; /* number of parity devices for raidz */
char *vdev_path; /* vdev path (if any) */
char *vdev_devid; /* vdev devid (if any) */
char *vdev_physpath; /* vdev device path (if any) */
uint64_t vdev_not_present; /* not present during import */
uint64_t vdev_unspare; /* unspare when resilvering done */
hrtime_t vdev_last_try; /* last reopen time */
boolean_t vdev_nowritecache; /* true if flushwritecache failed */
boolean_t vdev_checkremove; /* temporary online test */
boolean_t vdev_forcefault; /* force online fault */
uint8_t vdev_tmpoffline; /* device taken offline temporarily? */
uint8_t vdev_detached; /* device detached? */
uint8_t vdev_cant_read; /* vdev is failing all reads */
uint8_t vdev_cant_write; /* vdev is failing all writes */
uint64_t vdev_isspare; /* was a hot spare */
uint64_t vdev_isl2cache; /* was a l2cache device */
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
vdev_cache_t vdev_cache; /* physical block cache */
spa_aux_vdev_t *vdev_aux; /* for l2cache vdevs */
zio_t *vdev_probe_zio; /* root of current probe */
/*
* For DTrace to work in userland (libzpool) context, these fields must
* remain at the end of the structure. DTrace will use the kernel's
* CTF definition for 'struct vdev', and since the size of a kmutex_t is
* larger in userland, the offsets for the rest fields would be
* incorrect.
*/
kmutex_t vdev_dtl_lock; /* vdev_dtl_{map,resilver} */
kmutex_t vdev_stat_lock; /* vdev_stat */
kmutex_t vdev_probe_lock; /* protects vdev_probe_zio */
};
#define VDEV_SKIP_SIZE (8 << 10)
#define VDEV_BOOT_HEADER_SIZE (8 << 10)
#define VDEV_PHYS_SIZE (112 << 10)
#define VDEV_UBERBLOCK_RING (128 << 10)
#define VDEV_UBERBLOCK_SHIFT(vd) \
MAX((vd)->vdev_top->vdev_ashift, UBERBLOCK_SHIFT)
#define VDEV_UBERBLOCK_COUNT(vd) \
(VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT(vd))
#define VDEV_UBERBLOCK_OFFSET(vd, n) \
offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT(vd)])
#define VDEV_UBERBLOCK_SIZE(vd) (1ULL << VDEV_UBERBLOCK_SHIFT(vd))
/* ZFS boot block */
#define VDEV_BOOT_MAGIC 0x2f5b007b10cULL
#define VDEV_BOOT_VERSION 1 /* version number */
typedef struct vdev_boot_header {
uint64_t vb_magic; /* VDEV_BOOT_MAGIC */
uint64_t vb_version; /* VDEV_BOOT_VERSION */
uint64_t vb_offset; /* start offset (bytes) */
uint64_t vb_size; /* size (bytes) */
char vb_pad[VDEV_BOOT_HEADER_SIZE - 4 * sizeof (uint64_t)];
} vdev_boot_header_t;
typedef struct vdev_phys {
char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_block_tail_t)];
zio_block_tail_t vp_zbt;
} vdev_phys_t;
typedef struct vdev_label {
char vl_pad[VDEV_SKIP_SIZE]; /* 8K */
vdev_boot_header_t vl_boot_header; /* 8K */
vdev_phys_t vl_vdev_phys; /* 112K */
char vl_uberblock[VDEV_UBERBLOCK_RING]; /* 128K */
} vdev_label_t; /* 256K total */
/*
* vdev_dirty() flags
*/
#define VDD_METASLAB 0x01
#define VDD_DTL 0x02
/*
* Size and offset of embedded boot loader region on each label.
* The total size of the first two labels plus the boot area is 4MB.
*/
#define VDEV_BOOT_OFFSET (2 * sizeof (vdev_label_t))
#define VDEV_BOOT_SIZE (7ULL << 19) /* 3.5M */
/*
* Size of label regions at the start and end of each leaf device.
*/
#define VDEV_LABEL_START_SIZE (2 * sizeof (vdev_label_t) + VDEV_BOOT_SIZE)
#define VDEV_LABEL_END_SIZE (2 * sizeof (vdev_label_t))
#define VDEV_LABELS 4
#define VDEV_ALLOC_LOAD 0
#define VDEV_ALLOC_ADD 1
#define VDEV_ALLOC_SPARE 2
#define VDEV_ALLOC_L2CACHE 3
/*
* Allocate or free a vdev
*/
extern int vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *config,
vdev_t *parent, uint_t id, int alloctype);
extern void vdev_free(vdev_t *vd);
/*
* Add or remove children and parents
*/
extern void vdev_add_child(vdev_t *pvd, vdev_t *cvd);
extern void vdev_remove_child(vdev_t *pvd, vdev_t *cvd);
extern void vdev_compact_children(vdev_t *pvd);
extern vdev_t *vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops);
extern void vdev_remove_parent(vdev_t *cvd);
/*
* vdev sync load and sync
*/
extern void vdev_load(vdev_t *vd);
extern void vdev_sync(vdev_t *vd, uint64_t txg);
extern void vdev_sync_done(vdev_t *vd, uint64_t txg);
extern void vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg);
/*
* Available vdev types.
*/
extern vdev_ops_t vdev_root_ops;
extern vdev_ops_t vdev_mirror_ops;
extern vdev_ops_t vdev_replacing_ops;
extern vdev_ops_t vdev_raidz_ops;
extern vdev_ops_t vdev_disk_ops;
extern vdev_ops_t vdev_file_ops;
extern vdev_ops_t vdev_missing_ops;
extern vdev_ops_t vdev_spare_ops;
/*
* Common size functions
*/
extern uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize);
extern uint64_t vdev_get_rsize(vdev_t *vd);
/*
* zdb uses this tunable, so it must be declared here to make lint happy.
*/
extern int zfs_vdev_cache_size;
#ifdef __cplusplus
}
#endif
#endif /* _SYS_VDEV_IMPL_H */
+425
View File
@@ -0,0 +1,425 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZAP_H
#define _SYS_ZAP_H
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* ZAP - ZFS Attribute Processor
*
* The ZAP is a module which sits on top of the DMU (Data Management
* Unit) and implements a higher-level storage primitive using DMU
* objects. Its primary consumer is the ZPL (ZFS Posix Layer).
*
* A "zapobj" is a DMU object which the ZAP uses to stores attributes.
* Users should use only zap routines to access a zapobj - they should
* not access the DMU object directly using DMU routines.
*
* The attributes stored in a zapobj are name-value pairs. The name is
* a zero-terminated string of up to ZAP_MAXNAMELEN bytes (including
* terminating NULL). The value is an array of integers, which may be
* 1, 2, 4, or 8 bytes long. The total space used by the array (number
* of integers * integer length) can be up to ZAP_MAXVALUELEN bytes.
* Note that an 8-byte integer value can be used to store the location
* (object number) of another dmu object (which may be itself a zapobj).
* Note that you can use a zero-length attribute to store a single bit
* of information - the attribute is present or not.
*
* The ZAP routines are thread-safe. However, you must observe the
* DMU's restriction that a transaction may not be operated on
* concurrently.
*
* Any of the routines that return an int may return an I/O error (EIO
* or ECHECKSUM).
*
*
* Implementation / Performance Notes:
*
* The ZAP is intended to operate most efficiently on attributes with
* short (49 bytes or less) names and single 8-byte values, for which
* the microzap will be used. The ZAP should be efficient enough so
* that the user does not need to cache these attributes.
*
* The ZAP's locking scheme makes its routines thread-safe. Operations
* on different zapobjs will be processed concurrently. Operations on
* the same zapobj which only read data will be processed concurrently.
* Operations on the same zapobj which modify data will be processed
* concurrently when there are many attributes in the zapobj (because
* the ZAP uses per-block locking - more than 128 * (number of cpus)
* small attributes will suffice).
*/
/*
* We're using zero-terminated byte strings (ie. ASCII or UTF-8 C
* strings) for the names of attributes, rather than a byte string
* bounded by an explicit length. If some day we want to support names
* in character sets which have embedded zeros (eg. UTF-16, UTF-32),
* we'll have to add routines for using length-bounded strings.
*/
#include <sys/dmu.h>
#ifdef __cplusplus
extern "C" {
#endif
#define ZAP_MAXNAMELEN 256
#define ZAP_MAXVALUELEN 1024
/*
* The matchtype specifies which entry will be accessed.
* MT_EXACT: only find an exact match (non-normalized)
* MT_FIRST: find the "first" normalized (case and Unicode
* form) match; the designated "first" match will not change as long
* as the set of entries with this normalization doesn't change
* MT_BEST: if there is an exact match, find that, otherwise find the
* first normalized match
*/
typedef enum matchtype
{
MT_EXACT,
MT_BEST,
MT_FIRST
} matchtype_t;
/*
* Create a new zapobj with no attributes and return its object number.
* MT_EXACT will cause the zap object to only support MT_EXACT lookups,
* otherwise any matchtype can be used for lookups.
*
* normflags specifies what normalization will be done. values are:
* 0: no normalization (legacy on-disk format, supports MT_EXACT matching
* only)
* U8_TEXTPREP_TOLOWER: case normalization will be performed.
* MT_FIRST/MT_BEST matching will find entries that match without
* regard to case (eg. looking for "foo" can find an entry "Foo").
* Eventually, other flags will permit unicode normalization as well.
*/
uint64_t zap_create(objset_t *ds, dmu_object_type_t ot,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
uint64_t zap_create_norm(objset_t *ds, int normflags, dmu_object_type_t ot,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
/*
* Create a new zapobj with no attributes from the given (unallocated)
* object number.
*/
int zap_create_claim(objset_t *ds, uint64_t obj, dmu_object_type_t ot,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
int zap_create_claim_norm(objset_t *ds, uint64_t obj,
int normflags, dmu_object_type_t ot,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
/*
* The zapobj passed in must be a valid ZAP object for all of the
* following routines.
*/
/*
* Destroy this zapobj and all its attributes.
*
* Frees the object number using dmu_object_free.
*/
int zap_destroy(objset_t *ds, uint64_t zapobj, dmu_tx_t *tx);
/*
* Manipulate attributes.
*
* 'integer_size' is in bytes, and must be 1, 2, 4, or 8.
*/
/*
* Retrieve the contents of the attribute with the given name.
*
* If the requested attribute does not exist, the call will fail and
* return ENOENT.
*
* If 'integer_size' is smaller than the attribute's integer size, the
* call will fail and return EINVAL.
*
* If 'integer_size' is equal to or larger than the attribute's integer
* size, the call will succeed and return 0. * When converting to a
* larger integer size, the integers will be treated as unsigned (ie. no
* sign-extension will be performed).
*
* 'num_integers' is the length (in integers) of 'buf'.
*
* If the attribute is longer than the buffer, as many integers as will
* fit will be transferred to 'buf'. If the entire attribute was not
* transferred, the call will return EOVERFLOW.
*
* If rn_len is nonzero, realname will be set to the name of the found
* entry (which may be different from the requested name if matchtype is
* not MT_EXACT).
*
* If normalization_conflictp is not NULL, it will be set if there is
* another name with the same case/unicode normalized form.
*/
int zap_lookup(objset_t *ds, uint64_t zapobj, const char *name,
uint64_t integer_size, uint64_t num_integers, void *buf);
int zap_lookup_norm(objset_t *ds, uint64_t zapobj, const char *name,
uint64_t integer_size, uint64_t num_integers, void *buf,
matchtype_t mt, char *realname, int rn_len,
boolean_t *normalization_conflictp);
/*
* Create an attribute with the given name and value.
*
* If an attribute with the given name already exists, the call will
* fail and return EEXIST.
*/
int zap_add(objset_t *ds, uint64_t zapobj, const char *name,
int integer_size, uint64_t num_integers,
const void *val, dmu_tx_t *tx);
/*
* Set the attribute with the given name to the given value. If an
* attribute with the given name does not exist, it will be created. If
* an attribute with the given name already exists, the previous value
* will be overwritten. The integer_size may be different from the
* existing attribute's integer size, in which case the attribute's
* integer size will be updated to the new value.
*/
int zap_update(objset_t *ds, uint64_t zapobj, const char *name,
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
/*
* Get the length (in integers) and the integer size of the specified
* attribute.
*
* If the requested attribute does not exist, the call will fail and
* return ENOENT.
*/
int zap_length(objset_t *ds, uint64_t zapobj, const char *name,
uint64_t *integer_size, uint64_t *num_integers);
/*
* Remove the specified attribute.
*
* If the specified attribute does not exist, the call will fail and
* return ENOENT.
*/
int zap_remove(objset_t *ds, uint64_t zapobj, const char *name, dmu_tx_t *tx);
int zap_remove_norm(objset_t *ds, uint64_t zapobj, const char *name,
matchtype_t mt, dmu_tx_t *tx);
/*
* Returns (in *count) the number of attributes in the specified zap
* object.
*/
int zap_count(objset_t *ds, uint64_t zapobj, uint64_t *count);
/*
* Returns (in name) the name of the entry whose (value & mask)
* (za_first_integer) is value, or ENOENT if not found. The string
* pointed to by name must be at least 256 bytes long. If mask==0, the
* match must be exact (ie, same as mask=-1ULL).
*/
int zap_value_search(objset_t *os, uint64_t zapobj,
uint64_t value, uint64_t mask, char *name);
/*
* Transfer all the entries from fromobj into intoobj. Only works on
* int_size=8 num_integers=1 values. Fails if there are any duplicated
* entries.
*/
int zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx);
/*
* Manipulate entries where the name + value are the "same" (the name is
* a stringified version of the value).
*/
int zap_add_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);
int zap_remove_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);
int zap_lookup_int(objset_t *os, uint64_t obj, uint64_t value);
struct zap;
struct zap_leaf;
typedef struct zap_cursor {
/* This structure is opaque! */
objset_t *zc_objset;
struct zap *zc_zap;
struct zap_leaf *zc_leaf;
uint64_t zc_zapobj;
uint64_t zc_hash;
uint32_t zc_cd;
} zap_cursor_t;
typedef struct {
int za_integer_length;
/*
* za_normalization_conflict will be set if there are additional
* entries with this normalized form (eg, "foo" and "Foo").
*/
boolean_t za_normalization_conflict;
uint64_t za_num_integers;
uint64_t za_first_integer; /* no sign extension for <8byte ints */
char za_name[MAXNAMELEN];
} zap_attribute_t;
/*
* The interface for listing all the attributes of a zapobj can be
* thought of as cursor moving down a list of the attributes one by
* one. The cookie returned by the zap_cursor_serialize routine is
* persistent across system calls (and across reboot, even).
*/
/*
* Initialize a zap cursor, pointing to the "first" attribute of the
* zapobj. You must _fini the cursor when you are done with it.
*/
void zap_cursor_init(zap_cursor_t *zc, objset_t *ds, uint64_t zapobj);
void zap_cursor_fini(zap_cursor_t *zc);
/*
* Get the attribute currently pointed to by the cursor. Returns
* ENOENT if at the end of the attributes.
*/
int zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za);
/*
* Advance the cursor to the next attribute.
*/
void zap_cursor_advance(zap_cursor_t *zc);
/*
* Get a persistent cookie pointing to the current position of the zap
* cursor. The low 4 bits in the cookie are always zero, and thus can
* be used as to differentiate a serialized cookie from a different type
* of value. The cookie will be less than 2^32 as long as there are
* fewer than 2^22 (4.2 million) entries in the zap object.
*/
uint64_t zap_cursor_serialize(zap_cursor_t *zc);
/*
* Initialize a zap cursor pointing to the position recorded by
* zap_cursor_serialize (in the "serialized" argument). You can also
* use a "serialized" argument of 0 to start at the beginning of the
* zapobj (ie. zap_cursor_init_serialized(..., 0) is equivalent to
* zap_cursor_init(...).)
*/
void zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *ds,
uint64_t zapobj, uint64_t serialized);
#define ZAP_HISTOGRAM_SIZE 10
typedef struct zap_stats {
/*
* Size of the pointer table (in number of entries).
* This is always a power of 2, or zero if it's a microzap.
* In general, it should be considerably greater than zs_num_leafs.
*/
uint64_t zs_ptrtbl_len;
uint64_t zs_blocksize; /* size of zap blocks */
/*
* The number of blocks used. Note that some blocks may be
* wasted because old ptrtbl's and large name/value blocks are
* not reused. (Although their space is reclaimed, we don't
* reuse those offsets in the object.)
*/
uint64_t zs_num_blocks;
/*
* Pointer table values from zap_ptrtbl in the zap_phys_t
*/
uint64_t zs_ptrtbl_nextblk; /* next (larger) copy start block */
uint64_t zs_ptrtbl_blks_copied; /* number source blocks copied */
uint64_t zs_ptrtbl_zt_blk; /* starting block number */
uint64_t zs_ptrtbl_zt_numblks; /* number of blocks */
uint64_t zs_ptrtbl_zt_shift; /* bits to index it */
/*
* Values of the other members of the zap_phys_t
*/
uint64_t zs_block_type; /* ZBT_HEADER */
uint64_t zs_magic; /* ZAP_MAGIC */
uint64_t zs_num_leafs; /* The number of leaf blocks */
uint64_t zs_num_entries; /* The number of zap entries */
uint64_t zs_salt; /* salt to stir into hash function */
/*
* Histograms. For all histograms, the last index
* (ZAP_HISTOGRAM_SIZE-1) includes any values which are greater
* than what can be represented. For example
* zs_leafs_with_n5_entries[ZAP_HISTOGRAM_SIZE-1] is the number
* of leafs with more than 45 entries.
*/
/*
* zs_leafs_with_n_pointers[n] is the number of leafs with
* 2^n pointers to it.
*/
uint64_t zs_leafs_with_2n_pointers[ZAP_HISTOGRAM_SIZE];
/*
* zs_leafs_with_n_entries[n] is the number of leafs with
* [n*5, (n+1)*5) entries. In the current implementation, there
* can be at most 55 entries in any block, but there may be
* fewer if the name or value is large, or the block is not
* completely full.
*/
uint64_t zs_blocks_with_n5_entries[ZAP_HISTOGRAM_SIZE];
/*
* zs_leafs_n_tenths_full[n] is the number of leafs whose
* fullness is in the range [n/10, (n+1)/10).
*/
uint64_t zs_blocks_n_tenths_full[ZAP_HISTOGRAM_SIZE];
/*
* zs_entries_using_n_chunks[n] is the number of entries which
* consume n 24-byte chunks. (Note, large names/values only use
* one chunk, but contribute to zs_num_blocks_large.)
*/
uint64_t zs_entries_using_n_chunks[ZAP_HISTOGRAM_SIZE];
/*
* zs_buckets_with_n_entries[n] is the number of buckets (each
* leaf has 64 buckets) with n entries.
* zs_buckets_with_n_entries[1] should be very close to
* zs_num_entries.
*/
uint64_t zs_buckets_with_n_entries[ZAP_HISTOGRAM_SIZE];
} zap_stats_t;
/*
* Get statistics about a ZAP object. Note: you need to be aware of the
* internal implementation of the ZAP to correctly interpret some of the
* statistics. This interface shouldn't be relied on unless you really
* know what you're doing.
*/
int zap_get_stats(objset_t *ds, uint64_t zapobj, zap_stats_t *zs);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZAP_H */
+218
View File
@@ -0,0 +1,218 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZAP_IMPL_H
#define _SYS_ZAP_IMPL_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/zap.h>
#include <sys/zfs_context.h>
#include <sys/avl.h>
#ifdef __cplusplus
extern "C" {
#endif
extern int fzap_default_block_shift;
#define ZAP_MAGIC 0x2F52AB2ABULL
#define FZAP_BLOCK_SHIFT(zap) ((zap)->zap_f.zap_block_shift)
#define ZAP_MAXCD (uint32_t)(-1)
#define ZAP_HASHBITS 28
#define MZAP_ENT_LEN 64
#define MZAP_NAME_LEN (MZAP_ENT_LEN - 8 - 4 - 2)
#define MZAP_MAX_BLKSHIFT SPA_MAXBLOCKSHIFT
#define MZAP_MAX_BLKSZ (1 << MZAP_MAX_BLKSHIFT)
typedef struct mzap_ent_phys {
uint64_t mze_value;
uint32_t mze_cd;
uint16_t mze_pad; /* in case we want to chain them someday */
char mze_name[MZAP_NAME_LEN];
} mzap_ent_phys_t;
typedef struct mzap_phys {
uint64_t mz_block_type; /* ZBT_MICRO */
uint64_t mz_salt;
uint64_t mz_normflags;
uint64_t mz_pad[5];
mzap_ent_phys_t mz_chunk[1];
/* actually variable size depending on block size */
} mzap_phys_t;
typedef struct mzap_ent {
avl_node_t mze_node;
int mze_chunkid;
uint64_t mze_hash;
mzap_ent_phys_t mze_phys;
} mzap_ent_t;
/*
* The (fat) zap is stored in one object. It is an array of
* 1<<FZAP_BLOCK_SHIFT byte blocks. The layout looks like one of:
*
* ptrtbl fits in first block:
* [zap_phys_t zap_ptrtbl_shift < 6] [zap_leaf_t] ...
*
* ptrtbl too big for first block:
* [zap_phys_t zap_ptrtbl_shift >= 6] [zap_leaf_t] [ptrtbl] ...
*
*/
struct dmu_buf;
struct zap_leaf;
#define ZBT_LEAF ((1ULL << 63) + 0)
#define ZBT_HEADER ((1ULL << 63) + 1)
#define ZBT_MICRO ((1ULL << 63) + 3)
/* any other values are ptrtbl blocks */
/*
* the embedded pointer table takes up half a block:
* block size / entry size (2^3) / 2
*/
#define ZAP_EMBEDDED_PTRTBL_SHIFT(zap) (FZAP_BLOCK_SHIFT(zap) - 3 - 1)
/*
* The embedded pointer table starts half-way through the block. Since
* the pointer table itself is half the block, it starts at (64-bit)
* word number (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)).
*/
#define ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) \
((uint64_t *)(zap)->zap_f.zap_phys) \
[(idx) + (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap))]
/*
* TAKE NOTE:
* If zap_phys_t is modified, zap_byteswap() must be modified.
*/
typedef struct zap_phys {
uint64_t zap_block_type; /* ZBT_HEADER */
uint64_t zap_magic; /* ZAP_MAGIC */
struct zap_table_phys {
uint64_t zt_blk; /* starting block number */
uint64_t zt_numblks; /* number of blocks */
uint64_t zt_shift; /* bits to index it */
uint64_t zt_nextblk; /* next (larger) copy start block */
uint64_t zt_blks_copied; /* number source blocks copied */
} zap_ptrtbl;
uint64_t zap_freeblk; /* the next free block */
uint64_t zap_num_leafs; /* number of leafs */
uint64_t zap_num_entries; /* number of entries */
uint64_t zap_salt; /* salt to stir into hash function */
uint64_t zap_normflags; /* flags for u8_textprep_str() */
/*
* This structure is followed by padding, and then the embedded
* pointer table. The embedded pointer table takes up second
* half of the block. It is accessed using the
* ZAP_EMBEDDED_PTRTBL_ENT() macro.
*/
} zap_phys_t;
typedef struct zap_table_phys zap_table_phys_t;
typedef struct zap {
objset_t *zap_objset;
uint64_t zap_object;
struct dmu_buf *zap_dbuf;
krwlock_t zap_rwlock;
boolean_t zap_ismicro;
int zap_normflags;
uint64_t zap_salt;
union {
struct {
zap_phys_t *zap_phys;
/*
* zap_num_entries_mtx protects
* zap_num_entries
*/
kmutex_t zap_num_entries_mtx;
int zap_block_shift;
} zap_fat;
struct {
mzap_phys_t *zap_phys;
int16_t zap_num_entries;
int16_t zap_num_chunks;
int16_t zap_alloc_next;
avl_tree_t zap_avl;
} zap_micro;
} zap_u;
} zap_t;
typedef struct zap_name {
zap_t *zn_zap;
const char *zn_name_orij;
uint64_t zn_hash;
matchtype_t zn_matchtype;
const char *zn_name_norm;
char zn_normbuf[ZAP_MAXNAMELEN];
} zap_name_t;
#define zap_f zap_u.zap_fat
#define zap_m zap_u.zap_micro
boolean_t zap_match(zap_name_t *zn, const char *matchname);
int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp);
void zap_unlockdir(zap_t *zap);
void zap_evict(dmu_buf_t *db, void *vmzap);
zap_name_t *zap_name_alloc(zap_t *zap, const char *name, matchtype_t mt);
void zap_name_free(zap_name_t *zn);
#define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))
void fzap_byteswap(void *buf, size_t size);
int fzap_count(zap_t *zap, uint64_t *count);
int fzap_lookup(zap_name_t *zn,
uint64_t integer_size, uint64_t num_integers, void *buf,
char *realname, int rn_len, boolean_t *normalization_conflictp);
int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
const void *val, dmu_tx_t *tx);
int fzap_update(zap_name_t *zn,
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
int fzap_length(zap_name_t *zn,
uint64_t *integer_size, uint64_t *num_integers);
int fzap_remove(zap_name_t *zn, dmu_tx_t *tx);
int fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za);
void fzap_get_stats(zap_t *zap, zap_stats_t *zs);
void zap_put_leaf(struct zap_leaf *l);
int fzap_add_cd(zap_name_t *zn,
uint64_t integer_size, uint64_t num_integers,
const void *val, uint32_t cd, dmu_tx_t *tx);
void fzap_upgrade(zap_t *zap, dmu_tx_t *tx);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZAP_IMPL_H */
+244
View File
@@ -0,0 +1,244 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZAP_LEAF_H
#define _SYS_ZAP_LEAF_H
#pragma ident "%Z%%M% %I% %E% SMI"
#ifdef __cplusplus
extern "C" {
#endif
struct zap;
#define ZAP_LEAF_MAGIC 0x2AB1EAF
/* chunk size = 24 bytes */
#define ZAP_LEAF_CHUNKSIZE 24
/*
* The amount of space available for chunks is:
* block size (1<<l->l_bs) - hash entry size (2) * number of hash
* entries - header space (2*chunksize)
*/
#define ZAP_LEAF_NUMCHUNKS(l) \
(((1<<(l)->l_bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(l)) / \
ZAP_LEAF_CHUNKSIZE - 2)
/*
* The amount of space within the chunk available for the array is:
* chunk size - space for type (1) - space for next pointer (2)
*/
#define ZAP_LEAF_ARRAY_BYTES (ZAP_LEAF_CHUNKSIZE - 3)
#define ZAP_LEAF_ARRAY_NCHUNKS(bytes) \
(((bytes)+ZAP_LEAF_ARRAY_BYTES-1)/ZAP_LEAF_ARRAY_BYTES)
/*
* Low water mark: when there are only this many chunks free, start
* growing the ptrtbl. Ideally, this should be larger than a
* "reasonably-sized" entry. 20 chunks is more than enough for the
* largest directory entry (MAXNAMELEN (256) byte name, 8-byte value),
* while still being only around 3% for 16k blocks.
*/
#define ZAP_LEAF_LOW_WATER (20)
/*
* The leaf hash table has block size / 2^5 (32) number of entries,
* which should be more than enough for the maximum number of entries,
* which is less than block size / CHUNKSIZE (24) / minimum number of
* chunks per entry (3).
*/
#define ZAP_LEAF_HASH_SHIFT(l) ((l)->l_bs - 5)
#define ZAP_LEAF_HASH_NUMENTRIES(l) (1 << ZAP_LEAF_HASH_SHIFT(l))
/*
* The chunks start immediately after the hash table. The end of the
* hash table is at l_hash + HASH_NUMENTRIES, which we simply cast to a
* chunk_t.
*/
#define ZAP_LEAF_CHUNK(l, idx) \
((zap_leaf_chunk_t *) \
((l)->l_phys->l_hash + ZAP_LEAF_HASH_NUMENTRIES(l)))[idx]
#define ZAP_LEAF_ENTRY(l, idx) (&ZAP_LEAF_CHUNK(l, idx).l_entry)
typedef enum zap_chunk_type {
ZAP_CHUNK_FREE = 253,
ZAP_CHUNK_ENTRY = 252,
ZAP_CHUNK_ARRAY = 251,
ZAP_CHUNK_TYPE_MAX = 250
} zap_chunk_type_t;
#define ZLF_ENTRIES_CDSORTED (1<<0)
/*
* TAKE NOTE:
* If zap_leaf_phys_t is modified, zap_leaf_byteswap() must be modified.
*/
typedef struct zap_leaf_phys {
struct zap_leaf_header {
uint64_t lh_block_type; /* ZBT_LEAF */
uint64_t lh_pad1;
uint64_t lh_prefix; /* hash prefix of this leaf */
uint32_t lh_magic; /* ZAP_LEAF_MAGIC */
uint16_t lh_nfree; /* number free chunks */
uint16_t lh_nentries; /* number of entries */
uint16_t lh_prefix_len; /* num bits used to id this */
/* above is accessable to zap, below is zap_leaf private */
uint16_t lh_freelist; /* chunk head of free list */
uint8_t lh_flags; /* ZLF_* flags */
uint8_t lh_pad2[11];
} l_hdr; /* 2 24-byte chunks */
/*
* The header is followed by a hash table with
* ZAP_LEAF_HASH_NUMENTRIES(zap) entries. The hash table is
* followed by an array of ZAP_LEAF_NUMCHUNKS(zap)
* zap_leaf_chunk structures. These structures are accessed
* with the ZAP_LEAF_CHUNK() macro.
*/
uint16_t l_hash[1];
} zap_leaf_phys_t;
typedef union zap_leaf_chunk {
struct zap_leaf_entry {
uint8_t le_type; /* always ZAP_CHUNK_ENTRY */
uint8_t le_int_size; /* size of ints */
uint16_t le_next; /* next entry in hash chain */
uint16_t le_name_chunk; /* first chunk of the name */
uint16_t le_name_length; /* bytes in name, incl null */
uint16_t le_value_chunk; /* first chunk of the value */
uint16_t le_value_length; /* value length in ints */
uint32_t le_cd; /* collision differentiator */
uint64_t le_hash; /* hash value of the name */
} l_entry;
struct zap_leaf_array {
uint8_t la_type; /* always ZAP_CHUNK_ARRAY */
uint8_t la_array[ZAP_LEAF_ARRAY_BYTES];
uint16_t la_next; /* next blk or CHAIN_END */
} l_array;
struct zap_leaf_free {
uint8_t lf_type; /* always ZAP_CHUNK_FREE */
uint8_t lf_pad[ZAP_LEAF_ARRAY_BYTES];
uint16_t lf_next; /* next in free list, or CHAIN_END */
} l_free;
} zap_leaf_chunk_t;
typedef struct zap_leaf {
krwlock_t l_rwlock;
uint64_t l_blkid; /* 1<<ZAP_BLOCK_SHIFT byte block off */
int l_bs; /* block size shift */
dmu_buf_t *l_dbuf;
zap_leaf_phys_t *l_phys;
} zap_leaf_t;
typedef struct zap_entry_handle {
/* below is set by zap_leaf.c and is public to zap.c */
uint64_t zeh_num_integers;
uint64_t zeh_hash;
uint32_t zeh_cd;
uint8_t zeh_integer_size;
/* below is private to zap_leaf.c */
uint16_t zeh_fakechunk;
uint16_t *zeh_chunkp;
zap_leaf_t *zeh_leaf;
} zap_entry_handle_t;
/*
* Return a handle to the named entry, or ENOENT if not found. The hash
* value must equal zap_hash(name).
*/
extern int zap_leaf_lookup(zap_leaf_t *l,
zap_name_t *zn, zap_entry_handle_t *zeh);
/*
* Return a handle to the entry with this hash+cd, or the entry with the
* next closest hash+cd.
*/
extern int zap_leaf_lookup_closest(zap_leaf_t *l,
uint64_t hash, uint32_t cd, zap_entry_handle_t *zeh);
/*
* Read the first num_integers in the attribute. Integer size
* conversion will be done without sign extension. Return EINVAL if
* integer_size is too small. Return EOVERFLOW if there are more than
* num_integers in the attribute.
*/
extern int zap_entry_read(const zap_entry_handle_t *zeh,
uint8_t integer_size, uint64_t num_integers, void *buf);
extern int zap_entry_read_name(const zap_entry_handle_t *zeh,
uint16_t buflen, char *buf);
/*
* Replace the value of an existing entry.
*
* zap_entry_update may fail if it runs out of space (ENOSPC).
*/
extern int zap_entry_update(zap_entry_handle_t *zeh,
uint8_t integer_size, uint64_t num_integers, const void *buf);
/*
* Remove an entry.
*/
extern void zap_entry_remove(zap_entry_handle_t *zeh);
/*
* Create an entry. An equal entry must not exist, and this entry must
* belong in this leaf (according to its hash value). Fills in the
* entry handle on success. Returns 0 on success or ENOSPC on failure.
*/
extern int zap_entry_create(zap_leaf_t *l,
const char *name, uint64_t h, uint32_t cd,
uint8_t integer_size, uint64_t num_integers, const void *buf,
zap_entry_handle_t *zeh);
/*
* Return true if there are additional entries with the same normalized
* form.
*/
extern boolean_t zap_entry_normalization_conflict(zap_entry_handle_t *zeh,
zap_name_t *zn, const char *name, zap_t *zap);
/*
* Other stuff.
*/
extern void zap_leaf_init(zap_leaf_t *l, boolean_t sort);
extern void zap_leaf_byteswap(zap_leaf_phys_t *buf, int len);
extern void zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort);
extern void zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZAP_LEAF_H */
+214
View File
@@ -0,0 +1,214 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_FS_ZFS_ACL_H
#define _SYS_FS_ZFS_ACL_H
#ifdef _KERNEL
#include <sys/isa_defs.h>
#include <sys/types32.h>
#endif
#include <sys/acl.h>
#include <sys/dmu.h>
#include <sys/zfs_fuid.h>
#ifdef __cplusplus
extern "C" {
#endif
struct znode_phys;
#define ACE_SLOT_CNT 6
#define ZFS_ACL_VERSION_INITIAL 0ULL
#define ZFS_ACL_VERSION_FUID 1ULL
#define ZFS_ACL_VERSION ZFS_ACL_VERSION_FUID
/*
* ZFS ACLs are store in various forms.
* Files created with ACL version ZFS_ACL_VERSION_INITIAL
* will all be created with fixed length ACEs of type
* zfs_oldace_t.
*
* Files with ACL version ZFS_ACL_VERSION_FUID will be created
* with various sized ACEs. The abstraction entries will utilize
* zfs_ace_hdr_t, normal user/group entries will use zfs_ace_t
* and some specialized CIFS ACEs will use zfs_object_ace_t.
*/
/*
* All ACEs have a common hdr. For
* owner@, group@, and everyone@ this is all
* thats needed.
*/
typedef struct zfs_ace_hdr {
uint16_t z_type;
uint16_t z_flags;
uint32_t z_access_mask;
} zfs_ace_hdr_t;
typedef zfs_ace_hdr_t zfs_ace_abstract_t;
/*
* Standard ACE
*/
typedef struct zfs_ace {
zfs_ace_hdr_t z_hdr;
uint64_t z_fuid;
} zfs_ace_t;
/*
* The following type only applies to ACE_ACCESS_ALLOWED|DENIED_OBJECT_ACE_TYPE
* and will only be set/retrieved in a CIFS context.
*/
typedef struct zfs_object_ace {
zfs_ace_t z_ace;
uint8_t z_object_type[16]; /* object type */
uint8_t z_inherit_type[16]; /* inherited object type */
} zfs_object_ace_t;
typedef struct zfs_oldace {
uint32_t z_fuid; /* "who" */
uint32_t z_access_mask; /* access mask */
uint16_t z_flags; /* flags, i.e inheritance */
uint16_t z_type; /* type of entry allow/deny */
} zfs_oldace_t;
typedef struct zfs_acl_phys_v0 {
uint64_t z_acl_extern_obj; /* ext acl pieces */
uint32_t z_acl_count; /* Number of ACEs */
uint16_t z_acl_version; /* acl version */
uint16_t z_acl_pad; /* pad */
zfs_oldace_t z_ace_data[ACE_SLOT_CNT]; /* 6 standard ACEs */
} zfs_acl_phys_v0_t;
#define ZFS_ACE_SPACE (sizeof (zfs_oldace_t) * ACE_SLOT_CNT)
typedef struct zfs_acl_phys {
uint64_t z_acl_extern_obj; /* ext acl pieces */
uint32_t z_acl_size; /* Number of bytes in ACL */
uint16_t z_acl_version; /* acl version */
uint16_t z_acl_count; /* ace count */
uint8_t z_ace_data[ZFS_ACE_SPACE]; /* space for embedded ACEs */
} zfs_acl_phys_t;
typedef struct acl_ops {
uint32_t (*ace_mask_get) (void *acep); /* get access mask */
void (*ace_mask_set) (void *acep,
uint32_t mask); /* set access mask */
uint16_t (*ace_flags_get) (void *acep); /* get flags */
void (*ace_flags_set) (void *acep,
uint16_t flags); /* set flags */
uint16_t (*ace_type_get)(void *acep); /* get type */
void (*ace_type_set)(void *acep,
uint16_t type); /* set type */
uint64_t (*ace_who_get)(void *acep); /* get who/fuid */
void (*ace_who_set)(void *acep,
uint64_t who); /* set who/fuid */
size_t (*ace_size)(void *acep); /* how big is this ace */
size_t (*ace_abstract_size)(void); /* sizeof abstract entry */
int (*ace_mask_off)(void); /* off of access mask in ace */
int (*ace_data)(void *acep, void **datap);
/* ptr to data if any */
} acl_ops_t;
/*
* A zfs_acl_t structure is composed of a list of zfs_acl_node_t's.
* Each node will have one or more ACEs associated with it. You will
* only have multiple nodes during a chmod operation. Normally only
* one node is required.
*/
typedef struct zfs_acl_node {
list_node_t z_next; /* Next chunk of ACEs */
void *z_acldata; /* pointer into actual ACE(s) */
void *z_allocdata; /* pointer to kmem allocated memory */
size_t z_allocsize; /* Size of blob in bytes */
size_t z_size; /* length of ACL data */
int z_ace_count; /* number of ACEs in this acl node */
int z_ace_idx; /* ace iterator positioned on */
} zfs_acl_node_t;
typedef struct zfs_acl {
int z_acl_count; /* Number of ACEs */
size_t z_acl_bytes; /* Number of bytes in ACL */
uint_t z_version; /* version of ACL */
void *z_next_ace; /* pointer to next ACE */
int z_hints; /* ACL hints (ZFS_INHERIT_ACE ...) */
zfs_acl_node_t *z_curr_node; /* current node iterator is handling */
list_t z_acl; /* chunks of ACE data */
acl_ops_t z_ops; /* ACL operations */
boolean_t z_has_fuids; /* FUIDs present in ACL? */
} zfs_acl_t;
#define ACL_DATA_ALLOCED 0x1
#define ZFS_ACL_SIZE(aclcnt) (sizeof (ace_t) * (aclcnt))
/*
* Property values for acl_mode and acl_inherit.
*
* acl_mode can take discard, noallow, groupmask and passthrough.
* whereas acl_inherit has secure instead of groupmask.
*/
#define ZFS_ACL_DISCARD 0
#define ZFS_ACL_NOALLOW 1
#define ZFS_ACL_GROUPMASK 2
#define ZFS_ACL_PASSTHROUGH 3
#define ZFS_ACL_RESTRICTED 4
#define ZFS_ACL_PASSTHROUGH_X 5
struct znode;
struct zfsvfs;
struct zfs_fuid_info;
#ifdef _KERNEL
void zfs_perm_init(struct znode *, struct znode *, int, vattr_t *,
dmu_tx_t *, cred_t *, zfs_acl_t *, zfs_fuid_info_t **);
int zfs_getacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
int zfs_setacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
void zfs_acl_rele(void *);
void zfs_oldace_byteswap(ace_t *, int);
void zfs_ace_byteswap(void *, size_t, boolean_t);
extern int zfs_zaccess(struct znode *, int, int, boolean_t, cred_t *);
extern int zfs_zaccess_rwx(struct znode *, mode_t, int, cred_t *);
extern int zfs_zaccess_unix(struct znode *, mode_t, cred_t *);
extern int zfs_acl_access(struct znode *, int, cred_t *);
int zfs_acl_chmod_setattr(struct znode *, zfs_acl_t **, uint64_t);
int zfs_zaccess_delete(struct znode *, struct znode *, cred_t *);
int zfs_zaccess_rename(struct znode *, struct znode *,
struct znode *, struct znode *, cred_t *cr);
void zfs_acl_free(zfs_acl_t *);
int zfs_vsec_2_aclp(struct zfsvfs *, vtype_t, vsecattr_t *, zfs_acl_t **);
int zfs_aclset_common(struct znode *, zfs_acl_t *, cred_t *,
struct zfs_fuid_info **, dmu_tx_t *);
#endif
#ifdef __cplusplus
}
#endif
#endif /* _SYS_FS_ZFS_ACL_H */
+73
View File
@@ -0,0 +1,73 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZFS_CONTEXT_H
#define _SYS_ZFS_CONTEXT_H
#pragma ident "%Z%%M% %I% %E% SMI"
#ifdef __cplusplus
extern "C" {
#endif
#include <sys/note.h>
#include <sys/types.h>
#include <sys/t_lock.h>
#include <sys/atomic.h>
#include <sys/sysmacros.h>
#include <sys/bitmap.h>
#include <sys/cmn_err.h>
#include <sys/kmem.h>
#include <sys/taskq.h>
#include <sys/buf.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/cpuvar.h>
#include <sys/kobj.h>
#include <sys/conf.h>
#include <sys/disp.h>
#include <sys/debug.h>
#include <sys/random.h>
#include <sys/byteorder.h>
#include <sys/systm.h>
#include <sys/list.h>
#include <sys/uio.h>
#include <sys/dirent.h>
#include <sys/time.h>
#include <vm/seg_kmem.h>
#include <sys/zone.h>
#include <sys/uio.h>
#include <sys/zfs_debug.h>
#include <sys/sysevent.h>
#include <sys/sysevent/eventdefs.h>
#include <sys/fm/util.h>
#define CPU_SEQID (CPU->cpu_seqid)
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZFS_CONTEXT_H */
+74
View File
@@ -0,0 +1,74 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _ZFS_CTLDIR_H
#define _ZFS_CTLDIR_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/pathname.h>
#include <sys/vnode.h>
#include <sys/zfs_vfsops.h>
#include <sys/zfs_znode.h>
#ifdef __cplusplus
extern "C" {
#endif
#define ZFS_CTLDIR_NAME ".zfs"
#define zfs_has_ctldir(zdp) \
((zdp)->z_id == (zdp)->z_zfsvfs->z_root && \
((zdp)->z_zfsvfs->z_ctldir != NULL))
#define zfs_show_ctldir(zdp) \
(zfs_has_ctldir(zdp) && \
((zdp)->z_zfsvfs->z_show_ctldir))
void zfsctl_create(zfsvfs_t *);
void zfsctl_destroy(zfsvfs_t *);
vnode_t *zfsctl_root(znode_t *);
void zfsctl_init(void);
void zfsctl_fini(void);
int zfsctl_rename_snapshot(const char *from, const char *to);
int zfsctl_destroy_snapshot(const char *snapname, int force);
int zfsctl_umount_snapshots(vfs_t *, int, cred_t *);
int zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
int *direntflags, pathname_t *realpnp);
int zfsctl_make_fid(zfsvfs_t *zfsvfsp, uint64_t object, uint32_t gen,
fid_t *fidp);
int zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp);
#define ZFSCTL_INO_ROOT 0x1
#define ZFSCTL_INO_SNAPDIR 0x2
#ifdef __cplusplus
}
#endif
#endif /* _ZFS_CTLDIR_H */
+75
View File
@@ -0,0 +1,75 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZFS_DEBUG_H
#define _SYS_ZFS_DEBUG_H
#pragma ident "%Z%%M% %I% %E% SMI"
#ifdef __cplusplus
extern "C" {
#endif
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
/*
* ZFS debugging
*/
#if defined(DEBUG) || !defined(_KERNEL)
#define ZFS_DEBUG
#endif
extern int zfs_flags;
#define ZFS_DEBUG_DPRINTF 0x0001
#define ZFS_DEBUG_DBUF_VERIFY 0x0002
#define ZFS_DEBUG_DNODE_VERIFY 0x0004
#define ZFS_DEBUG_SNAPNAMES 0x0008
#define ZFS_DEBUG_MODIFY 0x0010
#ifdef ZFS_DEBUG
extern void __dprintf(const char *file, const char *func,
int line, const char *fmt, ...);
#define dprintf(...) \
if (zfs_flags & ZFS_DEBUG_DPRINTF) \
__dprintf(__FILE__, __func__, __LINE__, __VA_ARGS__)
#else
#define dprintf(...) ((void)0)
#endif /* ZFS_DEBUG */
extern void zfs_panic_recover(const char *fmt, ...);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZFS_DEBUG_H */
+76
View File
@@ -0,0 +1,76 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_FS_ZFS_DIR_H
#define _SYS_FS_ZFS_DIR_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/pathname.h>
#include <sys/dmu.h>
#include <sys/zfs_znode.h>
#ifdef __cplusplus
extern "C" {
#endif
/* zfs_dirent_lock() flags */
#define ZNEW 0x0001 /* entry should not exist */
#define ZEXISTS 0x0002 /* entry should exist */
#define ZSHARED 0x0004 /* shared access (zfs_dirlook()) */
#define ZXATTR 0x0008 /* we want the xattr dir */
#define ZRENAMING 0x0010 /* znode is being renamed */
#define ZCILOOK 0x0020 /* case-insensitive lookup requested */
#define ZCIEXACT 0x0040 /* c-i requires c-s match (rename) */
/* mknode flags */
#define IS_ROOT_NODE 0x01 /* create a root node */
#define IS_XATTR 0x02 /* create an extended attribute node */
#define IS_REPLAY 0x04 /* we are replaying intent log */
extern int zfs_dirent_lock(zfs_dirlock_t **, znode_t *, char *, znode_t **,
int, int *, pathname_t *);
extern void zfs_dirent_unlock(zfs_dirlock_t *);
extern int zfs_link_create(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int);
extern int zfs_link_destroy(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int,
boolean_t *);
extern int zfs_dirlook(znode_t *, char *, vnode_t **, int, int *,
pathname_t *);
extern void zfs_mknode(znode_t *, vattr_t *, dmu_tx_t *, cred_t *,
uint_t, znode_t **, int, zfs_acl_t *, zfs_fuid_info_t **);
extern void zfs_rmnode(znode_t *);
extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old);
extern boolean_t zfs_dirempty(znode_t *);
extern void zfs_unlinked_add(znode_t *, dmu_tx_t *);
extern void zfs_unlinked_drain(zfsvfs_t *zfsvfs);
extern int zfs_sticky_remove_access(znode_t *, znode_t *, cred_t *cr);
extern int zfs_get_xattrdir(znode_t *, vnode_t **, cred_t *, int);
extern int zfs_make_xattrdir(znode_t *, vattr_t *, vnode_t **, cred_t *);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_FS_ZFS_DIR_H */
+125
View File
@@ -0,0 +1,125 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_FS_ZFS_FUID_H
#define _SYS_FS_ZFS_FUID_H
#pragma ident "%Z%%M% %I% %E% SMI"
#ifdef _KERNEL
#include <sys/kidmap.h>
#include <sys/sid.h>
#include <sys/dmu.h>
#include <sys/zfs_vfsops.h>
#endif
#include <sys/avl.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef enum {
ZFS_OWNER,
ZFS_GROUP,
ZFS_ACE_USER,
ZFS_ACE_GROUP
} zfs_fuid_type_t;
/*
* Estimate space needed for one more fuid table entry.
* for now assume its current size + 1K
*/
#define FUID_SIZE_ESTIMATE(z) (z->z_fuid_size + (SPA_MINBLOCKSIZE << 1))
#define FUID_INDEX(x) (x >> 32)
#define FUID_RID(x) (x & 0xffffffff)
#define FUID_ENCODE(idx, rid) ((idx << 32) | rid)
/*
* FUIDs cause problems for the intent log
* we need to replay the creation of the FUID,
* but we can't count on the idmapper to be around
* and during replay the FUID index may be different than
* before. Also, if an ACL has 100 ACEs and 12 different
* domains we don't want to log 100 domain strings, but rather
* just the unique 12.
*/
/*
* The FUIDs in the log will index into
* domain string table and the bottom half will be the rid.
* Used for mapping ephemeral uid/gid during ACL setting to FUIDs
*/
typedef struct zfs_fuid {
list_node_t z_next;
uint64_t z_id; /* uid/gid being converted to fuid */
uint64_t z_domidx; /* index in AVL domain table */
uint64_t z_logfuid; /* index for domain in log */
} zfs_fuid_t;
/* list of unique domains */
typedef struct zfs_fuid_domain {
list_node_t z_next;
uint64_t z_domidx; /* AVL tree idx */
const char *z_domain; /* domain string */
} zfs_fuid_domain_t;
/*
* FUID information necessary for logging create, setattr, and setacl.
*/
typedef struct zfs_fuid_info {
list_t z_fuids;
list_t z_domains;
uint64_t z_fuid_owner;
uint64_t z_fuid_group;
char **z_domain_table; /* Used during replay */
uint32_t z_fuid_cnt; /* How many fuids in z_fuids */
uint32_t z_domain_cnt; /* How many domains */
size_t z_domain_str_sz; /* len of domain strings z_domain list */
} zfs_fuid_info_t;
#ifdef _KERNEL
struct znode;
extern uid_t zfs_fuid_map_id(zfsvfs_t *, uint64_t, cred_t *, zfs_fuid_type_t);
extern void zfs_fuid_destroy(zfsvfs_t *);
extern uint64_t zfs_fuid_create_cred(zfsvfs_t *, zfs_fuid_type_t,
dmu_tx_t *, cred_t *, zfs_fuid_info_t **);
extern uint64_t zfs_fuid_create(zfsvfs_t *, uint64_t, cred_t *, zfs_fuid_type_t,
dmu_tx_t *, zfs_fuid_info_t **);
extern void zfs_fuid_map_ids(struct znode *zp, cred_t *cr, uid_t *uid,
uid_t *gid);
extern zfs_fuid_info_t *zfs_fuid_info_alloc(void);
extern void zfs_fuid_info_free();
extern boolean_t zfs_groupmember(zfsvfs_t *, uint64_t, cred_t *);
#endif
char *zfs_fuid_idx_domain(avl_tree_t *, uint32_t);
uint64_t zfs_fuid_table_load(objset_t *, uint64_t, avl_tree_t *, avl_tree_t *);
void zfs_fuid_table_destroy(avl_tree_t *, avl_tree_t *);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_FS_ZFS_FUID_H */
+196
View File
@@ -0,0 +1,196 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZFS_IOCTL_H
#define _SYS_ZFS_IOCTL_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/cred.h>
#include <sys/dmu.h>
#include <sys/zio.h>
#include <sys/dsl_deleg.h>
#ifdef _KERNEL
#include <sys/nvpair.h>
#endif /* _KERNEL */
#ifdef __cplusplus
extern "C" {
#endif
/*
* Property values for snapdir
*/
#define ZFS_SNAPDIR_HIDDEN 0
#define ZFS_SNAPDIR_VISIBLE 1
#define DMU_BACKUP_STREAM_VERSION (1ULL)
#define DMU_BACKUP_HEADER_VERSION (2ULL)
#define DMU_BACKUP_MAGIC 0x2F5bacbacULL
#define DRR_FLAG_CLONE (1<<0)
#define DRR_FLAG_CI_DATA (1<<1)
/*
* zfs ioctl command structure
*/
typedef struct dmu_replay_record {
enum {
DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
DRR_WRITE, DRR_FREE, DRR_END,
} drr_type;
uint32_t drr_payloadlen;
union {
struct drr_begin {
uint64_t drr_magic;
uint64_t drr_version;
uint64_t drr_creation_time;
dmu_objset_type_t drr_type;
uint32_t drr_flags;
uint64_t drr_toguid;
uint64_t drr_fromguid;
char drr_toname[MAXNAMELEN];
} drr_begin;
struct drr_end {
zio_cksum_t drr_checksum;
} drr_end;
struct drr_object {
uint64_t drr_object;
dmu_object_type_t drr_type;
dmu_object_type_t drr_bonustype;
uint32_t drr_blksz;
uint32_t drr_bonuslen;
uint8_t drr_checksum;
uint8_t drr_compress;
uint8_t drr_pad[6];
/* bonus content follows */
} drr_object;
struct drr_freeobjects {
uint64_t drr_firstobj;
uint64_t drr_numobjs;
} drr_freeobjects;
struct drr_write {
uint64_t drr_object;
dmu_object_type_t drr_type;
uint32_t drr_pad;
uint64_t drr_offset;
uint64_t drr_length;
/* content follows */
} drr_write;
struct drr_free {
uint64_t drr_object;
uint64_t drr_offset;
uint64_t drr_length;
} drr_free;
} drr_u;
} dmu_replay_record_t;
typedef struct zinject_record {
uint64_t zi_objset;
uint64_t zi_object;
uint64_t zi_start;
uint64_t zi_end;
uint64_t zi_guid;
uint32_t zi_level;
uint32_t zi_error;
uint64_t zi_type;
uint32_t zi_freq;
uint32_t zi_pad; /* pad out to 64 bit alignment */
} zinject_record_t;
#define ZINJECT_NULL 0x1
#define ZINJECT_FLUSH_ARC 0x2
#define ZINJECT_UNLOAD_SPA 0x4
typedef struct zfs_share {
uint64_t z_exportdata;
uint64_t z_sharedata;
uint64_t z_sharetype; /* 0 = share, 1 = unshare */
uint64_t z_sharemax; /* max length of share string */
} zfs_share_t;
/*
* ZFS file systems may behave the usual, POSIX-compliant way, where
* name lookups are case-sensitive. They may also be set up so that
* all the name lookups are case-insensitive, or so that only some
* lookups, the ones that set an FIGNORECASE flag, are case-insensitive.
*/
typedef enum zfs_case {
ZFS_CASE_SENSITIVE,
ZFS_CASE_INSENSITIVE,
ZFS_CASE_MIXED
} zfs_case_t;
typedef struct zfs_cmd {
char zc_name[MAXPATHLEN];
char zc_value[MAXPATHLEN * 2];
char zc_string[MAXNAMELEN];
uint64_t zc_guid;
uint64_t zc_nvlist_conf; /* really (char *) */
uint64_t zc_nvlist_conf_size;
uint64_t zc_nvlist_src; /* really (char *) */
uint64_t zc_nvlist_src_size;
uint64_t zc_nvlist_dst; /* really (char *) */
uint64_t zc_nvlist_dst_size;
uint64_t zc_cookie;
uint64_t zc_objset_type;
uint64_t zc_perm_action;
uint64_t zc_history; /* really (char *) */
uint64_t zc_history_len;
uint64_t zc_history_offset;
uint64_t zc_obj;
zfs_share_t zc_share;
dmu_objset_stats_t zc_objset_stats;
struct drr_begin zc_begin_record;
zinject_record_t zc_inject_record;
} zfs_cmd_t;
#define ZVOL_MAX_MINOR (1 << 16)
#define ZFS_MIN_MINOR (ZVOL_MAX_MINOR + 1)
#ifdef _KERNEL
typedef struct zfs_creat {
nvlist_t *zct_zplprops;
nvlist_t *zct_props;
} zfs_creat_t;
extern dev_info_t *zfs_dip;
extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr);
extern int zfs_secpolicy_rename_perms(const char *from,
const char *to, cred_t *cr);
extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
extern int zfs_busy(void);
extern int zfs_unmount_snap(char *, void *);
#endif /* _KERNEL */
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZFS_IOCTL_H */
+89
View File
@@ -0,0 +1,89 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_FS_ZFS_RLOCK_H
#define _SYS_FS_ZFS_RLOCK_H
#pragma ident "%Z%%M% %I% %E% SMI"
#ifdef __cplusplus
extern "C" {
#endif
#ifdef _KERNEL
#include <sys/zfs_znode.h>
typedef enum {
RL_READER,
RL_WRITER,
RL_APPEND
} rl_type_t;
typedef struct rl {
znode_t *r_zp; /* znode this lock applies to */
avl_node_t r_node; /* avl node link */
uint64_t r_off; /* file range offset */
uint64_t r_len; /* file range length */
uint_t r_cnt; /* range reference count in tree */
rl_type_t r_type; /* range type */
kcondvar_t r_wr_cv; /* cv for waiting writers */
kcondvar_t r_rd_cv; /* cv for waiting readers */
uint8_t r_proxy; /* acting for original range */
uint8_t r_write_wanted; /* writer wants to lock this range */
uint8_t r_read_wanted; /* reader wants to lock this range */
} rl_t;
/*
* Lock a range (offset, length) as either shared (READER)
* or exclusive (WRITER or APPEND). APPEND is a special type that
* is converted to WRITER that specified to lock from the start of the
* end of file. zfs_range_lock() returns the range lock structure.
*/
rl_t *zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type);
/*
* Unlock range and destroy range lock structure.
*/
void zfs_range_unlock(rl_t *rl);
/*
* Reduce range locked as RW_WRITER from whole file to specified range.
* Asserts the whole file was previously locked.
*/
void zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len);
/*
* AVL comparison function used to compare range locks
*/
int zfs_range_compare(const void *arg1, const void *arg2);
#endif /* _KERNEL */
#ifdef __cplusplus
}
#endif
#endif /* _SYS_FS_ZFS_RLOCK_H */
+140
View File
@@ -0,0 +1,140 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_FS_ZFS_VFSOPS_H
#define _SYS_FS_ZFS_VFSOPS_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/isa_defs.h>
#include <sys/types32.h>
#include <sys/list.h>
#include <sys/vfs.h>
#include <sys/zil.h>
#include <sys/rrwlock.h>
#include <sys/zfs_ioctl.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct zfsvfs zfsvfs_t;
struct zfsvfs {
vfs_t *z_vfs; /* generic fs struct */
zfsvfs_t *z_parent; /* parent fs */
objset_t *z_os; /* objset reference */
uint64_t z_root; /* id of root znode */
uint64_t z_unlinkedobj; /* id of unlinked zapobj */
uint64_t z_max_blksz; /* maximum block size for files */
uint64_t z_assign; /* TXG_NOWAIT or set by zil_replay() */
uint64_t z_fuid_obj; /* fuid table object number */
uint64_t z_fuid_size; /* fuid table size */
avl_tree_t z_fuid_idx; /* fuid tree keyed by index */
avl_tree_t z_fuid_domain; /* fuid tree keyed by domain */
krwlock_t z_fuid_lock; /* fuid lock */
boolean_t z_fuid_loaded; /* fuid tables are loaded */
struct zfs_fuid_info *z_fuid_replay; /* fuid info for replay */
zilog_t *z_log; /* intent log pointer */
uint_t z_acl_mode; /* acl chmod/mode behavior */
uint_t z_acl_inherit; /* acl inheritance behavior */
zfs_case_t z_case; /* case-sense */
boolean_t z_utf8; /* utf8-only */
int z_norm; /* normalization flags */
boolean_t z_atime; /* enable atimes mount option */
boolean_t z_unmounted; /* unmounted */
rrwlock_t z_teardown_lock;
krwlock_t z_teardown_inactive_lock;
list_t z_all_znodes; /* all vnodes in the fs */
kmutex_t z_znodes_lock; /* lock for z_all_znodes */
vnode_t *z_ctldir; /* .zfs directory pointer */
boolean_t z_show_ctldir; /* expose .zfs in the root dir */
boolean_t z_issnap; /* true if this is a snapshot */
boolean_t z_vscan; /* virus scan on/off */
boolean_t z_use_fuids; /* version allows fuids */
kmutex_t z_online_recv_lock; /* recv in prog grabs as WRITER */
uint64_t z_version; /* ZPL version */
#define ZFS_OBJ_MTX_SZ 64
kmutex_t z_hold_mtx[ZFS_OBJ_MTX_SZ]; /* znode hold locks */
};
/*
* Normal filesystems (those not under .zfs/snapshot) have a total
* file ID size limited to 12 bytes (including the length field) due to
* NFSv2 protocol's limitation of 32 bytes for a filehandle. For historical
* reasons, this same limit is being imposed by the Solaris NFSv3 implementation
* (although the NFSv3 protocol actually permits a maximum of 64 bytes). It
* is not possible to expand beyond 12 bytes without abandoning support
* of NFSv2.
*
* For normal filesystems, we partition up the available space as follows:
* 2 bytes fid length (required)
* 6 bytes object number (48 bits)
* 4 bytes generation number (32 bits)
*
* We reserve only 48 bits for the object number, as this is the limit
* currently defined and imposed by the DMU.
*/
typedef struct zfid_short {
uint16_t zf_len;
uint8_t zf_object[6]; /* obj[i] = obj >> (8 * i) */
uint8_t zf_gen[4]; /* gen[i] = gen >> (8 * i) */
} zfid_short_t;
/*
* Filesystems under .zfs/snapshot have a total file ID size of 22 bytes
* (including the length field). This makes files under .zfs/snapshot
* accessible by NFSv3 and NFSv4, but not NFSv2.
*
* For files under .zfs/snapshot, we partition up the available space
* as follows:
* 2 bytes fid length (required)
* 6 bytes object number (48 bits)
* 4 bytes generation number (32 bits)
* 6 bytes objset id (48 bits)
* 4 bytes currently just zero (32 bits)
*
* We reserve only 48 bits for the object number and objset id, as these are
* the limits currently defined and imposed by the DMU.
*/
typedef struct zfid_long {
zfid_short_t z_fid;
uint8_t zf_setid[6]; /* obj[i] = obj >> (8 * i) */
uint8_t zf_setgen[4]; /* gen[i] = gen >> (8 * i) */
} zfid_long_t;
#define SHORT_FID_LEN (sizeof (zfid_short_t) - sizeof (uint16_t))
#define LONG_FID_LEN (sizeof (zfid_long_t) - sizeof (uint16_t))
extern uint_t zfs_fsyncer_key;
extern int zfs_suspend_fs(zfsvfs_t *zfsvfs, char *osname, int *mode);
extern int zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_FS_ZFS_VFSOPS_H */
+356
View File
@@ -0,0 +1,356 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_FS_ZFS_ZNODE_H
#define _SYS_FS_ZFS_ZNODE_H
#ifdef _KERNEL
#include <sys/isa_defs.h>
#include <sys/types32.h>
#include <sys/attr.h>
#include <sys/list.h>
#include <sys/dmu.h>
#include <sys/zfs_vfsops.h>
#include <sys/rrwlock.h>
#endif
#include <sys/zfs_acl.h>
#include <sys/zil.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Additional file level attributes, that are stored
* in the upper half of zp_flags
*/
#define ZFS_READONLY 0x0000000100000000
#define ZFS_HIDDEN 0x0000000200000000
#define ZFS_SYSTEM 0x0000000400000000
#define ZFS_ARCHIVE 0x0000000800000000
#define ZFS_IMMUTABLE 0x0000001000000000
#define ZFS_NOUNLINK 0x0000002000000000
#define ZFS_APPENDONLY 0x0000004000000000
#define ZFS_NODUMP 0x0000008000000000
#define ZFS_OPAQUE 0x0000010000000000
#define ZFS_AV_QUARANTINED 0x0000020000000000
#define ZFS_AV_MODIFIED 0x0000040000000000
#define ZFS_ATTR_SET(zp, attr, value) \
{ \
if (value) \
zp->z_phys->zp_flags |= attr; \
else \
zp->z_phys->zp_flags &= ~attr; \
}
/*
* Define special zfs pflags
*/
#define ZFS_XATTR 0x1 /* is an extended attribute */
#define ZFS_INHERIT_ACE 0x2 /* ace has inheritable ACEs */
#define ZFS_ACL_TRIVIAL 0x4 /* files ACL is trivial */
#define ZFS_ACL_OBJ_ACE 0x8 /* ACL has CMPLX Object ACE */
#define ZFS_ACL_PROTECTED 0x10 /* ACL protected */
#define ZFS_ACL_DEFAULTED 0x20 /* ACL should be defaulted */
#define ZFS_ACL_AUTO_INHERIT 0x40 /* ACL should be inherited */
#define ZFS_BONUS_SCANSTAMP 0x80 /* Scanstamp in bonus area */
/*
* Is ID ephemeral?
*/
#define IS_EPHEMERAL(x) (x > MAXUID)
/*
* Should we use FUIDs?
*/
#define USE_FUIDS(version, os) (version >= ZPL_VERSION_FUID &&\
spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID)
#define MASTER_NODE_OBJ 1
/*
* Special attributes for master node.
*/
#define ZFS_FSID "FSID"
#define ZFS_UNLINKED_SET "DELETE_QUEUE"
#define ZFS_ROOT_OBJ "ROOT"
#define ZPL_VERSION_STR "VERSION"
#define ZFS_FUID_TABLES "FUID"
#define ZFS_MAX_BLOCKSIZE (SPA_MAXBLOCKSIZE)
/* Path component length */
/*
* The generic fs code uses MAXNAMELEN to represent
* what the largest component length is. Unfortunately,
* this length includes the terminating NULL. ZFS needs
* to tell the users via pathconf() and statvfs() what the
* true maximum length of a component is, excluding the NULL.
*/
#define ZFS_MAXNAMELEN (MAXNAMELEN - 1)
/*
* Convert mode bits (zp_mode) to BSD-style DT_* values for storing in
* the directory entries.
*/
#define IFTODT(mode) (((mode) & S_IFMT) >> 12)
/*
* The directory entry has the type (currently unused on Solaris) in the
* top 4 bits, and the object number in the low 48 bits. The "middle"
* 12 bits are unused.
*/
#define ZFS_DIRENT_TYPE(de) BF64_GET(de, 60, 4)
#define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48)
/*
* This is the persistent portion of the znode. It is stored
* in the "bonus buffer" of the file. Short symbolic links
* are also stored in the bonus buffer.
*/
typedef struct znode_phys {
uint64_t zp_atime[2]; /* 0 - last file access time */
uint64_t zp_mtime[2]; /* 16 - last file modification time */
uint64_t zp_ctime[2]; /* 32 - last file change time */
uint64_t zp_crtime[2]; /* 48 - creation time */
uint64_t zp_gen; /* 64 - generation (txg of creation) */
uint64_t zp_mode; /* 72 - file mode bits */
uint64_t zp_size; /* 80 - size of file */
uint64_t zp_parent; /* 88 - directory parent (`..') */
uint64_t zp_links; /* 96 - number of links to file */
uint64_t zp_xattr; /* 104 - DMU object for xattrs */
uint64_t zp_rdev; /* 112 - dev_t for VBLK & VCHR files */
uint64_t zp_flags; /* 120 - persistent flags */
uint64_t zp_uid; /* 128 - file owner */
uint64_t zp_gid; /* 136 - owning group */
uint64_t zp_zap; /* 144 - extra attributes */
uint64_t zp_pad[3]; /* 152 - future */
zfs_acl_phys_t zp_acl; /* 176 - 263 ACL */
/*
* Data may pad out any remaining bytes in the znode buffer, eg:
*
* |<---------------------- dnode_phys (512) ------------------------>|
* |<-- dnode (192) --->|<----------- "bonus" buffer (320) ---------->|
* |<---- znode (264) ---->|<---- data (56) ---->|
*
* At present, we use this space for the following:
* - symbolic links
* - 32-byte anti-virus scanstamp (regular files only)
*/
} znode_phys_t;
/*
* Directory entry locks control access to directory entries.
* They are used to protect creates, deletes, and renames.
* Each directory znode has a mutex and a list of locked names.
*/
#ifdef _KERNEL
typedef struct zfs_dirlock {
char *dl_name; /* directory entry being locked */
uint32_t dl_sharecnt; /* 0 if exclusive, > 0 if shared */
uint16_t dl_namesize; /* set if dl_name was allocated */
kcondvar_t dl_cv; /* wait for entry to be unlocked */
struct znode *dl_dzp; /* directory znode */
struct zfs_dirlock *dl_next; /* next in z_dirlocks list */
} zfs_dirlock_t;
typedef struct znode {
struct zfsvfs *z_zfsvfs;
vnode_t *z_vnode;
uint64_t z_id; /* object ID for this znode */
kmutex_t z_lock; /* znode modification lock */
krwlock_t z_map_lock; /* page map lock */
krwlock_t z_parent_lock; /* parent lock for directories */
krwlock_t z_name_lock; /* "master" lock for dirent locks */
zfs_dirlock_t *z_dirlocks; /* directory entry lock list */
kmutex_t z_range_lock; /* protects changes to z_range_avl */
avl_tree_t z_range_avl; /* avl tree of file range locks */
uint8_t z_unlinked; /* file has been unlinked */
uint8_t z_atime_dirty; /* atime needs to be synced */
uint8_t z_zn_prefetch; /* Prefetch znodes? */
uint_t z_blksz; /* block size in bytes */
uint_t z_seq; /* modification sequence number */
uint64_t z_mapcnt; /* number of pages mapped to file */
uint64_t z_last_itx; /* last ZIL itx on this znode */
uint64_t z_gen; /* generation (same as zp_gen) */
uint32_t z_sync_cnt; /* synchronous open count */
kmutex_t z_acl_lock; /* acl data lock */
list_node_t z_link_node; /* all znodes in fs link */
/*
* These are dmu managed fields.
*/
znode_phys_t *z_phys; /* pointer to persistent znode */
dmu_buf_t *z_dbuf; /* buffer containing the z_phys */
} znode_t;
/*
* Range locking rules
* --------------------
* 1. When truncating a file (zfs_create, zfs_setattr, zfs_space) the whole
* file range needs to be locked as RL_WRITER. Only then can the pages be
* freed etc and zp_size reset. zp_size must be set within range lock.
* 2. For writes and punching holes (zfs_write & zfs_space) just the range
* being written or freed needs to be locked as RL_WRITER.
* Multiple writes at the end of the file must coordinate zp_size updates
* to ensure data isn't lost. A compare and swap loop is currently used
* to ensure the file size is at least the offset last written.
* 3. For reads (zfs_read, zfs_get_data & zfs_putapage) just the range being
* read needs to be locked as RL_READER. A check against zp_size can then
* be made for reading beyond end of file.
*/
/*
* Convert between znode pointers and vnode pointers
*/
#define ZTOV(ZP) ((ZP)->z_vnode)
#define VTOZ(VP) ((znode_t *)(VP)->v_data)
/*
* ZFS_ENTER() is called on entry to each ZFS vnode and vfs operation.
* ZFS_EXIT() must be called before exitting the vop.
* ZFS_VERIFY_ZP() verifies the znode is valid.
*/
#define ZFS_ENTER(zfsvfs) \
{ \
rrw_enter(&(zfsvfs)->z_teardown_lock, RW_READER, FTAG); \
if ((zfsvfs)->z_unmounted) { \
ZFS_EXIT(zfsvfs); \
return (EIO); \
} \
}
#define ZFS_EXIT(zfsvfs) rrw_exit(&(zfsvfs)->z_teardown_lock, FTAG)
#define ZFS_VERIFY_ZP(zp) \
if ((zp)->z_dbuf == NULL) { \
ZFS_EXIT((zp)->z_zfsvfs); \
return (EIO); \
} \
/*
* Macros for dealing with dmu_buf_hold
*/
#define ZFS_OBJ_HASH(obj_num) ((obj_num) & (ZFS_OBJ_MTX_SZ - 1))
#define ZFS_OBJ_MUTEX(zfsvfs, obj_num) \
(&(zfsvfs)->z_hold_mtx[ZFS_OBJ_HASH(obj_num)])
#define ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num) \
mutex_enter(ZFS_OBJ_MUTEX((zfsvfs), (obj_num)))
#define ZFS_OBJ_HOLD_TRYENTER(zfsvfs, obj_num) \
mutex_tryenter(ZFS_OBJ_MUTEX((zfsvfs), (obj_num)))
#define ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num) \
mutex_exit(ZFS_OBJ_MUTEX((zfsvfs), (obj_num)))
/*
* Macros to encode/decode ZFS stored time values from/to struct timespec
*/
#define ZFS_TIME_ENCODE(tp, stmp) \
{ \
(stmp)[0] = (uint64_t)(tp)->tv_sec; \
(stmp)[1] = (uint64_t)(tp)->tv_nsec; \
}
#define ZFS_TIME_DECODE(tp, stmp) \
{ \
(tp)->tv_sec = (time_t)(stmp)[0]; \
(tp)->tv_nsec = (long)(stmp)[1]; \
}
/*
* Timestamp defines
*/
#define ACCESSED (AT_ATIME)
#define STATE_CHANGED (AT_CTIME)
#define CONTENT_MODIFIED (AT_MTIME | AT_CTIME)
#define ZFS_ACCESSTIME_STAMP(zfsvfs, zp) \
if ((zfsvfs)->z_atime && !((zfsvfs)->z_vfs->vfs_flag & VFS_RDONLY)) \
zfs_time_stamper(zp, ACCESSED, NULL)
extern int zfs_init_fs(zfsvfs_t *, znode_t **);
extern void zfs_set_dataprop(objset_t *);
extern void zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *,
dmu_tx_t *tx);
extern void zfs_time_stamper(znode_t *, uint_t, dmu_tx_t *);
extern void zfs_time_stamper_locked(znode_t *, uint_t, dmu_tx_t *);
extern void zfs_grow_blocksize(znode_t *, uint64_t, dmu_tx_t *);
extern int zfs_freesp(znode_t *, uint64_t, uint64_t, int, boolean_t);
extern void zfs_znode_init(void);
extern void zfs_znode_fini(void);
extern int zfs_zget(zfsvfs_t *, uint64_t, znode_t **);
extern int zfs_rezget(znode_t *);
extern void zfs_zinactive(znode_t *);
extern void zfs_znode_delete(znode_t *, dmu_tx_t *);
extern void zfs_znode_free(znode_t *);
extern void zfs_remove_op_tables();
extern int zfs_create_op_tables();
extern int zfs_sync(vfs_t *vfsp, short flag, cred_t *cr);
extern dev_t zfs_cmpldev(uint64_t);
extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value);
extern int zfs_set_version(const char *name, uint64_t newvers);
extern int zfs_get_stats(objset_t *os, nvlist_t *nv);
extern void zfs_znode_dmu_fini(znode_t *);
extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *dzp, znode_t *zp, char *name, vsecattr_t *, zfs_fuid_info_t *,
vattr_t *vap);
extern int zfs_log_create_txtype(zil_create_t, vsecattr_t *vsecp,
vattr_t *vap);
extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *dzp, char *name);
extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *dzp, znode_t *zp, char *name);
extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *dzp, znode_t *zp, char *name, char *link);
extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp);
extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, offset_t off, ssize_t len, int ioflag);
extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, uint64_t off, uint64_t len);
extern void zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp);
extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
vsecattr_t *vsecp, zfs_fuid_info_t *fuidp);
extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap);
extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
extern caddr_t zfs_map_page(page_t *, enum seg_rw);
extern void zfs_unmap_page(page_t *, caddr_t);
extern zil_get_data_t zfs_get_data;
extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE];
extern int zfsfstype;
#endif /* _KERNEL */
extern int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_FS_ZFS_ZNODE_H */
+382
View File
@@ -0,0 +1,382 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZIL_H
#define _SYS_ZIL_H
#include <sys/types.h>
#include <sys/spa.h>
#include <sys/zio.h>
#include <sys/dmu.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Intent log format:
*
* Each objset has its own intent log. The log header (zil_header_t)
* for objset N's intent log is kept in the Nth object of the SPA's
* intent_log objset. The log header points to a chain of log blocks,
* each of which contains log records (i.e., transactions) followed by
* a log block trailer (zil_trailer_t). The format of a log record
* depends on the record (or transaction) type, but all records begin
* with a common structure that defines the type, length, and txg.
*/
/*
* Intent log header - this on disk structure holds fields to manage
* the log. All fields are 64 bit to easily handle cross architectures.
*/
typedef struct zil_header {
uint64_t zh_claim_txg; /* txg in which log blocks were claimed */
uint64_t zh_replay_seq; /* highest replayed sequence number */
blkptr_t zh_log; /* log chain */
uint64_t zh_claim_seq; /* highest claimed sequence number */
uint64_t zh_pad[5];
} zil_header_t;
/*
* Log block trailer - structure at the end of the header and each log block
*
* The zit_bt contains a zbt_cksum which for the intent log is
* the sequence number of this log block. A seq of 0 is invalid.
* The zbt_cksum is checked by the SPA against the sequence
* number passed in the blk_cksum field of the blkptr_t
*/
typedef struct zil_trailer {
uint64_t zit_pad;
blkptr_t zit_next_blk; /* next block in chain */
uint64_t zit_nused; /* bytes in log block used */
zio_block_tail_t zit_bt; /* block trailer */
} zil_trailer_t;
#define ZIL_MIN_BLKSZ 4096ULL
#define ZIL_MAX_BLKSZ SPA_MAXBLOCKSIZE
#define ZIL_BLK_DATA_SZ(lwb) ((lwb)->lwb_sz - sizeof (zil_trailer_t))
/*
* The words of a log block checksum.
*/
#define ZIL_ZC_GUID_0 0
#define ZIL_ZC_GUID_1 1
#define ZIL_ZC_OBJSET 2
#define ZIL_ZC_SEQ 3
typedef enum zil_create {
Z_FILE,
Z_DIR,
Z_XATTRDIR,
} zil_create_t;
/*
* size of xvattr log section.
* its composed of lr_attr_t + xvattr bitmap + 2 64 bit timestamps
* for create time and a single 64 bit integer for all of the attributes,
* and 4 64 bit integers (32 bytes) for the scanstamp.
*
*/
#define ZIL_XVAT_SIZE(mapsize) \
sizeof (lr_attr_t) + (sizeof (uint32_t) * (mapsize - 1)) + \
(sizeof (uint64_t) * 7)
/*
* Size of ACL in log. The ACE data is padded out to properly align
* on 8 byte boundary.
*/
#define ZIL_ACE_LENGTH(x) (roundup(x, sizeof (uint64_t)))
/*
* Intent log transaction types and record structures
*/
#define TX_CREATE 1 /* Create file */
#define TX_MKDIR 2 /* Make directory */
#define TX_MKXATTR 3 /* Make XATTR directory */
#define TX_SYMLINK 4 /* Create symbolic link to a file */
#define TX_REMOVE 5 /* Remove file */
#define TX_RMDIR 6 /* Remove directory */
#define TX_LINK 7 /* Create hard link to a file */
#define TX_RENAME 8 /* Rename a file */
#define TX_WRITE 9 /* File write */
#define TX_TRUNCATE 10 /* Truncate a file */
#define TX_SETATTR 11 /* Set file attributes */
#define TX_ACL_V0 12 /* Set old formatted ACL */
#define TX_ACL 13 /* Set ACL */
#define TX_CREATE_ACL 14 /* create with ACL */
#define TX_CREATE_ATTR 15 /* create + attrs */
#define TX_CREATE_ACL_ATTR 16 /* create with ACL + attrs */
#define TX_MKDIR_ACL 17 /* mkdir with ACL */
#define TX_MKDIR_ATTR 18 /* mkdir with attr */
#define TX_MKDIR_ACL_ATTR 19 /* mkdir with ACL + attrs */
#define TX_MAX_TYPE 20 /* Max transaction type */
/*
* The transactions for mkdir, symlink, remove, rmdir, link, and rename
* may have the following bit set, indicating the original request
* specified case-insensitive handling of names.
*/
#define TX_CI ((uint64_t)0x1 << 63) /* case-insensitive behavior requested */
/*
* Format of log records.
* The fields are carefully defined to allow them to be aligned
* and sized the same on sparc & intel architectures.
* Each log record has a common structure at the beginning.
*
* Note, lrc_seq holds two different sequence numbers. Whilst in memory
* it contains the transaction sequence number. The log record on
* disk holds the sequence number of all log records which is used to
* ensure we don't replay the same record. The two sequence numbers are
* different because the transactions can now be pushed out of order.
*/
typedef struct { /* common log record header */
uint64_t lrc_txtype; /* intent log transaction type */
uint64_t lrc_reclen; /* transaction record length */
uint64_t lrc_txg; /* dmu transaction group number */
uint64_t lrc_seq; /* see comment above */
} lr_t;
/*
* Handle option extended vattr attributes.
*
* Whenever new attributes are added the version number
* will need to be updated as will code in
* zfs_log.c and zfs_replay.c
*/
typedef struct {
uint32_t lr_attr_masksize; /* number of elements in array */
uint32_t lr_attr_bitmap; /* First entry of array */
/* remainder of array and any additional fields */
} lr_attr_t;
/*
* log record for creates without optional ACL.
* This log record does support optional xvattr_t attributes.
*/
typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_doid; /* object id of directory */
uint64_t lr_foid; /* object id of created file object */
uint64_t lr_mode; /* mode of object */
uint64_t lr_uid; /* uid of object */
uint64_t lr_gid; /* gid of object */
uint64_t lr_gen; /* generation (txg of creation) */
uint64_t lr_crtime[2]; /* creation time */
uint64_t lr_rdev; /* rdev of object to create */
/* name of object to create follows this */
/* for symlinks, link content follows name */
/* for creates with xvattr data, the name follows the xvattr info */
} lr_create_t;
/*
* FUID ACL record will be an array of ACEs from the original ACL.
* If this array includes ephemeral IDs, the record will also include
* an array of log-specific FUIDs to replace the ephemeral IDs.
* Only one copy of each unique domain will be present, so the log-specific
* FUIDs will use an index into a compressed domain table. On replay this
* information will be used to construct real FUIDs (and bypass idmap,
* since it may not be available).
*/
/*
* Log record for creates with optional ACL
* This log record is also used for recording any FUID
* information needed for replaying the create. If the
* file doesn't have any actual ACEs then the lr_aclcnt
* would be zero.
*/
typedef struct {
lr_create_t lr_create; /* common create portion */
uint64_t lr_aclcnt; /* number of ACEs in ACL */
uint64_t lr_domcnt; /* number of unique domains */
uint64_t lr_fuidcnt; /* number of real fuids */
uint64_t lr_acl_bytes; /* number of bytes in ACL */
uint64_t lr_acl_flags; /* ACL flags */
/* lr_acl_bytes number of variable sized ace's follows */
/* if create is also setting xvattr's, then acl data follows xvattr */
/* if ACE FUIDs are needed then they will follow the xvattr_t */
/* Following the FUIDs will be the domain table information. */
/* The FUIDs for the owner and group will be in the lr_create */
/* portion of the record. */
/* name follows ACL data */
} lr_acl_create_t;
typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_doid; /* obj id of directory */
/* name of object to remove follows this */
} lr_remove_t;
typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_doid; /* obj id of directory */
uint64_t lr_link_obj; /* obj id of link */
/* name of object to link follows this */
} lr_link_t;
typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_sdoid; /* obj id of source directory */
uint64_t lr_tdoid; /* obj id of target directory */
/* 2 strings: names of source and destination follow this */
} lr_rename_t;
typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_foid; /* file object to write */
uint64_t lr_offset; /* offset to write to */
uint64_t lr_length; /* user data length to write */
uint64_t lr_blkoff; /* offset represented by lr_blkptr */
blkptr_t lr_blkptr; /* spa block pointer for replay */
/* write data will follow for small writes */
} lr_write_t;
typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_foid; /* object id of file to truncate */
uint64_t lr_offset; /* offset to truncate from */
uint64_t lr_length; /* length to truncate */
} lr_truncate_t;
typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_foid; /* file object to change attributes */
uint64_t lr_mask; /* mask of attributes to set */
uint64_t lr_mode; /* mode to set */
uint64_t lr_uid; /* uid to set */
uint64_t lr_gid; /* gid to set */
uint64_t lr_size; /* size to set */
uint64_t lr_atime[2]; /* access time */
uint64_t lr_mtime[2]; /* modification time */
/* optional attribute lr_attr_t may be here */
} lr_setattr_t;
typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_foid; /* obj id of file */
uint64_t lr_aclcnt; /* number of acl entries */
/* lr_aclcnt number of ace_t entries follow this */
} lr_acl_v0_t;
typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_foid; /* obj id of file */
uint64_t lr_aclcnt; /* number of ACEs in ACL */
uint64_t lr_domcnt; /* number of unique domains */
uint64_t lr_fuidcnt; /* number of real fuids */
uint64_t lr_acl_bytes; /* number of bytes in ACL */
uint64_t lr_acl_flags; /* ACL flags */
/* lr_acl_bytes number of variable sized ace's follows */
} lr_acl_t;
/*
* ZIL structure definitions, interface function prototype and globals.
*/
/*
* ZFS intent log transaction structure
*/
typedef enum {
WR_INDIRECT, /* indirect - a large write (dmu_sync() data */
/* and put blkptr in log, rather than actual data) */
WR_COPIED, /* immediate - data is copied into lr_write_t */
WR_NEED_COPY, /* immediate - data needs to be copied if pushed */
} itx_wr_state_t;
typedef struct itx {
list_node_t itx_node; /* linkage on zl_itx_list */
void *itx_private; /* type-specific opaque data */
itx_wr_state_t itx_wr_state; /* write state */
uint8_t itx_sync; /* synchronous transaction */
uint64_t itx_sod; /* record size on disk */
lr_t itx_lr; /* common part of log record */
/* followed by type-specific part of lr_xx_t and its immediate data */
} itx_t;
/*
* zgd_t is passed through dmu_sync() to the callback routine zfs_get_done()
* to handle the cleanup of the dmu_sync() buffer write
*/
typedef struct {
zilog_t *zgd_zilog; /* zilog */
blkptr_t *zgd_bp; /* block pointer */
struct rl *zgd_rl; /* range lock */
} zgd_t;
typedef void zil_parse_blk_func_t(zilog_t *zilog, blkptr_t *bp, void *arg,
uint64_t txg);
typedef void zil_parse_lr_func_t(zilog_t *zilog, lr_t *lr, void *arg,
uint64_t txg);
typedef int zil_replay_func_t();
typedef void zil_replay_cleaner_t();
typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio);
extern uint64_t zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg);
extern void zil_init(void);
extern void zil_fini(void);
extern zilog_t *zil_alloc(objset_t *os, zil_header_t *zh_phys);
extern void zil_free(zilog_t *zilog);
extern zilog_t *zil_open(objset_t *os, zil_get_data_t *get_data);
extern void zil_close(zilog_t *zilog);
extern void zil_replay(objset_t *os, void *arg, uint64_t *txgp,
zil_replay_func_t *replay_func[TX_MAX_TYPE],
zil_replay_cleaner_t *replay_cleaner);
extern void zil_destroy(zilog_t *zilog, boolean_t keep_first);
extern void zil_rollback_destroy(zilog_t *zilog, dmu_tx_t *tx);
extern itx_t *zil_itx_create(uint64_t txtype, size_t lrsize);
extern uint64_t zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx);
extern void zil_commit(zilog_t *zilog, uint64_t seq, uint64_t oid);
extern int zil_claim(char *osname, void *txarg);
extern int zil_check_log_chain(char *osname, void *txarg);
extern int zil_clear_log_chain(char *osname, void *txarg);
extern void zil_sync(zilog_t *zilog, dmu_tx_t *tx);
extern void zil_clean(zilog_t *zilog);
extern int zil_is_committed(zilog_t *zilog);
extern int zil_suspend(zilog_t *zilog);
extern void zil_resume(zilog_t *zilog);
extern void zil_add_block(zilog_t *zilog, blkptr_t *bp);
extern int zil_disable;
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZIL_H */
+109
View File
@@ -0,0 +1,109 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZIL_IMPL_H
#define _SYS_ZIL_IMPL_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/zil.h>
#include <sys/dmu_objset.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Log write buffer.
*/
typedef struct lwb {
zilog_t *lwb_zilog; /* back pointer to log struct */
blkptr_t lwb_blk; /* on disk address of this log blk */
int lwb_nused; /* # used bytes in buffer */
int lwb_sz; /* size of block and buffer */
char *lwb_buf; /* log write buffer */
zio_t *lwb_zio; /* zio for this buffer */
uint64_t lwb_max_txg; /* highest txg in this lwb */
txg_handle_t lwb_txgh; /* txg handle for txg_exit() */
list_node_t lwb_node; /* zilog->zl_lwb_list linkage */
} lwb_t;
/*
* Vdev flushing: during a zil_commit(), we build up an AVL tree of the vdevs
* we've touched so we know which ones need a write cache flush at the end.
*/
typedef struct zil_vdev_node {
uint64_t zv_vdev; /* vdev to be flushed */
avl_node_t zv_node; /* AVL tree linkage */
} zil_vdev_node_t;
/*
* Stable storage intent log management structure. One per dataset.
*/
struct zilog {
kmutex_t zl_lock; /* protects most zilog_t fields */
struct dsl_pool *zl_dmu_pool; /* DSL pool */
spa_t *zl_spa; /* handle for read/write log */
const zil_header_t *zl_header; /* log header buffer */
objset_t *zl_os; /* object set we're logging */
zil_get_data_t *zl_get_data; /* callback to get object content */
zio_t *zl_root_zio; /* log writer root zio */
uint64_t zl_itx_seq; /* next itx sequence number */
uint64_t zl_commit_seq; /* committed upto this number */
uint64_t zl_lr_seq; /* log record sequence number */
uint64_t zl_destroy_txg; /* txg of last zil_destroy() */
uint64_t zl_replay_seq[TXG_SIZE]; /* seq of last replayed rec */
uint32_t zl_suspend; /* log suspend count */
kcondvar_t zl_cv_writer; /* log writer thread completion */
kcondvar_t zl_cv_suspend; /* log suspend completion */
uint8_t zl_suspending; /* log is currently suspending */
uint8_t zl_keep_first; /* keep first log block in destroy */
uint8_t zl_stop_replay; /* don't replay any further */
uint8_t zl_stop_sync; /* for debugging */
uint8_t zl_writer; /* boolean: write setup in progress */
uint8_t zl_log_error; /* boolean: log write error */
list_t zl_itx_list; /* in-memory itx list */
uint64_t zl_itx_list_sz; /* total size of records on list */
uint64_t zl_cur_used; /* current commit log size used */
uint64_t zl_prev_used; /* previous commit log size used */
list_t zl_lwb_list; /* in-flight log write list */
kmutex_t zl_vdev_lock; /* protects zl_vdev_tree */
avl_tree_t zl_vdev_tree; /* vdevs to flush in zil_commit() */
taskq_t *zl_clean_taskq; /* runs lwb and itx clean tasks */
avl_tree_t zl_dva_tree; /* track DVAs during log parse */
clock_t zl_replay_time; /* lbolt of when replay started */
uint64_t zl_replay_blks; /* number of log blocks replayed */
};
typedef struct zil_dva_node {
dva_t zn_dva;
avl_node_t zn_node;
} zil_dva_node_t;
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZIL_IMPL_H */
+424
View File
@@ -0,0 +1,424 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _ZIO_H
#define _ZIO_H
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/avl.h>
#include <sys/fs/zfs.h>
#include <sys/zio_impl.h>
#ifdef __cplusplus
extern "C" {
#endif
#define ZBT_MAGIC 0x210da7ab10c7a11ULL /* zio data bloc tail */
typedef struct zio_block_tail {
uint64_t zbt_magic; /* for validation, endianness */
zio_cksum_t zbt_cksum; /* 256-bit checksum */
} zio_block_tail_t;
/*
* Gang block headers are self-checksumming and contain an array
* of block pointers.
*/
#define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE
#define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \
sizeof (zio_block_tail_t)) / sizeof (blkptr_t))
#define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \
sizeof (zio_block_tail_t) - \
(SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\
sizeof (uint64_t))
typedef struct zio_gbh {
blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS];
uint64_t zg_filler[SPA_GBH_FILLER];
zio_block_tail_t zg_tail;
} zio_gbh_phys_t;
enum zio_checksum {
ZIO_CHECKSUM_INHERIT = 0,
ZIO_CHECKSUM_ON,
ZIO_CHECKSUM_OFF,
ZIO_CHECKSUM_LABEL,
ZIO_CHECKSUM_GANG_HEADER,
ZIO_CHECKSUM_ZILOG,
ZIO_CHECKSUM_FLETCHER_2,
ZIO_CHECKSUM_FLETCHER_4,
ZIO_CHECKSUM_SHA256,
ZIO_CHECKSUM_FUNCTIONS
};
#define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_2
#define ZIO_CHECKSUM_DEFAULT ZIO_CHECKSUM_ON
enum zio_compress {
ZIO_COMPRESS_INHERIT = 0,
ZIO_COMPRESS_ON,
ZIO_COMPRESS_OFF,
ZIO_COMPRESS_LZJB,
ZIO_COMPRESS_EMPTY,
ZIO_COMPRESS_GZIP_1,
ZIO_COMPRESS_GZIP_2,
ZIO_COMPRESS_GZIP_3,
ZIO_COMPRESS_GZIP_4,
ZIO_COMPRESS_GZIP_5,
ZIO_COMPRESS_GZIP_6,
ZIO_COMPRESS_GZIP_7,
ZIO_COMPRESS_GZIP_8,
ZIO_COMPRESS_GZIP_9,
ZIO_COMPRESS_FUNCTIONS
};
#define ZIO_COMPRESS_ON_VALUE ZIO_COMPRESS_LZJB
#define ZIO_COMPRESS_DEFAULT ZIO_COMPRESS_OFF
#define ZIO_FAILURE_MODE_WAIT 0
#define ZIO_FAILURE_MODE_CONTINUE 1
#define ZIO_FAILURE_MODE_PANIC 2
#define ZIO_PRIORITY_NOW (zio_priority_table[0])
#define ZIO_PRIORITY_SYNC_READ (zio_priority_table[1])
#define ZIO_PRIORITY_SYNC_WRITE (zio_priority_table[2])
#define ZIO_PRIORITY_ASYNC_READ (zio_priority_table[3])
#define ZIO_PRIORITY_ASYNC_WRITE (zio_priority_table[4])
#define ZIO_PRIORITY_FREE (zio_priority_table[5])
#define ZIO_PRIORITY_CACHE_FILL (zio_priority_table[6])
#define ZIO_PRIORITY_LOG_WRITE (zio_priority_table[7])
#define ZIO_PRIORITY_RESILVER (zio_priority_table[8])
#define ZIO_PRIORITY_SCRUB (zio_priority_table[9])
#define ZIO_PRIORITY_TABLE_SIZE 10
#define ZIO_FLAG_MUSTSUCCEED 0x00000
#define ZIO_FLAG_CANFAIL 0x00001
#define ZIO_FLAG_SPECULATIVE 0x00002
#define ZIO_FLAG_CONFIG_WRITER 0x00004
#define ZIO_FLAG_DONT_RETRY 0x00008
#define ZIO_FLAG_DONT_CACHE 0x00010
#define ZIO_FLAG_DONT_QUEUE 0x00020
#define ZIO_FLAG_DONT_AGGREGATE 0x00040
#define ZIO_FLAG_DONT_PROPAGATE 0x00080
#define ZIO_FLAG_IO_BYPASS 0x00100
#define ZIO_FLAG_IO_REPAIR 0x00200
#define ZIO_FLAG_IO_RETRY 0x00400
#define ZIO_FLAG_IO_REWRITE 0x00800
#define ZIO_FLAG_PROBE 0x01000
#define ZIO_FLAG_RESILVER 0x02000
#define ZIO_FLAG_SCRUB 0x04000
#define ZIO_FLAG_SCRUB_THREAD 0x08000
#define ZIO_FLAG_GANG_CHILD 0x10000
#define ZIO_FLAG_GANG_INHERIT \
(ZIO_FLAG_CANFAIL | \
ZIO_FLAG_SPECULATIVE | \
ZIO_FLAG_CONFIG_WRITER | \
ZIO_FLAG_DONT_RETRY | \
ZIO_FLAG_DONT_CACHE | \
ZIO_FLAG_DONT_AGGREGATE | \
ZIO_FLAG_RESILVER | \
ZIO_FLAG_SCRUB | \
ZIO_FLAG_SCRUB_THREAD)
#define ZIO_FLAG_VDEV_INHERIT \
(ZIO_FLAG_GANG_INHERIT | \
ZIO_FLAG_IO_REPAIR | \
ZIO_FLAG_IO_RETRY | \
ZIO_FLAG_PROBE)
#define ZIO_PIPELINE_CONTINUE 0x100
#define ZIO_PIPELINE_STOP 0x101
#define ZIO_GANG_CHILD_FLAGS(zio) \
(((zio)->io_flags & ZIO_FLAG_GANG_INHERIT) | \
ZIO_FLAG_GANG_CHILD | ZIO_FLAG_CANFAIL)
enum zio_child {
ZIO_CHILD_VDEV = 0,
ZIO_CHILD_GANG,
ZIO_CHILD_LOGICAL,
ZIO_CHILD_TYPES
};
enum zio_wait_type {
ZIO_WAIT_READY = 0,
ZIO_WAIT_DONE,
ZIO_WAIT_TYPES
};
/*
* We'll take the unused errnos, 'EBADE' and 'EBADR' (from the Convergent
* graveyard) to indicate checksum errors and fragmentation.
*/
#define ECKSUM EBADE
#define EFRAGS EBADR
typedef struct zio zio_t;
typedef void zio_done_func_t(zio_t *zio);
extern uint8_t zio_priority_table[ZIO_PRIORITY_TABLE_SIZE];
extern char *zio_type_name[ZIO_TYPES];
/*
* A bookmark is a four-tuple <objset, object, level, blkid> that uniquely
* identifies any block in the pool. By convention, the meta-objset (MOS)
* is objset 0, the meta-dnode is object 0, the root block (osphys_t) is
* level -1 of the meta-dnode, and intent log blocks (which are chained
* off the root block) have blkid == sequence number. In summary:
*
* mos is objset 0
* meta-dnode is object 0
* root block is <objset, 0, -1, 0>
* intent log is <objset, 0, -1, ZIL sequence number>
*
* Note: this structure is called a bookmark because its first purpose was
* to remember where to resume a pool-wide traverse. The absolute ordering
* for block visitation during traversal is defined in compare_bookmark().
*
* Note: this structure is passed between userland and the kernel.
* Therefore it must not change size or alignment between 32/64 bit
* compilation options.
*/
typedef struct zbookmark {
uint64_t zb_objset;
uint64_t zb_object;
int64_t zb_level;
uint64_t zb_blkid;
} zbookmark_t;
typedef struct zio_prop {
enum zio_checksum zp_checksum;
enum zio_compress zp_compress;
dmu_object_type_t zp_type;
uint8_t zp_level;
uint8_t zp_ndvas;
} zio_prop_t;
typedef struct zio_gang_node {
zio_gbh_phys_t *gn_gbh;
struct zio_gang_node *gn_child[SPA_GBH_NBLKPTRS];
} zio_gang_node_t;
typedef zio_t *zio_gang_issue_func_t(zio_t *zio, blkptr_t *bp,
zio_gang_node_t *gn, void *data);
typedef void zio_transform_func_t(zio_t *zio, void *data, uint64_t size);
typedef struct zio_transform {
void *zt_orig_data;
uint64_t zt_orig_size;
uint64_t zt_bufsize;
zio_transform_func_t *zt_transform;
struct zio_transform *zt_next;
} zio_transform_t;
typedef int zio_pipe_stage_t(zio_t *zio);
/*
* The io_reexecute flags are distinct from io_flags because the child must
* be able to propagate them to the parent. The normal io_flags are local
* to the zio, not protected by any lock, and not modifiable by children;
* the reexecute flags are protected by io_lock, modifiable by children,
* and always propagated -- even when ZIO_FLAG_DONT_PROPAGATE is set.
*/
#define ZIO_REEXECUTE_NOW 0x01
#define ZIO_REEXECUTE_SUSPEND 0x02
struct zio {
/* Core information about this I/O */
zbookmark_t io_bookmark;
zio_prop_t io_prop;
zio_type_t io_type;
enum zio_child io_child_type;
int io_cmd;
uint8_t io_priority;
uint8_t io_reexecute;
uint8_t io_async_root;
uint64_t io_txg;
spa_t *io_spa;
blkptr_t *io_bp;
blkptr_t io_bp_copy;
zio_t *io_parent;
zio_t *io_child;
zio_t *io_sibling_prev;
zio_t *io_sibling_next;
zio_t *io_logical;
zio_transform_t *io_transform_stack;
/* Callback info */
zio_done_func_t *io_ready;
zio_done_func_t *io_done;
void *io_private;
blkptr_t io_bp_orig;
/* Data represented by this I/O */
void *io_data;
uint64_t io_size;
/* Stuff for the vdev stack */
vdev_t *io_vd;
void *io_vsd;
zio_done_func_t *io_vsd_free;
uint64_t io_offset;
uint64_t io_deadline;
avl_node_t io_offset_node;
avl_node_t io_deadline_node;
avl_tree_t *io_vdev_tree;
zio_t *io_delegate_list;
zio_t *io_delegate_next;
/* Internal pipeline state */
int io_flags;
zio_stage_t io_stage;
uint32_t io_pipeline;
int io_orig_flags;
zio_stage_t io_orig_stage;
uint32_t io_orig_pipeline;
int io_error;
int io_child_error[ZIO_CHILD_TYPES];
uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES];
uint64_t *io_stall;
zio_gang_node_t *io_gang_tree;
void *io_executor;
void *io_waiter;
kmutex_t io_lock;
kcondvar_t io_cv;
/* FMA state */
uint64_t io_ena;
};
extern zio_t *zio_null(zio_t *pio, spa_t *spa,
zio_done_func_t *done, void *private, int flags);
extern zio_t *zio_root(spa_t *spa,
zio_done_func_t *done, void *private, int flags);
extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data,
uint64_t size, zio_done_func_t *done, void *private,
int priority, int flags, const zbookmark_t *zb);
extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
void *data, uint64_t size, zio_prop_t *zp,
zio_done_func_t *ready, zio_done_func_t *done, void *private,
int priority, int flags, const zbookmark_t *zb);
extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
void *data, uint64_t size, zio_done_func_t *done, void *private,
int priority, int flags, zbookmark_t *zb);
extern void zio_skip_write(zio_t *zio);
extern zio_t *zio_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
zio_done_func_t *done, void *private, int flags);
extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
zio_done_func_t *done, void *private, int flags);
extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
zio_done_func_t *done, void *private, int priority, int flags);
extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
uint64_t size, void *data, int checksum,
zio_done_func_t *done, void *private, int priority, int flags,
boolean_t labels);
extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
uint64_t size, void *data, int checksum,
zio_done_func_t *done, void *private, int priority, int flags,
boolean_t labels);
extern int zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp,
blkptr_t *old_bp, uint64_t txg);
extern void zio_free_blk(spa_t *spa, blkptr_t *bp, uint64_t txg);
extern void zio_flush(zio_t *zio, vdev_t *vd);
extern int zio_wait(zio_t *zio);
extern void zio_nowait(zio_t *zio);
extern void zio_execute(zio_t *zio);
extern void zio_interrupt(zio_t *zio);
extern void *zio_buf_alloc(size_t size);
extern void zio_buf_free(void *buf, size_t size);
extern void *zio_data_buf_alloc(size_t size);
extern void zio_data_buf_free(void *buf, size_t size);
extern void zio_resubmit_stage_async(void *);
extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd,
uint64_t offset, void *data, uint64_t size, int type, int priority,
int flags, zio_done_func_t *done, void *private);
extern zio_t *zio_vdev_delegated_io(vdev_t *vd, uint64_t offset,
void *data, uint64_t size, int type, int priority,
int flags, zio_done_func_t *done, void *private);
extern void zio_vdev_io_bypass(zio_t *zio);
extern void zio_vdev_io_reissue(zio_t *zio);
extern void zio_vdev_io_redone(zio_t *zio);
extern void zio_checksum_verified(zio_t *zio);
extern int zio_worst_error(int e1, int e2);
extern uint8_t zio_checksum_select(uint8_t child, uint8_t parent);
extern uint8_t zio_compress_select(uint8_t child, uint8_t parent);
extern void zio_suspend(spa_t *spa, zio_t *zio);
extern void zio_resume(spa_t *spa);
extern void zio_resume_wait(spa_t *spa);
/*
* Initial setup and teardown.
*/
extern void zio_init(void);
extern void zio_fini(void);
/*
* Fault injection
*/
struct zinject_record;
extern uint32_t zio_injection_enabled;
extern int zio_inject_fault(char *name, int flags, int *id,
struct zinject_record *record);
extern int zio_inject_list_next(int *id, char *name, size_t buflen,
struct zinject_record *record);
extern int zio_clear_fault(int id);
extern int zio_handle_fault_injection(zio_t *zio, int error);
extern int zio_handle_device_injection(vdev_t *vd, int error);
extern int zio_handle_label_injection(zio_t *zio, int error);
#ifdef __cplusplus
}
#endif
#endif /* _ZIO_H */
+73
View File
@@ -0,0 +1,73 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZIO_CHECKSUM_H
#define _SYS_ZIO_CHECKSUM_H
#include <sys/zio.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Signature for checksum functions.
*/
typedef void zio_checksum_t(const void *data, uint64_t size, zio_cksum_t *zcp);
/*
* Information about each checksum function.
*/
typedef struct zio_checksum_info {
zio_checksum_t *ci_func[2]; /* checksum function for each byteorder */
int ci_correctable; /* number of correctable bits */
int ci_zbt; /* uses zio block tail? */
char *ci_name; /* descriptive name */
} zio_checksum_info_t;
extern zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS];
/*
* Checksum routines.
*/
extern zio_checksum_t fletcher_2_native;
extern zio_checksum_t fletcher_4_native;
extern zio_checksum_t fletcher_4_incremental_native;
extern zio_checksum_t fletcher_2_byteswap;
extern zio_checksum_t fletcher_4_byteswap;
extern zio_checksum_t fletcher_4_incremental_byteswap;
extern zio_checksum_t zio_checksum_SHA256;
extern void zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
void *data, uint64_t size);
extern int zio_checksum_error(zio_t *zio);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZIO_CHECKSUM_H */
+82
View File
@@ -0,0 +1,82 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZIO_COMPRESS_H
#define _SYS_ZIO_COMPRESS_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/zio.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Common signature for all zio compress/decompress functions.
*/
typedef size_t zio_compress_func_t(void *src, void *dst,
size_t s_len, size_t d_len, int);
typedef int zio_decompress_func_t(void *src, void *dst,
size_t s_len, size_t d_len, int);
/*
* Information about each compression function.
*/
typedef struct zio_compress_info {
zio_compress_func_t *ci_compress; /* compression function */
zio_decompress_func_t *ci_decompress; /* decompression function */
int ci_level; /* level parameter */
char *ci_name; /* algorithm name */
} zio_compress_info_t;
extern zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS];
/*
* Compression routines.
*/
extern size_t lzjb_compress(void *src, void *dst, size_t s_len, size_t d_len,
int level);
extern int lzjb_decompress(void *src, void *dst, size_t s_len, size_t d_len,
int level);
extern size_t gzip_compress(void *src, void *dst, size_t s_len, size_t d_len,
int level);
extern int gzip_decompress(void *src, void *dst, size_t s_len, size_t d_len,
int level);
/*
* Compress and decompress data if necessary.
*/
extern int zio_compress_data(int cpfunc, void *src, uint64_t srcsize,
void **destp, uint64_t *destsizep, uint64_t *destbufsizep);
extern int zio_decompress_data(int cpfunc, void *src, uint64_t srcsize,
void *dest, uint64_t destsize);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZIO_COMPRESS_H */
+143
View File
@@ -0,0 +1,143 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _ZIO_IMPL_H
#define _ZIO_IMPL_H
#include <sys/zfs_context.h>
#include <sys/zio.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* I/O Groups: pipeline stage definitions.
*/
typedef enum zio_stage {
ZIO_STAGE_OPEN = 0, /* RWFCI */
ZIO_STAGE_ISSUE_ASYNC, /* -W--- */
ZIO_STAGE_READ_BP_INIT, /* R---- */
ZIO_STAGE_WRITE_BP_INIT, /* -W--- */
ZIO_STAGE_CHECKSUM_GENERATE, /* -W--- */
ZIO_STAGE_GANG_ASSEMBLE, /* RWFC- */
ZIO_STAGE_GANG_ISSUE, /* RWFC- */
ZIO_STAGE_DVA_ALLOCATE, /* -W--- */
ZIO_STAGE_DVA_FREE, /* --F-- */
ZIO_STAGE_DVA_CLAIM, /* ---C- */
ZIO_STAGE_READY, /* RWFCI */
ZIO_STAGE_VDEV_IO_START, /* RW--I */
ZIO_STAGE_VDEV_IO_DONE, /* RW--I */
ZIO_STAGE_VDEV_IO_ASSESS, /* RW--I */
ZIO_STAGE_CHECKSUM_VERIFY, /* R---- */
ZIO_STAGE_DONE, /* RWFCI */
ZIO_STAGES
} zio_stage_t;
#define ZIO_INTERLOCK_STAGES \
((1U << ZIO_STAGE_READY) | \
(1U << ZIO_STAGE_DONE))
#define ZIO_INTERLOCK_PIPELINE \
ZIO_INTERLOCK_STAGES
#define ZIO_VDEV_IO_STAGES \
((1U << ZIO_STAGE_VDEV_IO_START) | \
(1U << ZIO_STAGE_VDEV_IO_DONE) | \
(1U << ZIO_STAGE_VDEV_IO_ASSESS))
#define ZIO_VDEV_CHILD_PIPELINE \
(ZIO_VDEV_IO_STAGES | \
(1U << ZIO_STAGE_DONE))
#define ZIO_READ_COMMON_STAGES \
(ZIO_INTERLOCK_STAGES | \
ZIO_VDEV_IO_STAGES | \
(1U << ZIO_STAGE_CHECKSUM_VERIFY))
#define ZIO_READ_PHYS_PIPELINE \
ZIO_READ_COMMON_STAGES
#define ZIO_READ_PIPELINE \
(ZIO_READ_COMMON_STAGES | \
(1U << ZIO_STAGE_READ_BP_INIT))
#define ZIO_WRITE_COMMON_STAGES \
(ZIO_INTERLOCK_STAGES | \
ZIO_VDEV_IO_STAGES | \
(1U << ZIO_STAGE_ISSUE_ASYNC) | \
(1U << ZIO_STAGE_CHECKSUM_GENERATE))
#define ZIO_WRITE_PHYS_PIPELINE \
ZIO_WRITE_COMMON_STAGES
#define ZIO_REWRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \
(1U << ZIO_STAGE_WRITE_BP_INIT))
#define ZIO_WRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \
(1U << ZIO_STAGE_WRITE_BP_INIT) | \
(1U << ZIO_STAGE_DVA_ALLOCATE))
#define ZIO_GANG_STAGES \
((1U << ZIO_STAGE_GANG_ASSEMBLE) | \
(1U << ZIO_STAGE_GANG_ISSUE))
#define ZIO_FREE_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
(1U << ZIO_STAGE_DVA_FREE))
#define ZIO_CLAIM_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
(1U << ZIO_STAGE_DVA_CLAIM))
#define ZIO_IOCTL_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
(1U << ZIO_STAGE_VDEV_IO_START) | \
(1U << ZIO_STAGE_VDEV_IO_ASSESS))
#define ZIO_CONFIG_LOCK_BLOCKING_STAGES \
((1U << ZIO_STAGE_VDEV_IO_START) | \
(1U << ZIO_STAGE_DVA_ALLOCATE) | \
(1U << ZIO_STAGE_DVA_CLAIM))
extern void zio_inject_init(void);
extern void zio_inject_fini(void);
#ifdef __cplusplus
}
#endif
#endif /* _ZIO_IMPL_H */
+70
View File
@@ -0,0 +1,70 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZVOL_H
#define _SYS_ZVOL_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
#define ZVOL_OBJ 1ULL
#define ZVOL_ZAP_OBJ 2ULL
#ifdef _KERNEL
extern int zvol_check_volsize(uint64_t volsize, uint64_t blocksize);
extern int zvol_check_volblocksize(uint64_t volblocksize);
extern int zvol_get_stats(objset_t *os, nvlist_t *nv);
extern void zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
extern int zvol_create_minor(const char *, major_t);
extern int zvol_remove_minor(const char *);
extern int zvol_set_volsize(const char *, major_t, uint64_t);
extern int zvol_set_volblocksize(const char *, uint64_t);
extern int zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr);
extern int zvol_dump(dev_t dev, caddr_t addr, daddr_t offset, int nblocks);
extern int zvol_close(dev_t dev, int flag, int otyp, cred_t *cr);
extern int zvol_strategy(buf_t *bp);
extern int zvol_read(dev_t dev, uio_t *uiop, cred_t *cr);
extern int zvol_write(dev_t dev, uio_t *uiop, cred_t *cr);
extern int zvol_aread(dev_t dev, struct aio_req *aio, cred_t *cr);
extern int zvol_awrite(dev_t dev, struct aio_req *aio, cred_t *cr);
extern int zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr,
int *rvalp);
extern int zvol_busy(void);
extern void zvol_init(void);
extern void zvol_fini(void);
#endif
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZVOL_H */