mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 18:40:43 +03:00
Move the world out of /zfs/ and seperate out module build tree
This commit is contained in:
@@ -0,0 +1,138 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ARC_H
|
||||
#define _SYS_ARC_H
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/spa.h>
|
||||
|
||||
typedef struct arc_buf_hdr arc_buf_hdr_t;
|
||||
typedef struct arc_buf arc_buf_t;
|
||||
typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *private);
|
||||
typedef int arc_evict_func_t(void *private);
|
||||
|
||||
/* generic arc_done_func_t's which you can use */
|
||||
arc_done_func_t arc_bcopy_func;
|
||||
arc_done_func_t arc_getbuf_func;
|
||||
|
||||
struct arc_buf {
|
||||
arc_buf_hdr_t *b_hdr;
|
||||
arc_buf_t *b_next;
|
||||
krwlock_t b_lock;
|
||||
void *b_data;
|
||||
arc_evict_func_t *b_efunc;
|
||||
void *b_private;
|
||||
};
|
||||
|
||||
typedef enum arc_buf_contents {
|
||||
ARC_BUFC_DATA, /* buffer contains data */
|
||||
ARC_BUFC_METADATA, /* buffer contains metadata */
|
||||
ARC_BUFC_NUMTYPES
|
||||
} arc_buf_contents_t;
|
||||
/*
|
||||
* These are the flags we pass into calls to the arc
|
||||
*/
|
||||
#define ARC_WAIT (1 << 1) /* perform I/O synchronously */
|
||||
#define ARC_NOWAIT (1 << 2) /* perform I/O asynchronously */
|
||||
#define ARC_PREFETCH (1 << 3) /* I/O is a prefetch */
|
||||
#define ARC_CACHED (1 << 4) /* I/O was already in cache */
|
||||
#define ARC_L2CACHE (1 << 5) /* cache in L2ARC */
|
||||
|
||||
void arc_space_consume(uint64_t space);
|
||||
void arc_space_return(uint64_t space);
|
||||
void *arc_data_buf_alloc(uint64_t space);
|
||||
void arc_data_buf_free(void *buf, uint64_t space);
|
||||
arc_buf_t *arc_buf_alloc(spa_t *spa, int size, void *tag,
|
||||
arc_buf_contents_t type);
|
||||
void arc_buf_add_ref(arc_buf_t *buf, void *tag);
|
||||
int arc_buf_remove_ref(arc_buf_t *buf, void *tag);
|
||||
int arc_buf_size(arc_buf_t *buf);
|
||||
void arc_release(arc_buf_t *buf, void *tag);
|
||||
int arc_released(arc_buf_t *buf);
|
||||
int arc_has_callback(arc_buf_t *buf);
|
||||
void arc_buf_freeze(arc_buf_t *buf);
|
||||
void arc_buf_thaw(arc_buf_t *buf);
|
||||
#ifdef ZFS_DEBUG
|
||||
int arc_referenced(arc_buf_t *buf);
|
||||
#endif
|
||||
|
||||
typedef struct writeprops {
|
||||
dmu_object_type_t wp_type;
|
||||
uint8_t wp_level;
|
||||
uint8_t wp_copies;
|
||||
uint8_t wp_dncompress, wp_oscompress;
|
||||
uint8_t wp_dnchecksum, wp_oschecksum;
|
||||
} writeprops_t;
|
||||
|
||||
void write_policy(spa_t *spa, const writeprops_t *wp, zio_prop_t *zp);
|
||||
int arc_read(zio_t *pio, spa_t *spa, blkptr_t *bp, arc_buf_t *pbuf,
|
||||
arc_done_func_t *done, void *private, int priority, int zio_flags,
|
||||
uint32_t *arc_flags, const zbookmark_t *zb);
|
||||
int arc_read_nolock(zio_t *pio, spa_t *spa, blkptr_t *bp,
|
||||
arc_done_func_t *done, void *private, int priority, int flags,
|
||||
uint32_t *arc_flags, const zbookmark_t *zb);
|
||||
zio_t *arc_write(zio_t *pio, spa_t *spa, const writeprops_t *wp,
|
||||
boolean_t l2arc, uint64_t txg, blkptr_t *bp, arc_buf_t *buf,
|
||||
arc_done_func_t *ready, arc_done_func_t *done, void *private, int priority,
|
||||
int zio_flags, const zbookmark_t *zb);
|
||||
int arc_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
|
||||
zio_done_func_t *done, void *private, uint32_t arc_flags);
|
||||
int arc_tryread(spa_t *spa, blkptr_t *bp, void *data);
|
||||
|
||||
void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
|
||||
int arc_buf_evict(arc_buf_t *buf);
|
||||
|
||||
void arc_flush(spa_t *spa);
|
||||
void arc_tempreserve_clear(uint64_t reserve);
|
||||
int arc_tempreserve_space(uint64_t reserve, uint64_t txg);
|
||||
|
||||
void arc_init(void);
|
||||
void arc_fini(void);
|
||||
|
||||
/*
|
||||
* Level 2 ARC
|
||||
*/
|
||||
|
||||
void l2arc_add_vdev(spa_t *spa, vdev_t *vd, uint64_t start, uint64_t end);
|
||||
void l2arc_remove_vdev(vdev_t *vd);
|
||||
boolean_t l2arc_vdev_present(vdev_t *vd);
|
||||
void l2arc_init(void);
|
||||
void l2arc_fini(void);
|
||||
void l2arc_start(void);
|
||||
void l2arc_stop(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ARC_H */
|
||||
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_BPLIST_H
|
||||
#define _SYS_BPLIST_H
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct bplist_phys {
|
||||
/*
|
||||
* This is the bonus buffer for the dead lists. The object's
|
||||
* contents is an array of bpl_entries blkptr_t's, representing
|
||||
* a total of bpl_bytes physical space.
|
||||
*/
|
||||
uint64_t bpl_entries;
|
||||
uint64_t bpl_bytes;
|
||||
uint64_t bpl_comp;
|
||||
uint64_t bpl_uncomp;
|
||||
} bplist_phys_t;
|
||||
|
||||
#define BPLIST_SIZE_V0 (2 * sizeof (uint64_t))
|
||||
|
||||
typedef struct bplist_q {
|
||||
blkptr_t bpq_blk;
|
||||
void *bpq_next;
|
||||
} bplist_q_t;
|
||||
|
||||
typedef struct bplist {
|
||||
kmutex_t bpl_lock;
|
||||
objset_t *bpl_mos;
|
||||
uint64_t bpl_object;
|
||||
uint8_t bpl_blockshift;
|
||||
uint8_t bpl_bpshift;
|
||||
uint8_t bpl_havecomp;
|
||||
bplist_q_t *bpl_queue;
|
||||
bplist_phys_t *bpl_phys;
|
||||
dmu_buf_t *bpl_dbuf;
|
||||
dmu_buf_t *bpl_cached_dbuf;
|
||||
} bplist_t;
|
||||
|
||||
extern uint64_t bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx);
|
||||
extern void bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx);
|
||||
extern int bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object);
|
||||
extern void bplist_close(bplist_t *bpl);
|
||||
extern boolean_t bplist_empty(bplist_t *bpl);
|
||||
extern int bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp);
|
||||
extern int bplist_enqueue(bplist_t *bpl, const blkptr_t *bp, dmu_tx_t *tx);
|
||||
extern void bplist_enqueue_deferred(bplist_t *bpl, const blkptr_t *bp);
|
||||
extern void bplist_sync(bplist_t *bpl, dmu_tx_t *tx);
|
||||
extern void bplist_vacate(bplist_t *bpl, dmu_tx_t *tx);
|
||||
extern int bplist_space(bplist_t *bpl,
|
||||
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
|
||||
extern int bplist_space_birthrange(bplist_t *bpl,
|
||||
uint64_t mintxg, uint64_t maxtxg, uint64_t *dasizep);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_BPLIST_H */
|
||||
@@ -0,0 +1,347 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DBUF_H
|
||||
#define _SYS_DBUF_H
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/arc.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/refcount.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define DB_BONUS_BLKID (-1ULL)
|
||||
#define IN_DMU_SYNC 2
|
||||
|
||||
/*
|
||||
* define flags for dbuf_read
|
||||
*/
|
||||
|
||||
#define DB_RF_MUST_SUCCEED (1 << 0)
|
||||
#define DB_RF_CANFAIL (1 << 1)
|
||||
#define DB_RF_HAVESTRUCT (1 << 2)
|
||||
#define DB_RF_NOPREFETCH (1 << 3)
|
||||
#define DB_RF_NEVERWAIT (1 << 4)
|
||||
#define DB_RF_CACHED (1 << 5)
|
||||
|
||||
/*
|
||||
* The simplified state transition diagram for dbufs looks like:
|
||||
*
|
||||
* +----> READ ----+
|
||||
* | |
|
||||
* | V
|
||||
* (alloc)-->UNCACHED CACHED-->EVICTING-->(free)
|
||||
* | ^ ^
|
||||
* | | |
|
||||
* +----> FILL ----+ |
|
||||
* | |
|
||||
* | |
|
||||
* +--------> NOFILL -------+
|
||||
*/
|
||||
typedef enum dbuf_states {
|
||||
DB_UNCACHED,
|
||||
DB_FILL,
|
||||
DB_NOFILL,
|
||||
DB_READ,
|
||||
DB_CACHED,
|
||||
DB_EVICTING
|
||||
} dbuf_states_t;
|
||||
|
||||
struct objset_impl;
|
||||
struct dnode;
|
||||
struct dmu_tx;
|
||||
|
||||
/*
|
||||
* level = 0 means the user data
|
||||
* level = 1 means the single indirect block
|
||||
* etc.
|
||||
*/
|
||||
|
||||
#define LIST_LINK_INACTIVE(link) \
|
||||
((link)->list_next == NULL && (link)->list_prev == NULL)
|
||||
|
||||
struct dmu_buf_impl;
|
||||
|
||||
typedef enum override_states {
|
||||
DR_NOT_OVERRIDDEN,
|
||||
DR_IN_DMU_SYNC,
|
||||
DR_OVERRIDDEN
|
||||
} override_states_t;
|
||||
|
||||
typedef struct dbuf_dirty_record {
|
||||
/* link on our parents dirty list */
|
||||
list_node_t dr_dirty_node;
|
||||
|
||||
/* transaction group this data will sync in */
|
||||
uint64_t dr_txg;
|
||||
|
||||
/* zio of outstanding write IO */
|
||||
zio_t *dr_zio;
|
||||
|
||||
/* pointer back to our dbuf */
|
||||
struct dmu_buf_impl *dr_dbuf;
|
||||
|
||||
/* pointer to next dirty record */
|
||||
struct dbuf_dirty_record *dr_next;
|
||||
|
||||
/* pointer to parent dirty record */
|
||||
struct dbuf_dirty_record *dr_parent;
|
||||
|
||||
union dirty_types {
|
||||
struct dirty_indirect {
|
||||
|
||||
/* protect access to list */
|
||||
kmutex_t dr_mtx;
|
||||
|
||||
/* Our list of dirty children */
|
||||
list_t dr_children;
|
||||
} di;
|
||||
struct dirty_leaf {
|
||||
|
||||
/*
|
||||
* dr_data is set when we dirty the buffer
|
||||
* so that we can retain the pointer even if it
|
||||
* gets COW'd in a subsequent transaction group.
|
||||
*/
|
||||
arc_buf_t *dr_data;
|
||||
blkptr_t dr_overridden_by;
|
||||
override_states_t dr_override_state;
|
||||
} dl;
|
||||
} dt;
|
||||
} dbuf_dirty_record_t;
|
||||
|
||||
typedef struct dmu_buf_impl {
|
||||
/*
|
||||
* The following members are immutable, with the exception of
|
||||
* db.db_data, which is protected by db_mtx.
|
||||
*/
|
||||
|
||||
/* the publicly visible structure */
|
||||
dmu_buf_t db;
|
||||
|
||||
/* the objset we belong to */
|
||||
struct objset_impl *db_objset;
|
||||
|
||||
/*
|
||||
* the dnode we belong to (NULL when evicted)
|
||||
*/
|
||||
struct dnode *db_dnode;
|
||||
|
||||
/*
|
||||
* our parent buffer; if the dnode points to us directly,
|
||||
* db_parent == db_dnode->dn_dbuf
|
||||
* only accessed by sync thread ???
|
||||
* (NULL when evicted)
|
||||
*/
|
||||
struct dmu_buf_impl *db_parent;
|
||||
|
||||
/*
|
||||
* link for hash table of all dmu_buf_impl_t's
|
||||
*/
|
||||
struct dmu_buf_impl *db_hash_next;
|
||||
|
||||
/* our block number */
|
||||
uint64_t db_blkid;
|
||||
|
||||
/*
|
||||
* Pointer to the blkptr_t which points to us. May be NULL if we
|
||||
* don't have one yet. (NULL when evicted)
|
||||
*/
|
||||
blkptr_t *db_blkptr;
|
||||
|
||||
/*
|
||||
* Our indirection level. Data buffers have db_level==0.
|
||||
* Indirect buffers which point to data buffers have
|
||||
* db_level==1. etc. Buffers which contain dnodes have
|
||||
* db_level==0, since the dnodes are stored in a file.
|
||||
*/
|
||||
uint8_t db_level;
|
||||
|
||||
/* db_mtx protects the members below */
|
||||
kmutex_t db_mtx;
|
||||
|
||||
/*
|
||||
* Current state of the buffer
|
||||
*/
|
||||
dbuf_states_t db_state;
|
||||
|
||||
/*
|
||||
* Refcount accessed by dmu_buf_{hold,rele}.
|
||||
* If nonzero, the buffer can't be destroyed.
|
||||
* Protected by db_mtx.
|
||||
*/
|
||||
refcount_t db_holds;
|
||||
|
||||
/* buffer holding our data */
|
||||
arc_buf_t *db_buf;
|
||||
|
||||
kcondvar_t db_changed;
|
||||
dbuf_dirty_record_t *db_data_pending;
|
||||
|
||||
/* pointer to most recent dirty record for this buffer */
|
||||
dbuf_dirty_record_t *db_last_dirty;
|
||||
|
||||
/*
|
||||
* Our link on the owner dnodes's dn_dbufs list.
|
||||
* Protected by its dn_dbufs_mtx.
|
||||
*/
|
||||
list_node_t db_link;
|
||||
|
||||
/* Data which is unique to data (leaf) blocks: */
|
||||
|
||||
/* stuff we store for the user (see dmu_buf_set_user) */
|
||||
void *db_user_ptr;
|
||||
void **db_user_data_ptr_ptr;
|
||||
dmu_buf_evict_func_t *db_evict_func;
|
||||
|
||||
uint8_t db_immediate_evict;
|
||||
uint8_t db_freed_in_flight;
|
||||
|
||||
uint8_t db_dirtycnt;
|
||||
} dmu_buf_impl_t;
|
||||
|
||||
/* Note: the dbuf hash table is exposed only for the mdb module */
|
||||
#define DBUF_MUTEXES 256
|
||||
#define DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)])
|
||||
typedef struct dbuf_hash_table {
|
||||
uint64_t hash_table_mask;
|
||||
dmu_buf_impl_t **hash_table;
|
||||
kmutex_t hash_mutexes[DBUF_MUTEXES];
|
||||
} dbuf_hash_table_t;
|
||||
|
||||
|
||||
uint64_t dbuf_whichblock(struct dnode *di, uint64_t offset);
|
||||
|
||||
dmu_buf_impl_t *dbuf_create_tlib(struct dnode *dn, char *data);
|
||||
void dbuf_create_bonus(struct dnode *dn);
|
||||
|
||||
dmu_buf_impl_t *dbuf_hold(struct dnode *dn, uint64_t blkid, void *tag);
|
||||
dmu_buf_impl_t *dbuf_hold_level(struct dnode *dn, int level, uint64_t blkid,
|
||||
void *tag);
|
||||
int dbuf_hold_impl(struct dnode *dn, uint8_t level, uint64_t blkid, int create,
|
||||
void *tag, dmu_buf_impl_t **dbp);
|
||||
|
||||
void dbuf_prefetch(struct dnode *dn, uint64_t blkid);
|
||||
|
||||
void dbuf_add_ref(dmu_buf_impl_t *db, void *tag);
|
||||
uint64_t dbuf_refcount(dmu_buf_impl_t *db);
|
||||
|
||||
void dbuf_rele(dmu_buf_impl_t *db, void *tag);
|
||||
|
||||
dmu_buf_impl_t *dbuf_find(struct dnode *dn, uint8_t level, uint64_t blkid);
|
||||
|
||||
int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags);
|
||||
void dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||
void dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||
void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||
|
||||
void dbuf_clear(dmu_buf_impl_t *db);
|
||||
void dbuf_evict(dmu_buf_impl_t *db);
|
||||
|
||||
void dbuf_setdirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||
void dbuf_unoverride(dbuf_dirty_record_t *dr);
|
||||
void dbuf_sync_list(list_t *list, dmu_tx_t *tx);
|
||||
|
||||
void dbuf_free_range(struct dnode *dn, uint64_t start, uint64_t end,
|
||||
struct dmu_tx *);
|
||||
|
||||
void dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx);
|
||||
|
||||
void dbuf_init(void);
|
||||
void dbuf_fini(void);
|
||||
|
||||
#define DBUF_IS_METADATA(db) \
|
||||
((db)->db_level > 0 || dmu_ot[(db)->db_dnode->dn_type].ot_metadata)
|
||||
|
||||
#define DBUF_GET_BUFC_TYPE(db) \
|
||||
(DBUF_IS_METADATA(db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
|
||||
|
||||
#define DBUF_IS_CACHEABLE(db) \
|
||||
((db)->db_objset->os_primary_cache == ZFS_CACHE_ALL || \
|
||||
(DBUF_IS_METADATA(db) && \
|
||||
((db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA)))
|
||||
|
||||
#define DBUF_IS_L2CACHEABLE(db) \
|
||||
((db)->db_objset->os_secondary_cache == ZFS_CACHE_ALL || \
|
||||
(DBUF_IS_METADATA(db) && \
|
||||
((db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
|
||||
/*
|
||||
* There should be a ## between the string literal and fmt, to make it
|
||||
* clear that we're joining two strings together, but gcc does not
|
||||
* support that preprocessor token.
|
||||
*/
|
||||
#define dprintf_dbuf(dbuf, fmt, ...) do { \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
|
||||
char __db_buf[32]; \
|
||||
uint64_t __db_obj = (dbuf)->db.db_object; \
|
||||
if (__db_obj == DMU_META_DNODE_OBJECT) \
|
||||
(void) strcpy(__db_buf, "mdn"); \
|
||||
else \
|
||||
(void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \
|
||||
(u_longlong_t)__db_obj); \
|
||||
dprintf_ds((dbuf)->db_objset->os_dsl_dataset, \
|
||||
"obj=%s lvl=%u blkid=%lld " fmt, \
|
||||
__db_buf, (dbuf)->db_level, \
|
||||
(u_longlong_t)(dbuf)->db_blkid, __VA_ARGS__); \
|
||||
} \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
|
||||
#define dprintf_dbuf_bp(db, bp, fmt, ...) do { \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
|
||||
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \
|
||||
sprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, bp); \
|
||||
dprintf_dbuf(db, fmt " %s\n", __VA_ARGS__, __blkbuf); \
|
||||
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
|
||||
} \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
|
||||
#define DBUF_VERIFY(db) dbuf_verify(db)
|
||||
|
||||
#else
|
||||
|
||||
#define dprintf_dbuf(db, fmt, ...)
|
||||
#define dprintf_dbuf_bp(db, bp, fmt, ...)
|
||||
#define DBUF_VERIFY(db)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DBUF_H */
|
||||
@@ -0,0 +1,638 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DMU_H
|
||||
#define _SYS_DMU_H
|
||||
|
||||
/*
|
||||
* This file describes the interface that the DMU provides for its
|
||||
* consumers.
|
||||
*
|
||||
* The DMU also interacts with the SPA. That interface is described in
|
||||
* dmu_spa.h.
|
||||
*/
|
||||
|
||||
#include <sys/inttypes.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/cred.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct uio;
|
||||
struct page;
|
||||
struct vnode;
|
||||
struct spa;
|
||||
struct zilog;
|
||||
struct zio;
|
||||
struct blkptr;
|
||||
struct zap_cursor;
|
||||
struct dsl_dataset;
|
||||
struct dsl_pool;
|
||||
struct dnode;
|
||||
struct drr_begin;
|
||||
struct drr_end;
|
||||
struct zbookmark;
|
||||
struct spa;
|
||||
struct nvlist;
|
||||
struct objset_impl;
|
||||
|
||||
typedef struct objset objset_t;
|
||||
typedef struct dmu_tx dmu_tx_t;
|
||||
typedef struct dsl_dir dsl_dir_t;
|
||||
|
||||
typedef enum dmu_object_type {
|
||||
DMU_OT_NONE,
|
||||
/* general: */
|
||||
DMU_OT_OBJECT_DIRECTORY, /* ZAP */
|
||||
DMU_OT_OBJECT_ARRAY, /* UINT64 */
|
||||
DMU_OT_PACKED_NVLIST, /* UINT8 (XDR by nvlist_pack/unpack) */
|
||||
DMU_OT_PACKED_NVLIST_SIZE, /* UINT64 */
|
||||
DMU_OT_BPLIST, /* UINT64 */
|
||||
DMU_OT_BPLIST_HDR, /* UINT64 */
|
||||
/* spa: */
|
||||
DMU_OT_SPACE_MAP_HEADER, /* UINT64 */
|
||||
DMU_OT_SPACE_MAP, /* UINT64 */
|
||||
/* zil: */
|
||||
DMU_OT_INTENT_LOG, /* UINT64 */
|
||||
/* dmu: */
|
||||
DMU_OT_DNODE, /* DNODE */
|
||||
DMU_OT_OBJSET, /* OBJSET */
|
||||
/* dsl: */
|
||||
DMU_OT_DSL_DIR, /* UINT64 */
|
||||
DMU_OT_DSL_DIR_CHILD_MAP, /* ZAP */
|
||||
DMU_OT_DSL_DS_SNAP_MAP, /* ZAP */
|
||||
DMU_OT_DSL_PROPS, /* ZAP */
|
||||
DMU_OT_DSL_DATASET, /* UINT64 */
|
||||
/* zpl: */
|
||||
DMU_OT_ZNODE, /* ZNODE */
|
||||
DMU_OT_OLDACL, /* Old ACL */
|
||||
DMU_OT_PLAIN_FILE_CONTENTS, /* UINT8 */
|
||||
DMU_OT_DIRECTORY_CONTENTS, /* ZAP */
|
||||
DMU_OT_MASTER_NODE, /* ZAP */
|
||||
DMU_OT_UNLINKED_SET, /* ZAP */
|
||||
/* zvol: */
|
||||
DMU_OT_ZVOL, /* UINT8 */
|
||||
DMU_OT_ZVOL_PROP, /* ZAP */
|
||||
/* other; for testing only! */
|
||||
DMU_OT_PLAIN_OTHER, /* UINT8 */
|
||||
DMU_OT_UINT64_OTHER, /* UINT64 */
|
||||
DMU_OT_ZAP_OTHER, /* ZAP */
|
||||
/* new object types: */
|
||||
DMU_OT_ERROR_LOG, /* ZAP */
|
||||
DMU_OT_SPA_HISTORY, /* UINT8 */
|
||||
DMU_OT_SPA_HISTORY_OFFSETS, /* spa_his_phys_t */
|
||||
DMU_OT_POOL_PROPS, /* ZAP */
|
||||
DMU_OT_DSL_PERMS, /* ZAP */
|
||||
DMU_OT_ACL, /* ACL */
|
||||
DMU_OT_SYSACL, /* SYSACL */
|
||||
DMU_OT_FUID, /* FUID table (Packed NVLIST UINT8) */
|
||||
DMU_OT_FUID_SIZE, /* FUID table size UINT64 */
|
||||
DMU_OT_NEXT_CLONES, /* ZAP */
|
||||
DMU_OT_SCRUB_QUEUE, /* ZAP */
|
||||
DMU_OT_NUMTYPES
|
||||
} dmu_object_type_t;
|
||||
|
||||
typedef enum dmu_objset_type {
|
||||
DMU_OST_NONE,
|
||||
DMU_OST_META,
|
||||
DMU_OST_ZFS,
|
||||
DMU_OST_ZVOL,
|
||||
DMU_OST_OTHER, /* For testing only! */
|
||||
DMU_OST_ANY, /* Be careful! */
|
||||
DMU_OST_NUMTYPES
|
||||
} dmu_objset_type_t;
|
||||
|
||||
void byteswap_uint64_array(void *buf, size_t size);
|
||||
void byteswap_uint32_array(void *buf, size_t size);
|
||||
void byteswap_uint16_array(void *buf, size_t size);
|
||||
void byteswap_uint8_array(void *buf, size_t size);
|
||||
void zap_byteswap(void *buf, size_t size);
|
||||
void zfs_oldacl_byteswap(void *buf, size_t size);
|
||||
void zfs_acl_byteswap(void *buf, size_t size);
|
||||
void zfs_znode_byteswap(void *buf, size_t size);
|
||||
|
||||
#define DS_MODE_NOHOLD 0 /* internal use only */
|
||||
#define DS_MODE_USER 1 /* simple access, no special needs */
|
||||
#define DS_MODE_OWNER 2 /* the "main" access, e.g. a mount */
|
||||
#define DS_MODE_TYPE_MASK 0x3
|
||||
#define DS_MODE_TYPE(x) ((x) & DS_MODE_TYPE_MASK)
|
||||
#define DS_MODE_READONLY 0x8
|
||||
#define DS_MODE_IS_READONLY(x) ((x) & DS_MODE_READONLY)
|
||||
#define DS_MODE_INCONSISTENT 0x10
|
||||
#define DS_MODE_IS_INCONSISTENT(x) ((x) & DS_MODE_INCONSISTENT)
|
||||
|
||||
#define DS_FIND_SNAPSHOTS (1<<0)
|
||||
#define DS_FIND_CHILDREN (1<<1)
|
||||
|
||||
/*
|
||||
* The maximum number of bytes that can be accessed as part of one
|
||||
* operation, including metadata.
|
||||
*/
|
||||
#define DMU_MAX_ACCESS (10<<20) /* 10MB */
|
||||
#define DMU_MAX_DELETEBLKCNT (20480) /* ~5MB of indirect blocks */
|
||||
|
||||
/*
|
||||
* Public routines to create, destroy, open, and close objsets.
|
||||
*/
|
||||
int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
|
||||
objset_t **osp);
|
||||
int dmu_objset_open_ds(struct dsl_dataset *ds, dmu_objset_type_t type,
|
||||
objset_t **osp);
|
||||
void dmu_objset_close(objset_t *os);
|
||||
int dmu_objset_evict_dbufs(objset_t *os);
|
||||
int dmu_objset_create(const char *name, dmu_objset_type_t type,
|
||||
objset_t *clone_parent, uint64_t flags,
|
||||
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
|
||||
int dmu_objset_destroy(const char *name);
|
||||
int dmu_snapshots_destroy(char *fsname, char *snapname);
|
||||
int dmu_objset_rollback(objset_t *os);
|
||||
int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive);
|
||||
int dmu_objset_rename(const char *name, const char *newname,
|
||||
boolean_t recursive);
|
||||
int dmu_objset_find(char *name, int func(char *, void *), void *arg,
|
||||
int flags);
|
||||
void dmu_objset_byteswap(void *buf, size_t size);
|
||||
|
||||
typedef struct dmu_buf {
|
||||
uint64_t db_object; /* object that this buffer is part of */
|
||||
uint64_t db_offset; /* byte offset in this object */
|
||||
uint64_t db_size; /* size of buffer in bytes */
|
||||
void *db_data; /* data in buffer */
|
||||
} dmu_buf_t;
|
||||
|
||||
typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
|
||||
|
||||
/*
|
||||
* The names of zap entries in the DIRECTORY_OBJECT of the MOS.
|
||||
*/
|
||||
#define DMU_POOL_DIRECTORY_OBJECT 1
|
||||
#define DMU_POOL_CONFIG "config"
|
||||
#define DMU_POOL_ROOT_DATASET "root_dataset"
|
||||
#define DMU_POOL_SYNC_BPLIST "sync_bplist"
|
||||
#define DMU_POOL_ERRLOG_SCRUB "errlog_scrub"
|
||||
#define DMU_POOL_ERRLOG_LAST "errlog_last"
|
||||
#define DMU_POOL_SPARES "spares"
|
||||
#define DMU_POOL_DEFLATE "deflate"
|
||||
#define DMU_POOL_HISTORY "history"
|
||||
#define DMU_POOL_PROPS "pool_props"
|
||||
#define DMU_POOL_L2CACHE "l2cache"
|
||||
|
||||
/* 4x8 zbookmark_t */
|
||||
#define DMU_POOL_SCRUB_BOOKMARK "scrub_bookmark"
|
||||
/* 1x8 zap obj DMU_OT_SCRUB_QUEUE */
|
||||
#define DMU_POOL_SCRUB_QUEUE "scrub_queue"
|
||||
/* 1x8 txg */
|
||||
#define DMU_POOL_SCRUB_MIN_TXG "scrub_min_txg"
|
||||
/* 1x8 txg */
|
||||
#define DMU_POOL_SCRUB_MAX_TXG "scrub_max_txg"
|
||||
/* 1x4 enum scrub_func */
|
||||
#define DMU_POOL_SCRUB_FUNC "scrub_func"
|
||||
/* 1x8 count */
|
||||
#define DMU_POOL_SCRUB_ERRORS "scrub_errors"
|
||||
|
||||
/*
|
||||
* Allocate an object from this objset. The range of object numbers
|
||||
* available is (0, DN_MAX_OBJECT). Object 0 is the meta-dnode.
|
||||
*
|
||||
* The transaction must be assigned to a txg. The newly allocated
|
||||
* object will be "held" in the transaction (ie. you can modify the
|
||||
* newly allocated object in this transaction).
|
||||
*
|
||||
* dmu_object_alloc() chooses an object and returns it in *objectp.
|
||||
*
|
||||
* dmu_object_claim() allocates a specific object number. If that
|
||||
* number is already allocated, it fails and returns EEXIST.
|
||||
*
|
||||
* Return 0 on success, or ENOSPC or EEXIST as specified above.
|
||||
*/
|
||||
uint64_t dmu_object_alloc(objset_t *os, dmu_object_type_t ot,
|
||||
int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
|
||||
int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
|
||||
int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
|
||||
int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
|
||||
int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Free an object from this objset.
|
||||
*
|
||||
* The object's data will be freed as well (ie. you don't need to call
|
||||
* dmu_free(object, 0, -1, tx)).
|
||||
*
|
||||
* The object need not be held in the transaction.
|
||||
*
|
||||
* If there are any holds on this object's buffers (via dmu_buf_hold()),
|
||||
* or tx holds on the object (via dmu_tx_hold_object()), you can not
|
||||
* free it; it fails and returns EBUSY.
|
||||
*
|
||||
* If the object is not allocated, it fails and returns ENOENT.
|
||||
*
|
||||
* Return 0 on success, or EBUSY or ENOENT as specified above.
|
||||
*/
|
||||
int dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Find the next allocated or free object.
|
||||
*
|
||||
* The objectp parameter is in-out. It will be updated to be the next
|
||||
* object which is allocated. Ignore objects which have not been
|
||||
* modified since txg.
|
||||
*
|
||||
* XXX Can only be called on a objset with no dirty data.
|
||||
*
|
||||
* Returns 0 on success, or ENOENT if there are no more objects.
|
||||
*/
|
||||
int dmu_object_next(objset_t *os, uint64_t *objectp,
|
||||
boolean_t hole, uint64_t txg);
|
||||
|
||||
/*
|
||||
* Set the data blocksize for an object.
|
||||
*
|
||||
* The object cannot have any blocks allcated beyond the first. If
|
||||
* the first block is allocated already, the new size must be greater
|
||||
* than the current block size. If these conditions are not met,
|
||||
* ENOTSUP will be returned.
|
||||
*
|
||||
* Returns 0 on success, or EBUSY if there are any holds on the object
|
||||
* contents, or ENOTSUP as described above.
|
||||
*/
|
||||
int dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size,
|
||||
int ibs, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Set the checksum property on a dnode. The new checksum algorithm will
|
||||
* apply to all newly written blocks; existing blocks will not be affected.
|
||||
*/
|
||||
void dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum,
|
||||
dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Set the compress property on a dnode. The new compression algorithm will
|
||||
* apply to all newly written blocks; existing blocks will not be affected.
|
||||
*/
|
||||
void dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
|
||||
dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Decide how many copies of a given block we should make. Can be from
|
||||
* 1 to SPA_DVAS_PER_BP.
|
||||
*/
|
||||
int dmu_get_replication_level(struct objset_impl *, struct zbookmark *zb,
|
||||
dmu_object_type_t ot);
|
||||
/*
|
||||
* The bonus data is accessed more or less like a regular buffer.
|
||||
* You must dmu_bonus_hold() to get the buffer, which will give you a
|
||||
* dmu_buf_t with db_offset==-1ULL, and db_size = the size of the bonus
|
||||
* data. As with any normal buffer, you must call dmu_buf_read() to
|
||||
* read db_data, dmu_buf_will_dirty() before modifying it, and the
|
||||
* object must be held in an assigned transaction before calling
|
||||
* dmu_buf_will_dirty. You may use dmu_buf_set_user() on the bonus
|
||||
* buffer as well. You must release your hold with dmu_buf_rele().
|
||||
*/
|
||||
int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **);
|
||||
int dmu_bonus_max(void);
|
||||
int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
|
||||
|
||||
/*
|
||||
* Obtain the DMU buffer from the specified object which contains the
|
||||
* specified offset. dmu_buf_hold() puts a "hold" on the buffer, so
|
||||
* that it will remain in memory. You must release the hold with
|
||||
* dmu_buf_rele(). You musn't access the dmu_buf_t after releasing your
|
||||
* hold. You must have a hold on any dmu_buf_t* you pass to the DMU.
|
||||
*
|
||||
* You must call dmu_buf_read, dmu_buf_will_dirty, or dmu_buf_will_fill
|
||||
* on the returned buffer before reading or writing the buffer's
|
||||
* db_data. The comments for those routines describe what particular
|
||||
* operations are valid after calling them.
|
||||
*
|
||||
* The object number must be a valid, allocated object number.
|
||||
*/
|
||||
int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
|
||||
void *tag, dmu_buf_t **);
|
||||
void dmu_buf_add_ref(dmu_buf_t *db, void* tag);
|
||||
void dmu_buf_rele(dmu_buf_t *db, void *tag);
|
||||
uint64_t dmu_buf_refcount(dmu_buf_t *db);
|
||||
|
||||
/*
|
||||
* dmu_buf_hold_array holds the DMU buffers which contain all bytes in a
|
||||
* range of an object. A pointer to an array of dmu_buf_t*'s is
|
||||
* returned (in *dbpp).
|
||||
*
|
||||
* dmu_buf_rele_array releases the hold on an array of dmu_buf_t*'s, and
|
||||
* frees the array. The hold on the array of buffers MUST be released
|
||||
* with dmu_buf_rele_array. You can NOT release the hold on each buffer
|
||||
* individually with dmu_buf_rele.
|
||||
*/
|
||||
int dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset,
|
||||
uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp);
|
||||
void dmu_buf_rele_array(dmu_buf_t **, int numbufs, void *tag);
|
||||
|
||||
/*
|
||||
* Returns NULL on success, or the existing user ptr if it's already
|
||||
* been set.
|
||||
*
|
||||
* user_ptr is for use by the user and can be obtained via dmu_buf_get_user().
|
||||
*
|
||||
* user_data_ptr_ptr should be NULL, or a pointer to a pointer which
|
||||
* will be set to db->db_data when you are allowed to access it. Note
|
||||
* that db->db_data (the pointer) can change when you do dmu_buf_read(),
|
||||
* dmu_buf_tryupgrade(), dmu_buf_will_dirty(), or dmu_buf_will_fill().
|
||||
* *user_data_ptr_ptr will be set to the new value when it changes.
|
||||
*
|
||||
* If non-NULL, pageout func will be called when this buffer is being
|
||||
* excised from the cache, so that you can clean up the data structure
|
||||
* pointed to by user_ptr.
|
||||
*
|
||||
* dmu_evict_user() will call the pageout func for all buffers in a
|
||||
* objset with a given pageout func.
|
||||
*/
|
||||
void *dmu_buf_set_user(dmu_buf_t *db, void *user_ptr, void *user_data_ptr_ptr,
|
||||
dmu_buf_evict_func_t *pageout_func);
|
||||
/*
|
||||
* set_user_ie is the same as set_user, but request immediate eviction
|
||||
* when hold count goes to zero.
|
||||
*/
|
||||
void *dmu_buf_set_user_ie(dmu_buf_t *db, void *user_ptr,
|
||||
void *user_data_ptr_ptr, dmu_buf_evict_func_t *pageout_func);
|
||||
void *dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr,
|
||||
void *user_ptr, void *user_data_ptr_ptr,
|
||||
dmu_buf_evict_func_t *pageout_func);
|
||||
void dmu_evict_user(objset_t *os, dmu_buf_evict_func_t *func);
|
||||
|
||||
/*
|
||||
* Returns the user_ptr set with dmu_buf_set_user(), or NULL if not set.
|
||||
*/
|
||||
void *dmu_buf_get_user(dmu_buf_t *db);
|
||||
|
||||
/*
|
||||
* Indicate that you are going to modify the buffer's data (db_data).
|
||||
*
|
||||
* The transaction (tx) must be assigned to a txg (ie. you've called
|
||||
* dmu_tx_assign()). The buffer's object must be held in the tx
|
||||
* (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
|
||||
*/
|
||||
void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* You must create a transaction, then hold the objects which you will
|
||||
* (or might) modify as part of this transaction. Then you must assign
|
||||
* the transaction to a transaction group. Once the transaction has
|
||||
* been assigned, you can modify buffers which belong to held objects as
|
||||
* part of this transaction. You can't modify buffers before the
|
||||
* transaction has been assigned; you can't modify buffers which don't
|
||||
* belong to objects which this transaction holds; you can't hold
|
||||
* objects once the transaction has been assigned. You may hold an
|
||||
* object which you are going to free (with dmu_object_free()), but you
|
||||
* don't have to.
|
||||
*
|
||||
* You can abort the transaction before it has been assigned.
|
||||
*
|
||||
* Note that you may hold buffers (with dmu_buf_hold) at any time,
|
||||
* regardless of transaction state.
|
||||
*/
|
||||
|
||||
#define DMU_NEW_OBJECT (-1ULL)
|
||||
#define DMU_OBJECT_END (-1ULL)
|
||||
|
||||
dmu_tx_t *dmu_tx_create(objset_t *os);
|
||||
void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
|
||||
void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
|
||||
uint64_t len);
|
||||
void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name);
|
||||
void dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object);
|
||||
void dmu_tx_abort(dmu_tx_t *tx);
|
||||
int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
|
||||
void dmu_tx_wait(dmu_tx_t *tx);
|
||||
void dmu_tx_commit(dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Free up the data blocks for a defined range of a file. If size is
|
||||
* zero, the range from offset to end-of-file is freed.
|
||||
*/
|
||||
int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
|
||||
uint64_t size, dmu_tx_t *tx);
|
||||
int dmu_free_long_range(objset_t *os, uint64_t object, uint64_t offset,
|
||||
uint64_t size);
|
||||
int dmu_free_object(objset_t *os, uint64_t object);
|
||||
|
||||
/*
|
||||
* Convenience functions.
|
||||
*
|
||||
* Canfail routines will return 0 on success, or an errno if there is a
|
||||
* nonrecoverable I/O error.
|
||||
*/
|
||||
int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||
void *buf);
|
||||
void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||
const void *buf, dmu_tx_t *tx);
|
||||
void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||
dmu_tx_t *tx);
|
||||
int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
|
||||
int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size,
|
||||
dmu_tx_t *tx);
|
||||
int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset,
|
||||
uint64_t size, struct page *pp, dmu_tx_t *tx);
|
||||
|
||||
extern int zfs_prefetch_disable;
|
||||
|
||||
/*
|
||||
* Asynchronously try to read in the data.
|
||||
*/
|
||||
void dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset,
|
||||
uint64_t len);
|
||||
|
||||
typedef struct dmu_object_info {
|
||||
/* All sizes are in bytes. */
|
||||
uint32_t doi_data_block_size;
|
||||
uint32_t doi_metadata_block_size;
|
||||
uint64_t doi_bonus_size;
|
||||
dmu_object_type_t doi_type;
|
||||
dmu_object_type_t doi_bonus_type;
|
||||
uint8_t doi_indirection; /* 2 = dnode->indirect->data */
|
||||
uint8_t doi_checksum;
|
||||
uint8_t doi_compress;
|
||||
uint8_t doi_pad[5];
|
||||
/* Values below are number of 512-byte blocks. */
|
||||
uint64_t doi_physical_blks; /* data + metadata */
|
||||
uint64_t doi_max_block_offset;
|
||||
} dmu_object_info_t;
|
||||
|
||||
typedef void arc_byteswap_func_t(void *buf, size_t size);
|
||||
|
||||
typedef struct dmu_object_type_info {
|
||||
arc_byteswap_func_t *ot_byteswap;
|
||||
boolean_t ot_metadata;
|
||||
char *ot_name;
|
||||
} dmu_object_type_info_t;
|
||||
|
||||
extern const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES];
|
||||
|
||||
/*
|
||||
* Get information on a DMU object.
|
||||
*
|
||||
* Return 0 on success or ENOENT if object is not allocated.
|
||||
*
|
||||
* If doi is NULL, just indicates whether the object exists.
|
||||
*/
|
||||
int dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi);
|
||||
void dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi);
|
||||
void dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi);
|
||||
void dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize,
|
||||
u_longlong_t *nblk512);
|
||||
|
||||
typedef struct dmu_objset_stats {
|
||||
uint64_t dds_num_clones; /* number of clones of this */
|
||||
uint64_t dds_creation_txg;
|
||||
uint64_t dds_guid;
|
||||
dmu_objset_type_t dds_type;
|
||||
uint8_t dds_is_snapshot;
|
||||
uint8_t dds_inconsistent;
|
||||
char dds_origin[MAXNAMELEN];
|
||||
} dmu_objset_stats_t;
|
||||
|
||||
/*
|
||||
* Get stats on a dataset.
|
||||
*/
|
||||
void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
|
||||
|
||||
/*
|
||||
* Add entries to the nvlist for all the objset's properties. See
|
||||
* zfs_prop_table[] and zfs(1m) for details on the properties.
|
||||
*/
|
||||
void dmu_objset_stats(objset_t *os, struct nvlist *nv);
|
||||
|
||||
/*
|
||||
* Get the space usage statistics for statvfs().
|
||||
*
|
||||
* refdbytes is the amount of space "referenced" by this objset.
|
||||
* availbytes is the amount of space available to this objset, taking
|
||||
* into account quotas & reservations, assuming that no other objsets
|
||||
* use the space first. These values correspond to the 'referenced' and
|
||||
* 'available' properties, described in the zfs(1m) manpage.
|
||||
*
|
||||
* usedobjs and availobjs are the number of objects currently allocated,
|
||||
* and available.
|
||||
*/
|
||||
void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
|
||||
uint64_t *usedobjsp, uint64_t *availobjsp);
|
||||
|
||||
/*
|
||||
* The fsid_guid is a 56-bit ID that can change to avoid collisions.
|
||||
* (Contrast with the ds_guid which is a 64-bit ID that will never
|
||||
* change, so there is a small probability that it will collide.)
|
||||
*/
|
||||
uint64_t dmu_objset_fsid_guid(objset_t *os);
|
||||
|
||||
int dmu_objset_is_snapshot(objset_t *os);
|
||||
|
||||
extern struct spa *dmu_objset_spa(objset_t *os);
|
||||
extern struct zilog *dmu_objset_zil(objset_t *os);
|
||||
extern struct dsl_pool *dmu_objset_pool(objset_t *os);
|
||||
extern struct dsl_dataset *dmu_objset_ds(objset_t *os);
|
||||
extern void dmu_objset_name(objset_t *os, char *buf);
|
||||
extern dmu_objset_type_t dmu_objset_type(objset_t *os);
|
||||
extern uint64_t dmu_objset_id(objset_t *os);
|
||||
extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
|
||||
uint64_t *id, uint64_t *offp, boolean_t *case_conflict);
|
||||
extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
|
||||
int maxlen, boolean_t *conflict);
|
||||
extern int dmu_dir_list_next(objset_t *os, int namelen, char *name,
|
||||
uint64_t *idp, uint64_t *offp);
|
||||
extern void dmu_objset_set_user(objset_t *os, void *user_ptr);
|
||||
extern void *dmu_objset_get_user(objset_t *os);
|
||||
|
||||
/*
|
||||
* Return the txg number for the given assigned transaction.
|
||||
*/
|
||||
uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Synchronous write.
|
||||
* If a parent zio is provided this function initiates a write on the
|
||||
* provided buffer as a child of the parent zio.
|
||||
* In the absence of a parent zio, the write is completed synchronously.
|
||||
* At write completion, blk is filled with the bp of the written block.
|
||||
* Note that while the data covered by this function will be on stable
|
||||
* storage when the write completes this new data does not become a
|
||||
* permanent part of the file until the associated transaction commits.
|
||||
*/
|
||||
typedef void dmu_sync_cb_t(dmu_buf_t *db, void *arg);
|
||||
int dmu_sync(struct zio *zio, dmu_buf_t *db,
|
||||
struct blkptr *bp, uint64_t txg, dmu_sync_cb_t *done, void *arg);
|
||||
|
||||
/*
|
||||
* Find the next hole or data block in file starting at *off
|
||||
* Return found offset in *off. Return ESRCH for end of file.
|
||||
*/
|
||||
int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole,
|
||||
uint64_t *off);
|
||||
|
||||
/*
|
||||
* Initial setup and final teardown.
|
||||
*/
|
||||
extern void dmu_init(void);
|
||||
extern void dmu_fini(void);
|
||||
|
||||
typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp,
|
||||
uint64_t object, uint64_t offset, int len);
|
||||
void dmu_traverse_objset(objset_t *os, uint64_t txg_start,
|
||||
dmu_traverse_cb_t cb, void *arg);
|
||||
|
||||
int dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
|
||||
struct vnode *vp, offset_t *off);
|
||||
|
||||
typedef struct dmu_recv_cookie {
|
||||
/*
|
||||
* This structure is opaque!
|
||||
*
|
||||
* If logical and real are different, we are recving the stream
|
||||
* into the "real" temporary clone, and then switching it with
|
||||
* the "logical" target.
|
||||
*/
|
||||
struct dsl_dataset *drc_logical_ds;
|
||||
struct dsl_dataset *drc_real_ds;
|
||||
struct drr_begin *drc_drrb;
|
||||
char *drc_tosnap;
|
||||
boolean_t drc_newfs;
|
||||
boolean_t drc_force;
|
||||
} dmu_recv_cookie_t;
|
||||
|
||||
int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *,
|
||||
boolean_t force, objset_t *origin, boolean_t online, dmu_recv_cookie_t *);
|
||||
int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp);
|
||||
int dmu_recv_end(dmu_recv_cookie_t *drc);
|
||||
void dmu_recv_abort_cleanup(dmu_recv_cookie_t *drc);
|
||||
|
||||
/* CRC64 table */
|
||||
#define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */
|
||||
extern uint64_t zfs_crc64_table[256];
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DMU_H */
|
||||
@@ -0,0 +1,239 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DMU_IMPL_H
|
||||
#define _SYS_DMU_IMPL_H
|
||||
|
||||
#include <sys/txg_impl.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dnode.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This is the locking strategy for the DMU. Numbers in parenthesis are
|
||||
* cases that use that lock order, referenced below:
|
||||
*
|
||||
* ARC is self-contained
|
||||
* bplist is self-contained
|
||||
* refcount is self-contained
|
||||
* txg is self-contained (hopefully!)
|
||||
* zst_lock
|
||||
* zf_rwlock
|
||||
*
|
||||
* XXX try to improve evicting path?
|
||||
*
|
||||
* dp_config_rwlock > os_obj_lock > dn_struct_rwlock >
|
||||
* dn_dbufs_mtx > hash_mutexes > db_mtx > dd_lock > leafs
|
||||
*
|
||||
* dp_config_rwlock
|
||||
* must be held before: everything
|
||||
* protects dd namespace changes
|
||||
* protects property changes globally
|
||||
* held from:
|
||||
* dsl_dir_open/r:
|
||||
* dsl_dir_create_sync/w:
|
||||
* dsl_dir_sync_destroy/w:
|
||||
* dsl_dir_rename_sync/w:
|
||||
* dsl_prop_changed_notify/r:
|
||||
*
|
||||
* os_obj_lock
|
||||
* must be held before:
|
||||
* everything except dp_config_rwlock
|
||||
* protects os_obj_next
|
||||
* held from:
|
||||
* dmu_object_alloc: dn_dbufs_mtx, db_mtx, hash_mutexes, dn_struct_rwlock
|
||||
*
|
||||
* dn_struct_rwlock
|
||||
* must be held before:
|
||||
* everything except dp_config_rwlock and os_obj_lock
|
||||
* protects structure of dnode (eg. nlevels)
|
||||
* db_blkptr can change when syncing out change to nlevels
|
||||
* dn_maxblkid
|
||||
* dn_nlevels
|
||||
* dn_*blksz*
|
||||
* phys nlevels, maxblkid, physical blkptr_t's (?)
|
||||
* held from:
|
||||
* callers of dbuf_read_impl, dbuf_hold[_impl], dbuf_prefetch
|
||||
* dmu_object_info_from_dnode: dn_dirty_mtx (dn_datablksz)
|
||||
* dmu_tx_count_free:
|
||||
* dbuf_read_impl: db_mtx, dmu_zfetch()
|
||||
* dmu_zfetch: zf_rwlock/r, zst_lock, dbuf_prefetch()
|
||||
* dbuf_new_size: db_mtx
|
||||
* dbuf_dirty: db_mtx
|
||||
* dbuf_findbp: (callers, phys? - the real need)
|
||||
* dbuf_create: dn_dbufs_mtx, hash_mutexes, db_mtx (phys?)
|
||||
* dbuf_prefetch: dn_dirty_mtx, hash_mutexes, db_mtx, dn_dbufs_mtx
|
||||
* dbuf_hold_impl: hash_mutexes, db_mtx, dn_dbufs_mtx, dbuf_findbp()
|
||||
* dnode_sync/w (increase_indirection): db_mtx (phys)
|
||||
* dnode_set_blksz/w: dn_dbufs_mtx (dn_*blksz*)
|
||||
* dnode_new_blkid/w: (dn_maxblkid)
|
||||
* dnode_free_range/w: dn_dirty_mtx (dn_maxblkid)
|
||||
* dnode_next_offset: (phys)
|
||||
*
|
||||
* dn_dbufs_mtx
|
||||
* must be held before:
|
||||
* db_mtx, hash_mutexes
|
||||
* protects:
|
||||
* dn_dbufs
|
||||
* dn_evicted
|
||||
* held from:
|
||||
* dmu_evict_user: db_mtx (dn_dbufs)
|
||||
* dbuf_free_range: db_mtx (dn_dbufs)
|
||||
* dbuf_remove_ref: db_mtx, callees:
|
||||
* dbuf_hash_remove: hash_mutexes, db_mtx
|
||||
* dbuf_create: hash_mutexes, db_mtx (dn_dbufs)
|
||||
* dnode_set_blksz: (dn_dbufs)
|
||||
*
|
||||
* hash_mutexes (global)
|
||||
* must be held before:
|
||||
* db_mtx
|
||||
* protects dbuf_hash_table (global) and db_hash_next
|
||||
* held from:
|
||||
* dbuf_find: db_mtx
|
||||
* dbuf_hash_insert: db_mtx
|
||||
* dbuf_hash_remove: db_mtx
|
||||
*
|
||||
* db_mtx (meta-leaf)
|
||||
* must be held before:
|
||||
* dn_mtx, dn_dirty_mtx, dd_lock (leaf mutexes)
|
||||
* protects:
|
||||
* db_state
|
||||
* db_holds
|
||||
* db_buf
|
||||
* db_changed
|
||||
* db_data_pending
|
||||
* db_dirtied
|
||||
* db_link
|
||||
* db_dirty_node (??)
|
||||
* db_dirtycnt
|
||||
* db_d.*
|
||||
* db.*
|
||||
* held from:
|
||||
* dbuf_dirty: dn_mtx, dn_dirty_mtx
|
||||
* dbuf_dirty->dsl_dir_willuse_space: dd_lock
|
||||
* dbuf_dirty->dbuf_new_block->dsl_dataset_block_freeable: dd_lock
|
||||
* dbuf_undirty: dn_dirty_mtx (db_d)
|
||||
* dbuf_write_done: dn_dirty_mtx (db_state)
|
||||
* dbuf_*
|
||||
* dmu_buf_update_user: none (db_d)
|
||||
* dmu_evict_user: none (db_d) (maybe can eliminate)
|
||||
* dbuf_find: none (db_holds)
|
||||
* dbuf_hash_insert: none (db_holds)
|
||||
* dmu_buf_read_array_impl: none (db_state, db_changed)
|
||||
* dmu_sync: none (db_dirty_node, db_d)
|
||||
* dnode_reallocate: none (db)
|
||||
*
|
||||
* dn_mtx (leaf)
|
||||
* protects:
|
||||
* dn_dirty_dbufs
|
||||
* dn_ranges
|
||||
* phys accounting
|
||||
* dn_allocated_txg
|
||||
* dn_free_txg
|
||||
* dn_assigned_txg
|
||||
* dd_assigned_tx
|
||||
* dn_notxholds
|
||||
* dn_dirtyctx
|
||||
* dn_dirtyctx_firstset
|
||||
* (dn_phys copy fields?)
|
||||
* (dn_phys contents?)
|
||||
* held from:
|
||||
* dnode_*
|
||||
* dbuf_dirty: none
|
||||
* dbuf_sync: none (phys accounting)
|
||||
* dbuf_undirty: none (dn_ranges, dn_dirty_dbufs)
|
||||
* dbuf_write_done: none (phys accounting)
|
||||
* dmu_object_info_from_dnode: none (accounting)
|
||||
* dmu_tx_commit: none
|
||||
* dmu_tx_hold_object_impl: none
|
||||
* dmu_tx_try_assign: dn_notxholds(cv)
|
||||
* dmu_tx_unassign: none
|
||||
*
|
||||
* dd_lock
|
||||
* must be held before:
|
||||
* ds_lock
|
||||
* ancestors' dd_lock
|
||||
* protects:
|
||||
* dd_prop_cbs
|
||||
* dd_sync_*
|
||||
* dd_used_bytes
|
||||
* dd_tempreserved
|
||||
* dd_space_towrite
|
||||
* dd_myname
|
||||
* dd_phys accounting?
|
||||
* held from:
|
||||
* dsl_dir_*
|
||||
* dsl_prop_changed_notify: none (dd_prop_cbs)
|
||||
* dsl_prop_register: none (dd_prop_cbs)
|
||||
* dsl_prop_unregister: none (dd_prop_cbs)
|
||||
* dsl_dataset_block_freeable: none (dd_sync_*)
|
||||
*
|
||||
* os_lock (leaf)
|
||||
* protects:
|
||||
* os_dirty_dnodes
|
||||
* os_free_dnodes
|
||||
* os_dnodes
|
||||
* os_downgraded_dbufs
|
||||
* dn_dirtyblksz
|
||||
* dn_dirty_link
|
||||
* held from:
|
||||
* dnode_create: none (os_dnodes)
|
||||
* dnode_destroy: none (os_dnodes)
|
||||
* dnode_setdirty: none (dn_dirtyblksz, os_*_dnodes)
|
||||
* dnode_free: none (dn_dirtyblksz, os_*_dnodes)
|
||||
*
|
||||
* ds_lock
|
||||
* protects:
|
||||
* ds_user_ptr
|
||||
* ds_user_evice_func
|
||||
* ds_open_refcount
|
||||
* ds_snapname
|
||||
* ds_phys accounting
|
||||
* ds_reserved
|
||||
* held from:
|
||||
* dsl_dataset_*
|
||||
*
|
||||
* dr_mtx (leaf)
|
||||
* protects:
|
||||
* dr_children
|
||||
* held from:
|
||||
* dbuf_dirty
|
||||
* dbuf_undirty
|
||||
* dbuf_sync_indirect
|
||||
* dnode_new_blkid
|
||||
*/
|
||||
|
||||
struct objset;
|
||||
struct dmu_pool;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DMU_IMPL_H */
|
||||
@@ -0,0 +1,136 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DMU_OBJSET_H
|
||||
#define _SYS_DMU_OBJSET_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/arc.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/dnode.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/zil.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct dsl_dataset;
|
||||
struct dmu_tx;
|
||||
struct objset_impl;
|
||||
|
||||
typedef struct objset_phys {
|
||||
dnode_phys_t os_meta_dnode;
|
||||
zil_header_t os_zil_header;
|
||||
uint64_t os_type;
|
||||
char os_pad[1024 - sizeof (dnode_phys_t) - sizeof (zil_header_t) -
|
||||
sizeof (uint64_t)];
|
||||
} objset_phys_t;
|
||||
|
||||
struct objset {
|
||||
struct objset_impl *os;
|
||||
int os_mode;
|
||||
};
|
||||
|
||||
typedef struct objset_impl {
|
||||
/* Immutable: */
|
||||
struct dsl_dataset *os_dsl_dataset;
|
||||
spa_t *os_spa;
|
||||
arc_buf_t *os_phys_buf;
|
||||
objset_phys_t *os_phys;
|
||||
dnode_t *os_meta_dnode;
|
||||
zilog_t *os_zil;
|
||||
objset_t os;
|
||||
uint8_t os_checksum; /* can change, under dsl_dir's locks */
|
||||
uint8_t os_compress; /* can change, under dsl_dir's locks */
|
||||
uint8_t os_copies; /* can change, under dsl_dir's locks */
|
||||
uint8_t os_primary_cache; /* can change, under dsl_dir's locks */
|
||||
uint8_t os_secondary_cache; /* can change, under dsl_dir's locks */
|
||||
|
||||
/* no lock needed: */
|
||||
struct dmu_tx *os_synctx; /* XXX sketchy */
|
||||
blkptr_t *os_rootbp;
|
||||
zil_header_t os_zil_header;
|
||||
|
||||
/* Protected by os_obj_lock */
|
||||
kmutex_t os_obj_lock;
|
||||
uint64_t os_obj_next;
|
||||
|
||||
/* Protected by os_lock */
|
||||
kmutex_t os_lock;
|
||||
list_t os_dirty_dnodes[TXG_SIZE];
|
||||
list_t os_free_dnodes[TXG_SIZE];
|
||||
list_t os_dnodes;
|
||||
list_t os_downgraded_dbufs;
|
||||
|
||||
/* stuff we store for the user */
|
||||
kmutex_t os_user_ptr_lock;
|
||||
void *os_user_ptr;
|
||||
} objset_impl_t;
|
||||
|
||||
#define DMU_META_DNODE_OBJECT 0
|
||||
|
||||
#define DMU_OS_IS_L2CACHEABLE(os) \
|
||||
((os)->os_secondary_cache == ZFS_CACHE_ALL || \
|
||||
(os)->os_secondary_cache == ZFS_CACHE_METADATA)
|
||||
|
||||
/* called from zpl */
|
||||
int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
|
||||
objset_t **osp);
|
||||
void dmu_objset_close(objset_t *os);
|
||||
int dmu_objset_create(const char *name, dmu_objset_type_t type,
|
||||
objset_t *clone_parent, uint64_t flags,
|
||||
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
|
||||
int dmu_objset_destroy(const char *name);
|
||||
int dmu_objset_rollback(objset_t *os);
|
||||
int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive);
|
||||
void dmu_objset_stats(objset_t *os, nvlist_t *nv);
|
||||
void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
|
||||
void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
|
||||
uint64_t *usedobjsp, uint64_t *availobjsp);
|
||||
uint64_t dmu_objset_fsid_guid(objset_t *os);
|
||||
int dmu_objset_find(char *name, int func(char *, void *), void *arg,
|
||||
int flags);
|
||||
int dmu_objset_find_spa(spa_t *spa, const char *name,
|
||||
int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags);
|
||||
void dmu_objset_byteswap(void *buf, size_t size);
|
||||
int dmu_objset_evict_dbufs(objset_t *os);
|
||||
|
||||
/* called from dsl */
|
||||
void dmu_objset_sync(objset_impl_t *os, zio_t *zio, dmu_tx_t *tx);
|
||||
objset_impl_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
|
||||
blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx);
|
||||
int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
|
||||
objset_impl_t **osip);
|
||||
void dmu_objset_evict(struct dsl_dataset *ds, void *arg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DMU_OBJSET_H */
|
||||
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DMU_TRAVERSE_H
|
||||
#define _SYS_DMU_TRAVERSE_H
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct dnode_phys;
|
||||
struct dsl_dataset;
|
||||
|
||||
typedef int (blkptr_cb_t)(spa_t *spa, blkptr_t *bp,
|
||||
const zbookmark_t *zb, const struct dnode_phys *dnp, void *arg);
|
||||
|
||||
#define TRAVERSE_PRE (1<<0)
|
||||
#define TRAVERSE_POST (1<<1)
|
||||
#define TRAVERSE_PREFETCH_METADATA (1<<2)
|
||||
#define TRAVERSE_PREFETCH_DATA (1<<3)
|
||||
#define TRAVERSE_PREFETCH (TRAVERSE_PREFETCH_METADATA | TRAVERSE_PREFETCH_DATA)
|
||||
|
||||
int traverse_dataset(struct dsl_dataset *ds, uint64_t txg_start,
|
||||
int flags, blkptr_cb_t func, void *arg);
|
||||
int traverse_pool(spa_t *spa, blkptr_cb_t func, void *arg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DMU_TRAVERSE_H */
|
||||
@@ -0,0 +1,139 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DMU_TX_H
|
||||
#define _SYS_DMU_TX_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/inttypes.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/refcount.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct dmu_buf_impl;
|
||||
struct dmu_tx_hold;
|
||||
struct dnode_link;
|
||||
struct dsl_pool;
|
||||
struct dnode;
|
||||
struct dsl_dir;
|
||||
|
||||
struct dmu_tx {
|
||||
/*
|
||||
* No synchronization is needed because a tx can only be handled
|
||||
* by one thread.
|
||||
*/
|
||||
list_t tx_holds; /* list of dmu_tx_hold_t */
|
||||
objset_t *tx_objset;
|
||||
struct dsl_dir *tx_dir;
|
||||
struct dsl_pool *tx_pool;
|
||||
uint64_t tx_txg;
|
||||
uint64_t tx_lastsnap_txg;
|
||||
uint64_t tx_lasttried_txg;
|
||||
txg_handle_t tx_txgh;
|
||||
void *tx_tempreserve_cookie;
|
||||
struct dmu_tx_hold *tx_needassign_txh;
|
||||
uint8_t tx_anyobj;
|
||||
int tx_err;
|
||||
#ifdef ZFS_DEBUG
|
||||
uint64_t tx_space_towrite;
|
||||
uint64_t tx_space_tofree;
|
||||
uint64_t tx_space_tooverwrite;
|
||||
uint64_t tx_space_tounref;
|
||||
refcount_t tx_space_written;
|
||||
refcount_t tx_space_freed;
|
||||
#endif
|
||||
};
|
||||
|
||||
enum dmu_tx_hold_type {
|
||||
THT_NEWOBJECT,
|
||||
THT_WRITE,
|
||||
THT_BONUS,
|
||||
THT_FREE,
|
||||
THT_ZAP,
|
||||
THT_SPACE,
|
||||
THT_NUMTYPES
|
||||
};
|
||||
|
||||
typedef struct dmu_tx_hold {
|
||||
dmu_tx_t *txh_tx;
|
||||
list_node_t txh_node;
|
||||
struct dnode *txh_dnode;
|
||||
uint64_t txh_space_towrite;
|
||||
uint64_t txh_space_tofree;
|
||||
uint64_t txh_space_tooverwrite;
|
||||
uint64_t txh_space_tounref;
|
||||
uint64_t txh_memory_tohold;
|
||||
uint64_t txh_fudge;
|
||||
#ifdef ZFS_DEBUG
|
||||
enum dmu_tx_hold_type txh_type;
|
||||
uint64_t txh_arg1;
|
||||
uint64_t txh_arg2;
|
||||
#endif
|
||||
} dmu_tx_hold_t;
|
||||
|
||||
|
||||
/*
|
||||
* These routines are defined in dmu.h, and are called by the user.
|
||||
*/
|
||||
dmu_tx_t *dmu_tx_create(objset_t *dd);
|
||||
int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
|
||||
void dmu_tx_commit(dmu_tx_t *tx);
|
||||
void dmu_tx_abort(dmu_tx_t *tx);
|
||||
uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
|
||||
void dmu_tx_wait(dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* These routines are defined in dmu_spa.h, and are called by the SPA.
|
||||
*/
|
||||
extern dmu_tx_t *dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg);
|
||||
|
||||
/*
|
||||
* These routines are only called by the DMU.
|
||||
*/
|
||||
dmu_tx_t *dmu_tx_create_dd(dsl_dir_t *dd);
|
||||
int dmu_tx_is_syncing(dmu_tx_t *tx);
|
||||
int dmu_tx_private_ok(dmu_tx_t *tx);
|
||||
void dmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object);
|
||||
void dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta);
|
||||
void dmu_tx_dirty_buf(dmu_tx_t *tx, struct dmu_buf_impl *db);
|
||||
int dmu_tx_holds(dmu_tx_t *tx, uint64_t object);
|
||||
void dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space);
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
#define DMU_TX_DIRTY_BUF(tx, db) dmu_tx_dirty_buf(tx, db)
|
||||
#else
|
||||
#define DMU_TX_DIRTY_BUF(tx, db)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DMU_TX_H */
|
||||
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _DFETCH_H
|
||||
#define _DFETCH_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern uint64_t zfetch_array_rd_sz;
|
||||
|
||||
struct dnode; /* so we can reference dnode */
|
||||
|
||||
typedef enum zfetch_dirn {
|
||||
ZFETCH_FORWARD = 1, /* prefetch increasing block numbers */
|
||||
ZFETCH_BACKWARD = -1 /* prefetch decreasing block numbers */
|
||||
} zfetch_dirn_t;
|
||||
|
||||
typedef struct zstream {
|
||||
uint64_t zst_offset; /* offset of starting block in range */
|
||||
uint64_t zst_len; /* length of range, in blocks */
|
||||
zfetch_dirn_t zst_direction; /* direction of prefetch */
|
||||
uint64_t zst_stride; /* length of stride, in blocks */
|
||||
uint64_t zst_ph_offset; /* prefetch offset, in blocks */
|
||||
uint64_t zst_cap; /* prefetch limit (cap), in blocks */
|
||||
kmutex_t zst_lock; /* protects stream */
|
||||
clock_t zst_last; /* lbolt of last prefetch */
|
||||
avl_node_t zst_node; /* embed avl node here */
|
||||
} zstream_t;
|
||||
|
||||
typedef struct zfetch {
|
||||
krwlock_t zf_rwlock; /* protects zfetch structure */
|
||||
list_t zf_stream; /* AVL tree of zstream_t's */
|
||||
struct dnode *zf_dnode; /* dnode that owns this zfetch */
|
||||
uint32_t zf_stream_cnt; /* # of active streams */
|
||||
uint64_t zf_alloc_fail; /* # of failed attempts to alloc strm */
|
||||
} zfetch_t;
|
||||
|
||||
void dmu_zfetch_init(zfetch_t *, struct dnode *);
|
||||
void dmu_zfetch_rele(zfetch_t *);
|
||||
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, int);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _DFETCH_H */
|
||||
@@ -0,0 +1,275 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DNODE_H
|
||||
#define _SYS_DNODE_H
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/avl.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/dmu_zfetch.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* dnode_hold() flags.
|
||||
*/
|
||||
#define DNODE_MUST_BE_ALLOCATED 1
|
||||
#define DNODE_MUST_BE_FREE 2
|
||||
|
||||
/*
|
||||
* dnode_next_offset() flags.
|
||||
*/
|
||||
#define DNODE_FIND_HOLE 1
|
||||
#define DNODE_FIND_BACKWARDS 2
|
||||
#define DNODE_FIND_HAVELOCK 4
|
||||
|
||||
/*
|
||||
* Fixed constants.
|
||||
*/
|
||||
#define DNODE_SHIFT 9 /* 512 bytes */
|
||||
#define DN_MIN_INDBLKSHIFT 10 /* 1k */
|
||||
#define DN_MAX_INDBLKSHIFT 14 /* 16k */
|
||||
#define DNODE_BLOCK_SHIFT 14 /* 16k */
|
||||
#define DNODE_CORE_SIZE 64 /* 64 bytes for dnode sans blkptrs */
|
||||
#define DN_MAX_OBJECT_SHIFT 48 /* 256 trillion (zfs_fid_t limit) */
|
||||
#define DN_MAX_OFFSET_SHIFT 64 /* 2^64 bytes in a dnode */
|
||||
|
||||
/*
|
||||
* Derived constants.
|
||||
*/
|
||||
#define DNODE_SIZE (1 << DNODE_SHIFT)
|
||||
#define DN_MAX_NBLKPTR ((DNODE_SIZE - DNODE_CORE_SIZE) >> SPA_BLKPTRSHIFT)
|
||||
#define DN_MAX_BONUSLEN (DNODE_SIZE - DNODE_CORE_SIZE - (1 << SPA_BLKPTRSHIFT))
|
||||
#define DN_MAX_OBJECT (1ULL << DN_MAX_OBJECT_SHIFT)
|
||||
#define DN_ZERO_BONUSLEN (DN_MAX_BONUSLEN + 1)
|
||||
|
||||
#define DNODES_PER_BLOCK_SHIFT (DNODE_BLOCK_SHIFT - DNODE_SHIFT)
|
||||
#define DNODES_PER_BLOCK (1ULL << DNODES_PER_BLOCK_SHIFT)
|
||||
#define DNODES_PER_LEVEL_SHIFT (DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT)
|
||||
|
||||
/* The +2 here is a cheesy way to round up */
|
||||
#define DN_MAX_LEVELS (2 + ((DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT) / \
|
||||
(DN_MIN_INDBLKSHIFT - SPA_BLKPTRSHIFT)))
|
||||
|
||||
#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \
|
||||
(((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
|
||||
|
||||
#define DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \
|
||||
(dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT)
|
||||
|
||||
#define EPB(blkshift, typeshift) (1 << (blkshift - typeshift))
|
||||
|
||||
struct dmu_buf_impl;
|
||||
struct objset_impl;
|
||||
struct zio;
|
||||
|
||||
enum dnode_dirtycontext {
|
||||
DN_UNDIRTIED,
|
||||
DN_DIRTY_OPEN,
|
||||
DN_DIRTY_SYNC
|
||||
};
|
||||
|
||||
/* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */
|
||||
#define DNODE_FLAG_USED_BYTES (1<<0)
|
||||
|
||||
typedef struct dnode_phys {
|
||||
uint8_t dn_type; /* dmu_object_type_t */
|
||||
uint8_t dn_indblkshift; /* ln2(indirect block size) */
|
||||
uint8_t dn_nlevels; /* 1=dn_blkptr->data blocks */
|
||||
uint8_t dn_nblkptr; /* length of dn_blkptr */
|
||||
uint8_t dn_bonustype; /* type of data in bonus buffer */
|
||||
uint8_t dn_checksum; /* ZIO_CHECKSUM type */
|
||||
uint8_t dn_compress; /* ZIO_COMPRESS type */
|
||||
uint8_t dn_flags; /* DNODE_FLAG_* */
|
||||
uint16_t dn_datablkszsec; /* data block size in 512b sectors */
|
||||
uint16_t dn_bonuslen; /* length of dn_bonus */
|
||||
uint8_t dn_pad2[4];
|
||||
|
||||
/* accounting is protected by dn_dirty_mtx */
|
||||
uint64_t dn_maxblkid; /* largest allocated block ID */
|
||||
uint64_t dn_used; /* bytes (or sectors) of disk space */
|
||||
|
||||
uint64_t dn_pad3[4];
|
||||
|
||||
blkptr_t dn_blkptr[1];
|
||||
uint8_t dn_bonus[DN_MAX_BONUSLEN];
|
||||
} dnode_phys_t;
|
||||
|
||||
typedef struct dnode {
|
||||
/*
|
||||
* dn_struct_rwlock protects the structure of the dnode,
|
||||
* including the number of levels of indirection (dn_nlevels),
|
||||
* dn_maxblkid, and dn_next_*
|
||||
*/
|
||||
krwlock_t dn_struct_rwlock;
|
||||
|
||||
/*
|
||||
* Our link on dataset's dd_dnodes list.
|
||||
* Protected by dd_accounting_mtx.
|
||||
*/
|
||||
list_node_t dn_link;
|
||||
|
||||
/* immutable: */
|
||||
struct objset_impl *dn_objset;
|
||||
uint64_t dn_object;
|
||||
struct dmu_buf_impl *dn_dbuf;
|
||||
dnode_phys_t *dn_phys; /* pointer into dn->dn_dbuf->db.db_data */
|
||||
|
||||
/*
|
||||
* Copies of stuff in dn_phys. They're valid in the open
|
||||
* context (eg. even before the dnode is first synced).
|
||||
* Where necessary, these are protected by dn_struct_rwlock.
|
||||
*/
|
||||
dmu_object_type_t dn_type; /* object type */
|
||||
uint16_t dn_bonuslen; /* bonus length */
|
||||
uint8_t dn_bonustype; /* bonus type */
|
||||
uint8_t dn_nblkptr; /* number of blkptrs (immutable) */
|
||||
uint8_t dn_checksum; /* ZIO_CHECKSUM type */
|
||||
uint8_t dn_compress; /* ZIO_COMPRESS type */
|
||||
uint8_t dn_nlevels;
|
||||
uint8_t dn_indblkshift;
|
||||
uint8_t dn_datablkshift; /* zero if blksz not power of 2! */
|
||||
uint16_t dn_datablkszsec; /* in 512b sectors */
|
||||
uint32_t dn_datablksz; /* in bytes */
|
||||
uint64_t dn_maxblkid;
|
||||
uint8_t dn_next_nlevels[TXG_SIZE];
|
||||
uint8_t dn_next_indblkshift[TXG_SIZE];
|
||||
uint16_t dn_next_bonuslen[TXG_SIZE];
|
||||
uint32_t dn_next_blksz[TXG_SIZE]; /* next block size in bytes */
|
||||
|
||||
/* protected by os_lock: */
|
||||
list_node_t dn_dirty_link[TXG_SIZE]; /* next on dataset's dirty */
|
||||
|
||||
/* protected by dn_mtx: */
|
||||
kmutex_t dn_mtx;
|
||||
list_t dn_dirty_records[TXG_SIZE];
|
||||
avl_tree_t dn_ranges[TXG_SIZE];
|
||||
uint64_t dn_allocated_txg;
|
||||
uint64_t dn_free_txg;
|
||||
uint64_t dn_assigned_txg;
|
||||
kcondvar_t dn_notxholds;
|
||||
enum dnode_dirtycontext dn_dirtyctx;
|
||||
uint8_t *dn_dirtyctx_firstset; /* dbg: contents meaningless */
|
||||
|
||||
/* protected by own devices */
|
||||
refcount_t dn_tx_holds;
|
||||
refcount_t dn_holds;
|
||||
|
||||
kmutex_t dn_dbufs_mtx;
|
||||
list_t dn_dbufs; /* linked list of descendent dbuf_t's */
|
||||
struct dmu_buf_impl *dn_bonus; /* bonus buffer dbuf */
|
||||
|
||||
/* parent IO for current sync write */
|
||||
zio_t *dn_zio;
|
||||
|
||||
/* holds prefetch structure */
|
||||
struct zfetch dn_zfetch;
|
||||
} dnode_t;
|
||||
|
||||
typedef struct free_range {
|
||||
avl_node_t fr_node;
|
||||
uint64_t fr_blkid;
|
||||
uint64_t fr_nblks;
|
||||
} free_range_t;
|
||||
|
||||
dnode_t *dnode_special_open(struct objset_impl *dd, dnode_phys_t *dnp,
|
||||
uint64_t object);
|
||||
void dnode_special_close(dnode_t *dn);
|
||||
|
||||
void dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx);
|
||||
int dnode_hold(struct objset_impl *dd, uint64_t object,
|
||||
void *ref, dnode_t **dnp);
|
||||
int dnode_hold_impl(struct objset_impl *dd, uint64_t object, int flag,
|
||||
void *ref, dnode_t **dnp);
|
||||
boolean_t dnode_add_ref(dnode_t *dn, void *ref);
|
||||
void dnode_rele(dnode_t *dn, void *ref);
|
||||
void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
|
||||
void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
|
||||
void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
void dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
void dnode_free(dnode_t *dn, dmu_tx_t *tx);
|
||||
void dnode_byteswap(dnode_phys_t *dnp);
|
||||
void dnode_buf_byteswap(void *buf, size_t size);
|
||||
void dnode_verify(dnode_t *dn);
|
||||
int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
|
||||
uint64_t dnode_current_max_length(dnode_t *dn);
|
||||
void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
|
||||
void dnode_clear_range(dnode_t *dn, uint64_t blkid,
|
||||
uint64_t nblks, dmu_tx_t *tx);
|
||||
void dnode_diduse_space(dnode_t *dn, int64_t space);
|
||||
void dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx);
|
||||
void dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t);
|
||||
uint64_t dnode_block_freed(dnode_t *dn, uint64_t blkid);
|
||||
void dnode_init(void);
|
||||
void dnode_fini(void);
|
||||
int dnode_next_offset(dnode_t *dn, int flags, uint64_t *off,
|
||||
int minlvl, uint64_t blkfill, uint64_t txg);
|
||||
void dnode_evict_dbufs(dnode_t *dn);
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
|
||||
/*
|
||||
* There should be a ## between the string literal and fmt, to make it
|
||||
* clear that we're joining two strings together, but that piece of shit
|
||||
* gcc doesn't support that preprocessor token.
|
||||
*/
|
||||
#define dprintf_dnode(dn, fmt, ...) do { \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
|
||||
char __db_buf[32]; \
|
||||
uint64_t __db_obj = (dn)->dn_object; \
|
||||
if (__db_obj == DMU_META_DNODE_OBJECT) \
|
||||
(void) strcpy(__db_buf, "mdn"); \
|
||||
else \
|
||||
(void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \
|
||||
(u_longlong_t)__db_obj);\
|
||||
dprintf_ds((dn)->dn_objset->os_dsl_dataset, "obj=%s " fmt, \
|
||||
__db_buf, __VA_ARGS__); \
|
||||
} \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
|
||||
#define DNODE_VERIFY(dn) dnode_verify(dn)
|
||||
#define FREE_VERIFY(db, start, end, tx) free_verify(db, start, end, tx)
|
||||
|
||||
#else
|
||||
|
||||
#define dprintf_dnode(db, fmt, ...)
|
||||
#define DNODE_VERIFY(dn)
|
||||
#define FREE_VERIFY(db, start, end, tx)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DNODE_H */
|
||||
@@ -0,0 +1,239 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_DATASET_H
|
||||
#define _SYS_DSL_DATASET_H
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/bplist.h>
|
||||
#include <sys/dsl_synctask.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct dsl_dataset;
|
||||
struct dsl_dir;
|
||||
struct dsl_pool;
|
||||
|
||||
typedef void dsl_dataset_evict_func_t(struct dsl_dataset *, void *);
|
||||
|
||||
#define DS_FLAG_INCONSISTENT (1ULL<<0)
|
||||
#define DS_IS_INCONSISTENT(ds) \
|
||||
((ds)->ds_phys->ds_flags & DS_FLAG_INCONSISTENT)
|
||||
/*
|
||||
* NB: nopromote can not yet be set, but we want support for it in this
|
||||
* on-disk version, so that we don't need to upgrade for it later. It
|
||||
* will be needed when we implement 'zfs split' (where the split off
|
||||
* clone should not be promoted).
|
||||
*/
|
||||
#define DS_FLAG_NOPROMOTE (1ULL<<1)
|
||||
|
||||
/*
|
||||
* DS_FLAG_UNIQUE_ACCURATE is set if ds_unique_bytes has been correctly
|
||||
* calculated for head datasets (starting with SPA_VERSION_UNIQUE_ACCURATE,
|
||||
* refquota/refreservations).
|
||||
*/
|
||||
#define DS_FLAG_UNIQUE_ACCURATE (1ULL<<2)
|
||||
|
||||
/*
|
||||
* DS_FLAG_CI_DATASET is set if the dataset contains a file system whose
|
||||
* name lookups should be performed case-insensitively.
|
||||
*/
|
||||
#define DS_FLAG_CI_DATASET (1ULL<<16)
|
||||
|
||||
typedef struct dsl_dataset_phys {
|
||||
uint64_t ds_dir_obj; /* DMU_OT_DSL_DIR */
|
||||
uint64_t ds_prev_snap_obj; /* DMU_OT_DSL_DATASET */
|
||||
uint64_t ds_prev_snap_txg;
|
||||
uint64_t ds_next_snap_obj; /* DMU_OT_DSL_DATASET */
|
||||
uint64_t ds_snapnames_zapobj; /* DMU_OT_DSL_DS_SNAP_MAP 0 for snaps */
|
||||
uint64_t ds_num_children; /* clone/snap children; ==0 for head */
|
||||
uint64_t ds_creation_time; /* seconds since 1970 */
|
||||
uint64_t ds_creation_txg;
|
||||
uint64_t ds_deadlist_obj; /* DMU_OT_BPLIST */
|
||||
uint64_t ds_used_bytes;
|
||||
uint64_t ds_compressed_bytes;
|
||||
uint64_t ds_uncompressed_bytes;
|
||||
uint64_t ds_unique_bytes; /* only relevant to snapshots */
|
||||
/*
|
||||
* The ds_fsid_guid is a 56-bit ID that can change to avoid
|
||||
* collisions. The ds_guid is a 64-bit ID that will never
|
||||
* change, so there is a small probability that it will collide.
|
||||
*/
|
||||
uint64_t ds_fsid_guid;
|
||||
uint64_t ds_guid;
|
||||
uint64_t ds_flags; /* DS_FLAG_* */
|
||||
blkptr_t ds_bp;
|
||||
uint64_t ds_next_clones_obj; /* DMU_OT_DSL_CLONES */
|
||||
uint64_t ds_props_obj; /* DMU_OT_DSL_PROPS for snaps */
|
||||
uint64_t ds_pad[6]; /* pad out to 320 bytes for good measure */
|
||||
} dsl_dataset_phys_t;
|
||||
|
||||
typedef struct dsl_dataset {
|
||||
/* Immutable: */
|
||||
struct dsl_dir *ds_dir;
|
||||
dsl_dataset_phys_t *ds_phys;
|
||||
dmu_buf_t *ds_dbuf;
|
||||
uint64_t ds_object;
|
||||
uint64_t ds_fsid_guid;
|
||||
|
||||
/* only used in syncing context, only valid for non-snapshots: */
|
||||
struct dsl_dataset *ds_prev;
|
||||
uint64_t ds_origin_txg;
|
||||
|
||||
/* has internal locking: */
|
||||
bplist_t ds_deadlist;
|
||||
|
||||
/* protected by lock on pool's dp_dirty_datasets list */
|
||||
txg_node_t ds_dirty_link;
|
||||
list_node_t ds_synced_link;
|
||||
|
||||
/*
|
||||
* ds_phys->ds_<accounting> is also protected by ds_lock.
|
||||
* Protected by ds_lock:
|
||||
*/
|
||||
kmutex_t ds_lock;
|
||||
void *ds_user_ptr;
|
||||
dsl_dataset_evict_func_t *ds_user_evict_func;
|
||||
|
||||
/*
|
||||
* ds_owner is protected by the ds_rwlock and the ds_lock
|
||||
*/
|
||||
krwlock_t ds_rwlock;
|
||||
kcondvar_t ds_exclusive_cv;
|
||||
void *ds_owner;
|
||||
|
||||
/* no locking; only for making guesses */
|
||||
uint64_t ds_trysnap_txg;
|
||||
|
||||
/* for objset_open() */
|
||||
kmutex_t ds_opening_lock;
|
||||
|
||||
uint64_t ds_reserved; /* cached refreservation */
|
||||
uint64_t ds_quota; /* cached refquota */
|
||||
|
||||
/* Protected by ds_lock; keep at end of struct for better locality */
|
||||
char ds_snapname[MAXNAMELEN];
|
||||
} dsl_dataset_t;
|
||||
|
||||
#define dsl_dataset_is_snapshot(ds) \
|
||||
((ds)->ds_phys->ds_num_children != 0)
|
||||
|
||||
#define DS_UNIQUE_IS_ACCURATE(ds) \
|
||||
(((ds)->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
|
||||
|
||||
int dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp);
|
||||
int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj,
|
||||
void *tag, dsl_dataset_t **);
|
||||
int dsl_dataset_own(const char *name, int flags, void *owner,
|
||||
dsl_dataset_t **dsp);
|
||||
int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
|
||||
int flags, void *owner, dsl_dataset_t **);
|
||||
void dsl_dataset_name(dsl_dataset_t *ds, char *name);
|
||||
void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
|
||||
void dsl_dataset_disown(dsl_dataset_t *ds, void *owner);
|
||||
void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag);
|
||||
boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok,
|
||||
void *owner);
|
||||
void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner);
|
||||
uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
|
||||
dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
|
||||
uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
|
||||
uint64_t flags, dmu_tx_t *tx);
|
||||
int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag);
|
||||
int dsl_snapshots_destroy(char *fsname, char *snapname);
|
||||
dsl_checkfunc_t dsl_dataset_destroy_check;
|
||||
dsl_syncfunc_t dsl_dataset_destroy_sync;
|
||||
dsl_checkfunc_t dsl_dataset_snapshot_check;
|
||||
dsl_syncfunc_t dsl_dataset_snapshot_sync;
|
||||
int dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost);
|
||||
int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive);
|
||||
int dsl_dataset_promote(const char *name);
|
||||
int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
|
||||
boolean_t force);
|
||||
|
||||
void *dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
|
||||
void *p, dsl_dataset_evict_func_t func);
|
||||
void *dsl_dataset_get_user_ptr(dsl_dataset_t *ds);
|
||||
|
||||
blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
|
||||
void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
|
||||
|
||||
spa_t *dsl_dataset_get_spa(dsl_dataset_t *ds);
|
||||
|
||||
boolean_t dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds);
|
||||
|
||||
void dsl_dataset_sync(dsl_dataset_t *os, zio_t *zio, dmu_tx_t *tx);
|
||||
|
||||
void dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
|
||||
int dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
|
||||
dmu_tx_t *tx);
|
||||
int dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth);
|
||||
uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds);
|
||||
|
||||
void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx);
|
||||
void dsl_dataset_stats(dsl_dataset_t *os, nvlist_t *nv);
|
||||
void dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat);
|
||||
void dsl_dataset_space(dsl_dataset_t *ds,
|
||||
uint64_t *refdbytesp, uint64_t *availbytesp,
|
||||
uint64_t *usedobjsp, uint64_t *availobjsp);
|
||||
uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds);
|
||||
|
||||
int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
|
||||
|
||||
int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
|
||||
uint64_t asize, uint64_t inflight, uint64_t *used,
|
||||
uint64_t *ref_rsrv);
|
||||
int dsl_dataset_set_quota(const char *dsname, uint64_t quota);
|
||||
void dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr,
|
||||
dmu_tx_t *tx);
|
||||
int dsl_dataset_set_reservation(const char *dsname, uint64_t reservation);
|
||||
void dsl_dataset_set_flags(dsl_dataset_t *ds, uint64_t flags);
|
||||
int64_t dsl_dataset_new_refreservation(dsl_dataset_t *ds, uint64_t reservation,
|
||||
dmu_tx_t *tx);
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
#define dprintf_ds(ds, fmt, ...) do { \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
|
||||
char *__ds_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); \
|
||||
dsl_dataset_name(ds, __ds_name); \
|
||||
dprintf("ds=%s " fmt, __ds_name, __VA_ARGS__); \
|
||||
kmem_free(__ds_name, MAXNAMELEN); \
|
||||
} \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
#else
|
||||
#define dprintf_ds(dd, fmt, ...)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DSL_DATASET_H */
|
||||
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_DELEG_H
|
||||
#define _SYS_DSL_DELEG_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ZFS_DELEG_PERM_NONE ""
|
||||
#define ZFS_DELEG_PERM_CREATE "create"
|
||||
#define ZFS_DELEG_PERM_DESTROY "destroy"
|
||||
#define ZFS_DELEG_PERM_SNAPSHOT "snapshot"
|
||||
#define ZFS_DELEG_PERM_ROLLBACK "rollback"
|
||||
#define ZFS_DELEG_PERM_CLONE "clone"
|
||||
#define ZFS_DELEG_PERM_PROMOTE "promote"
|
||||
#define ZFS_DELEG_PERM_RENAME "rename"
|
||||
#define ZFS_DELEG_PERM_MOUNT "mount"
|
||||
#define ZFS_DELEG_PERM_SHARE "share"
|
||||
#define ZFS_DELEG_PERM_SEND "send"
|
||||
#define ZFS_DELEG_PERM_RECEIVE "receive"
|
||||
#define ZFS_DELEG_PERM_ALLOW "allow"
|
||||
#define ZFS_DELEG_PERM_USERPROP "userprop"
|
||||
#define ZFS_DELEG_PERM_VSCAN "vscan"
|
||||
|
||||
/*
|
||||
* Note: the names of properties that are marked delegatable are also
|
||||
* valid delegated permissions
|
||||
*/
|
||||
|
||||
int dsl_deleg_get(const char *ddname, nvlist_t **nvp);
|
||||
int dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset);
|
||||
int dsl_deleg_access(const char *ddname, const char *perm, cred_t *cr);
|
||||
void dsl_deleg_set_create_perms(dsl_dir_t *dd, dmu_tx_t *tx, cred_t *cr);
|
||||
int dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr);
|
||||
int dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr);
|
||||
int dsl_deleg_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx);
|
||||
boolean_t dsl_delegation_on(objset_t *os);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DSL_DELEG_H */
|
||||
@@ -0,0 +1,160 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_DIR_H
|
||||
#define _SYS_DSL_DIR_H
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/dsl_synctask.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct dsl_dataset;
|
||||
|
||||
typedef enum dd_used {
|
||||
DD_USED_HEAD,
|
||||
DD_USED_SNAP,
|
||||
DD_USED_CHILD,
|
||||
DD_USED_CHILD_RSRV,
|
||||
DD_USED_REFRSRV,
|
||||
DD_USED_NUM
|
||||
} dd_used_t;
|
||||
|
||||
#define DD_FLAG_USED_BREAKDOWN (1<<0)
|
||||
|
||||
typedef struct dsl_dir_phys {
|
||||
uint64_t dd_creation_time; /* not actually used */
|
||||
uint64_t dd_head_dataset_obj;
|
||||
uint64_t dd_parent_obj;
|
||||
uint64_t dd_origin_obj;
|
||||
uint64_t dd_child_dir_zapobj;
|
||||
/*
|
||||
* how much space our children are accounting for; for leaf
|
||||
* datasets, == physical space used by fs + snaps
|
||||
*/
|
||||
uint64_t dd_used_bytes;
|
||||
uint64_t dd_compressed_bytes;
|
||||
uint64_t dd_uncompressed_bytes;
|
||||
/* Administrative quota setting */
|
||||
uint64_t dd_quota;
|
||||
/* Administrative reservation setting */
|
||||
uint64_t dd_reserved;
|
||||
uint64_t dd_props_zapobj;
|
||||
uint64_t dd_deleg_zapobj; /* dataset delegation permissions */
|
||||
uint64_t dd_flags;
|
||||
uint64_t dd_used_breakdown[DD_USED_NUM];
|
||||
uint64_t dd_pad[14]; /* pad out to 256 bytes for good measure */
|
||||
} dsl_dir_phys_t;
|
||||
|
||||
struct dsl_dir {
|
||||
/* These are immutable; no lock needed: */
|
||||
uint64_t dd_object;
|
||||
dsl_dir_phys_t *dd_phys;
|
||||
dmu_buf_t *dd_dbuf;
|
||||
dsl_pool_t *dd_pool;
|
||||
|
||||
/* protected by lock on pool's dp_dirty_dirs list */
|
||||
txg_node_t dd_dirty_link;
|
||||
|
||||
/* protected by dp_config_rwlock */
|
||||
dsl_dir_t *dd_parent;
|
||||
|
||||
/* Protected by dd_lock */
|
||||
kmutex_t dd_lock;
|
||||
list_t dd_prop_cbs; /* list of dsl_prop_cb_record_t's */
|
||||
|
||||
/* gross estimate of space used by in-flight tx's */
|
||||
uint64_t dd_tempreserved[TXG_SIZE];
|
||||
/* amount of space we expect to write; == amount of dirty data */
|
||||
int64_t dd_space_towrite[TXG_SIZE];
|
||||
|
||||
/* protected by dd_lock; keep at end of struct for better locality */
|
||||
char dd_myname[MAXNAMELEN];
|
||||
};
|
||||
|
||||
void dsl_dir_close(dsl_dir_t *dd, void *tag);
|
||||
int dsl_dir_open(const char *name, void *tag, dsl_dir_t **, const char **tail);
|
||||
int dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, dsl_dir_t **,
|
||||
const char **tailp);
|
||||
int dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
|
||||
const char *tail, void *tag, dsl_dir_t **);
|
||||
void dsl_dir_name(dsl_dir_t *dd, char *buf);
|
||||
int dsl_dir_namelen(dsl_dir_t *dd);
|
||||
int dsl_dir_is_private(dsl_dir_t *dd);
|
||||
uint64_t dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds,
|
||||
const char *name, dmu_tx_t *tx);
|
||||
dsl_checkfunc_t dsl_dir_destroy_check;
|
||||
dsl_syncfunc_t dsl_dir_destroy_sync;
|
||||
void dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv);
|
||||
uint64_t dsl_dir_space_available(dsl_dir_t *dd,
|
||||
dsl_dir_t *ancestor, int64_t delta, int ondiskonly);
|
||||
void dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx);
|
||||
void dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx);
|
||||
int dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t mem,
|
||||
uint64_t asize, uint64_t fsize, uint64_t usize, void **tr_cookiep,
|
||||
dmu_tx_t *tx);
|
||||
void dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx);
|
||||
void dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx);
|
||||
void dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
|
||||
int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx);
|
||||
void dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
|
||||
dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx);
|
||||
int dsl_dir_set_quota(const char *ddname, uint64_t quota);
|
||||
int dsl_dir_set_reservation(const char *ddname, uint64_t reservation);
|
||||
int dsl_dir_rename(dsl_dir_t *dd, const char *newname);
|
||||
int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space);
|
||||
int dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx);
|
||||
boolean_t dsl_dir_is_clone(dsl_dir_t *dd);
|
||||
void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds,
|
||||
uint64_t reservation, cred_t *cr, dmu_tx_t *tx);
|
||||
|
||||
/* internal reserved dir name */
|
||||
#define MOS_DIR_NAME "$MOS"
|
||||
#define ORIGIN_DIR_NAME "$ORIGIN"
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
#define dprintf_dd(dd, fmt, ...) do { \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
|
||||
char *__ds_name = kmem_alloc(MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, \
|
||||
KM_SLEEP); \
|
||||
dsl_dir_name(dd, __ds_name); \
|
||||
dprintf("dd=%s " fmt, __ds_name, __VA_ARGS__); \
|
||||
kmem_free(__ds_name, MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); \
|
||||
} \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
#else
|
||||
#define dprintf_dd(dd, fmt, ...)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DSL_DIR_H */
|
||||
@@ -0,0 +1,150 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_POOL_H
|
||||
#define _SYS_DSL_POOL_H
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/txg_impl.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dnode.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct objset;
|
||||
struct dsl_dir;
|
||||
struct dsl_dataset;
|
||||
struct dsl_pool;
|
||||
struct dmu_tx;
|
||||
|
||||
enum scrub_func {
|
||||
SCRUB_FUNC_NONE,
|
||||
SCRUB_FUNC_CLEAN,
|
||||
SCRUB_FUNC_NUMFUNCS
|
||||
};
|
||||
|
||||
/* These macros are for indexing into the zfs_all_blkstats_t. */
|
||||
#define DMU_OT_DEFERRED DMU_OT_NONE
|
||||
#define DMU_OT_TOTAL DMU_OT_NUMTYPES
|
||||
|
||||
typedef struct zfs_blkstat {
|
||||
uint64_t zb_count;
|
||||
uint64_t zb_asize;
|
||||
uint64_t zb_lsize;
|
||||
uint64_t zb_psize;
|
||||
uint64_t zb_gangs;
|
||||
uint64_t zb_ditto_2_of_2_samevdev;
|
||||
uint64_t zb_ditto_2_of_3_samevdev;
|
||||
uint64_t zb_ditto_3_of_3_samevdev;
|
||||
} zfs_blkstat_t;
|
||||
|
||||
typedef struct zfs_all_blkstats {
|
||||
zfs_blkstat_t zab_type[DN_MAX_LEVELS + 1][DMU_OT_TOTAL + 1];
|
||||
} zfs_all_blkstats_t;
|
||||
|
||||
|
||||
typedef struct dsl_pool {
|
||||
/* Immutable */
|
||||
spa_t *dp_spa;
|
||||
struct objset *dp_meta_objset;
|
||||
struct dsl_dir *dp_root_dir;
|
||||
struct dsl_dir *dp_mos_dir;
|
||||
struct dsl_dataset *dp_origin_snap;
|
||||
uint64_t dp_root_dir_obj;
|
||||
|
||||
/* No lock needed - sync context only */
|
||||
blkptr_t dp_meta_rootbp;
|
||||
list_t dp_synced_datasets;
|
||||
hrtime_t dp_read_overhead;
|
||||
uint64_t dp_throughput;
|
||||
uint64_t dp_write_limit;
|
||||
|
||||
/* Uses dp_lock */
|
||||
kmutex_t dp_lock;
|
||||
uint64_t dp_space_towrite[TXG_SIZE];
|
||||
uint64_t dp_tempreserved[TXG_SIZE];
|
||||
|
||||
enum scrub_func dp_scrub_func;
|
||||
uint64_t dp_scrub_queue_obj;
|
||||
uint64_t dp_scrub_min_txg;
|
||||
uint64_t dp_scrub_max_txg;
|
||||
zbookmark_t dp_scrub_bookmark;
|
||||
boolean_t dp_scrub_pausing;
|
||||
boolean_t dp_scrub_isresilver;
|
||||
uint64_t dp_scrub_start_time;
|
||||
kmutex_t dp_scrub_cancel_lock; /* protects dp_scrub_restart */
|
||||
boolean_t dp_scrub_restart;
|
||||
|
||||
/* Has its own locking */
|
||||
tx_state_t dp_tx;
|
||||
txg_list_t dp_dirty_datasets;
|
||||
txg_list_t dp_dirty_dirs;
|
||||
txg_list_t dp_sync_tasks;
|
||||
|
||||
/*
|
||||
* Protects administrative changes (properties, namespace)
|
||||
* It is only held for write in syncing context. Therefore
|
||||
* syncing context does not need to ever have it for read, since
|
||||
* nobody else could possibly have it for write.
|
||||
*/
|
||||
krwlock_t dp_config_rwlock;
|
||||
|
||||
zfs_all_blkstats_t *dp_blkstats;
|
||||
} dsl_pool_t;
|
||||
|
||||
int dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
|
||||
void dsl_pool_close(dsl_pool_t *dp);
|
||||
dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg);
|
||||
void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg);
|
||||
void dsl_pool_zil_clean(dsl_pool_t *dp);
|
||||
int dsl_pool_sync_context(dsl_pool_t *dp);
|
||||
uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree);
|
||||
int dsl_pool_tempreserve_space(dsl_pool_t *dp, uint64_t space, dmu_tx_t *tx);
|
||||
void dsl_pool_tempreserve_clear(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
|
||||
void dsl_pool_memory_pressure(dsl_pool_t *dp);
|
||||
void dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
|
||||
int dsl_free(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp,
|
||||
zio_done_func_t *done, void *private, uint32_t arc_flags);
|
||||
void dsl_pool_ds_destroyed(struct dsl_dataset *ds, struct dmu_tx *tx);
|
||||
void dsl_pool_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
|
||||
void dsl_pool_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
|
||||
struct dmu_tx *tx);
|
||||
void dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx);
|
||||
void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx);
|
||||
|
||||
int dsl_pool_scrub_cancel(dsl_pool_t *dp);
|
||||
int dsl_pool_scrub_clean(dsl_pool_t *dp);
|
||||
void dsl_pool_scrub_sync(dsl_pool_t *dp, dmu_tx_t *tx);
|
||||
void dsl_pool_scrub_restart(dsl_pool_t *dp);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DSL_POOL_H */
|
||||
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_PROP_H
|
||||
#define _SYS_DSL_PROP_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct dsl_dataset;
|
||||
struct dsl_dir;
|
||||
|
||||
/* The callback func may not call into the DMU or DSL! */
|
||||
typedef void (dsl_prop_changed_cb_t)(void *arg, uint64_t newval);
|
||||
|
||||
typedef struct dsl_prop_cb_record {
|
||||
list_node_t cbr_node; /* link on dd_prop_cbs */
|
||||
struct dsl_dataset *cbr_ds;
|
||||
const char *cbr_propname;
|
||||
dsl_prop_changed_cb_t *cbr_func;
|
||||
void *cbr_arg;
|
||||
} dsl_prop_cb_record_t;
|
||||
|
||||
int dsl_prop_register(struct dsl_dataset *ds, const char *propname,
|
||||
dsl_prop_changed_cb_t *callback, void *cbarg);
|
||||
int dsl_prop_unregister(struct dsl_dataset *ds, const char *propname,
|
||||
dsl_prop_changed_cb_t *callback, void *cbarg);
|
||||
int dsl_prop_numcb(struct dsl_dataset *ds);
|
||||
|
||||
int dsl_prop_get(const char *ddname, const char *propname,
|
||||
int intsz, int numints, void *buf, char *setpoint);
|
||||
int dsl_prop_get_integer(const char *ddname, const char *propname,
|
||||
uint64_t *valuep, char *setpoint);
|
||||
int dsl_prop_get_all(objset_t *os, nvlist_t **nvp, boolean_t local);
|
||||
int dsl_prop_get_ds(struct dsl_dataset *ds, const char *propname,
|
||||
int intsz, int numints, void *buf, char *setpoint);
|
||||
int dsl_prop_get_dd(struct dsl_dir *dd, const char *propname,
|
||||
int intsz, int numints, void *buf, char *setpoint);
|
||||
|
||||
int dsl_prop_set(const char *ddname, const char *propname,
|
||||
int intsz, int numints, const void *buf);
|
||||
void dsl_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
|
||||
cred_t *cr, dmu_tx_t *tx);
|
||||
|
||||
void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value);
|
||||
void dsl_prop_nvlist_add_string(nvlist_t *nv,
|
||||
zfs_prop_t prop, const char *value);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DSL_PROP_H */
|
||||
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_SYNCTASK_H
|
||||
#define _SYS_DSL_SYNCTASK_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct dsl_pool;
|
||||
|
||||
typedef int (dsl_checkfunc_t)(void *, void *, dmu_tx_t *);
|
||||
typedef void (dsl_syncfunc_t)(void *, void *, cred_t *, dmu_tx_t *);
|
||||
|
||||
typedef struct dsl_sync_task {
|
||||
list_node_t dst_node;
|
||||
dsl_checkfunc_t *dst_checkfunc;
|
||||
dsl_syncfunc_t *dst_syncfunc;
|
||||
void *dst_arg1;
|
||||
void *dst_arg2;
|
||||
int dst_err;
|
||||
} dsl_sync_task_t;
|
||||
|
||||
typedef struct dsl_sync_task_group {
|
||||
txg_node_t dstg_node;
|
||||
list_t dstg_tasks;
|
||||
struct dsl_pool *dstg_pool;
|
||||
cred_t *dstg_cr;
|
||||
uint64_t dstg_txg;
|
||||
int dstg_err;
|
||||
int dstg_space;
|
||||
boolean_t dstg_nowaiter;
|
||||
} dsl_sync_task_group_t;
|
||||
|
||||
dsl_sync_task_group_t *dsl_sync_task_group_create(struct dsl_pool *dp);
|
||||
void dsl_sync_task_create(dsl_sync_task_group_t *dstg,
|
||||
dsl_checkfunc_t *, dsl_syncfunc_t *,
|
||||
void *arg1, void *arg2, int blocks_modified);
|
||||
int dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg);
|
||||
void dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
|
||||
void dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg);
|
||||
void dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
|
||||
|
||||
int dsl_sync_task_do(struct dsl_pool *dp,
|
||||
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
|
||||
void *arg1, void *arg2, int blocks_modified);
|
||||
void dsl_sync_task_do_nowait(struct dsl_pool *dp,
|
||||
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
|
||||
void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DSL_SYNCTASK_H */
|
||||
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_METASLAB_H
|
||||
#define _SYS_METASLAB_H
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/space_map.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/avl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct metaslab_class metaslab_class_t;
|
||||
typedef struct metaslab_group metaslab_group_t;
|
||||
|
||||
extern metaslab_t *metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
|
||||
uint64_t start, uint64_t size, uint64_t txg);
|
||||
extern void metaslab_fini(metaslab_t *msp);
|
||||
extern void metaslab_sync(metaslab_t *msp, uint64_t txg);
|
||||
extern void metaslab_sync_done(metaslab_t *msp, uint64_t txg);
|
||||
|
||||
#define METASLAB_HINTBP_FAVOR 0x0
|
||||
#define METASLAB_HINTBP_AVOID 0x1
|
||||
#define METASLAB_GANG_HEADER 0x2
|
||||
|
||||
extern int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
||||
blkptr_t *bp, int ncopies, uint64_t txg, blkptr_t *hintbp, int flags);
|
||||
extern void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg,
|
||||
boolean_t now);
|
||||
extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg);
|
||||
|
||||
extern metaslab_class_t *metaslab_class_create(void);
|
||||
extern void metaslab_class_destroy(metaslab_class_t *mc);
|
||||
extern void metaslab_class_add(metaslab_class_t *mc, metaslab_group_t *mg);
|
||||
extern void metaslab_class_remove(metaslab_class_t *mc, metaslab_group_t *mg);
|
||||
|
||||
extern metaslab_group_t *metaslab_group_create(metaslab_class_t *mc,
|
||||
vdev_t *vd);
|
||||
extern void metaslab_group_destroy(metaslab_group_t *mg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_METASLAB_H */
|
||||
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_METASLAB_IMPL_H
|
||||
#define _SYS_METASLAB_IMPL_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/metaslab.h>
|
||||
#include <sys/space_map.h>
|
||||
#include <sys/vdev.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/avl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct metaslab_class {
|
||||
metaslab_group_t *mc_rotor;
|
||||
uint64_t mc_allocated;
|
||||
};
|
||||
|
||||
struct metaslab_group {
|
||||
kmutex_t mg_lock;
|
||||
avl_tree_t mg_metaslab_tree;
|
||||
uint64_t mg_aliquot;
|
||||
int64_t mg_bias;
|
||||
metaslab_class_t *mg_class;
|
||||
vdev_t *mg_vd;
|
||||
metaslab_group_t *mg_prev;
|
||||
metaslab_group_t *mg_next;
|
||||
};
|
||||
|
||||
/*
|
||||
* Each metaslab's free space is tracked in space map object in the MOS,
|
||||
* which is only updated in syncing context. Each time we sync a txg,
|
||||
* we append the allocs and frees from that txg to the space map object.
|
||||
* When the txg is done syncing, metaslab_sync_done() updates ms_smo
|
||||
* to ms_smo_syncing. Everything in ms_smo is always safe to allocate.
|
||||
*/
|
||||
struct metaslab {
|
||||
kmutex_t ms_lock; /* metaslab lock */
|
||||
space_map_obj_t ms_smo; /* synced space map object */
|
||||
space_map_obj_t ms_smo_syncing; /* syncing space map object */
|
||||
space_map_t ms_allocmap[TXG_SIZE]; /* allocated this txg */
|
||||
space_map_t ms_freemap[TXG_SIZE]; /* freed this txg */
|
||||
space_map_t ms_map; /* in-core free space map */
|
||||
uint64_t ms_weight; /* weight vs. others in group */
|
||||
metaslab_group_t *ms_group; /* metaslab group */
|
||||
avl_node_t ms_group_node; /* node in metaslab group tree */
|
||||
txg_node_t ms_txg_node; /* per-txg dirty metaslab links */
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_METASLAB_IMPL_H */
|
||||
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_REFCOUNT_H
|
||||
#define _SYS_REFCOUNT_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/inttypes.h>
|
||||
#include <sys/list.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If the reference is held only by the calling function and not any
|
||||
* particular object, use FTAG (which is a string) for the holder_tag.
|
||||
* Otherwise, use the object that holds the reference.
|
||||
*/
|
||||
#define FTAG ((char *)__func__)
|
||||
|
||||
#if defined(DEBUG) || !defined(_KERNEL)
|
||||
typedef struct reference {
|
||||
list_node_t ref_link;
|
||||
void *ref_holder;
|
||||
uint64_t ref_number;
|
||||
uint8_t *ref_removed;
|
||||
} reference_t;
|
||||
|
||||
typedef struct refcount {
|
||||
kmutex_t rc_mtx;
|
||||
list_t rc_list;
|
||||
list_t rc_removed;
|
||||
int64_t rc_count;
|
||||
int64_t rc_removed_count;
|
||||
} refcount_t;
|
||||
|
||||
/* Note: refcount_t must be initialized with refcount_create() */
|
||||
|
||||
void refcount_create(refcount_t *rc);
|
||||
void refcount_destroy(refcount_t *rc);
|
||||
void refcount_destroy_many(refcount_t *rc, uint64_t number);
|
||||
int refcount_is_zero(refcount_t *rc);
|
||||
int64_t refcount_count(refcount_t *rc);
|
||||
int64_t refcount_add(refcount_t *rc, void *holder_tag);
|
||||
int64_t refcount_remove(refcount_t *rc, void *holder_tag);
|
||||
int64_t refcount_add_many(refcount_t *rc, uint64_t number, void *holder_tag);
|
||||
int64_t refcount_remove_many(refcount_t *rc, uint64_t number, void *holder_tag);
|
||||
|
||||
void refcount_init(void);
|
||||
void refcount_fini(void);
|
||||
|
||||
#else /* DEBUG */
|
||||
|
||||
typedef struct refcount {
|
||||
uint64_t rc_count;
|
||||
} refcount_t;
|
||||
|
||||
#define refcount_create(rc) ((rc)->rc_count = 0)
|
||||
#define refcount_destroy(rc) ((rc)->rc_count = 0)
|
||||
#define refcount_destroy_many(rc, number) ((rc)->rc_count = 0)
|
||||
#define refcount_is_zero(rc) ((rc)->rc_count == 0)
|
||||
#define refcount_count(rc) ((rc)->rc_count)
|
||||
#define refcount_add(rc, holder) atomic_add_64_nv(&(rc)->rc_count, 1)
|
||||
#define refcount_remove(rc, holder) atomic_add_64_nv(&(rc)->rc_count, -1)
|
||||
#define refcount_add_many(rc, number, holder) \
|
||||
atomic_add_64_nv(&(rc)->rc_count, number)
|
||||
#define refcount_remove_many(rc, number, holder) \
|
||||
atomic_add_64_nv(&(rc)->rc_count, -number)
|
||||
|
||||
#define refcount_init()
|
||||
#define refcount_fini()
|
||||
|
||||
#endif /* DEBUG */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_REFCOUNT_H */
|
||||
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_RR_RW_LOCK_H
|
||||
#define _SYS_RR_RW_LOCK_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <sys/inttypes.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/refcount.h>
|
||||
|
||||
/*
|
||||
* A reader-writer lock implementation that allows re-entrant reads, but
|
||||
* still gives writers priority on "new" reads.
|
||||
*
|
||||
* See rrwlock.c for more details about the implementation.
|
||||
*
|
||||
* Fields of the rrwlock_t structure:
|
||||
* - rr_lock: protects modification and reading of rrwlock_t fields
|
||||
* - rr_cv: cv for waking up readers or waiting writers
|
||||
* - rr_writer: thread id of the current writer
|
||||
* - rr_anon_rount: number of active anonymous readers
|
||||
* - rr_linked_rcount: total number of non-anonymous active readers
|
||||
* - rr_writer_wanted: a writer wants the lock
|
||||
*/
|
||||
typedef struct rrwlock {
|
||||
kmutex_t rr_lock;
|
||||
kcondvar_t rr_cv;
|
||||
kthread_t *rr_writer;
|
||||
refcount_t rr_anon_rcount;
|
||||
refcount_t rr_linked_rcount;
|
||||
boolean_t rr_writer_wanted;
|
||||
} rrwlock_t;
|
||||
|
||||
/*
|
||||
* 'tag' is used in reference counting tracking. The
|
||||
* 'tag' must be the same in a rrw_enter() as in its
|
||||
* corresponding rrw_exit().
|
||||
*/
|
||||
void rrw_init(rrwlock_t *rrl);
|
||||
void rrw_destroy(rrwlock_t *rrl);
|
||||
void rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag);
|
||||
void rrw_exit(rrwlock_t *rrl, void *tag);
|
||||
boolean_t rrw_held(rrwlock_t *rrl, krw_t rw);
|
||||
|
||||
#define RRW_READ_HELD(x) rrw_held(x, RW_READER)
|
||||
#define RRW_WRITE_HELD(x) rrw_held(x, RW_WRITER)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_RR_RW_LOCK_H */
|
||||
@@ -0,0 +1,554 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_SPA_H
|
||||
#define _SYS_SPA_H
|
||||
|
||||
#include <sys/avl.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/nvpair.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Forward references that lots of things need.
|
||||
*/
|
||||
typedef struct spa spa_t;
|
||||
typedef struct vdev vdev_t;
|
||||
typedef struct metaslab metaslab_t;
|
||||
typedef struct zilog zilog_t;
|
||||
typedef struct spa_aux_vdev spa_aux_vdev_t;
|
||||
struct dsl_pool;
|
||||
|
||||
/*
|
||||
* General-purpose 32-bit and 64-bit bitfield encodings.
|
||||
*/
|
||||
#define BF32_DECODE(x, low, len) P2PHASE((x) >> (low), 1U << (len))
|
||||
#define BF64_DECODE(x, low, len) P2PHASE((x) >> (low), 1ULL << (len))
|
||||
#define BF32_ENCODE(x, low, len) (P2PHASE((x), 1U << (len)) << (low))
|
||||
#define BF64_ENCODE(x, low, len) (P2PHASE((x), 1ULL << (len)) << (low))
|
||||
|
||||
#define BF32_GET(x, low, len) BF32_DECODE(x, low, len)
|
||||
#define BF64_GET(x, low, len) BF64_DECODE(x, low, len)
|
||||
|
||||
#define BF32_SET(x, low, len, val) \
|
||||
((x) ^= BF32_ENCODE((x >> low) ^ (val), low, len))
|
||||
#define BF64_SET(x, low, len, val) \
|
||||
((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len))
|
||||
|
||||
#define BF32_GET_SB(x, low, len, shift, bias) \
|
||||
((BF32_GET(x, low, len) + (bias)) << (shift))
|
||||
#define BF64_GET_SB(x, low, len, shift, bias) \
|
||||
((BF64_GET(x, low, len) + (bias)) << (shift))
|
||||
|
||||
#define BF32_SET_SB(x, low, len, shift, bias, val) \
|
||||
BF32_SET(x, low, len, ((val) >> (shift)) - (bias))
|
||||
#define BF64_SET_SB(x, low, len, shift, bias, val) \
|
||||
BF64_SET(x, low, len, ((val) >> (shift)) - (bias))
|
||||
|
||||
/*
|
||||
* We currently support nine block sizes, from 512 bytes to 128K.
|
||||
* We could go higher, but the benefits are near-zero and the cost
|
||||
* of COWing a giant block to modify one byte would become excessive.
|
||||
*/
|
||||
#define SPA_MINBLOCKSHIFT 9
|
||||
#define SPA_MAXBLOCKSHIFT 17
|
||||
#define SPA_MINBLOCKSIZE (1ULL << SPA_MINBLOCKSHIFT)
|
||||
#define SPA_MAXBLOCKSIZE (1ULL << SPA_MAXBLOCKSHIFT)
|
||||
|
||||
#define SPA_BLOCKSIZES (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1)
|
||||
|
||||
/*
|
||||
* Size of block to hold the configuration data (a packed nvlist)
|
||||
*/
|
||||
#define SPA_CONFIG_BLOCKSIZE (1 << 14)
|
||||
|
||||
/*
|
||||
* The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
|
||||
* The ASIZE encoding should be at least 64 times larger (6 more bits)
|
||||
* to support up to 4-way RAID-Z mirror mode with worst-case gang block
|
||||
* overhead, three DVAs per bp, plus one more bit in case we do anything
|
||||
* else that expands the ASIZE.
|
||||
*/
|
||||
#define SPA_LSIZEBITS 16 /* LSIZE up to 32M (2^16 * 512) */
|
||||
#define SPA_PSIZEBITS 16 /* PSIZE up to 32M (2^16 * 512) */
|
||||
#define SPA_ASIZEBITS 24 /* ASIZE up to 64 times larger */
|
||||
|
||||
/*
|
||||
* All SPA data is represented by 128-bit data virtual addresses (DVAs).
|
||||
* The members of the dva_t should be considered opaque outside the SPA.
|
||||
*/
|
||||
typedef struct dva {
|
||||
uint64_t dva_word[2];
|
||||
} dva_t;
|
||||
|
||||
/*
|
||||
* Each block has a 256-bit checksum -- strong enough for cryptographic hashes.
|
||||
*/
|
||||
typedef struct zio_cksum {
|
||||
uint64_t zc_word[4];
|
||||
} zio_cksum_t;
|
||||
|
||||
/*
|
||||
* Each block is described by its DVAs, time of birth, checksum, etc.
|
||||
* The word-by-word, bit-by-bit layout of the blkptr is as follows:
|
||||
*
|
||||
* 64 56 48 40 32 24 16 8 0
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 0 | vdev1 | GRID | ASIZE |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 1 |G| offset1 |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 2 | vdev2 | GRID | ASIZE |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 3 |G| offset2 |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 4 | vdev3 | GRID | ASIZE |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 5 |G| offset3 |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 6 |E| lvl | type | cksum | comp | PSIZE | LSIZE |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 7 | padding |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 8 | padding |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 9 | padding |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* a | birth txg |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* b | fill count |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* c | checksum[0] |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* d | checksum[1] |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* e | checksum[2] |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* f | checksum[3] |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
*
|
||||
* Legend:
|
||||
*
|
||||
* vdev virtual device ID
|
||||
* offset offset into virtual device
|
||||
* LSIZE logical size
|
||||
* PSIZE physical size (after compression)
|
||||
* ASIZE allocated size (including RAID-Z parity and gang block headers)
|
||||
* GRID RAID-Z layout information (reserved for future use)
|
||||
* cksum checksum function
|
||||
* comp compression function
|
||||
* G gang block indicator
|
||||
* E endianness
|
||||
* type DMU object type
|
||||
* lvl level of indirection
|
||||
* birth txg transaction group in which the block was born
|
||||
* fill count number of non-zero blocks under this bp
|
||||
* checksum[4] 256-bit checksum of the data this bp describes
|
||||
*/
|
||||
typedef struct blkptr {
|
||||
dva_t blk_dva[3]; /* 128-bit Data Virtual Address */
|
||||
uint64_t blk_prop; /* size, compression, type, etc */
|
||||
uint64_t blk_pad[3]; /* Extra space for the future */
|
||||
uint64_t blk_birth; /* transaction group at birth */
|
||||
uint64_t blk_fill; /* fill count */
|
||||
zio_cksum_t blk_cksum; /* 256-bit checksum */
|
||||
} blkptr_t;
|
||||
|
||||
#define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */
|
||||
#define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */
|
||||
|
||||
/*
|
||||
* Macros to get and set fields in a bp or DVA.
|
||||
*/
|
||||
#define DVA_GET_ASIZE(dva) \
|
||||
BF64_GET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0)
|
||||
#define DVA_SET_ASIZE(dva, x) \
|
||||
BF64_SET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0, x)
|
||||
|
||||
#define DVA_GET_GRID(dva) BF64_GET((dva)->dva_word[0], 24, 8)
|
||||
#define DVA_SET_GRID(dva, x) BF64_SET((dva)->dva_word[0], 24, 8, x)
|
||||
|
||||
#define DVA_GET_VDEV(dva) BF64_GET((dva)->dva_word[0], 32, 32)
|
||||
#define DVA_SET_VDEV(dva, x) BF64_SET((dva)->dva_word[0], 32, 32, x)
|
||||
|
||||
#define DVA_GET_OFFSET(dva) \
|
||||
BF64_GET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0)
|
||||
#define DVA_SET_OFFSET(dva, x) \
|
||||
BF64_SET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0, x)
|
||||
|
||||
#define DVA_GET_GANG(dva) BF64_GET((dva)->dva_word[1], 63, 1)
|
||||
#define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x)
|
||||
|
||||
#define BP_GET_LSIZE(bp) \
|
||||
(BP_IS_HOLE(bp) ? 0 : \
|
||||
BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1))
|
||||
#define BP_SET_LSIZE(bp, x) \
|
||||
BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
|
||||
|
||||
#define BP_GET_PSIZE(bp) \
|
||||
BF64_GET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1)
|
||||
#define BP_SET_PSIZE(bp, x) \
|
||||
BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
|
||||
|
||||
#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8)
|
||||
#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x)
|
||||
|
||||
#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8)
|
||||
#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x)
|
||||
|
||||
#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8)
|
||||
#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x)
|
||||
|
||||
#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5)
|
||||
#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x)
|
||||
|
||||
#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1))
|
||||
#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x)
|
||||
|
||||
#define BP_GET_ASIZE(bp) \
|
||||
(DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
|
||||
DVA_GET_ASIZE(&(bp)->blk_dva[2]))
|
||||
|
||||
#define BP_GET_UCSIZE(bp) \
|
||||
((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
|
||||
BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp));
|
||||
|
||||
#define BP_GET_NDVAS(bp) \
|
||||
(!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
|
||||
!!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
|
||||
!!DVA_GET_ASIZE(&(bp)->blk_dva[2]))
|
||||
|
||||
#define BP_COUNT_GANG(bp) \
|
||||
(DVA_GET_GANG(&(bp)->blk_dva[0]) + \
|
||||
DVA_GET_GANG(&(bp)->blk_dva[1]) + \
|
||||
DVA_GET_GANG(&(bp)->blk_dva[2]))
|
||||
|
||||
#define DVA_EQUAL(dva1, dva2) \
|
||||
((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
|
||||
(dva1)->dva_word[0] == (dva2)->dva_word[0])
|
||||
|
||||
#define ZIO_CHECKSUM_EQUAL(zc1, zc2) \
|
||||
(0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \
|
||||
((zc1).zc_word[1] - (zc2).zc_word[1]) | \
|
||||
((zc1).zc_word[2] - (zc2).zc_word[2]) | \
|
||||
((zc1).zc_word[3] - (zc2).zc_word[3])))
|
||||
|
||||
#define DVA_IS_VALID(dva) (DVA_GET_ASIZE(dva) != 0)
|
||||
|
||||
#define ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3) \
|
||||
{ \
|
||||
(zcp)->zc_word[0] = w0; \
|
||||
(zcp)->zc_word[1] = w1; \
|
||||
(zcp)->zc_word[2] = w2; \
|
||||
(zcp)->zc_word[3] = w3; \
|
||||
}
|
||||
|
||||
#define BP_IDENTITY(bp) (&(bp)->blk_dva[0])
|
||||
#define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp))
|
||||
#define BP_IS_HOLE(bp) ((bp)->blk_birth == 0)
|
||||
#define BP_IS_OLDER(bp, txg) (!BP_IS_HOLE(bp) && (bp)->blk_birth < (txg))
|
||||
|
||||
#define BP_ZERO(bp) \
|
||||
{ \
|
||||
(bp)->blk_dva[0].dva_word[0] = 0; \
|
||||
(bp)->blk_dva[0].dva_word[1] = 0; \
|
||||
(bp)->blk_dva[1].dva_word[0] = 0; \
|
||||
(bp)->blk_dva[1].dva_word[1] = 0; \
|
||||
(bp)->blk_dva[2].dva_word[0] = 0; \
|
||||
(bp)->blk_dva[2].dva_word[1] = 0; \
|
||||
(bp)->blk_prop = 0; \
|
||||
(bp)->blk_pad[0] = 0; \
|
||||
(bp)->blk_pad[1] = 0; \
|
||||
(bp)->blk_pad[2] = 0; \
|
||||
(bp)->blk_birth = 0; \
|
||||
(bp)->blk_fill = 0; \
|
||||
ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \
|
||||
}
|
||||
|
||||
#define BLK_FILL_ALREADY_FREED (-1ULL)
|
||||
|
||||
/*
|
||||
* Note: the byteorder is either 0 or -1, both of which are palindromes.
|
||||
* This simplifies the endianness handling a bit.
|
||||
*/
|
||||
#ifdef _BIG_ENDIAN
|
||||
#define ZFS_HOST_BYTEORDER (0ULL)
|
||||
#else
|
||||
#define ZFS_HOST_BYTEORDER (-1ULL)
|
||||
#endif
|
||||
|
||||
#define BP_SHOULD_BYTESWAP(bp) (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER)
|
||||
|
||||
#define BP_SPRINTF_LEN 320
|
||||
|
||||
#include <sys/dmu.h>
|
||||
|
||||
#define BP_GET_BUFC_TYPE(bp) \
|
||||
(((BP_GET_LEVEL(bp) > 0) || (dmu_ot[BP_GET_TYPE(bp)].ot_metadata)) ? \
|
||||
ARC_BUFC_METADATA : ARC_BUFC_DATA);
|
||||
/*
|
||||
* Routines found in spa.c
|
||||
*/
|
||||
|
||||
/* state manipulation functions */
|
||||
extern int spa_open(const char *pool, spa_t **, void *tag);
|
||||
extern int spa_get_stats(const char *pool, nvlist_t **config,
|
||||
char *altroot, size_t buflen);
|
||||
extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
|
||||
const char *history_str, nvlist_t *zplprops);
|
||||
extern int spa_check_rootconf(char *devpath, char *devid,
|
||||
nvlist_t **bestconf, uint64_t *besttxg);
|
||||
extern boolean_t spa_rootdev_validate(nvlist_t *nv);
|
||||
extern int spa_import_rootpool(char *devpath, char *devid);
|
||||
extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props);
|
||||
extern int spa_import_faulted(const char *, nvlist_t *, nvlist_t *);
|
||||
extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
|
||||
extern int spa_destroy(char *pool);
|
||||
extern int spa_export(char *pool, nvlist_t **oldconfig, boolean_t force);
|
||||
extern int spa_reset(char *pool);
|
||||
extern void spa_async_request(spa_t *spa, int flag);
|
||||
extern void spa_async_unrequest(spa_t *spa, int flag);
|
||||
extern void spa_async_suspend(spa_t *spa);
|
||||
extern void spa_async_resume(spa_t *spa);
|
||||
extern spa_t *spa_inject_addref(char *pool);
|
||||
extern void spa_inject_delref(spa_t *spa);
|
||||
|
||||
#define SPA_ASYNC_CONFIG_UPDATE 0x01
|
||||
#define SPA_ASYNC_REMOVE 0x02
|
||||
#define SPA_ASYNC_PROBE 0x04
|
||||
#define SPA_ASYNC_RESILVER_DONE 0x08
|
||||
#define SPA_ASYNC_RESILVER 0x10
|
||||
|
||||
/* device manipulation */
|
||||
extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
|
||||
extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot,
|
||||
int replacing);
|
||||
extern int spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done);
|
||||
extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
|
||||
extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
|
||||
|
||||
/* spare state (which is global across all pools) */
|
||||
extern void spa_spare_add(vdev_t *vd);
|
||||
extern void spa_spare_remove(vdev_t *vd);
|
||||
extern boolean_t spa_spare_exists(uint64_t guid, uint64_t *pool, int *refcnt);
|
||||
extern void spa_spare_activate(vdev_t *vd);
|
||||
|
||||
/* L2ARC state (which is global across all pools) */
|
||||
extern void spa_l2cache_add(vdev_t *vd);
|
||||
extern void spa_l2cache_remove(vdev_t *vd);
|
||||
extern boolean_t spa_l2cache_exists(uint64_t guid, uint64_t *pool);
|
||||
extern void spa_l2cache_activate(vdev_t *vd);
|
||||
extern void spa_l2cache_drop(spa_t *spa);
|
||||
extern void spa_l2cache_space_update(vdev_t *vd, int64_t space, int64_t alloc);
|
||||
|
||||
/* scrubbing */
|
||||
extern int spa_scrub(spa_t *spa, pool_scrub_type_t type);
|
||||
|
||||
/* spa syncing */
|
||||
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
|
||||
extern void spa_sync_allpools(void);
|
||||
|
||||
/* spa namespace global mutex */
|
||||
extern kmutex_t spa_namespace_lock;
|
||||
|
||||
/*
|
||||
* SPA configuration functions in spa_config.c
|
||||
*/
|
||||
|
||||
#define SPA_CONFIG_UPDATE_POOL 0
|
||||
#define SPA_CONFIG_UPDATE_VDEVS 1
|
||||
|
||||
extern void spa_config_sync(spa_t *, boolean_t, boolean_t);
|
||||
extern void spa_config_load(void);
|
||||
extern nvlist_t *spa_all_configs(uint64_t *);
|
||||
extern void spa_config_set(spa_t *spa, nvlist_t *config);
|
||||
extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
|
||||
int getstats);
|
||||
extern void spa_config_update(spa_t *spa, int what);
|
||||
extern void spa_config_update_common(spa_t *spa, int what, boolean_t isroot);
|
||||
|
||||
/*
|
||||
* Miscellaneous SPA routines in spa_misc.c
|
||||
*/
|
||||
|
||||
/* Namespace manipulation */
|
||||
extern spa_t *spa_lookup(const char *name);
|
||||
extern spa_t *spa_add(const char *name, const char *altroot);
|
||||
extern void spa_remove(spa_t *spa);
|
||||
extern spa_t *spa_next(spa_t *prev);
|
||||
|
||||
/* Refcount functions */
|
||||
extern void spa_open_ref(spa_t *spa, void *tag);
|
||||
extern void spa_close(spa_t *spa, void *tag);
|
||||
extern boolean_t spa_refcount_zero(spa_t *spa);
|
||||
|
||||
#define SCL_CONFIG 0x01
|
||||
#define SCL_STATE 0x02
|
||||
#define SCL_L2ARC 0x04 /* hack until L2ARC 2.0 */
|
||||
#define SCL_ALLOC 0x08
|
||||
#define SCL_ZIO 0x10
|
||||
#define SCL_FREE 0x20
|
||||
#define SCL_VDEV 0x40
|
||||
#define SCL_LOCKS 7
|
||||
#define SCL_ALL ((1 << SCL_LOCKS) - 1)
|
||||
#define SCL_STATE_ALL (SCL_STATE | SCL_L2ARC | SCL_ZIO)
|
||||
|
||||
/* Pool configuration locks */
|
||||
extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
|
||||
extern void spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw);
|
||||
extern void spa_config_exit(spa_t *spa, int locks, void *tag);
|
||||
extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
|
||||
|
||||
/* Pool vdev add/remove lock */
|
||||
extern uint64_t spa_vdev_enter(spa_t *spa);
|
||||
extern int spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error);
|
||||
|
||||
/* Pool vdev state change lock */
|
||||
extern void spa_vdev_state_enter(spa_t *spa);
|
||||
extern int spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error);
|
||||
|
||||
/* Accessor functions */
|
||||
extern boolean_t spa_shutting_down(spa_t *spa);
|
||||
extern struct dsl_pool *spa_get_dsl(spa_t *spa);
|
||||
extern blkptr_t *spa_get_rootblkptr(spa_t *spa);
|
||||
extern void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp);
|
||||
extern void spa_altroot(spa_t *, char *, size_t);
|
||||
extern int spa_sync_pass(spa_t *spa);
|
||||
extern char *spa_name(spa_t *spa);
|
||||
extern uint64_t spa_guid(spa_t *spa);
|
||||
extern uint64_t spa_last_synced_txg(spa_t *spa);
|
||||
extern uint64_t spa_first_txg(spa_t *spa);
|
||||
extern uint64_t spa_version(spa_t *spa);
|
||||
extern pool_state_t spa_state(spa_t *spa);
|
||||
extern uint64_t spa_freeze_txg(spa_t *spa);
|
||||
extern uint64_t spa_get_alloc(spa_t *spa);
|
||||
extern uint64_t spa_get_space(spa_t *spa);
|
||||
extern uint64_t spa_get_dspace(spa_t *spa);
|
||||
extern uint64_t spa_get_asize(spa_t *spa, uint64_t lsize);
|
||||
extern uint64_t spa_version(spa_t *spa);
|
||||
extern int spa_max_replication(spa_t *spa);
|
||||
extern int spa_busy(void);
|
||||
extern uint8_t spa_get_failmode(spa_t *spa);
|
||||
extern boolean_t spa_suspended(spa_t *spa);
|
||||
|
||||
/* Miscellaneous support routines */
|
||||
extern int spa_rename(const char *oldname, const char *newname);
|
||||
extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
|
||||
extern char *spa_strdup(const char *);
|
||||
extern void spa_strfree(char *);
|
||||
extern uint64_t spa_get_random(uint64_t range);
|
||||
extern void sprintf_blkptr(char *buf, int len, const blkptr_t *bp);
|
||||
extern void spa_freeze(spa_t *spa);
|
||||
extern void spa_upgrade(spa_t *spa, uint64_t version);
|
||||
extern void spa_evict_all(void);
|
||||
extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid,
|
||||
boolean_t l2cache);
|
||||
extern boolean_t spa_has_spare(spa_t *, uint64_t guid);
|
||||
extern uint64_t bp_get_dasize(spa_t *spa, const blkptr_t *bp);
|
||||
extern boolean_t spa_has_slogs(spa_t *spa);
|
||||
extern boolean_t spa_is_root(spa_t *spa);
|
||||
|
||||
/* history logging */
|
||||
typedef enum history_log_type {
|
||||
LOG_CMD_POOL_CREATE,
|
||||
LOG_CMD_NORMAL,
|
||||
LOG_INTERNAL
|
||||
} history_log_type_t;
|
||||
|
||||
typedef struct history_arg {
|
||||
const char *ha_history_str;
|
||||
history_log_type_t ha_log_type;
|
||||
history_internal_events_t ha_event;
|
||||
char ha_zone[MAXPATHLEN];
|
||||
} history_arg_t;
|
||||
|
||||
extern char *spa_his_ievent_table[];
|
||||
|
||||
extern void spa_history_create_obj(spa_t *spa, dmu_tx_t *tx);
|
||||
extern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read,
|
||||
char *his_buf);
|
||||
extern int spa_history_log(spa_t *spa, const char *his_buf,
|
||||
history_log_type_t what);
|
||||
void spa_history_internal_log(history_internal_events_t event, spa_t *spa,
|
||||
dmu_tx_t *tx, cred_t *cr, const char *fmt, ...);
|
||||
|
||||
/* error handling */
|
||||
struct zbookmark;
|
||||
struct zio;
|
||||
extern void spa_log_error(spa_t *spa, struct zio *zio);
|
||||
extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
|
||||
struct zio *zio, uint64_t stateoroffset, uint64_t length);
|
||||
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
|
||||
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
|
||||
extern uint64_t spa_get_errlog_size(spa_t *spa);
|
||||
extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
|
||||
extern void spa_errlog_rotate(spa_t *spa);
|
||||
extern void spa_errlog_drain(spa_t *spa);
|
||||
extern void spa_errlog_sync(spa_t *spa, uint64_t txg);
|
||||
extern void spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub);
|
||||
|
||||
/* vdev cache */
|
||||
extern void vdev_cache_stat_init(void);
|
||||
extern void vdev_cache_stat_fini(void);
|
||||
|
||||
/* Initialization and termination */
|
||||
extern void spa_init(int flags);
|
||||
extern void spa_fini(void);
|
||||
extern void spa_boot_init();
|
||||
|
||||
/* properties */
|
||||
extern int spa_prop_set(spa_t *spa, nvlist_t *nvp);
|
||||
extern int spa_prop_get(spa_t *spa, nvlist_t **nvp);
|
||||
extern void spa_prop_clear_bootfs(spa_t *spa, uint64_t obj, dmu_tx_t *tx);
|
||||
|
||||
/* asynchronous event notification */
|
||||
extern void spa_event_notify(spa_t *spa, vdev_t *vdev, const char *name);
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
#define dprintf_bp(bp, fmt, ...) do { \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
|
||||
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \
|
||||
sprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, (bp)); \
|
||||
dprintf(fmt " %s\n", __VA_ARGS__, __blkbuf); \
|
||||
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
|
||||
} \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
#else
|
||||
#define dprintf_bp(bp, fmt, ...)
|
||||
#endif
|
||||
|
||||
extern int spa_mode; /* mode, e.g. FREAD | FWRITE */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_SPA_H */
|
||||
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_SPA_BOOT_H
|
||||
#define _SYS_SPA_BOOT_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/nvpair.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern char *spa_get_bootprop(char *prop);
|
||||
extern void spa_free_bootprop(char *prop);
|
||||
extern int spa_get_rootconf(char *devpath, char *devid, nvlist_t **bestconf_p);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_SPA_BOOT_H */
|
||||
@@ -0,0 +1,196 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_SPA_IMPL_H
|
||||
#define _SYS_SPA_IMPL_H
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/vdev.h>
|
||||
#include <sys/metaslab.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/uberblock_impl.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/avl.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/bplist.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct spa_error_entry {
|
||||
zbookmark_t se_bookmark;
|
||||
char *se_name;
|
||||
avl_node_t se_avl;
|
||||
} spa_error_entry_t;
|
||||
|
||||
typedef struct spa_history_phys {
|
||||
uint64_t sh_pool_create_len; /* ending offset of zpool create */
|
||||
uint64_t sh_phys_max_off; /* physical EOF */
|
||||
uint64_t sh_bof; /* logical BOF */
|
||||
uint64_t sh_eof; /* logical EOF */
|
||||
uint64_t sh_records_lost; /* num of records overwritten */
|
||||
} spa_history_phys_t;
|
||||
|
||||
struct spa_aux_vdev {
|
||||
uint64_t sav_object; /* MOS object for device list */
|
||||
nvlist_t *sav_config; /* cached device config */
|
||||
vdev_t **sav_vdevs; /* devices */
|
||||
int sav_count; /* number devices */
|
||||
boolean_t sav_sync; /* sync the device list */
|
||||
nvlist_t **sav_pending; /* pending device additions */
|
||||
uint_t sav_npending; /* # pending devices */
|
||||
};
|
||||
|
||||
typedef struct spa_config_lock {
|
||||
kmutex_t scl_lock;
|
||||
kthread_t *scl_writer;
|
||||
int scl_write_wanted;
|
||||
kcondvar_t scl_cv;
|
||||
refcount_t scl_count;
|
||||
} spa_config_lock_t;
|
||||
|
||||
typedef struct spa_config_dirent {
|
||||
list_node_t scd_link;
|
||||
char *scd_path;
|
||||
} spa_config_dirent_t;
|
||||
|
||||
typedef enum spa_log_state {
|
||||
SPA_LOG_UNKNOWN = 0, /* unknown log state */
|
||||
SPA_LOG_MISSING, /* missing log(s) */
|
||||
SPA_LOG_CLEAR, /* clear the log(s) */
|
||||
SPA_LOG_GOOD, /* log(s) are good */
|
||||
} spa_log_state_t;
|
||||
|
||||
enum zio_taskq_type {
|
||||
ZIO_TASKQ_ISSUE = 0,
|
||||
ZIO_TASKQ_INTERRUPT,
|
||||
ZIO_TASKQ_TYPES
|
||||
};
|
||||
|
||||
struct spa {
|
||||
/*
|
||||
* Fields protected by spa_namespace_lock.
|
||||
*/
|
||||
char spa_name[MAXNAMELEN]; /* pool name */
|
||||
avl_node_t spa_avl; /* node in spa_namespace_avl */
|
||||
nvlist_t *spa_config; /* last synced config */
|
||||
nvlist_t *spa_config_syncing; /* currently syncing config */
|
||||
uint64_t spa_config_txg; /* txg of last config change */
|
||||
int spa_sync_pass; /* iterate-to-convergence */
|
||||
pool_state_t spa_state; /* pool state */
|
||||
int spa_inject_ref; /* injection references */
|
||||
uint8_t spa_sync_on; /* sync threads are running */
|
||||
spa_load_state_t spa_load_state; /* current load operation */
|
||||
taskq_t *spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES];
|
||||
dsl_pool_t *spa_dsl_pool;
|
||||
metaslab_class_t *spa_normal_class; /* normal data class */
|
||||
metaslab_class_t *spa_log_class; /* intent log data class */
|
||||
uint64_t spa_first_txg; /* first txg after spa_open() */
|
||||
uint64_t spa_final_txg; /* txg of export/destroy */
|
||||
uint64_t spa_freeze_txg; /* freeze pool at this txg */
|
||||
objset_t *spa_meta_objset; /* copy of dp->dp_meta_objset */
|
||||
txg_list_t spa_vdev_txg_list; /* per-txg dirty vdev list */
|
||||
vdev_t *spa_root_vdev; /* top-level vdev container */
|
||||
uint64_t spa_load_guid; /* initial guid for spa_load */
|
||||
list_t spa_config_dirty_list; /* vdevs with dirty config */
|
||||
list_t spa_state_dirty_list; /* vdevs with dirty state */
|
||||
spa_aux_vdev_t spa_spares; /* hot spares */
|
||||
spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */
|
||||
uint64_t spa_config_object; /* MOS object for pool config */
|
||||
uint64_t spa_syncing_txg; /* txg currently syncing */
|
||||
uint64_t spa_sync_bplist_obj; /* object for deferred frees */
|
||||
bplist_t spa_sync_bplist; /* deferred-free bplist */
|
||||
uberblock_t spa_ubsync; /* last synced uberblock */
|
||||
uberblock_t spa_uberblock; /* current uberblock */
|
||||
kmutex_t spa_scrub_lock; /* resilver/scrub lock */
|
||||
uint64_t spa_scrub_inflight; /* in-flight scrub I/Os */
|
||||
uint64_t spa_scrub_maxinflight; /* max in-flight scrub I/Os */
|
||||
uint64_t spa_scrub_errors; /* scrub I/O error count */
|
||||
kcondvar_t spa_scrub_io_cv; /* scrub I/O completion */
|
||||
uint8_t spa_scrub_active; /* active or suspended? */
|
||||
uint8_t spa_scrub_type; /* type of scrub we're doing */
|
||||
uint8_t spa_scrub_finished; /* indicator to rotate logs */
|
||||
uint8_t spa_scrub_started; /* started since last boot */
|
||||
uint8_t spa_scrub_reopen; /* scrub doing vdev_reopen */
|
||||
kmutex_t spa_async_lock; /* protect async state */
|
||||
kthread_t *spa_async_thread; /* thread doing async task */
|
||||
int spa_async_suspended; /* async tasks suspended */
|
||||
kcondvar_t spa_async_cv; /* wait for thread_exit() */
|
||||
uint16_t spa_async_tasks; /* async task mask */
|
||||
kmutex_t spa_async_root_lock; /* protects async root count */
|
||||
uint64_t spa_async_root_count; /* number of async root zios */
|
||||
kcondvar_t spa_async_root_cv; /* notify when count == 0 */
|
||||
char *spa_root; /* alternate root directory */
|
||||
uint64_t spa_ena; /* spa-wide ereport ENA */
|
||||
boolean_t spa_last_open_failed; /* true if last open faled */
|
||||
kmutex_t spa_errlog_lock; /* error log lock */
|
||||
uint64_t spa_errlog_last; /* last error log object */
|
||||
uint64_t spa_errlog_scrub; /* scrub error log object */
|
||||
kmutex_t spa_errlist_lock; /* error list/ereport lock */
|
||||
avl_tree_t spa_errlist_last; /* last error list */
|
||||
avl_tree_t spa_errlist_scrub; /* scrub error list */
|
||||
uint64_t spa_deflate; /* should we deflate? */
|
||||
uint64_t spa_history; /* history object */
|
||||
kmutex_t spa_history_lock; /* history lock */
|
||||
vdev_t *spa_pending_vdev; /* pending vdev additions */
|
||||
kmutex_t spa_props_lock; /* property lock */
|
||||
uint64_t spa_pool_props_object; /* object for properties */
|
||||
uint64_t spa_bootfs; /* default boot filesystem */
|
||||
uint64_t spa_failmode; /* failure mode for the pool */
|
||||
uint64_t spa_delegation; /* delegation on/off */
|
||||
list_t spa_config_list; /* previous cache file(s) */
|
||||
zio_t *spa_suspend_zio_root; /* root of all suspended I/O */
|
||||
kmutex_t spa_suspend_lock; /* protects suspend_zio_root */
|
||||
kcondvar_t spa_suspend_cv; /* notification of resume */
|
||||
uint8_t spa_suspended; /* pool is suspended */
|
||||
boolean_t spa_import_faulted; /* allow faulted vdevs */
|
||||
boolean_t spa_is_root; /* pool is root */
|
||||
int spa_minref; /* num refs when first opened */
|
||||
spa_log_state_t spa_log_state; /* log state */
|
||||
/*
|
||||
* spa_refcnt & spa_config_lock must be the last elements
|
||||
* because refcount_t changes size based on compilation options.
|
||||
* In order for the MDB module to function correctly, the other
|
||||
* fields must remain in the same location.
|
||||
*/
|
||||
spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */
|
||||
refcount_t spa_refcount; /* number of opens */
|
||||
};
|
||||
|
||||
extern const char *spa_config_path;
|
||||
|
||||
#define BOOTFS_COMPRESS_VALID(compress) \
|
||||
((compress) == ZIO_COMPRESS_LZJB || \
|
||||
((compress) == ZIO_COMPRESS_ON && \
|
||||
ZIO_COMPRESS_ON_VALUE == ZIO_COMPRESS_LZJB) || \
|
||||
(compress) == ZIO_COMPRESS_OFF)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_SPA_IMPL_H */
|
||||
@@ -0,0 +1,162 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_SPACE_MAP_H
|
||||
#define _SYS_SPACE_MAP_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/avl.h>
|
||||
#include <sys/dmu.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct space_map_ops space_map_ops_t;
|
||||
|
||||
typedef struct space_map {
|
||||
avl_tree_t sm_root; /* AVL tree of map segments */
|
||||
uint64_t sm_space; /* sum of all segments in the map */
|
||||
uint64_t sm_start; /* start of map */
|
||||
uint64_t sm_size; /* size of map */
|
||||
uint8_t sm_shift; /* unit shift */
|
||||
uint8_t sm_pad[3]; /* unused */
|
||||
uint8_t sm_loaded; /* map loaded? */
|
||||
uint8_t sm_loading; /* map loading? */
|
||||
kcondvar_t sm_load_cv; /* map load completion */
|
||||
space_map_ops_t *sm_ops; /* space map block picker ops vector */
|
||||
void *sm_ppd; /* picker-private data */
|
||||
kmutex_t *sm_lock; /* pointer to lock that protects map */
|
||||
} space_map_t;
|
||||
|
||||
typedef struct space_seg {
|
||||
avl_node_t ss_node; /* AVL node */
|
||||
uint64_t ss_start; /* starting offset of this segment */
|
||||
uint64_t ss_end; /* ending offset (non-inclusive) */
|
||||
} space_seg_t;
|
||||
|
||||
typedef struct space_map_obj {
|
||||
uint64_t smo_object; /* on-disk space map object */
|
||||
uint64_t smo_objsize; /* size of the object */
|
||||
uint64_t smo_alloc; /* space allocated from the map */
|
||||
} space_map_obj_t;
|
||||
|
||||
struct space_map_ops {
|
||||
void (*smop_load)(space_map_t *sm);
|
||||
void (*smop_unload)(space_map_t *sm);
|
||||
uint64_t (*smop_alloc)(space_map_t *sm, uint64_t size);
|
||||
void (*smop_claim)(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
void (*smop_free)(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
};
|
||||
|
||||
/*
|
||||
* debug entry
|
||||
*
|
||||
* 1 3 10 50
|
||||
* ,---+--------+------------+---------------------------------.
|
||||
* | 1 | action | syncpass | txg (lower bits) |
|
||||
* `---+--------+------------+---------------------------------'
|
||||
* 63 62 60 59 50 49 0
|
||||
*
|
||||
*
|
||||
*
|
||||
* non-debug entry
|
||||
*
|
||||
* 1 47 1 15
|
||||
* ,-----------------------------------------------------------.
|
||||
* | 0 | offset (sm_shift units) | type | run |
|
||||
* `-----------------------------------------------------------'
|
||||
* 63 62 17 16 15 0
|
||||
*/
|
||||
|
||||
/* All this stuff takes and returns bytes */
|
||||
#define SM_RUN_DECODE(x) (BF64_DECODE(x, 0, 15) + 1)
|
||||
#define SM_RUN_ENCODE(x) BF64_ENCODE((x) - 1, 0, 15)
|
||||
#define SM_TYPE_DECODE(x) BF64_DECODE(x, 15, 1)
|
||||
#define SM_TYPE_ENCODE(x) BF64_ENCODE(x, 15, 1)
|
||||
#define SM_OFFSET_DECODE(x) BF64_DECODE(x, 16, 47)
|
||||
#define SM_OFFSET_ENCODE(x) BF64_ENCODE(x, 16, 47)
|
||||
#define SM_DEBUG_DECODE(x) BF64_DECODE(x, 63, 1)
|
||||
#define SM_DEBUG_ENCODE(x) BF64_ENCODE(x, 63, 1)
|
||||
|
||||
#define SM_DEBUG_ACTION_DECODE(x) BF64_DECODE(x, 60, 3)
|
||||
#define SM_DEBUG_ACTION_ENCODE(x) BF64_ENCODE(x, 60, 3)
|
||||
|
||||
#define SM_DEBUG_SYNCPASS_DECODE(x) BF64_DECODE(x, 50, 10)
|
||||
#define SM_DEBUG_SYNCPASS_ENCODE(x) BF64_ENCODE(x, 50, 10)
|
||||
|
||||
#define SM_DEBUG_TXG_DECODE(x) BF64_DECODE(x, 0, 50)
|
||||
#define SM_DEBUG_TXG_ENCODE(x) BF64_ENCODE(x, 0, 50)
|
||||
|
||||
#define SM_RUN_MAX SM_RUN_DECODE(~0ULL)
|
||||
|
||||
#define SM_ALLOC 0x0
|
||||
#define SM_FREE 0x1
|
||||
|
||||
/*
|
||||
* The data for a given space map can be kept on blocks of any size.
|
||||
* Larger blocks entail fewer i/o operations, but they also cause the
|
||||
* DMU to keep more data in-core, and also to waste more i/o bandwidth
|
||||
* when only a few blocks have changed since the last transaction group.
|
||||
* This could use a lot more research, but for now, set the freelist
|
||||
* block size to 4k (2^12).
|
||||
*/
|
||||
#define SPACE_MAP_BLOCKSHIFT 12
|
||||
|
||||
typedef void space_map_func_t(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
|
||||
extern void space_map_create(space_map_t *sm, uint64_t start, uint64_t size,
|
||||
uint8_t shift, kmutex_t *lp);
|
||||
extern void space_map_destroy(space_map_t *sm);
|
||||
extern void space_map_add(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
extern void space_map_remove(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
extern int space_map_contains(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
extern void space_map_vacate(space_map_t *sm,
|
||||
space_map_func_t *func, space_map_t *mdest);
|
||||
extern void space_map_walk(space_map_t *sm,
|
||||
space_map_func_t *func, space_map_t *mdest);
|
||||
extern void space_map_excise(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
extern void space_map_union(space_map_t *smd, space_map_t *sms);
|
||||
|
||||
extern void space_map_load_wait(space_map_t *sm);
|
||||
extern int space_map_load(space_map_t *sm, space_map_ops_t *ops,
|
||||
uint8_t maptype, space_map_obj_t *smo, objset_t *os);
|
||||
extern void space_map_unload(space_map_t *sm);
|
||||
|
||||
extern uint64_t space_map_alloc(space_map_t *sm, uint64_t size);
|
||||
extern void space_map_claim(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
extern void space_map_free(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
|
||||
extern void space_map_sync(space_map_t *sm, uint8_t maptype,
|
||||
space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx);
|
||||
extern void space_map_truncate(space_map_obj_t *smo,
|
||||
objset_t *os, dmu_tx_t *tx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_SPACE_MAP_H */
|
||||
@@ -0,0 +1,130 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_TXG_H
|
||||
#define _SYS_TXG_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define TXG_CONCURRENT_STATES 3 /* open, quiescing, syncing */
|
||||
#define TXG_SIZE 4 /* next power of 2 */
|
||||
#define TXG_MASK (TXG_SIZE - 1) /* mask for size */
|
||||
#define TXG_INITIAL TXG_SIZE /* initial txg */
|
||||
#define TXG_IDX (txg & TXG_MASK)
|
||||
|
||||
#define TXG_WAIT 1ULL
|
||||
#define TXG_NOWAIT 2ULL
|
||||
|
||||
typedef struct tx_cpu tx_cpu_t;
|
||||
|
||||
typedef struct txg_handle {
|
||||
tx_cpu_t *th_cpu;
|
||||
uint64_t th_txg;
|
||||
} txg_handle_t;
|
||||
|
||||
typedef struct txg_node {
|
||||
struct txg_node *tn_next[TXG_SIZE];
|
||||
uint8_t tn_member[TXG_SIZE];
|
||||
} txg_node_t;
|
||||
|
||||
typedef struct txg_list {
|
||||
kmutex_t tl_lock;
|
||||
size_t tl_offset;
|
||||
txg_node_t *tl_head[TXG_SIZE];
|
||||
} txg_list_t;
|
||||
|
||||
struct dsl_pool;
|
||||
|
||||
extern void txg_init(struct dsl_pool *dp, uint64_t txg);
|
||||
extern void txg_fini(struct dsl_pool *dp);
|
||||
extern void txg_sync_start(struct dsl_pool *dp);
|
||||
extern void txg_sync_stop(struct dsl_pool *dp);
|
||||
extern uint64_t txg_hold_open(struct dsl_pool *dp, txg_handle_t *txghp);
|
||||
extern void txg_rele_to_quiesce(txg_handle_t *txghp);
|
||||
extern void txg_rele_to_sync(txg_handle_t *txghp);
|
||||
extern void txg_suspend(struct dsl_pool *dp);
|
||||
extern void txg_resume(struct dsl_pool *dp);
|
||||
|
||||
/*
|
||||
* Delay the caller by the specified number of ticks or until
|
||||
* the txg closes (whichever comes first). This is intended
|
||||
* to be used to throttle writers when the system nears its
|
||||
* capacity.
|
||||
*/
|
||||
extern void txg_delay(struct dsl_pool *dp, uint64_t txg, int ticks);
|
||||
|
||||
/*
|
||||
* Wait until the given transaction group has finished syncing.
|
||||
* Try to make this happen as soon as possible (eg. kick off any
|
||||
* necessary syncs immediately). If txg==0, wait for the currently open
|
||||
* txg to finish syncing.
|
||||
*/
|
||||
extern void txg_wait_synced(struct dsl_pool *dp, uint64_t txg);
|
||||
|
||||
/*
|
||||
* Wait until the given transaction group, or one after it, is
|
||||
* the open transaction group. Try to make this happen as soon
|
||||
* as possible (eg. kick off any necessary syncs immediately).
|
||||
* If txg == 0, wait for the next open txg.
|
||||
*/
|
||||
extern void txg_wait_open(struct dsl_pool *dp, uint64_t txg);
|
||||
|
||||
/*
|
||||
* Returns TRUE if we are "backed up" waiting for the syncing
|
||||
* transaction to complete; otherwise returns FALSE.
|
||||
*/
|
||||
extern boolean_t txg_stalled(struct dsl_pool *dp);
|
||||
|
||||
/* returns TRUE if someone is waiting for the next txg to sync */
|
||||
extern boolean_t txg_sync_waiting(struct dsl_pool *dp);
|
||||
|
||||
/*
|
||||
* Per-txg object lists.
|
||||
*/
|
||||
|
||||
#define TXG_CLEAN(txg) ((txg) - 1)
|
||||
|
||||
extern void txg_list_create(txg_list_t *tl, size_t offset);
|
||||
extern void txg_list_destroy(txg_list_t *tl);
|
||||
extern int txg_list_empty(txg_list_t *tl, uint64_t txg);
|
||||
extern int txg_list_add(txg_list_t *tl, void *p, uint64_t txg);
|
||||
extern void *txg_list_remove(txg_list_t *tl, uint64_t txg);
|
||||
extern void *txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg);
|
||||
extern int txg_list_member(txg_list_t *tl, void *p, uint64_t txg);
|
||||
extern void *txg_list_head(txg_list_t *tl, uint64_t txg);
|
||||
extern void *txg_list_next(txg_list_t *tl, void *p, uint64_t txg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_TXG_H */
|
||||
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_TXG_IMPL_H
|
||||
#define _SYS_TXG_IMPL_H
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct tx_cpu {
|
||||
kmutex_t tc_lock;
|
||||
kcondvar_t tc_cv[TXG_SIZE];
|
||||
uint64_t tc_count[TXG_SIZE];
|
||||
char tc_pad[16];
|
||||
};
|
||||
|
||||
typedef struct tx_state {
|
||||
tx_cpu_t *tx_cpu; /* protects right to enter txg */
|
||||
kmutex_t tx_sync_lock; /* protects tx_state_t */
|
||||
krwlock_t tx_suspend;
|
||||
uint64_t tx_open_txg; /* currently open txg id */
|
||||
uint64_t tx_quiesced_txg; /* quiesced txg waiting for sync */
|
||||
uint64_t tx_syncing_txg; /* currently syncing txg id */
|
||||
uint64_t tx_synced_txg; /* last synced txg id */
|
||||
|
||||
uint64_t tx_sync_txg_waiting; /* txg we're waiting to sync */
|
||||
uint64_t tx_quiesce_txg_waiting; /* txg we're waiting to open */
|
||||
|
||||
kcondvar_t tx_sync_more_cv;
|
||||
kcondvar_t tx_sync_done_cv;
|
||||
kcondvar_t tx_quiesce_more_cv;
|
||||
kcondvar_t tx_quiesce_done_cv;
|
||||
kcondvar_t tx_timeout_cv;
|
||||
kcondvar_t tx_exit_cv; /* wait for all threads to exit */
|
||||
|
||||
uint8_t tx_threads; /* number of threads */
|
||||
uint8_t tx_exiting; /* set when we're exiting */
|
||||
|
||||
kthread_t *tx_sync_thread;
|
||||
kthread_t *tx_quiesce_thread;
|
||||
} tx_state_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_TXG_IMPL_H */
|
||||
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License, Version 1.0 only
|
||||
* (the "License"). You may not use this file except in compliance
|
||||
* with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_UBERBLOCK_H
|
||||
#define _SYS_UBERBLOCK_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/vdev.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct uberblock uberblock_t;
|
||||
|
||||
extern int uberblock_verify(uberblock_t *ub);
|
||||
extern int uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_UBERBLOCK_H */
|
||||
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_UBERBLOCK_IMPL_H
|
||||
#define _SYS_UBERBLOCK_IMPL_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/uberblock.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The uberblock version is incremented whenever an incompatible on-disk
|
||||
* format change is made to the SPA, DMU, or ZAP.
|
||||
*
|
||||
* Note: the first two fields should never be moved. When a storage pool
|
||||
* is opened, the uberblock must be read off the disk before the version
|
||||
* can be checked. If the ub_version field is moved, we may not detect
|
||||
* version mismatch. If the ub_magic field is moved, applications that
|
||||
* expect the magic number in the first word won't work.
|
||||
*/
|
||||
#define UBERBLOCK_MAGIC 0x00bab10c /* oo-ba-bloc! */
|
||||
#define UBERBLOCK_SHIFT 10 /* up to 1K */
|
||||
|
||||
struct uberblock {
|
||||
uint64_t ub_magic; /* UBERBLOCK_MAGIC */
|
||||
uint64_t ub_version; /* SPA_VERSION */
|
||||
uint64_t ub_txg; /* txg of last sync */
|
||||
uint64_t ub_guid_sum; /* sum of all vdev guids */
|
||||
uint64_t ub_timestamp; /* UTC time of last sync */
|
||||
blkptr_t ub_rootbp; /* MOS objset_phys_t */
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_UBERBLOCK_IMPL_H */
|
||||
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_UNIQUE_H
|
||||
#define _SYS_UNIQUE_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* The number of significant bits in each unique value. */
|
||||
#define UNIQUE_BITS 56
|
||||
|
||||
void unique_init(void);
|
||||
void unique_fini(void);
|
||||
|
||||
/*
|
||||
* Return a new unique value (which will not be uniquified against until
|
||||
* it is unique_insert()-ed.
|
||||
*/
|
||||
uint64_t unique_create(void);
|
||||
|
||||
/* Return a unique value, which equals the one passed in if possible. */
|
||||
uint64_t unique_insert(uint64_t value);
|
||||
|
||||
/* Indicate that this value no longer needs to be uniquified against. */
|
||||
void unique_remove(uint64_t value);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_UNIQUE_H */
|
||||
@@ -0,0 +1,135 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_VDEV_H
|
||||
#define _SYS_VDEV_H
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/space_map.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern boolean_t zfs_nocacheflush;
|
||||
|
||||
extern int vdev_open(vdev_t *);
|
||||
extern int vdev_validate(vdev_t *);
|
||||
extern void vdev_close(vdev_t *);
|
||||
extern int vdev_create(vdev_t *, uint64_t txg, boolean_t isreplace);
|
||||
extern void vdev_init(vdev_t *, uint64_t txg);
|
||||
extern void vdev_reopen(vdev_t *);
|
||||
extern int vdev_validate_aux(vdev_t *vd);
|
||||
extern zio_t *vdev_probe(vdev_t *vd, zio_t *pio);
|
||||
|
||||
extern boolean_t vdev_is_bootable(vdev_t *vd);
|
||||
extern vdev_t *vdev_lookup_top(spa_t *spa, uint64_t vdev);
|
||||
extern vdev_t *vdev_lookup_by_guid(vdev_t *vd, uint64_t guid);
|
||||
extern void vdev_dtl_dirty(space_map_t *sm, uint64_t txg, uint64_t size);
|
||||
extern int vdev_dtl_contains(space_map_t *sm, uint64_t txg, uint64_t size);
|
||||
extern void vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
|
||||
int scrub_done);
|
||||
extern boolean_t vdev_resilver_needed(vdev_t *vd,
|
||||
uint64_t *minp, uint64_t *maxp);
|
||||
|
||||
extern int vdev_metaslab_init(vdev_t *vd, uint64_t txg);
|
||||
extern void vdev_metaslab_fini(vdev_t *vd);
|
||||
|
||||
extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
|
||||
extern void vdev_clear_stats(vdev_t *vd);
|
||||
extern void vdev_stat_update(zio_t *zio, uint64_t psize);
|
||||
extern void vdev_scrub_stat_update(vdev_t *vd, pool_scrub_type_t type,
|
||||
boolean_t complete);
|
||||
extern int vdev_getspec(spa_t *spa, uint64_t vdev, char **vdev_spec);
|
||||
extern void vdev_propagate_state(vdev_t *vd);
|
||||
extern void vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state,
|
||||
vdev_aux_t aux);
|
||||
|
||||
extern void vdev_space_update(vdev_t *vd, int64_t space_delta,
|
||||
int64_t alloc_delta, boolean_t update_root);
|
||||
|
||||
extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize);
|
||||
|
||||
extern int vdev_fault(spa_t *spa, uint64_t guid);
|
||||
extern int vdev_degrade(spa_t *spa, uint64_t guid);
|
||||
extern int vdev_online(spa_t *spa, uint64_t guid, uint64_t flags,
|
||||
vdev_state_t *);
|
||||
extern int vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags);
|
||||
extern void vdev_clear(spa_t *spa, vdev_t *vd);
|
||||
|
||||
extern boolean_t vdev_is_dead(vdev_t *vd);
|
||||
extern boolean_t vdev_readable(vdev_t *vd);
|
||||
extern boolean_t vdev_writeable(vdev_t *vd);
|
||||
extern boolean_t vdev_allocatable(vdev_t *vd);
|
||||
extern boolean_t vdev_accessible(vdev_t *vd, zio_t *zio);
|
||||
|
||||
extern void vdev_cache_init(vdev_t *vd);
|
||||
extern void vdev_cache_fini(vdev_t *vd);
|
||||
extern int vdev_cache_read(zio_t *zio);
|
||||
extern void vdev_cache_write(zio_t *zio);
|
||||
extern void vdev_cache_purge(vdev_t *vd);
|
||||
|
||||
extern void vdev_queue_init(vdev_t *vd);
|
||||
extern void vdev_queue_fini(vdev_t *vd);
|
||||
extern zio_t *vdev_queue_io(zio_t *zio);
|
||||
extern void vdev_queue_io_done(zio_t *zio);
|
||||
|
||||
extern void vdev_config_dirty(vdev_t *vd);
|
||||
extern void vdev_config_clean(vdev_t *vd);
|
||||
extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg);
|
||||
|
||||
extern void vdev_state_dirty(vdev_t *vd);
|
||||
extern void vdev_state_clean(vdev_t *vd);
|
||||
|
||||
extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
|
||||
boolean_t getstats, boolean_t isspare, boolean_t isl2cache);
|
||||
|
||||
/*
|
||||
* Label routines
|
||||
*/
|
||||
struct uberblock;
|
||||
extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset);
|
||||
extern int vdev_label_number(uint64_t psise, uint64_t offset);
|
||||
extern nvlist_t *vdev_label_read_config(vdev_t *vd);
|
||||
extern void vdev_uberblock_load(zio_t *zio, vdev_t *vd, struct uberblock *ub);
|
||||
|
||||
typedef enum {
|
||||
VDEV_LABEL_CREATE, /* create/add a new device */
|
||||
VDEV_LABEL_REPLACE, /* replace an existing device */
|
||||
VDEV_LABEL_SPARE, /* add a new hot spare */
|
||||
VDEV_LABEL_REMOVE, /* remove an existing device */
|
||||
VDEV_LABEL_L2CACHE /* add an L2ARC cache device */
|
||||
} vdev_labeltype_t;
|
||||
|
||||
extern int vdev_label_init(vdev_t *vd, uint64_t txg, vdev_labeltype_t reason);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_VDEV_H */
|
||||
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License, Version 1.0 only
|
||||
* (the "License"). You may not use this file except in compliance
|
||||
* with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_VDEV_FILE_H
|
||||
#define _SYS_VDEV_FILE_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/vdev.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct vdev_file {
|
||||
vnode_t *vf_vnode;
|
||||
} vdev_file_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_VDEV_FILE_H */
|
||||
@@ -0,0 +1,305 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_VDEV_IMPL_H
|
||||
#define _SYS_VDEV_IMPL_H
|
||||
|
||||
#include <sys/avl.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/metaslab.h>
|
||||
#include <sys/nvpair.h>
|
||||
#include <sys/space_map.h>
|
||||
#include <sys/vdev.h>
|
||||
#include <sys/dkio.h>
|
||||
#include <sys/uberblock_impl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Virtual device descriptors.
|
||||
*
|
||||
* All storage pool operations go through the virtual device framework,
|
||||
* which provides data replication and I/O scheduling.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Forward declarations that lots of things need.
|
||||
*/
|
||||
typedef struct vdev_queue vdev_queue_t;
|
||||
typedef struct vdev_cache vdev_cache_t;
|
||||
typedef struct vdev_cache_entry vdev_cache_entry_t;
|
||||
|
||||
/*
|
||||
* Virtual device operations
|
||||
*/
|
||||
typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *ashift);
|
||||
typedef void vdev_close_func_t(vdev_t *vd);
|
||||
typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize);
|
||||
typedef int vdev_io_start_func_t(zio_t *zio);
|
||||
typedef void vdev_io_done_func_t(zio_t *zio);
|
||||
typedef void vdev_state_change_func_t(vdev_t *vd, int, int);
|
||||
|
||||
typedef struct vdev_ops {
|
||||
vdev_open_func_t *vdev_op_open;
|
||||
vdev_close_func_t *vdev_op_close;
|
||||
vdev_asize_func_t *vdev_op_asize;
|
||||
vdev_io_start_func_t *vdev_op_io_start;
|
||||
vdev_io_done_func_t *vdev_op_io_done;
|
||||
vdev_state_change_func_t *vdev_op_state_change;
|
||||
char vdev_op_type[16];
|
||||
boolean_t vdev_op_leaf;
|
||||
} vdev_ops_t;
|
||||
|
||||
/*
|
||||
* Virtual device properties
|
||||
*/
|
||||
struct vdev_cache_entry {
|
||||
char *ve_data;
|
||||
uint64_t ve_offset;
|
||||
uint64_t ve_lastused;
|
||||
avl_node_t ve_offset_node;
|
||||
avl_node_t ve_lastused_node;
|
||||
uint32_t ve_hits;
|
||||
uint16_t ve_missed_update;
|
||||
zio_t *ve_fill_io;
|
||||
};
|
||||
|
||||
struct vdev_cache {
|
||||
avl_tree_t vc_offset_tree;
|
||||
avl_tree_t vc_lastused_tree;
|
||||
kmutex_t vc_lock;
|
||||
};
|
||||
|
||||
struct vdev_queue {
|
||||
avl_tree_t vq_deadline_tree;
|
||||
avl_tree_t vq_read_tree;
|
||||
avl_tree_t vq_write_tree;
|
||||
avl_tree_t vq_pending_tree;
|
||||
kmutex_t vq_lock;
|
||||
};
|
||||
|
||||
/*
|
||||
* Virtual device descriptor
|
||||
*/
|
||||
struct vdev {
|
||||
/*
|
||||
* Common to all vdev types.
|
||||
*/
|
||||
uint64_t vdev_id; /* child number in vdev parent */
|
||||
uint64_t vdev_guid; /* unique ID for this vdev */
|
||||
uint64_t vdev_guid_sum; /* self guid + all child guids */
|
||||
uint64_t vdev_asize; /* allocatable device capacity */
|
||||
uint64_t vdev_ashift; /* block alignment shift */
|
||||
uint64_t vdev_state; /* see VDEV_STATE_* #defines */
|
||||
uint64_t vdev_prevstate; /* used when reopening a vdev */
|
||||
vdev_ops_t *vdev_ops; /* vdev operations */
|
||||
spa_t *vdev_spa; /* spa for this vdev */
|
||||
void *vdev_tsd; /* type-specific data */
|
||||
vdev_t *vdev_top; /* top-level vdev */
|
||||
vdev_t *vdev_parent; /* parent vdev */
|
||||
vdev_t **vdev_child; /* array of children */
|
||||
uint64_t vdev_children; /* number of children */
|
||||
space_map_t vdev_dtl_map; /* dirty time log in-core state */
|
||||
space_map_t vdev_dtl_scrub; /* DTL for scrub repair writes */
|
||||
vdev_stat_t vdev_stat; /* virtual device statistics */
|
||||
|
||||
/*
|
||||
* Top-level vdev state.
|
||||
*/
|
||||
uint64_t vdev_ms_array; /* metaslab array object */
|
||||
uint64_t vdev_ms_shift; /* metaslab size shift */
|
||||
uint64_t vdev_ms_count; /* number of metaslabs */
|
||||
metaslab_group_t *vdev_mg; /* metaslab group */
|
||||
metaslab_t **vdev_ms; /* metaslab array */
|
||||
txg_list_t vdev_ms_list; /* per-txg dirty metaslab lists */
|
||||
txg_list_t vdev_dtl_list; /* per-txg dirty DTL lists */
|
||||
txg_node_t vdev_txg_node; /* per-txg dirty vdev linkage */
|
||||
boolean_t vdev_remove_wanted; /* async remove wanted? */
|
||||
boolean_t vdev_probe_wanted; /* async probe wanted? */
|
||||
list_node_t vdev_config_dirty_node; /* config dirty list */
|
||||
list_node_t vdev_state_dirty_node; /* state dirty list */
|
||||
uint64_t vdev_deflate_ratio; /* deflation ratio (x512) */
|
||||
uint64_t vdev_islog; /* is an intent log device */
|
||||
|
||||
/*
|
||||
* Leaf vdev state.
|
||||
*/
|
||||
uint64_t vdev_psize; /* physical device capacity */
|
||||
space_map_obj_t vdev_dtl; /* dirty time log on-disk state */
|
||||
txg_node_t vdev_dtl_node; /* per-txg dirty DTL linkage */
|
||||
uint64_t vdev_wholedisk; /* true if this is a whole disk */
|
||||
uint64_t vdev_offline; /* persistent offline state */
|
||||
uint64_t vdev_faulted; /* persistent faulted state */
|
||||
uint64_t vdev_degraded; /* persistent degraded state */
|
||||
uint64_t vdev_removed; /* persistent removed state */
|
||||
uint64_t vdev_nparity; /* number of parity devices for raidz */
|
||||
char *vdev_path; /* vdev path (if any) */
|
||||
char *vdev_devid; /* vdev devid (if any) */
|
||||
char *vdev_physpath; /* vdev device path (if any) */
|
||||
uint64_t vdev_not_present; /* not present during import */
|
||||
uint64_t vdev_unspare; /* unspare when resilvering done */
|
||||
hrtime_t vdev_last_try; /* last reopen time */
|
||||
boolean_t vdev_nowritecache; /* true if flushwritecache failed */
|
||||
boolean_t vdev_checkremove; /* temporary online test */
|
||||
boolean_t vdev_forcefault; /* force online fault */
|
||||
uint8_t vdev_tmpoffline; /* device taken offline temporarily? */
|
||||
uint8_t vdev_detached; /* device detached? */
|
||||
uint8_t vdev_cant_read; /* vdev is failing all reads */
|
||||
uint8_t vdev_cant_write; /* vdev is failing all writes */
|
||||
uint64_t vdev_isspare; /* was a hot spare */
|
||||
uint64_t vdev_isl2cache; /* was a l2cache device */
|
||||
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
|
||||
vdev_cache_t vdev_cache; /* physical block cache */
|
||||
spa_aux_vdev_t *vdev_aux; /* for l2cache vdevs */
|
||||
zio_t *vdev_probe_zio; /* root of current probe */
|
||||
|
||||
/*
|
||||
* For DTrace to work in userland (libzpool) context, these fields must
|
||||
* remain at the end of the structure. DTrace will use the kernel's
|
||||
* CTF definition for 'struct vdev', and since the size of a kmutex_t is
|
||||
* larger in userland, the offsets for the rest fields would be
|
||||
* incorrect.
|
||||
*/
|
||||
kmutex_t vdev_dtl_lock; /* vdev_dtl_{map,resilver} */
|
||||
kmutex_t vdev_stat_lock; /* vdev_stat */
|
||||
kmutex_t vdev_probe_lock; /* protects vdev_probe_zio */
|
||||
};
|
||||
|
||||
#define VDEV_SKIP_SIZE (8 << 10)
|
||||
#define VDEV_BOOT_HEADER_SIZE (8 << 10)
|
||||
#define VDEV_PHYS_SIZE (112 << 10)
|
||||
#define VDEV_UBERBLOCK_RING (128 << 10)
|
||||
|
||||
#define VDEV_UBERBLOCK_SHIFT(vd) \
|
||||
MAX((vd)->vdev_top->vdev_ashift, UBERBLOCK_SHIFT)
|
||||
#define VDEV_UBERBLOCK_COUNT(vd) \
|
||||
(VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT(vd))
|
||||
#define VDEV_UBERBLOCK_OFFSET(vd, n) \
|
||||
offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT(vd)])
|
||||
#define VDEV_UBERBLOCK_SIZE(vd) (1ULL << VDEV_UBERBLOCK_SHIFT(vd))
|
||||
|
||||
/* ZFS boot block */
|
||||
#define VDEV_BOOT_MAGIC 0x2f5b007b10cULL
|
||||
#define VDEV_BOOT_VERSION 1 /* version number */
|
||||
|
||||
typedef struct vdev_boot_header {
|
||||
uint64_t vb_magic; /* VDEV_BOOT_MAGIC */
|
||||
uint64_t vb_version; /* VDEV_BOOT_VERSION */
|
||||
uint64_t vb_offset; /* start offset (bytes) */
|
||||
uint64_t vb_size; /* size (bytes) */
|
||||
char vb_pad[VDEV_BOOT_HEADER_SIZE - 4 * sizeof (uint64_t)];
|
||||
} vdev_boot_header_t;
|
||||
|
||||
typedef struct vdev_phys {
|
||||
char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_block_tail_t)];
|
||||
zio_block_tail_t vp_zbt;
|
||||
} vdev_phys_t;
|
||||
|
||||
typedef struct vdev_label {
|
||||
char vl_pad[VDEV_SKIP_SIZE]; /* 8K */
|
||||
vdev_boot_header_t vl_boot_header; /* 8K */
|
||||
vdev_phys_t vl_vdev_phys; /* 112K */
|
||||
char vl_uberblock[VDEV_UBERBLOCK_RING]; /* 128K */
|
||||
} vdev_label_t; /* 256K total */
|
||||
|
||||
/*
|
||||
* vdev_dirty() flags
|
||||
*/
|
||||
#define VDD_METASLAB 0x01
|
||||
#define VDD_DTL 0x02
|
||||
|
||||
/*
|
||||
* Size and offset of embedded boot loader region on each label.
|
||||
* The total size of the first two labels plus the boot area is 4MB.
|
||||
*/
|
||||
#define VDEV_BOOT_OFFSET (2 * sizeof (vdev_label_t))
|
||||
#define VDEV_BOOT_SIZE (7ULL << 19) /* 3.5M */
|
||||
|
||||
/*
|
||||
* Size of label regions at the start and end of each leaf device.
|
||||
*/
|
||||
#define VDEV_LABEL_START_SIZE (2 * sizeof (vdev_label_t) + VDEV_BOOT_SIZE)
|
||||
#define VDEV_LABEL_END_SIZE (2 * sizeof (vdev_label_t))
|
||||
#define VDEV_LABELS 4
|
||||
|
||||
#define VDEV_ALLOC_LOAD 0
|
||||
#define VDEV_ALLOC_ADD 1
|
||||
#define VDEV_ALLOC_SPARE 2
|
||||
#define VDEV_ALLOC_L2CACHE 3
|
||||
|
||||
/*
|
||||
* Allocate or free a vdev
|
||||
*/
|
||||
extern int vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *config,
|
||||
vdev_t *parent, uint_t id, int alloctype);
|
||||
extern void vdev_free(vdev_t *vd);
|
||||
|
||||
/*
|
||||
* Add or remove children and parents
|
||||
*/
|
||||
extern void vdev_add_child(vdev_t *pvd, vdev_t *cvd);
|
||||
extern void vdev_remove_child(vdev_t *pvd, vdev_t *cvd);
|
||||
extern void vdev_compact_children(vdev_t *pvd);
|
||||
extern vdev_t *vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops);
|
||||
extern void vdev_remove_parent(vdev_t *cvd);
|
||||
|
||||
/*
|
||||
* vdev sync load and sync
|
||||
*/
|
||||
extern void vdev_load(vdev_t *vd);
|
||||
extern void vdev_sync(vdev_t *vd, uint64_t txg);
|
||||
extern void vdev_sync_done(vdev_t *vd, uint64_t txg);
|
||||
extern void vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg);
|
||||
|
||||
/*
|
||||
* Available vdev types.
|
||||
*/
|
||||
extern vdev_ops_t vdev_root_ops;
|
||||
extern vdev_ops_t vdev_mirror_ops;
|
||||
extern vdev_ops_t vdev_replacing_ops;
|
||||
extern vdev_ops_t vdev_raidz_ops;
|
||||
extern vdev_ops_t vdev_disk_ops;
|
||||
extern vdev_ops_t vdev_file_ops;
|
||||
extern vdev_ops_t vdev_missing_ops;
|
||||
extern vdev_ops_t vdev_spare_ops;
|
||||
|
||||
/*
|
||||
* Common size functions
|
||||
*/
|
||||
extern uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize);
|
||||
extern uint64_t vdev_get_rsize(vdev_t *vd);
|
||||
|
||||
/*
|
||||
* zdb uses this tunable, so it must be declared here to make lint happy.
|
||||
*/
|
||||
extern int zfs_vdev_cache_size;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_VDEV_IMPL_H */
|
||||
@@ -0,0 +1,425 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZAP_H
|
||||
#define _SYS_ZAP_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
/*
|
||||
* ZAP - ZFS Attribute Processor
|
||||
*
|
||||
* The ZAP is a module which sits on top of the DMU (Data Management
|
||||
* Unit) and implements a higher-level storage primitive using DMU
|
||||
* objects. Its primary consumer is the ZPL (ZFS Posix Layer).
|
||||
*
|
||||
* A "zapobj" is a DMU object which the ZAP uses to stores attributes.
|
||||
* Users should use only zap routines to access a zapobj - they should
|
||||
* not access the DMU object directly using DMU routines.
|
||||
*
|
||||
* The attributes stored in a zapobj are name-value pairs. The name is
|
||||
* a zero-terminated string of up to ZAP_MAXNAMELEN bytes (including
|
||||
* terminating NULL). The value is an array of integers, which may be
|
||||
* 1, 2, 4, or 8 bytes long. The total space used by the array (number
|
||||
* of integers * integer length) can be up to ZAP_MAXVALUELEN bytes.
|
||||
* Note that an 8-byte integer value can be used to store the location
|
||||
* (object number) of another dmu object (which may be itself a zapobj).
|
||||
* Note that you can use a zero-length attribute to store a single bit
|
||||
* of information - the attribute is present or not.
|
||||
*
|
||||
* The ZAP routines are thread-safe. However, you must observe the
|
||||
* DMU's restriction that a transaction may not be operated on
|
||||
* concurrently.
|
||||
*
|
||||
* Any of the routines that return an int may return an I/O error (EIO
|
||||
* or ECHECKSUM).
|
||||
*
|
||||
*
|
||||
* Implementation / Performance Notes:
|
||||
*
|
||||
* The ZAP is intended to operate most efficiently on attributes with
|
||||
* short (49 bytes or less) names and single 8-byte values, for which
|
||||
* the microzap will be used. The ZAP should be efficient enough so
|
||||
* that the user does not need to cache these attributes.
|
||||
*
|
||||
* The ZAP's locking scheme makes its routines thread-safe. Operations
|
||||
* on different zapobjs will be processed concurrently. Operations on
|
||||
* the same zapobj which only read data will be processed concurrently.
|
||||
* Operations on the same zapobj which modify data will be processed
|
||||
* concurrently when there are many attributes in the zapobj (because
|
||||
* the ZAP uses per-block locking - more than 128 * (number of cpus)
|
||||
* small attributes will suffice).
|
||||
*/
|
||||
|
||||
/*
|
||||
* We're using zero-terminated byte strings (ie. ASCII or UTF-8 C
|
||||
* strings) for the names of attributes, rather than a byte string
|
||||
* bounded by an explicit length. If some day we want to support names
|
||||
* in character sets which have embedded zeros (eg. UTF-16, UTF-32),
|
||||
* we'll have to add routines for using length-bounded strings.
|
||||
*/
|
||||
|
||||
#include <sys/dmu.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ZAP_MAXNAMELEN 256
|
||||
#define ZAP_MAXVALUELEN 1024
|
||||
|
||||
/*
|
||||
* The matchtype specifies which entry will be accessed.
|
||||
* MT_EXACT: only find an exact match (non-normalized)
|
||||
* MT_FIRST: find the "first" normalized (case and Unicode
|
||||
* form) match; the designated "first" match will not change as long
|
||||
* as the set of entries with this normalization doesn't change
|
||||
* MT_BEST: if there is an exact match, find that, otherwise find the
|
||||
* first normalized match
|
||||
*/
|
||||
typedef enum matchtype
|
||||
{
|
||||
MT_EXACT,
|
||||
MT_BEST,
|
||||
MT_FIRST
|
||||
} matchtype_t;
|
||||
|
||||
/*
|
||||
* Create a new zapobj with no attributes and return its object number.
|
||||
* MT_EXACT will cause the zap object to only support MT_EXACT lookups,
|
||||
* otherwise any matchtype can be used for lookups.
|
||||
*
|
||||
* normflags specifies what normalization will be done. values are:
|
||||
* 0: no normalization (legacy on-disk format, supports MT_EXACT matching
|
||||
* only)
|
||||
* U8_TEXTPREP_TOLOWER: case normalization will be performed.
|
||||
* MT_FIRST/MT_BEST matching will find entries that match without
|
||||
* regard to case (eg. looking for "foo" can find an entry "Foo").
|
||||
* Eventually, other flags will permit unicode normalization as well.
|
||||
*/
|
||||
uint64_t zap_create(objset_t *ds, dmu_object_type_t ot,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
uint64_t zap_create_norm(objset_t *ds, int normflags, dmu_object_type_t ot,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Create a new zapobj with no attributes from the given (unallocated)
|
||||
* object number.
|
||||
*/
|
||||
int zap_create_claim(objset_t *ds, uint64_t obj, dmu_object_type_t ot,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
int zap_create_claim_norm(objset_t *ds, uint64_t obj,
|
||||
int normflags, dmu_object_type_t ot,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* The zapobj passed in must be a valid ZAP object for all of the
|
||||
* following routines.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Destroy this zapobj and all its attributes.
|
||||
*
|
||||
* Frees the object number using dmu_object_free.
|
||||
*/
|
||||
int zap_destroy(objset_t *ds, uint64_t zapobj, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Manipulate attributes.
|
||||
*
|
||||
* 'integer_size' is in bytes, and must be 1, 2, 4, or 8.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Retrieve the contents of the attribute with the given name.
|
||||
*
|
||||
* If the requested attribute does not exist, the call will fail and
|
||||
* return ENOENT.
|
||||
*
|
||||
* If 'integer_size' is smaller than the attribute's integer size, the
|
||||
* call will fail and return EINVAL.
|
||||
*
|
||||
* If 'integer_size' is equal to or larger than the attribute's integer
|
||||
* size, the call will succeed and return 0. * When converting to a
|
||||
* larger integer size, the integers will be treated as unsigned (ie. no
|
||||
* sign-extension will be performed).
|
||||
*
|
||||
* 'num_integers' is the length (in integers) of 'buf'.
|
||||
*
|
||||
* If the attribute is longer than the buffer, as many integers as will
|
||||
* fit will be transferred to 'buf'. If the entire attribute was not
|
||||
* transferred, the call will return EOVERFLOW.
|
||||
*
|
||||
* If rn_len is nonzero, realname will be set to the name of the found
|
||||
* entry (which may be different from the requested name if matchtype is
|
||||
* not MT_EXACT).
|
||||
*
|
||||
* If normalization_conflictp is not NULL, it will be set if there is
|
||||
* another name with the same case/unicode normalized form.
|
||||
*/
|
||||
int zap_lookup(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf);
|
||||
int zap_lookup_norm(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf,
|
||||
matchtype_t mt, char *realname, int rn_len,
|
||||
boolean_t *normalization_conflictp);
|
||||
|
||||
/*
|
||||
* Create an attribute with the given name and value.
|
||||
*
|
||||
* If an attribute with the given name already exists, the call will
|
||||
* fail and return EEXIST.
|
||||
*/
|
||||
int zap_add(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
int integer_size, uint64_t num_integers,
|
||||
const void *val, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Set the attribute with the given name to the given value. If an
|
||||
* attribute with the given name does not exist, it will be created. If
|
||||
* an attribute with the given name already exists, the previous value
|
||||
* will be overwritten. The integer_size may be different from the
|
||||
* existing attribute's integer size, in which case the attribute's
|
||||
* integer size will be updated to the new value.
|
||||
*/
|
||||
int zap_update(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Get the length (in integers) and the integer size of the specified
|
||||
* attribute.
|
||||
*
|
||||
* If the requested attribute does not exist, the call will fail and
|
||||
* return ENOENT.
|
||||
*/
|
||||
int zap_length(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
uint64_t *integer_size, uint64_t *num_integers);
|
||||
|
||||
/*
|
||||
* Remove the specified attribute.
|
||||
*
|
||||
* If the specified attribute does not exist, the call will fail and
|
||||
* return ENOENT.
|
||||
*/
|
||||
int zap_remove(objset_t *ds, uint64_t zapobj, const char *name, dmu_tx_t *tx);
|
||||
int zap_remove_norm(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
matchtype_t mt, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Returns (in *count) the number of attributes in the specified zap
|
||||
* object.
|
||||
*/
|
||||
int zap_count(objset_t *ds, uint64_t zapobj, uint64_t *count);
|
||||
|
||||
|
||||
/*
|
||||
* Returns (in name) the name of the entry whose (value & mask)
|
||||
* (za_first_integer) is value, or ENOENT if not found. The string
|
||||
* pointed to by name must be at least 256 bytes long. If mask==0, the
|
||||
* match must be exact (ie, same as mask=-1ULL).
|
||||
*/
|
||||
int zap_value_search(objset_t *os, uint64_t zapobj,
|
||||
uint64_t value, uint64_t mask, char *name);
|
||||
|
||||
/*
|
||||
* Transfer all the entries from fromobj into intoobj. Only works on
|
||||
* int_size=8 num_integers=1 values. Fails if there are any duplicated
|
||||
* entries.
|
||||
*/
|
||||
int zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Manipulate entries where the name + value are the "same" (the name is
|
||||
* a stringified version of the value).
|
||||
*/
|
||||
int zap_add_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);
|
||||
int zap_remove_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);
|
||||
int zap_lookup_int(objset_t *os, uint64_t obj, uint64_t value);
|
||||
|
||||
struct zap;
|
||||
struct zap_leaf;
|
||||
typedef struct zap_cursor {
|
||||
/* This structure is opaque! */
|
||||
objset_t *zc_objset;
|
||||
struct zap *zc_zap;
|
||||
struct zap_leaf *zc_leaf;
|
||||
uint64_t zc_zapobj;
|
||||
uint64_t zc_hash;
|
||||
uint32_t zc_cd;
|
||||
} zap_cursor_t;
|
||||
|
||||
typedef struct {
|
||||
int za_integer_length;
|
||||
/*
|
||||
* za_normalization_conflict will be set if there are additional
|
||||
* entries with this normalized form (eg, "foo" and "Foo").
|
||||
*/
|
||||
boolean_t za_normalization_conflict;
|
||||
uint64_t za_num_integers;
|
||||
uint64_t za_first_integer; /* no sign extension for <8byte ints */
|
||||
char za_name[MAXNAMELEN];
|
||||
} zap_attribute_t;
|
||||
|
||||
/*
|
||||
* The interface for listing all the attributes of a zapobj can be
|
||||
* thought of as cursor moving down a list of the attributes one by
|
||||
* one. The cookie returned by the zap_cursor_serialize routine is
|
||||
* persistent across system calls (and across reboot, even).
|
||||
*/
|
||||
|
||||
/*
|
||||
* Initialize a zap cursor, pointing to the "first" attribute of the
|
||||
* zapobj. You must _fini the cursor when you are done with it.
|
||||
*/
|
||||
void zap_cursor_init(zap_cursor_t *zc, objset_t *ds, uint64_t zapobj);
|
||||
void zap_cursor_fini(zap_cursor_t *zc);
|
||||
|
||||
/*
|
||||
* Get the attribute currently pointed to by the cursor. Returns
|
||||
* ENOENT if at the end of the attributes.
|
||||
*/
|
||||
int zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za);
|
||||
|
||||
/*
|
||||
* Advance the cursor to the next attribute.
|
||||
*/
|
||||
void zap_cursor_advance(zap_cursor_t *zc);
|
||||
|
||||
/*
|
||||
* Get a persistent cookie pointing to the current position of the zap
|
||||
* cursor. The low 4 bits in the cookie are always zero, and thus can
|
||||
* be used as to differentiate a serialized cookie from a different type
|
||||
* of value. The cookie will be less than 2^32 as long as there are
|
||||
* fewer than 2^22 (4.2 million) entries in the zap object.
|
||||
*/
|
||||
uint64_t zap_cursor_serialize(zap_cursor_t *zc);
|
||||
|
||||
/*
|
||||
* Initialize a zap cursor pointing to the position recorded by
|
||||
* zap_cursor_serialize (in the "serialized" argument). You can also
|
||||
* use a "serialized" argument of 0 to start at the beginning of the
|
||||
* zapobj (ie. zap_cursor_init_serialized(..., 0) is equivalent to
|
||||
* zap_cursor_init(...).)
|
||||
*/
|
||||
void zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *ds,
|
||||
uint64_t zapobj, uint64_t serialized);
|
||||
|
||||
|
||||
#define ZAP_HISTOGRAM_SIZE 10
|
||||
|
||||
typedef struct zap_stats {
|
||||
/*
|
||||
* Size of the pointer table (in number of entries).
|
||||
* This is always a power of 2, or zero if it's a microzap.
|
||||
* In general, it should be considerably greater than zs_num_leafs.
|
||||
*/
|
||||
uint64_t zs_ptrtbl_len;
|
||||
|
||||
uint64_t zs_blocksize; /* size of zap blocks */
|
||||
|
||||
/*
|
||||
* The number of blocks used. Note that some blocks may be
|
||||
* wasted because old ptrtbl's and large name/value blocks are
|
||||
* not reused. (Although their space is reclaimed, we don't
|
||||
* reuse those offsets in the object.)
|
||||
*/
|
||||
uint64_t zs_num_blocks;
|
||||
|
||||
/*
|
||||
* Pointer table values from zap_ptrtbl in the zap_phys_t
|
||||
*/
|
||||
uint64_t zs_ptrtbl_nextblk; /* next (larger) copy start block */
|
||||
uint64_t zs_ptrtbl_blks_copied; /* number source blocks copied */
|
||||
uint64_t zs_ptrtbl_zt_blk; /* starting block number */
|
||||
uint64_t zs_ptrtbl_zt_numblks; /* number of blocks */
|
||||
uint64_t zs_ptrtbl_zt_shift; /* bits to index it */
|
||||
|
||||
/*
|
||||
* Values of the other members of the zap_phys_t
|
||||
*/
|
||||
uint64_t zs_block_type; /* ZBT_HEADER */
|
||||
uint64_t zs_magic; /* ZAP_MAGIC */
|
||||
uint64_t zs_num_leafs; /* The number of leaf blocks */
|
||||
uint64_t zs_num_entries; /* The number of zap entries */
|
||||
uint64_t zs_salt; /* salt to stir into hash function */
|
||||
|
||||
/*
|
||||
* Histograms. For all histograms, the last index
|
||||
* (ZAP_HISTOGRAM_SIZE-1) includes any values which are greater
|
||||
* than what can be represented. For example
|
||||
* zs_leafs_with_n5_entries[ZAP_HISTOGRAM_SIZE-1] is the number
|
||||
* of leafs with more than 45 entries.
|
||||
*/
|
||||
|
||||
/*
|
||||
* zs_leafs_with_n_pointers[n] is the number of leafs with
|
||||
* 2^n pointers to it.
|
||||
*/
|
||||
uint64_t zs_leafs_with_2n_pointers[ZAP_HISTOGRAM_SIZE];
|
||||
|
||||
/*
|
||||
* zs_leafs_with_n_entries[n] is the number of leafs with
|
||||
* [n*5, (n+1)*5) entries. In the current implementation, there
|
||||
* can be at most 55 entries in any block, but there may be
|
||||
* fewer if the name or value is large, or the block is not
|
||||
* completely full.
|
||||
*/
|
||||
uint64_t zs_blocks_with_n5_entries[ZAP_HISTOGRAM_SIZE];
|
||||
|
||||
/*
|
||||
* zs_leafs_n_tenths_full[n] is the number of leafs whose
|
||||
* fullness is in the range [n/10, (n+1)/10).
|
||||
*/
|
||||
uint64_t zs_blocks_n_tenths_full[ZAP_HISTOGRAM_SIZE];
|
||||
|
||||
/*
|
||||
* zs_entries_using_n_chunks[n] is the number of entries which
|
||||
* consume n 24-byte chunks. (Note, large names/values only use
|
||||
* one chunk, but contribute to zs_num_blocks_large.)
|
||||
*/
|
||||
uint64_t zs_entries_using_n_chunks[ZAP_HISTOGRAM_SIZE];
|
||||
|
||||
/*
|
||||
* zs_buckets_with_n_entries[n] is the number of buckets (each
|
||||
* leaf has 64 buckets) with n entries.
|
||||
* zs_buckets_with_n_entries[1] should be very close to
|
||||
* zs_num_entries.
|
||||
*/
|
||||
uint64_t zs_buckets_with_n_entries[ZAP_HISTOGRAM_SIZE];
|
||||
} zap_stats_t;
|
||||
|
||||
/*
|
||||
* Get statistics about a ZAP object. Note: you need to be aware of the
|
||||
* internal implementation of the ZAP to correctly interpret some of the
|
||||
* statistics. This interface shouldn't be relied on unless you really
|
||||
* know what you're doing.
|
||||
*/
|
||||
int zap_get_stats(objset_t *ds, uint64_t zapobj, zap_stats_t *zs);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZAP_H */
|
||||
@@ -0,0 +1,218 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZAP_IMPL_H
|
||||
#define _SYS_ZAP_IMPL_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/zap.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/avl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern int fzap_default_block_shift;
|
||||
|
||||
#define ZAP_MAGIC 0x2F52AB2ABULL
|
||||
|
||||
#define FZAP_BLOCK_SHIFT(zap) ((zap)->zap_f.zap_block_shift)
|
||||
|
||||
#define ZAP_MAXCD (uint32_t)(-1)
|
||||
#define ZAP_HASHBITS 28
|
||||
#define MZAP_ENT_LEN 64
|
||||
#define MZAP_NAME_LEN (MZAP_ENT_LEN - 8 - 4 - 2)
|
||||
#define MZAP_MAX_BLKSHIFT SPA_MAXBLOCKSHIFT
|
||||
#define MZAP_MAX_BLKSZ (1 << MZAP_MAX_BLKSHIFT)
|
||||
|
||||
typedef struct mzap_ent_phys {
|
||||
uint64_t mze_value;
|
||||
uint32_t mze_cd;
|
||||
uint16_t mze_pad; /* in case we want to chain them someday */
|
||||
char mze_name[MZAP_NAME_LEN];
|
||||
} mzap_ent_phys_t;
|
||||
|
||||
typedef struct mzap_phys {
|
||||
uint64_t mz_block_type; /* ZBT_MICRO */
|
||||
uint64_t mz_salt;
|
||||
uint64_t mz_normflags;
|
||||
uint64_t mz_pad[5];
|
||||
mzap_ent_phys_t mz_chunk[1];
|
||||
/* actually variable size depending on block size */
|
||||
} mzap_phys_t;
|
||||
|
||||
typedef struct mzap_ent {
|
||||
avl_node_t mze_node;
|
||||
int mze_chunkid;
|
||||
uint64_t mze_hash;
|
||||
mzap_ent_phys_t mze_phys;
|
||||
} mzap_ent_t;
|
||||
|
||||
|
||||
/*
|
||||
* The (fat) zap is stored in one object. It is an array of
|
||||
* 1<<FZAP_BLOCK_SHIFT byte blocks. The layout looks like one of:
|
||||
*
|
||||
* ptrtbl fits in first block:
|
||||
* [zap_phys_t zap_ptrtbl_shift < 6] [zap_leaf_t] ...
|
||||
*
|
||||
* ptrtbl too big for first block:
|
||||
* [zap_phys_t zap_ptrtbl_shift >= 6] [zap_leaf_t] [ptrtbl] ...
|
||||
*
|
||||
*/
|
||||
|
||||
struct dmu_buf;
|
||||
struct zap_leaf;
|
||||
|
||||
#define ZBT_LEAF ((1ULL << 63) + 0)
|
||||
#define ZBT_HEADER ((1ULL << 63) + 1)
|
||||
#define ZBT_MICRO ((1ULL << 63) + 3)
|
||||
/* any other values are ptrtbl blocks */
|
||||
|
||||
/*
|
||||
* the embedded pointer table takes up half a block:
|
||||
* block size / entry size (2^3) / 2
|
||||
*/
|
||||
#define ZAP_EMBEDDED_PTRTBL_SHIFT(zap) (FZAP_BLOCK_SHIFT(zap) - 3 - 1)
|
||||
|
||||
/*
|
||||
* The embedded pointer table starts half-way through the block. Since
|
||||
* the pointer table itself is half the block, it starts at (64-bit)
|
||||
* word number (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)).
|
||||
*/
|
||||
#define ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) \
|
||||
((uint64_t *)(zap)->zap_f.zap_phys) \
|
||||
[(idx) + (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap))]
|
||||
|
||||
/*
|
||||
* TAKE NOTE:
|
||||
* If zap_phys_t is modified, zap_byteswap() must be modified.
|
||||
*/
|
||||
typedef struct zap_phys {
|
||||
uint64_t zap_block_type; /* ZBT_HEADER */
|
||||
uint64_t zap_magic; /* ZAP_MAGIC */
|
||||
|
||||
struct zap_table_phys {
|
||||
uint64_t zt_blk; /* starting block number */
|
||||
uint64_t zt_numblks; /* number of blocks */
|
||||
uint64_t zt_shift; /* bits to index it */
|
||||
uint64_t zt_nextblk; /* next (larger) copy start block */
|
||||
uint64_t zt_blks_copied; /* number source blocks copied */
|
||||
} zap_ptrtbl;
|
||||
|
||||
uint64_t zap_freeblk; /* the next free block */
|
||||
uint64_t zap_num_leafs; /* number of leafs */
|
||||
uint64_t zap_num_entries; /* number of entries */
|
||||
uint64_t zap_salt; /* salt to stir into hash function */
|
||||
uint64_t zap_normflags; /* flags for u8_textprep_str() */
|
||||
/*
|
||||
* This structure is followed by padding, and then the embedded
|
||||
* pointer table. The embedded pointer table takes up second
|
||||
* half of the block. It is accessed using the
|
||||
* ZAP_EMBEDDED_PTRTBL_ENT() macro.
|
||||
*/
|
||||
} zap_phys_t;
|
||||
|
||||
typedef struct zap_table_phys zap_table_phys_t;
|
||||
|
||||
typedef struct zap {
|
||||
objset_t *zap_objset;
|
||||
uint64_t zap_object;
|
||||
struct dmu_buf *zap_dbuf;
|
||||
krwlock_t zap_rwlock;
|
||||
boolean_t zap_ismicro;
|
||||
int zap_normflags;
|
||||
uint64_t zap_salt;
|
||||
union {
|
||||
struct {
|
||||
zap_phys_t *zap_phys;
|
||||
|
||||
/*
|
||||
* zap_num_entries_mtx protects
|
||||
* zap_num_entries
|
||||
*/
|
||||
kmutex_t zap_num_entries_mtx;
|
||||
int zap_block_shift;
|
||||
} zap_fat;
|
||||
struct {
|
||||
mzap_phys_t *zap_phys;
|
||||
int16_t zap_num_entries;
|
||||
int16_t zap_num_chunks;
|
||||
int16_t zap_alloc_next;
|
||||
avl_tree_t zap_avl;
|
||||
} zap_micro;
|
||||
} zap_u;
|
||||
} zap_t;
|
||||
|
||||
typedef struct zap_name {
|
||||
zap_t *zn_zap;
|
||||
const char *zn_name_orij;
|
||||
uint64_t zn_hash;
|
||||
matchtype_t zn_matchtype;
|
||||
const char *zn_name_norm;
|
||||
char zn_normbuf[ZAP_MAXNAMELEN];
|
||||
} zap_name_t;
|
||||
|
||||
#define zap_f zap_u.zap_fat
|
||||
#define zap_m zap_u.zap_micro
|
||||
|
||||
boolean_t zap_match(zap_name_t *zn, const char *matchname);
|
||||
int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
|
||||
krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp);
|
||||
void zap_unlockdir(zap_t *zap);
|
||||
void zap_evict(dmu_buf_t *db, void *vmzap);
|
||||
zap_name_t *zap_name_alloc(zap_t *zap, const char *name, matchtype_t mt);
|
||||
void zap_name_free(zap_name_t *zn);
|
||||
|
||||
#define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))
|
||||
|
||||
void fzap_byteswap(void *buf, size_t size);
|
||||
int fzap_count(zap_t *zap, uint64_t *count);
|
||||
int fzap_lookup(zap_name_t *zn,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf,
|
||||
char *realname, int rn_len, boolean_t *normalization_conflictp);
|
||||
int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
|
||||
const void *val, dmu_tx_t *tx);
|
||||
int fzap_update(zap_name_t *zn,
|
||||
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
|
||||
int fzap_length(zap_name_t *zn,
|
||||
uint64_t *integer_size, uint64_t *num_integers);
|
||||
int fzap_remove(zap_name_t *zn, dmu_tx_t *tx);
|
||||
int fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za);
|
||||
void fzap_get_stats(zap_t *zap, zap_stats_t *zs);
|
||||
void zap_put_leaf(struct zap_leaf *l);
|
||||
|
||||
int fzap_add_cd(zap_name_t *zn,
|
||||
uint64_t integer_size, uint64_t num_integers,
|
||||
const void *val, uint32_t cd, dmu_tx_t *tx);
|
||||
void fzap_upgrade(zap_t *zap, dmu_tx_t *tx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZAP_IMPL_H */
|
||||
@@ -0,0 +1,244 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZAP_LEAF_H
|
||||
#define _SYS_ZAP_LEAF_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct zap;
|
||||
|
||||
#define ZAP_LEAF_MAGIC 0x2AB1EAF
|
||||
|
||||
/* chunk size = 24 bytes */
|
||||
#define ZAP_LEAF_CHUNKSIZE 24
|
||||
|
||||
/*
|
||||
* The amount of space available for chunks is:
|
||||
* block size (1<<l->l_bs) - hash entry size (2) * number of hash
|
||||
* entries - header space (2*chunksize)
|
||||
*/
|
||||
#define ZAP_LEAF_NUMCHUNKS(l) \
|
||||
(((1<<(l)->l_bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(l)) / \
|
||||
ZAP_LEAF_CHUNKSIZE - 2)
|
||||
|
||||
/*
|
||||
* The amount of space within the chunk available for the array is:
|
||||
* chunk size - space for type (1) - space for next pointer (2)
|
||||
*/
|
||||
#define ZAP_LEAF_ARRAY_BYTES (ZAP_LEAF_CHUNKSIZE - 3)
|
||||
|
||||
#define ZAP_LEAF_ARRAY_NCHUNKS(bytes) \
|
||||
(((bytes)+ZAP_LEAF_ARRAY_BYTES-1)/ZAP_LEAF_ARRAY_BYTES)
|
||||
|
||||
/*
|
||||
* Low water mark: when there are only this many chunks free, start
|
||||
* growing the ptrtbl. Ideally, this should be larger than a
|
||||
* "reasonably-sized" entry. 20 chunks is more than enough for the
|
||||
* largest directory entry (MAXNAMELEN (256) byte name, 8-byte value),
|
||||
* while still being only around 3% for 16k blocks.
|
||||
*/
|
||||
#define ZAP_LEAF_LOW_WATER (20)
|
||||
|
||||
/*
|
||||
* The leaf hash table has block size / 2^5 (32) number of entries,
|
||||
* which should be more than enough for the maximum number of entries,
|
||||
* which is less than block size / CHUNKSIZE (24) / minimum number of
|
||||
* chunks per entry (3).
|
||||
*/
|
||||
#define ZAP_LEAF_HASH_SHIFT(l) ((l)->l_bs - 5)
|
||||
#define ZAP_LEAF_HASH_NUMENTRIES(l) (1 << ZAP_LEAF_HASH_SHIFT(l))
|
||||
|
||||
/*
|
||||
* The chunks start immediately after the hash table. The end of the
|
||||
* hash table is at l_hash + HASH_NUMENTRIES, which we simply cast to a
|
||||
* chunk_t.
|
||||
*/
|
||||
#define ZAP_LEAF_CHUNK(l, idx) \
|
||||
((zap_leaf_chunk_t *) \
|
||||
((l)->l_phys->l_hash + ZAP_LEAF_HASH_NUMENTRIES(l)))[idx]
|
||||
#define ZAP_LEAF_ENTRY(l, idx) (&ZAP_LEAF_CHUNK(l, idx).l_entry)
|
||||
|
||||
typedef enum zap_chunk_type {
|
||||
ZAP_CHUNK_FREE = 253,
|
||||
ZAP_CHUNK_ENTRY = 252,
|
||||
ZAP_CHUNK_ARRAY = 251,
|
||||
ZAP_CHUNK_TYPE_MAX = 250
|
||||
} zap_chunk_type_t;
|
||||
|
||||
#define ZLF_ENTRIES_CDSORTED (1<<0)
|
||||
|
||||
/*
|
||||
* TAKE NOTE:
|
||||
* If zap_leaf_phys_t is modified, zap_leaf_byteswap() must be modified.
|
||||
*/
|
||||
typedef struct zap_leaf_phys {
|
||||
struct zap_leaf_header {
|
||||
uint64_t lh_block_type; /* ZBT_LEAF */
|
||||
uint64_t lh_pad1;
|
||||
uint64_t lh_prefix; /* hash prefix of this leaf */
|
||||
uint32_t lh_magic; /* ZAP_LEAF_MAGIC */
|
||||
uint16_t lh_nfree; /* number free chunks */
|
||||
uint16_t lh_nentries; /* number of entries */
|
||||
uint16_t lh_prefix_len; /* num bits used to id this */
|
||||
|
||||
/* above is accessable to zap, below is zap_leaf private */
|
||||
|
||||
uint16_t lh_freelist; /* chunk head of free list */
|
||||
uint8_t lh_flags; /* ZLF_* flags */
|
||||
uint8_t lh_pad2[11];
|
||||
} l_hdr; /* 2 24-byte chunks */
|
||||
|
||||
/*
|
||||
* The header is followed by a hash table with
|
||||
* ZAP_LEAF_HASH_NUMENTRIES(zap) entries. The hash table is
|
||||
* followed by an array of ZAP_LEAF_NUMCHUNKS(zap)
|
||||
* zap_leaf_chunk structures. These structures are accessed
|
||||
* with the ZAP_LEAF_CHUNK() macro.
|
||||
*/
|
||||
|
||||
uint16_t l_hash[1];
|
||||
} zap_leaf_phys_t;
|
||||
|
||||
typedef union zap_leaf_chunk {
|
||||
struct zap_leaf_entry {
|
||||
uint8_t le_type; /* always ZAP_CHUNK_ENTRY */
|
||||
uint8_t le_int_size; /* size of ints */
|
||||
uint16_t le_next; /* next entry in hash chain */
|
||||
uint16_t le_name_chunk; /* first chunk of the name */
|
||||
uint16_t le_name_length; /* bytes in name, incl null */
|
||||
uint16_t le_value_chunk; /* first chunk of the value */
|
||||
uint16_t le_value_length; /* value length in ints */
|
||||
uint32_t le_cd; /* collision differentiator */
|
||||
uint64_t le_hash; /* hash value of the name */
|
||||
} l_entry;
|
||||
struct zap_leaf_array {
|
||||
uint8_t la_type; /* always ZAP_CHUNK_ARRAY */
|
||||
uint8_t la_array[ZAP_LEAF_ARRAY_BYTES];
|
||||
uint16_t la_next; /* next blk or CHAIN_END */
|
||||
} l_array;
|
||||
struct zap_leaf_free {
|
||||
uint8_t lf_type; /* always ZAP_CHUNK_FREE */
|
||||
uint8_t lf_pad[ZAP_LEAF_ARRAY_BYTES];
|
||||
uint16_t lf_next; /* next in free list, or CHAIN_END */
|
||||
} l_free;
|
||||
} zap_leaf_chunk_t;
|
||||
|
||||
typedef struct zap_leaf {
|
||||
krwlock_t l_rwlock;
|
||||
uint64_t l_blkid; /* 1<<ZAP_BLOCK_SHIFT byte block off */
|
||||
int l_bs; /* block size shift */
|
||||
dmu_buf_t *l_dbuf;
|
||||
zap_leaf_phys_t *l_phys;
|
||||
} zap_leaf_t;
|
||||
|
||||
|
||||
typedef struct zap_entry_handle {
|
||||
/* below is set by zap_leaf.c and is public to zap.c */
|
||||
uint64_t zeh_num_integers;
|
||||
uint64_t zeh_hash;
|
||||
uint32_t zeh_cd;
|
||||
uint8_t zeh_integer_size;
|
||||
|
||||
/* below is private to zap_leaf.c */
|
||||
uint16_t zeh_fakechunk;
|
||||
uint16_t *zeh_chunkp;
|
||||
zap_leaf_t *zeh_leaf;
|
||||
} zap_entry_handle_t;
|
||||
|
||||
/*
|
||||
* Return a handle to the named entry, or ENOENT if not found. The hash
|
||||
* value must equal zap_hash(name).
|
||||
*/
|
||||
extern int zap_leaf_lookup(zap_leaf_t *l,
|
||||
zap_name_t *zn, zap_entry_handle_t *zeh);
|
||||
|
||||
/*
|
||||
* Return a handle to the entry with this hash+cd, or the entry with the
|
||||
* next closest hash+cd.
|
||||
*/
|
||||
extern int zap_leaf_lookup_closest(zap_leaf_t *l,
|
||||
uint64_t hash, uint32_t cd, zap_entry_handle_t *zeh);
|
||||
|
||||
/*
|
||||
* Read the first num_integers in the attribute. Integer size
|
||||
* conversion will be done without sign extension. Return EINVAL if
|
||||
* integer_size is too small. Return EOVERFLOW if there are more than
|
||||
* num_integers in the attribute.
|
||||
*/
|
||||
extern int zap_entry_read(const zap_entry_handle_t *zeh,
|
||||
uint8_t integer_size, uint64_t num_integers, void *buf);
|
||||
|
||||
extern int zap_entry_read_name(const zap_entry_handle_t *zeh,
|
||||
uint16_t buflen, char *buf);
|
||||
|
||||
/*
|
||||
* Replace the value of an existing entry.
|
||||
*
|
||||
* zap_entry_update may fail if it runs out of space (ENOSPC).
|
||||
*/
|
||||
extern int zap_entry_update(zap_entry_handle_t *zeh,
|
||||
uint8_t integer_size, uint64_t num_integers, const void *buf);
|
||||
|
||||
/*
|
||||
* Remove an entry.
|
||||
*/
|
||||
extern void zap_entry_remove(zap_entry_handle_t *zeh);
|
||||
|
||||
/*
|
||||
* Create an entry. An equal entry must not exist, and this entry must
|
||||
* belong in this leaf (according to its hash value). Fills in the
|
||||
* entry handle on success. Returns 0 on success or ENOSPC on failure.
|
||||
*/
|
||||
extern int zap_entry_create(zap_leaf_t *l,
|
||||
const char *name, uint64_t h, uint32_t cd,
|
||||
uint8_t integer_size, uint64_t num_integers, const void *buf,
|
||||
zap_entry_handle_t *zeh);
|
||||
|
||||
/*
|
||||
* Return true if there are additional entries with the same normalized
|
||||
* form.
|
||||
*/
|
||||
extern boolean_t zap_entry_normalization_conflict(zap_entry_handle_t *zeh,
|
||||
zap_name_t *zn, const char *name, zap_t *zap);
|
||||
|
||||
/*
|
||||
* Other stuff.
|
||||
*/
|
||||
|
||||
extern void zap_leaf_init(zap_leaf_t *l, boolean_t sort);
|
||||
extern void zap_leaf_byteswap(zap_leaf_phys_t *buf, int len);
|
||||
extern void zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort);
|
||||
extern void zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZAP_LEAF_H */
|
||||
@@ -0,0 +1,214 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FS_ZFS_ACL_H
|
||||
#define _SYS_FS_ZFS_ACL_H
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/isa_defs.h>
|
||||
#include <sys/types32.h>
|
||||
#endif
|
||||
#include <sys/acl.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zfs_fuid.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct znode_phys;
|
||||
|
||||
#define ACE_SLOT_CNT 6
|
||||
#define ZFS_ACL_VERSION_INITIAL 0ULL
|
||||
#define ZFS_ACL_VERSION_FUID 1ULL
|
||||
#define ZFS_ACL_VERSION ZFS_ACL_VERSION_FUID
|
||||
|
||||
/*
|
||||
* ZFS ACLs are store in various forms.
|
||||
* Files created with ACL version ZFS_ACL_VERSION_INITIAL
|
||||
* will all be created with fixed length ACEs of type
|
||||
* zfs_oldace_t.
|
||||
*
|
||||
* Files with ACL version ZFS_ACL_VERSION_FUID will be created
|
||||
* with various sized ACEs. The abstraction entries will utilize
|
||||
* zfs_ace_hdr_t, normal user/group entries will use zfs_ace_t
|
||||
* and some specialized CIFS ACEs will use zfs_object_ace_t.
|
||||
*/
|
||||
|
||||
/*
|
||||
* All ACEs have a common hdr. For
|
||||
* owner@, group@, and everyone@ this is all
|
||||
* thats needed.
|
||||
*/
|
||||
typedef struct zfs_ace_hdr {
|
||||
uint16_t z_type;
|
||||
uint16_t z_flags;
|
||||
uint32_t z_access_mask;
|
||||
} zfs_ace_hdr_t;
|
||||
|
||||
typedef zfs_ace_hdr_t zfs_ace_abstract_t;
|
||||
|
||||
/*
|
||||
* Standard ACE
|
||||
*/
|
||||
typedef struct zfs_ace {
|
||||
zfs_ace_hdr_t z_hdr;
|
||||
uint64_t z_fuid;
|
||||
} zfs_ace_t;
|
||||
|
||||
/*
|
||||
* The following type only applies to ACE_ACCESS_ALLOWED|DENIED_OBJECT_ACE_TYPE
|
||||
* and will only be set/retrieved in a CIFS context.
|
||||
*/
|
||||
|
||||
typedef struct zfs_object_ace {
|
||||
zfs_ace_t z_ace;
|
||||
uint8_t z_object_type[16]; /* object type */
|
||||
uint8_t z_inherit_type[16]; /* inherited object type */
|
||||
} zfs_object_ace_t;
|
||||
|
||||
typedef struct zfs_oldace {
|
||||
uint32_t z_fuid; /* "who" */
|
||||
uint32_t z_access_mask; /* access mask */
|
||||
uint16_t z_flags; /* flags, i.e inheritance */
|
||||
uint16_t z_type; /* type of entry allow/deny */
|
||||
} zfs_oldace_t;
|
||||
|
||||
typedef struct zfs_acl_phys_v0 {
|
||||
uint64_t z_acl_extern_obj; /* ext acl pieces */
|
||||
uint32_t z_acl_count; /* Number of ACEs */
|
||||
uint16_t z_acl_version; /* acl version */
|
||||
uint16_t z_acl_pad; /* pad */
|
||||
zfs_oldace_t z_ace_data[ACE_SLOT_CNT]; /* 6 standard ACEs */
|
||||
} zfs_acl_phys_v0_t;
|
||||
|
||||
#define ZFS_ACE_SPACE (sizeof (zfs_oldace_t) * ACE_SLOT_CNT)
|
||||
|
||||
typedef struct zfs_acl_phys {
|
||||
uint64_t z_acl_extern_obj; /* ext acl pieces */
|
||||
uint32_t z_acl_size; /* Number of bytes in ACL */
|
||||
uint16_t z_acl_version; /* acl version */
|
||||
uint16_t z_acl_count; /* ace count */
|
||||
uint8_t z_ace_data[ZFS_ACE_SPACE]; /* space for embedded ACEs */
|
||||
} zfs_acl_phys_t;
|
||||
|
||||
|
||||
|
||||
typedef struct acl_ops {
|
||||
uint32_t (*ace_mask_get) (void *acep); /* get access mask */
|
||||
void (*ace_mask_set) (void *acep,
|
||||
uint32_t mask); /* set access mask */
|
||||
uint16_t (*ace_flags_get) (void *acep); /* get flags */
|
||||
void (*ace_flags_set) (void *acep,
|
||||
uint16_t flags); /* set flags */
|
||||
uint16_t (*ace_type_get)(void *acep); /* get type */
|
||||
void (*ace_type_set)(void *acep,
|
||||
uint16_t type); /* set type */
|
||||
uint64_t (*ace_who_get)(void *acep); /* get who/fuid */
|
||||
void (*ace_who_set)(void *acep,
|
||||
uint64_t who); /* set who/fuid */
|
||||
size_t (*ace_size)(void *acep); /* how big is this ace */
|
||||
size_t (*ace_abstract_size)(void); /* sizeof abstract entry */
|
||||
int (*ace_mask_off)(void); /* off of access mask in ace */
|
||||
int (*ace_data)(void *acep, void **datap);
|
||||
/* ptr to data if any */
|
||||
} acl_ops_t;
|
||||
|
||||
/*
|
||||
* A zfs_acl_t structure is composed of a list of zfs_acl_node_t's.
|
||||
* Each node will have one or more ACEs associated with it. You will
|
||||
* only have multiple nodes during a chmod operation. Normally only
|
||||
* one node is required.
|
||||
*/
|
||||
typedef struct zfs_acl_node {
|
||||
list_node_t z_next; /* Next chunk of ACEs */
|
||||
void *z_acldata; /* pointer into actual ACE(s) */
|
||||
void *z_allocdata; /* pointer to kmem allocated memory */
|
||||
size_t z_allocsize; /* Size of blob in bytes */
|
||||
size_t z_size; /* length of ACL data */
|
||||
int z_ace_count; /* number of ACEs in this acl node */
|
||||
int z_ace_idx; /* ace iterator positioned on */
|
||||
} zfs_acl_node_t;
|
||||
|
||||
typedef struct zfs_acl {
|
||||
int z_acl_count; /* Number of ACEs */
|
||||
size_t z_acl_bytes; /* Number of bytes in ACL */
|
||||
uint_t z_version; /* version of ACL */
|
||||
void *z_next_ace; /* pointer to next ACE */
|
||||
int z_hints; /* ACL hints (ZFS_INHERIT_ACE ...) */
|
||||
zfs_acl_node_t *z_curr_node; /* current node iterator is handling */
|
||||
list_t z_acl; /* chunks of ACE data */
|
||||
acl_ops_t z_ops; /* ACL operations */
|
||||
boolean_t z_has_fuids; /* FUIDs present in ACL? */
|
||||
} zfs_acl_t;
|
||||
|
||||
#define ACL_DATA_ALLOCED 0x1
|
||||
#define ZFS_ACL_SIZE(aclcnt) (sizeof (ace_t) * (aclcnt))
|
||||
|
||||
/*
|
||||
* Property values for acl_mode and acl_inherit.
|
||||
*
|
||||
* acl_mode can take discard, noallow, groupmask and passthrough.
|
||||
* whereas acl_inherit has secure instead of groupmask.
|
||||
*/
|
||||
|
||||
#define ZFS_ACL_DISCARD 0
|
||||
#define ZFS_ACL_NOALLOW 1
|
||||
#define ZFS_ACL_GROUPMASK 2
|
||||
#define ZFS_ACL_PASSTHROUGH 3
|
||||
#define ZFS_ACL_RESTRICTED 4
|
||||
#define ZFS_ACL_PASSTHROUGH_X 5
|
||||
|
||||
struct znode;
|
||||
struct zfsvfs;
|
||||
struct zfs_fuid_info;
|
||||
|
||||
#ifdef _KERNEL
|
||||
void zfs_perm_init(struct znode *, struct znode *, int, vattr_t *,
|
||||
dmu_tx_t *, cred_t *, zfs_acl_t *, zfs_fuid_info_t **);
|
||||
int zfs_getacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
|
||||
int zfs_setacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
|
||||
void zfs_acl_rele(void *);
|
||||
void zfs_oldace_byteswap(ace_t *, int);
|
||||
void zfs_ace_byteswap(void *, size_t, boolean_t);
|
||||
extern int zfs_zaccess(struct znode *, int, int, boolean_t, cred_t *);
|
||||
extern int zfs_zaccess_rwx(struct znode *, mode_t, int, cred_t *);
|
||||
extern int zfs_zaccess_unix(struct znode *, mode_t, cred_t *);
|
||||
extern int zfs_acl_access(struct znode *, int, cred_t *);
|
||||
int zfs_acl_chmod_setattr(struct znode *, zfs_acl_t **, uint64_t);
|
||||
int zfs_zaccess_delete(struct znode *, struct znode *, cred_t *);
|
||||
int zfs_zaccess_rename(struct znode *, struct znode *,
|
||||
struct znode *, struct znode *, cred_t *cr);
|
||||
void zfs_acl_free(zfs_acl_t *);
|
||||
int zfs_vsec_2_aclp(struct zfsvfs *, vtype_t, vsecattr_t *, zfs_acl_t **);
|
||||
int zfs_aclset_common(struct znode *, zfs_acl_t *, cred_t *,
|
||||
struct zfs_fuid_info **, dmu_tx_t *);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* _SYS_FS_ZFS_ACL_H */
|
||||
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZFS_CONTEXT_H
|
||||
#define _SYS_ZFS_CONTEXT_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <sys/note.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/t_lock.h>
|
||||
#include <sys/atomic.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#include <sys/bitmap.h>
|
||||
#include <sys/cmn_err.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/taskq.h>
|
||||
#include <sys/buf.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/cpuvar.h>
|
||||
#include <sys/kobj.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/disp.h>
|
||||
#include <sys/debug.h>
|
||||
#include <sys/random.h>
|
||||
#include <sys/byteorder.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/list.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/dirent.h>
|
||||
#include <sys/time.h>
|
||||
#include <vm/seg_kmem.h>
|
||||
#include <sys/zone.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/zfs_debug.h>
|
||||
#include <sys/sysevent.h>
|
||||
#include <sys/sysevent/eventdefs.h>
|
||||
#include <sys/fm/util.h>
|
||||
|
||||
#define CPU_SEQID (CPU->cpu_seqid)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZFS_CONTEXT_H */
|
||||
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _ZFS_CTLDIR_H
|
||||
#define _ZFS_CTLDIR_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/pathname.h>
|
||||
#include <sys/vnode.h>
|
||||
#include <sys/zfs_vfsops.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ZFS_CTLDIR_NAME ".zfs"
|
||||
|
||||
#define zfs_has_ctldir(zdp) \
|
||||
((zdp)->z_id == (zdp)->z_zfsvfs->z_root && \
|
||||
((zdp)->z_zfsvfs->z_ctldir != NULL))
|
||||
#define zfs_show_ctldir(zdp) \
|
||||
(zfs_has_ctldir(zdp) && \
|
||||
((zdp)->z_zfsvfs->z_show_ctldir))
|
||||
|
||||
void zfsctl_create(zfsvfs_t *);
|
||||
void zfsctl_destroy(zfsvfs_t *);
|
||||
vnode_t *zfsctl_root(znode_t *);
|
||||
void zfsctl_init(void);
|
||||
void zfsctl_fini(void);
|
||||
|
||||
int zfsctl_rename_snapshot(const char *from, const char *to);
|
||||
int zfsctl_destroy_snapshot(const char *snapname, int force);
|
||||
int zfsctl_umount_snapshots(vfs_t *, int, cred_t *);
|
||||
|
||||
int zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
|
||||
int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
|
||||
int *direntflags, pathname_t *realpnp);
|
||||
|
||||
int zfsctl_make_fid(zfsvfs_t *zfsvfsp, uint64_t object, uint32_t gen,
|
||||
fid_t *fidp);
|
||||
int zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp);
|
||||
|
||||
#define ZFSCTL_INO_ROOT 0x1
|
||||
#define ZFSCTL_INO_SNAPDIR 0x2
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ZFS_CTLDIR_H */
|
||||
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZFS_DEBUG_H
|
||||
#define _SYS_ZFS_DEBUG_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef TRUE
|
||||
#define TRUE 1
|
||||
#endif
|
||||
|
||||
#ifndef FALSE
|
||||
#define FALSE 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* ZFS debugging
|
||||
*/
|
||||
|
||||
#if defined(DEBUG) || !defined(_KERNEL)
|
||||
#define ZFS_DEBUG
|
||||
#endif
|
||||
|
||||
extern int zfs_flags;
|
||||
|
||||
#define ZFS_DEBUG_DPRINTF 0x0001
|
||||
#define ZFS_DEBUG_DBUF_VERIFY 0x0002
|
||||
#define ZFS_DEBUG_DNODE_VERIFY 0x0004
|
||||
#define ZFS_DEBUG_SNAPNAMES 0x0008
|
||||
#define ZFS_DEBUG_MODIFY 0x0010
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
extern void __dprintf(const char *file, const char *func,
|
||||
int line, const char *fmt, ...);
|
||||
#define dprintf(...) \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) \
|
||||
__dprintf(__FILE__, __func__, __LINE__, __VA_ARGS__)
|
||||
#else
|
||||
#define dprintf(...) ((void)0)
|
||||
#endif /* ZFS_DEBUG */
|
||||
|
||||
extern void zfs_panic_recover(const char *fmt, ...);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZFS_DEBUG_H */
|
||||
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FS_ZFS_DIR_H
|
||||
#define _SYS_FS_ZFS_DIR_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/pathname.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* zfs_dirent_lock() flags */
|
||||
#define ZNEW 0x0001 /* entry should not exist */
|
||||
#define ZEXISTS 0x0002 /* entry should exist */
|
||||
#define ZSHARED 0x0004 /* shared access (zfs_dirlook()) */
|
||||
#define ZXATTR 0x0008 /* we want the xattr dir */
|
||||
#define ZRENAMING 0x0010 /* znode is being renamed */
|
||||
#define ZCILOOK 0x0020 /* case-insensitive lookup requested */
|
||||
#define ZCIEXACT 0x0040 /* c-i requires c-s match (rename) */
|
||||
|
||||
/* mknode flags */
|
||||
#define IS_ROOT_NODE 0x01 /* create a root node */
|
||||
#define IS_XATTR 0x02 /* create an extended attribute node */
|
||||
#define IS_REPLAY 0x04 /* we are replaying intent log */
|
||||
|
||||
extern int zfs_dirent_lock(zfs_dirlock_t **, znode_t *, char *, znode_t **,
|
||||
int, int *, pathname_t *);
|
||||
extern void zfs_dirent_unlock(zfs_dirlock_t *);
|
||||
extern int zfs_link_create(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int);
|
||||
extern int zfs_link_destroy(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int,
|
||||
boolean_t *);
|
||||
extern int zfs_dirlook(znode_t *, char *, vnode_t **, int, int *,
|
||||
pathname_t *);
|
||||
extern void zfs_mknode(znode_t *, vattr_t *, dmu_tx_t *, cred_t *,
|
||||
uint_t, znode_t **, int, zfs_acl_t *, zfs_fuid_info_t **);
|
||||
extern void zfs_rmnode(znode_t *);
|
||||
extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old);
|
||||
extern boolean_t zfs_dirempty(znode_t *);
|
||||
extern void zfs_unlinked_add(znode_t *, dmu_tx_t *);
|
||||
extern void zfs_unlinked_drain(zfsvfs_t *zfsvfs);
|
||||
extern int zfs_sticky_remove_access(znode_t *, znode_t *, cred_t *cr);
|
||||
extern int zfs_get_xattrdir(znode_t *, vnode_t **, cred_t *, int);
|
||||
extern int zfs_make_xattrdir(znode_t *, vattr_t *, vnode_t **, cred_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_FS_ZFS_DIR_H */
|
||||
@@ -0,0 +1,125 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FS_ZFS_FUID_H
|
||||
#define _SYS_FS_ZFS_FUID_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/kidmap.h>
|
||||
#include <sys/sid.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zfs_vfsops.h>
|
||||
#endif
|
||||
#include <sys/avl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
ZFS_OWNER,
|
||||
ZFS_GROUP,
|
||||
ZFS_ACE_USER,
|
||||
ZFS_ACE_GROUP
|
||||
} zfs_fuid_type_t;
|
||||
|
||||
/*
|
||||
* Estimate space needed for one more fuid table entry.
|
||||
* for now assume its current size + 1K
|
||||
*/
|
||||
#define FUID_SIZE_ESTIMATE(z) (z->z_fuid_size + (SPA_MINBLOCKSIZE << 1))
|
||||
|
||||
#define FUID_INDEX(x) (x >> 32)
|
||||
#define FUID_RID(x) (x & 0xffffffff)
|
||||
#define FUID_ENCODE(idx, rid) ((idx << 32) | rid)
|
||||
/*
|
||||
* FUIDs cause problems for the intent log
|
||||
* we need to replay the creation of the FUID,
|
||||
* but we can't count on the idmapper to be around
|
||||
* and during replay the FUID index may be different than
|
||||
* before. Also, if an ACL has 100 ACEs and 12 different
|
||||
* domains we don't want to log 100 domain strings, but rather
|
||||
* just the unique 12.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The FUIDs in the log will index into
|
||||
* domain string table and the bottom half will be the rid.
|
||||
* Used for mapping ephemeral uid/gid during ACL setting to FUIDs
|
||||
*/
|
||||
typedef struct zfs_fuid {
|
||||
list_node_t z_next;
|
||||
uint64_t z_id; /* uid/gid being converted to fuid */
|
||||
uint64_t z_domidx; /* index in AVL domain table */
|
||||
uint64_t z_logfuid; /* index for domain in log */
|
||||
} zfs_fuid_t;
|
||||
|
||||
/* list of unique domains */
|
||||
typedef struct zfs_fuid_domain {
|
||||
list_node_t z_next;
|
||||
uint64_t z_domidx; /* AVL tree idx */
|
||||
const char *z_domain; /* domain string */
|
||||
} zfs_fuid_domain_t;
|
||||
|
||||
/*
|
||||
* FUID information necessary for logging create, setattr, and setacl.
|
||||
*/
|
||||
typedef struct zfs_fuid_info {
|
||||
list_t z_fuids;
|
||||
list_t z_domains;
|
||||
uint64_t z_fuid_owner;
|
||||
uint64_t z_fuid_group;
|
||||
char **z_domain_table; /* Used during replay */
|
||||
uint32_t z_fuid_cnt; /* How many fuids in z_fuids */
|
||||
uint32_t z_domain_cnt; /* How many domains */
|
||||
size_t z_domain_str_sz; /* len of domain strings z_domain list */
|
||||
} zfs_fuid_info_t;
|
||||
|
||||
#ifdef _KERNEL
|
||||
struct znode;
|
||||
extern uid_t zfs_fuid_map_id(zfsvfs_t *, uint64_t, cred_t *, zfs_fuid_type_t);
|
||||
extern void zfs_fuid_destroy(zfsvfs_t *);
|
||||
extern uint64_t zfs_fuid_create_cred(zfsvfs_t *, zfs_fuid_type_t,
|
||||
dmu_tx_t *, cred_t *, zfs_fuid_info_t **);
|
||||
extern uint64_t zfs_fuid_create(zfsvfs_t *, uint64_t, cred_t *, zfs_fuid_type_t,
|
||||
dmu_tx_t *, zfs_fuid_info_t **);
|
||||
extern void zfs_fuid_map_ids(struct znode *zp, cred_t *cr, uid_t *uid,
|
||||
uid_t *gid);
|
||||
extern zfs_fuid_info_t *zfs_fuid_info_alloc(void);
|
||||
extern void zfs_fuid_info_free();
|
||||
extern boolean_t zfs_groupmember(zfsvfs_t *, uint64_t, cred_t *);
|
||||
#endif
|
||||
|
||||
char *zfs_fuid_idx_domain(avl_tree_t *, uint32_t);
|
||||
uint64_t zfs_fuid_table_load(objset_t *, uint64_t, avl_tree_t *, avl_tree_t *);
|
||||
void zfs_fuid_table_destroy(avl_tree_t *, avl_tree_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_FS_ZFS_FUID_H */
|
||||
@@ -0,0 +1,196 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZFS_IOCTL_H
|
||||
#define _SYS_ZFS_IOCTL_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/cred.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dsl_deleg.h>
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/nvpair.h>
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Property values for snapdir
|
||||
*/
|
||||
#define ZFS_SNAPDIR_HIDDEN 0
|
||||
#define ZFS_SNAPDIR_VISIBLE 1
|
||||
|
||||
#define DMU_BACKUP_STREAM_VERSION (1ULL)
|
||||
#define DMU_BACKUP_HEADER_VERSION (2ULL)
|
||||
#define DMU_BACKUP_MAGIC 0x2F5bacbacULL
|
||||
|
||||
#define DRR_FLAG_CLONE (1<<0)
|
||||
#define DRR_FLAG_CI_DATA (1<<1)
|
||||
|
||||
/*
|
||||
* zfs ioctl command structure
|
||||
*/
|
||||
typedef struct dmu_replay_record {
|
||||
enum {
|
||||
DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
|
||||
DRR_WRITE, DRR_FREE, DRR_END,
|
||||
} drr_type;
|
||||
uint32_t drr_payloadlen;
|
||||
union {
|
||||
struct drr_begin {
|
||||
uint64_t drr_magic;
|
||||
uint64_t drr_version;
|
||||
uint64_t drr_creation_time;
|
||||
dmu_objset_type_t drr_type;
|
||||
uint32_t drr_flags;
|
||||
uint64_t drr_toguid;
|
||||
uint64_t drr_fromguid;
|
||||
char drr_toname[MAXNAMELEN];
|
||||
} drr_begin;
|
||||
struct drr_end {
|
||||
zio_cksum_t drr_checksum;
|
||||
} drr_end;
|
||||
struct drr_object {
|
||||
uint64_t drr_object;
|
||||
dmu_object_type_t drr_type;
|
||||
dmu_object_type_t drr_bonustype;
|
||||
uint32_t drr_blksz;
|
||||
uint32_t drr_bonuslen;
|
||||
uint8_t drr_checksum;
|
||||
uint8_t drr_compress;
|
||||
uint8_t drr_pad[6];
|
||||
/* bonus content follows */
|
||||
} drr_object;
|
||||
struct drr_freeobjects {
|
||||
uint64_t drr_firstobj;
|
||||
uint64_t drr_numobjs;
|
||||
} drr_freeobjects;
|
||||
struct drr_write {
|
||||
uint64_t drr_object;
|
||||
dmu_object_type_t drr_type;
|
||||
uint32_t drr_pad;
|
||||
uint64_t drr_offset;
|
||||
uint64_t drr_length;
|
||||
/* content follows */
|
||||
} drr_write;
|
||||
struct drr_free {
|
||||
uint64_t drr_object;
|
||||
uint64_t drr_offset;
|
||||
uint64_t drr_length;
|
||||
} drr_free;
|
||||
} drr_u;
|
||||
} dmu_replay_record_t;
|
||||
|
||||
typedef struct zinject_record {
|
||||
uint64_t zi_objset;
|
||||
uint64_t zi_object;
|
||||
uint64_t zi_start;
|
||||
uint64_t zi_end;
|
||||
uint64_t zi_guid;
|
||||
uint32_t zi_level;
|
||||
uint32_t zi_error;
|
||||
uint64_t zi_type;
|
||||
uint32_t zi_freq;
|
||||
uint32_t zi_pad; /* pad out to 64 bit alignment */
|
||||
} zinject_record_t;
|
||||
|
||||
#define ZINJECT_NULL 0x1
|
||||
#define ZINJECT_FLUSH_ARC 0x2
|
||||
#define ZINJECT_UNLOAD_SPA 0x4
|
||||
|
||||
typedef struct zfs_share {
|
||||
uint64_t z_exportdata;
|
||||
uint64_t z_sharedata;
|
||||
uint64_t z_sharetype; /* 0 = share, 1 = unshare */
|
||||
uint64_t z_sharemax; /* max length of share string */
|
||||
} zfs_share_t;
|
||||
|
||||
/*
|
||||
* ZFS file systems may behave the usual, POSIX-compliant way, where
|
||||
* name lookups are case-sensitive. They may also be set up so that
|
||||
* all the name lookups are case-insensitive, or so that only some
|
||||
* lookups, the ones that set an FIGNORECASE flag, are case-insensitive.
|
||||
*/
|
||||
typedef enum zfs_case {
|
||||
ZFS_CASE_SENSITIVE,
|
||||
ZFS_CASE_INSENSITIVE,
|
||||
ZFS_CASE_MIXED
|
||||
} zfs_case_t;
|
||||
|
||||
typedef struct zfs_cmd {
|
||||
char zc_name[MAXPATHLEN];
|
||||
char zc_value[MAXPATHLEN * 2];
|
||||
char zc_string[MAXNAMELEN];
|
||||
uint64_t zc_guid;
|
||||
uint64_t zc_nvlist_conf; /* really (char *) */
|
||||
uint64_t zc_nvlist_conf_size;
|
||||
uint64_t zc_nvlist_src; /* really (char *) */
|
||||
uint64_t zc_nvlist_src_size;
|
||||
uint64_t zc_nvlist_dst; /* really (char *) */
|
||||
uint64_t zc_nvlist_dst_size;
|
||||
uint64_t zc_cookie;
|
||||
uint64_t zc_objset_type;
|
||||
uint64_t zc_perm_action;
|
||||
uint64_t zc_history; /* really (char *) */
|
||||
uint64_t zc_history_len;
|
||||
uint64_t zc_history_offset;
|
||||
uint64_t zc_obj;
|
||||
zfs_share_t zc_share;
|
||||
dmu_objset_stats_t zc_objset_stats;
|
||||
struct drr_begin zc_begin_record;
|
||||
zinject_record_t zc_inject_record;
|
||||
} zfs_cmd_t;
|
||||
|
||||
#define ZVOL_MAX_MINOR (1 << 16)
|
||||
#define ZFS_MIN_MINOR (ZVOL_MAX_MINOR + 1)
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
typedef struct zfs_creat {
|
||||
nvlist_t *zct_zplprops;
|
||||
nvlist_t *zct_props;
|
||||
} zfs_creat_t;
|
||||
|
||||
extern dev_info_t *zfs_dip;
|
||||
|
||||
extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr);
|
||||
extern int zfs_secpolicy_rename_perms(const char *from,
|
||||
const char *to, cred_t *cr);
|
||||
extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
|
||||
extern int zfs_busy(void);
|
||||
extern int zfs_unmount_snap(char *, void *);
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZFS_IOCTL_H */
|
||||
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FS_ZFS_RLOCK_H
|
||||
#define _SYS_FS_ZFS_RLOCK_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
#include <sys/zfs_znode.h>
|
||||
|
||||
typedef enum {
|
||||
RL_READER,
|
||||
RL_WRITER,
|
||||
RL_APPEND
|
||||
} rl_type_t;
|
||||
|
||||
typedef struct rl {
|
||||
znode_t *r_zp; /* znode this lock applies to */
|
||||
avl_node_t r_node; /* avl node link */
|
||||
uint64_t r_off; /* file range offset */
|
||||
uint64_t r_len; /* file range length */
|
||||
uint_t r_cnt; /* range reference count in tree */
|
||||
rl_type_t r_type; /* range type */
|
||||
kcondvar_t r_wr_cv; /* cv for waiting writers */
|
||||
kcondvar_t r_rd_cv; /* cv for waiting readers */
|
||||
uint8_t r_proxy; /* acting for original range */
|
||||
uint8_t r_write_wanted; /* writer wants to lock this range */
|
||||
uint8_t r_read_wanted; /* reader wants to lock this range */
|
||||
} rl_t;
|
||||
|
||||
/*
|
||||
* Lock a range (offset, length) as either shared (READER)
|
||||
* or exclusive (WRITER or APPEND). APPEND is a special type that
|
||||
* is converted to WRITER that specified to lock from the start of the
|
||||
* end of file. zfs_range_lock() returns the range lock structure.
|
||||
*/
|
||||
rl_t *zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type);
|
||||
|
||||
/*
|
||||
* Unlock range and destroy range lock structure.
|
||||
*/
|
||||
void zfs_range_unlock(rl_t *rl);
|
||||
|
||||
/*
|
||||
* Reduce range locked as RW_WRITER from whole file to specified range.
|
||||
* Asserts the whole file was previously locked.
|
||||
*/
|
||||
void zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len);
|
||||
|
||||
/*
|
||||
* AVL comparison function used to compare range locks
|
||||
*/
|
||||
int zfs_range_compare(const void *arg1, const void *arg2);
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_FS_ZFS_RLOCK_H */
|
||||
@@ -0,0 +1,140 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FS_ZFS_VFSOPS_H
|
||||
#define _SYS_FS_ZFS_VFSOPS_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/isa_defs.h>
|
||||
#include <sys/types32.h>
|
||||
#include <sys/list.h>
|
||||
#include <sys/vfs.h>
|
||||
#include <sys/zil.h>
|
||||
#include <sys/rrwlock.h>
|
||||
#include <sys/zfs_ioctl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct zfsvfs zfsvfs_t;
|
||||
|
||||
struct zfsvfs {
|
||||
vfs_t *z_vfs; /* generic fs struct */
|
||||
zfsvfs_t *z_parent; /* parent fs */
|
||||
objset_t *z_os; /* objset reference */
|
||||
uint64_t z_root; /* id of root znode */
|
||||
uint64_t z_unlinkedobj; /* id of unlinked zapobj */
|
||||
uint64_t z_max_blksz; /* maximum block size for files */
|
||||
uint64_t z_assign; /* TXG_NOWAIT or set by zil_replay() */
|
||||
uint64_t z_fuid_obj; /* fuid table object number */
|
||||
uint64_t z_fuid_size; /* fuid table size */
|
||||
avl_tree_t z_fuid_idx; /* fuid tree keyed by index */
|
||||
avl_tree_t z_fuid_domain; /* fuid tree keyed by domain */
|
||||
krwlock_t z_fuid_lock; /* fuid lock */
|
||||
boolean_t z_fuid_loaded; /* fuid tables are loaded */
|
||||
struct zfs_fuid_info *z_fuid_replay; /* fuid info for replay */
|
||||
zilog_t *z_log; /* intent log pointer */
|
||||
uint_t z_acl_mode; /* acl chmod/mode behavior */
|
||||
uint_t z_acl_inherit; /* acl inheritance behavior */
|
||||
zfs_case_t z_case; /* case-sense */
|
||||
boolean_t z_utf8; /* utf8-only */
|
||||
int z_norm; /* normalization flags */
|
||||
boolean_t z_atime; /* enable atimes mount option */
|
||||
boolean_t z_unmounted; /* unmounted */
|
||||
rrwlock_t z_teardown_lock;
|
||||
krwlock_t z_teardown_inactive_lock;
|
||||
list_t z_all_znodes; /* all vnodes in the fs */
|
||||
kmutex_t z_znodes_lock; /* lock for z_all_znodes */
|
||||
vnode_t *z_ctldir; /* .zfs directory pointer */
|
||||
boolean_t z_show_ctldir; /* expose .zfs in the root dir */
|
||||
boolean_t z_issnap; /* true if this is a snapshot */
|
||||
boolean_t z_vscan; /* virus scan on/off */
|
||||
boolean_t z_use_fuids; /* version allows fuids */
|
||||
kmutex_t z_online_recv_lock; /* recv in prog grabs as WRITER */
|
||||
uint64_t z_version; /* ZPL version */
|
||||
#define ZFS_OBJ_MTX_SZ 64
|
||||
kmutex_t z_hold_mtx[ZFS_OBJ_MTX_SZ]; /* znode hold locks */
|
||||
};
|
||||
|
||||
/*
|
||||
* Normal filesystems (those not under .zfs/snapshot) have a total
|
||||
* file ID size limited to 12 bytes (including the length field) due to
|
||||
* NFSv2 protocol's limitation of 32 bytes for a filehandle. For historical
|
||||
* reasons, this same limit is being imposed by the Solaris NFSv3 implementation
|
||||
* (although the NFSv3 protocol actually permits a maximum of 64 bytes). It
|
||||
* is not possible to expand beyond 12 bytes without abandoning support
|
||||
* of NFSv2.
|
||||
*
|
||||
* For normal filesystems, we partition up the available space as follows:
|
||||
* 2 bytes fid length (required)
|
||||
* 6 bytes object number (48 bits)
|
||||
* 4 bytes generation number (32 bits)
|
||||
*
|
||||
* We reserve only 48 bits for the object number, as this is the limit
|
||||
* currently defined and imposed by the DMU.
|
||||
*/
|
||||
typedef struct zfid_short {
|
||||
uint16_t zf_len;
|
||||
uint8_t zf_object[6]; /* obj[i] = obj >> (8 * i) */
|
||||
uint8_t zf_gen[4]; /* gen[i] = gen >> (8 * i) */
|
||||
} zfid_short_t;
|
||||
|
||||
/*
|
||||
* Filesystems under .zfs/snapshot have a total file ID size of 22 bytes
|
||||
* (including the length field). This makes files under .zfs/snapshot
|
||||
* accessible by NFSv3 and NFSv4, but not NFSv2.
|
||||
*
|
||||
* For files under .zfs/snapshot, we partition up the available space
|
||||
* as follows:
|
||||
* 2 bytes fid length (required)
|
||||
* 6 bytes object number (48 bits)
|
||||
* 4 bytes generation number (32 bits)
|
||||
* 6 bytes objset id (48 bits)
|
||||
* 4 bytes currently just zero (32 bits)
|
||||
*
|
||||
* We reserve only 48 bits for the object number and objset id, as these are
|
||||
* the limits currently defined and imposed by the DMU.
|
||||
*/
|
||||
typedef struct zfid_long {
|
||||
zfid_short_t z_fid;
|
||||
uint8_t zf_setid[6]; /* obj[i] = obj >> (8 * i) */
|
||||
uint8_t zf_setgen[4]; /* gen[i] = gen >> (8 * i) */
|
||||
} zfid_long_t;
|
||||
|
||||
#define SHORT_FID_LEN (sizeof (zfid_short_t) - sizeof (uint16_t))
|
||||
#define LONG_FID_LEN (sizeof (zfid_long_t) - sizeof (uint16_t))
|
||||
|
||||
extern uint_t zfs_fsyncer_key;
|
||||
|
||||
extern int zfs_suspend_fs(zfsvfs_t *zfsvfs, char *osname, int *mode);
|
||||
extern int zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_FS_ZFS_VFSOPS_H */
|
||||
@@ -0,0 +1,356 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FS_ZFS_ZNODE_H
|
||||
#define _SYS_FS_ZFS_ZNODE_H
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/isa_defs.h>
|
||||
#include <sys/types32.h>
|
||||
#include <sys/attr.h>
|
||||
#include <sys/list.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zfs_vfsops.h>
|
||||
#include <sys/rrwlock.h>
|
||||
#endif
|
||||
#include <sys/zfs_acl.h>
|
||||
#include <sys/zil.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Additional file level attributes, that are stored
|
||||
* in the upper half of zp_flags
|
||||
*/
|
||||
#define ZFS_READONLY 0x0000000100000000
|
||||
#define ZFS_HIDDEN 0x0000000200000000
|
||||
#define ZFS_SYSTEM 0x0000000400000000
|
||||
#define ZFS_ARCHIVE 0x0000000800000000
|
||||
#define ZFS_IMMUTABLE 0x0000001000000000
|
||||
#define ZFS_NOUNLINK 0x0000002000000000
|
||||
#define ZFS_APPENDONLY 0x0000004000000000
|
||||
#define ZFS_NODUMP 0x0000008000000000
|
||||
#define ZFS_OPAQUE 0x0000010000000000
|
||||
#define ZFS_AV_QUARANTINED 0x0000020000000000
|
||||
#define ZFS_AV_MODIFIED 0x0000040000000000
|
||||
|
||||
#define ZFS_ATTR_SET(zp, attr, value) \
|
||||
{ \
|
||||
if (value) \
|
||||
zp->z_phys->zp_flags |= attr; \
|
||||
else \
|
||||
zp->z_phys->zp_flags &= ~attr; \
|
||||
}
|
||||
|
||||
/*
|
||||
* Define special zfs pflags
|
||||
*/
|
||||
#define ZFS_XATTR 0x1 /* is an extended attribute */
|
||||
#define ZFS_INHERIT_ACE 0x2 /* ace has inheritable ACEs */
|
||||
#define ZFS_ACL_TRIVIAL 0x4 /* files ACL is trivial */
|
||||
#define ZFS_ACL_OBJ_ACE 0x8 /* ACL has CMPLX Object ACE */
|
||||
#define ZFS_ACL_PROTECTED 0x10 /* ACL protected */
|
||||
#define ZFS_ACL_DEFAULTED 0x20 /* ACL should be defaulted */
|
||||
#define ZFS_ACL_AUTO_INHERIT 0x40 /* ACL should be inherited */
|
||||
#define ZFS_BONUS_SCANSTAMP 0x80 /* Scanstamp in bonus area */
|
||||
|
||||
/*
|
||||
* Is ID ephemeral?
|
||||
*/
|
||||
#define IS_EPHEMERAL(x) (x > MAXUID)
|
||||
|
||||
/*
|
||||
* Should we use FUIDs?
|
||||
*/
|
||||
#define USE_FUIDS(version, os) (version >= ZPL_VERSION_FUID &&\
|
||||
spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID)
|
||||
|
||||
#define MASTER_NODE_OBJ 1
|
||||
|
||||
/*
|
||||
* Special attributes for master node.
|
||||
*/
|
||||
#define ZFS_FSID "FSID"
|
||||
#define ZFS_UNLINKED_SET "DELETE_QUEUE"
|
||||
#define ZFS_ROOT_OBJ "ROOT"
|
||||
#define ZPL_VERSION_STR "VERSION"
|
||||
#define ZFS_FUID_TABLES "FUID"
|
||||
|
||||
#define ZFS_MAX_BLOCKSIZE (SPA_MAXBLOCKSIZE)
|
||||
|
||||
/* Path component length */
|
||||
/*
|
||||
* The generic fs code uses MAXNAMELEN to represent
|
||||
* what the largest component length is. Unfortunately,
|
||||
* this length includes the terminating NULL. ZFS needs
|
||||
* to tell the users via pathconf() and statvfs() what the
|
||||
* true maximum length of a component is, excluding the NULL.
|
||||
*/
|
||||
#define ZFS_MAXNAMELEN (MAXNAMELEN - 1)
|
||||
|
||||
/*
|
||||
* Convert mode bits (zp_mode) to BSD-style DT_* values for storing in
|
||||
* the directory entries.
|
||||
*/
|
||||
#define IFTODT(mode) (((mode) & S_IFMT) >> 12)
|
||||
|
||||
/*
|
||||
* The directory entry has the type (currently unused on Solaris) in the
|
||||
* top 4 bits, and the object number in the low 48 bits. The "middle"
|
||||
* 12 bits are unused.
|
||||
*/
|
||||
#define ZFS_DIRENT_TYPE(de) BF64_GET(de, 60, 4)
|
||||
#define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48)
|
||||
|
||||
/*
|
||||
* This is the persistent portion of the znode. It is stored
|
||||
* in the "bonus buffer" of the file. Short symbolic links
|
||||
* are also stored in the bonus buffer.
|
||||
*/
|
||||
typedef struct znode_phys {
|
||||
uint64_t zp_atime[2]; /* 0 - last file access time */
|
||||
uint64_t zp_mtime[2]; /* 16 - last file modification time */
|
||||
uint64_t zp_ctime[2]; /* 32 - last file change time */
|
||||
uint64_t zp_crtime[2]; /* 48 - creation time */
|
||||
uint64_t zp_gen; /* 64 - generation (txg of creation) */
|
||||
uint64_t zp_mode; /* 72 - file mode bits */
|
||||
uint64_t zp_size; /* 80 - size of file */
|
||||
uint64_t zp_parent; /* 88 - directory parent (`..') */
|
||||
uint64_t zp_links; /* 96 - number of links to file */
|
||||
uint64_t zp_xattr; /* 104 - DMU object for xattrs */
|
||||
uint64_t zp_rdev; /* 112 - dev_t for VBLK & VCHR files */
|
||||
uint64_t zp_flags; /* 120 - persistent flags */
|
||||
uint64_t zp_uid; /* 128 - file owner */
|
||||
uint64_t zp_gid; /* 136 - owning group */
|
||||
uint64_t zp_zap; /* 144 - extra attributes */
|
||||
uint64_t zp_pad[3]; /* 152 - future */
|
||||
zfs_acl_phys_t zp_acl; /* 176 - 263 ACL */
|
||||
/*
|
||||
* Data may pad out any remaining bytes in the znode buffer, eg:
|
||||
*
|
||||
* |<---------------------- dnode_phys (512) ------------------------>|
|
||||
* |<-- dnode (192) --->|<----------- "bonus" buffer (320) ---------->|
|
||||
* |<---- znode (264) ---->|<---- data (56) ---->|
|
||||
*
|
||||
* At present, we use this space for the following:
|
||||
* - symbolic links
|
||||
* - 32-byte anti-virus scanstamp (regular files only)
|
||||
*/
|
||||
} znode_phys_t;
|
||||
|
||||
/*
|
||||
* Directory entry locks control access to directory entries.
|
||||
* They are used to protect creates, deletes, and renames.
|
||||
* Each directory znode has a mutex and a list of locked names.
|
||||
*/
|
||||
#ifdef _KERNEL
|
||||
typedef struct zfs_dirlock {
|
||||
char *dl_name; /* directory entry being locked */
|
||||
uint32_t dl_sharecnt; /* 0 if exclusive, > 0 if shared */
|
||||
uint16_t dl_namesize; /* set if dl_name was allocated */
|
||||
kcondvar_t dl_cv; /* wait for entry to be unlocked */
|
||||
struct znode *dl_dzp; /* directory znode */
|
||||
struct zfs_dirlock *dl_next; /* next in z_dirlocks list */
|
||||
} zfs_dirlock_t;
|
||||
|
||||
typedef struct znode {
|
||||
struct zfsvfs *z_zfsvfs;
|
||||
vnode_t *z_vnode;
|
||||
uint64_t z_id; /* object ID for this znode */
|
||||
kmutex_t z_lock; /* znode modification lock */
|
||||
krwlock_t z_map_lock; /* page map lock */
|
||||
krwlock_t z_parent_lock; /* parent lock for directories */
|
||||
krwlock_t z_name_lock; /* "master" lock for dirent locks */
|
||||
zfs_dirlock_t *z_dirlocks; /* directory entry lock list */
|
||||
kmutex_t z_range_lock; /* protects changes to z_range_avl */
|
||||
avl_tree_t z_range_avl; /* avl tree of file range locks */
|
||||
uint8_t z_unlinked; /* file has been unlinked */
|
||||
uint8_t z_atime_dirty; /* atime needs to be synced */
|
||||
uint8_t z_zn_prefetch; /* Prefetch znodes? */
|
||||
uint_t z_blksz; /* block size in bytes */
|
||||
uint_t z_seq; /* modification sequence number */
|
||||
uint64_t z_mapcnt; /* number of pages mapped to file */
|
||||
uint64_t z_last_itx; /* last ZIL itx on this znode */
|
||||
uint64_t z_gen; /* generation (same as zp_gen) */
|
||||
uint32_t z_sync_cnt; /* synchronous open count */
|
||||
kmutex_t z_acl_lock; /* acl data lock */
|
||||
list_node_t z_link_node; /* all znodes in fs link */
|
||||
/*
|
||||
* These are dmu managed fields.
|
||||
*/
|
||||
znode_phys_t *z_phys; /* pointer to persistent znode */
|
||||
dmu_buf_t *z_dbuf; /* buffer containing the z_phys */
|
||||
} znode_t;
|
||||
|
||||
|
||||
/*
|
||||
* Range locking rules
|
||||
* --------------------
|
||||
* 1. When truncating a file (zfs_create, zfs_setattr, zfs_space) the whole
|
||||
* file range needs to be locked as RL_WRITER. Only then can the pages be
|
||||
* freed etc and zp_size reset. zp_size must be set within range lock.
|
||||
* 2. For writes and punching holes (zfs_write & zfs_space) just the range
|
||||
* being written or freed needs to be locked as RL_WRITER.
|
||||
* Multiple writes at the end of the file must coordinate zp_size updates
|
||||
* to ensure data isn't lost. A compare and swap loop is currently used
|
||||
* to ensure the file size is at least the offset last written.
|
||||
* 3. For reads (zfs_read, zfs_get_data & zfs_putapage) just the range being
|
||||
* read needs to be locked as RL_READER. A check against zp_size can then
|
||||
* be made for reading beyond end of file.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Convert between znode pointers and vnode pointers
|
||||
*/
|
||||
#define ZTOV(ZP) ((ZP)->z_vnode)
|
||||
#define VTOZ(VP) ((znode_t *)(VP)->v_data)
|
||||
|
||||
/*
|
||||
* ZFS_ENTER() is called on entry to each ZFS vnode and vfs operation.
|
||||
* ZFS_EXIT() must be called before exitting the vop.
|
||||
* ZFS_VERIFY_ZP() verifies the znode is valid.
|
||||
*/
|
||||
#define ZFS_ENTER(zfsvfs) \
|
||||
{ \
|
||||
rrw_enter(&(zfsvfs)->z_teardown_lock, RW_READER, FTAG); \
|
||||
if ((zfsvfs)->z_unmounted) { \
|
||||
ZFS_EXIT(zfsvfs); \
|
||||
return (EIO); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define ZFS_EXIT(zfsvfs) rrw_exit(&(zfsvfs)->z_teardown_lock, FTAG)
|
||||
|
||||
#define ZFS_VERIFY_ZP(zp) \
|
||||
if ((zp)->z_dbuf == NULL) { \
|
||||
ZFS_EXIT((zp)->z_zfsvfs); \
|
||||
return (EIO); \
|
||||
} \
|
||||
|
||||
/*
|
||||
* Macros for dealing with dmu_buf_hold
|
||||
*/
|
||||
#define ZFS_OBJ_HASH(obj_num) ((obj_num) & (ZFS_OBJ_MTX_SZ - 1))
|
||||
#define ZFS_OBJ_MUTEX(zfsvfs, obj_num) \
|
||||
(&(zfsvfs)->z_hold_mtx[ZFS_OBJ_HASH(obj_num)])
|
||||
#define ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num) \
|
||||
mutex_enter(ZFS_OBJ_MUTEX((zfsvfs), (obj_num)))
|
||||
#define ZFS_OBJ_HOLD_TRYENTER(zfsvfs, obj_num) \
|
||||
mutex_tryenter(ZFS_OBJ_MUTEX((zfsvfs), (obj_num)))
|
||||
#define ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num) \
|
||||
mutex_exit(ZFS_OBJ_MUTEX((zfsvfs), (obj_num)))
|
||||
|
||||
/*
|
||||
* Macros to encode/decode ZFS stored time values from/to struct timespec
|
||||
*/
|
||||
#define ZFS_TIME_ENCODE(tp, stmp) \
|
||||
{ \
|
||||
(stmp)[0] = (uint64_t)(tp)->tv_sec; \
|
||||
(stmp)[1] = (uint64_t)(tp)->tv_nsec; \
|
||||
}
|
||||
|
||||
#define ZFS_TIME_DECODE(tp, stmp) \
|
||||
{ \
|
||||
(tp)->tv_sec = (time_t)(stmp)[0]; \
|
||||
(tp)->tv_nsec = (long)(stmp)[1]; \
|
||||
}
|
||||
|
||||
/*
|
||||
* Timestamp defines
|
||||
*/
|
||||
#define ACCESSED (AT_ATIME)
|
||||
#define STATE_CHANGED (AT_CTIME)
|
||||
#define CONTENT_MODIFIED (AT_MTIME | AT_CTIME)
|
||||
|
||||
#define ZFS_ACCESSTIME_STAMP(zfsvfs, zp) \
|
||||
if ((zfsvfs)->z_atime && !((zfsvfs)->z_vfs->vfs_flag & VFS_RDONLY)) \
|
||||
zfs_time_stamper(zp, ACCESSED, NULL)
|
||||
|
||||
extern int zfs_init_fs(zfsvfs_t *, znode_t **);
|
||||
extern void zfs_set_dataprop(objset_t *);
|
||||
extern void zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *,
|
||||
dmu_tx_t *tx);
|
||||
extern void zfs_time_stamper(znode_t *, uint_t, dmu_tx_t *);
|
||||
extern void zfs_time_stamper_locked(znode_t *, uint_t, dmu_tx_t *);
|
||||
extern void zfs_grow_blocksize(znode_t *, uint64_t, dmu_tx_t *);
|
||||
extern int zfs_freesp(znode_t *, uint64_t, uint64_t, int, boolean_t);
|
||||
extern void zfs_znode_init(void);
|
||||
extern void zfs_znode_fini(void);
|
||||
extern int zfs_zget(zfsvfs_t *, uint64_t, znode_t **);
|
||||
extern int zfs_rezget(znode_t *);
|
||||
extern void zfs_zinactive(znode_t *);
|
||||
extern void zfs_znode_delete(znode_t *, dmu_tx_t *);
|
||||
extern void zfs_znode_free(znode_t *);
|
||||
extern void zfs_remove_op_tables();
|
||||
extern int zfs_create_op_tables();
|
||||
extern int zfs_sync(vfs_t *vfsp, short flag, cred_t *cr);
|
||||
extern dev_t zfs_cmpldev(uint64_t);
|
||||
extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value);
|
||||
extern int zfs_set_version(const char *name, uint64_t newvers);
|
||||
extern int zfs_get_stats(objset_t *os, nvlist_t *nv);
|
||||
extern void zfs_znode_dmu_fini(znode_t *);
|
||||
|
||||
extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
|
||||
znode_t *dzp, znode_t *zp, char *name, vsecattr_t *, zfs_fuid_info_t *,
|
||||
vattr_t *vap);
|
||||
extern int zfs_log_create_txtype(zil_create_t, vsecattr_t *vsecp,
|
||||
vattr_t *vap);
|
||||
extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
|
||||
znode_t *dzp, char *name);
|
||||
extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
|
||||
znode_t *dzp, znode_t *zp, char *name);
|
||||
extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
|
||||
znode_t *dzp, znode_t *zp, char *name, char *link);
|
||||
extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
|
||||
znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp);
|
||||
extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||
znode_t *zp, offset_t off, ssize_t len, int ioflag);
|
||||
extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||
znode_t *zp, uint64_t off, uint64_t len);
|
||||
extern void zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||
znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp);
|
||||
extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
|
||||
vsecattr_t *vsecp, zfs_fuid_info_t *fuidp);
|
||||
extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap);
|
||||
extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
|
||||
|
||||
extern caddr_t zfs_map_page(page_t *, enum seg_rw);
|
||||
extern void zfs_unmap_page(page_t *, caddr_t);
|
||||
|
||||
extern zil_get_data_t zfs_get_data;
|
||||
extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE];
|
||||
extern int zfsfstype;
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
extern int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_FS_ZFS_ZNODE_H */
|
||||
@@ -0,0 +1,382 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZIL_H
|
||||
#define _SYS_ZIL_H
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dmu.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Intent log format:
|
||||
*
|
||||
* Each objset has its own intent log. The log header (zil_header_t)
|
||||
* for objset N's intent log is kept in the Nth object of the SPA's
|
||||
* intent_log objset. The log header points to a chain of log blocks,
|
||||
* each of which contains log records (i.e., transactions) followed by
|
||||
* a log block trailer (zil_trailer_t). The format of a log record
|
||||
* depends on the record (or transaction) type, but all records begin
|
||||
* with a common structure that defines the type, length, and txg.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Intent log header - this on disk structure holds fields to manage
|
||||
* the log. All fields are 64 bit to easily handle cross architectures.
|
||||
*/
|
||||
typedef struct zil_header {
|
||||
uint64_t zh_claim_txg; /* txg in which log blocks were claimed */
|
||||
uint64_t zh_replay_seq; /* highest replayed sequence number */
|
||||
blkptr_t zh_log; /* log chain */
|
||||
uint64_t zh_claim_seq; /* highest claimed sequence number */
|
||||
uint64_t zh_pad[5];
|
||||
} zil_header_t;
|
||||
|
||||
/*
|
||||
* Log block trailer - structure at the end of the header and each log block
|
||||
*
|
||||
* The zit_bt contains a zbt_cksum which for the intent log is
|
||||
* the sequence number of this log block. A seq of 0 is invalid.
|
||||
* The zbt_cksum is checked by the SPA against the sequence
|
||||
* number passed in the blk_cksum field of the blkptr_t
|
||||
*/
|
||||
typedef struct zil_trailer {
|
||||
uint64_t zit_pad;
|
||||
blkptr_t zit_next_blk; /* next block in chain */
|
||||
uint64_t zit_nused; /* bytes in log block used */
|
||||
zio_block_tail_t zit_bt; /* block trailer */
|
||||
} zil_trailer_t;
|
||||
|
||||
#define ZIL_MIN_BLKSZ 4096ULL
|
||||
#define ZIL_MAX_BLKSZ SPA_MAXBLOCKSIZE
|
||||
#define ZIL_BLK_DATA_SZ(lwb) ((lwb)->lwb_sz - sizeof (zil_trailer_t))
|
||||
|
||||
/*
|
||||
* The words of a log block checksum.
|
||||
*/
|
||||
#define ZIL_ZC_GUID_0 0
|
||||
#define ZIL_ZC_GUID_1 1
|
||||
#define ZIL_ZC_OBJSET 2
|
||||
#define ZIL_ZC_SEQ 3
|
||||
|
||||
typedef enum zil_create {
|
||||
Z_FILE,
|
||||
Z_DIR,
|
||||
Z_XATTRDIR,
|
||||
} zil_create_t;
|
||||
|
||||
/*
|
||||
* size of xvattr log section.
|
||||
* its composed of lr_attr_t + xvattr bitmap + 2 64 bit timestamps
|
||||
* for create time and a single 64 bit integer for all of the attributes,
|
||||
* and 4 64 bit integers (32 bytes) for the scanstamp.
|
||||
*
|
||||
*/
|
||||
|
||||
#define ZIL_XVAT_SIZE(mapsize) \
|
||||
sizeof (lr_attr_t) + (sizeof (uint32_t) * (mapsize - 1)) + \
|
||||
(sizeof (uint64_t) * 7)
|
||||
|
||||
/*
|
||||
* Size of ACL in log. The ACE data is padded out to properly align
|
||||
* on 8 byte boundary.
|
||||
*/
|
||||
|
||||
#define ZIL_ACE_LENGTH(x) (roundup(x, sizeof (uint64_t)))
|
||||
|
||||
/*
|
||||
* Intent log transaction types and record structures
|
||||
*/
|
||||
#define TX_CREATE 1 /* Create file */
|
||||
#define TX_MKDIR 2 /* Make directory */
|
||||
#define TX_MKXATTR 3 /* Make XATTR directory */
|
||||
#define TX_SYMLINK 4 /* Create symbolic link to a file */
|
||||
#define TX_REMOVE 5 /* Remove file */
|
||||
#define TX_RMDIR 6 /* Remove directory */
|
||||
#define TX_LINK 7 /* Create hard link to a file */
|
||||
#define TX_RENAME 8 /* Rename a file */
|
||||
#define TX_WRITE 9 /* File write */
|
||||
#define TX_TRUNCATE 10 /* Truncate a file */
|
||||
#define TX_SETATTR 11 /* Set file attributes */
|
||||
#define TX_ACL_V0 12 /* Set old formatted ACL */
|
||||
#define TX_ACL 13 /* Set ACL */
|
||||
#define TX_CREATE_ACL 14 /* create with ACL */
|
||||
#define TX_CREATE_ATTR 15 /* create + attrs */
|
||||
#define TX_CREATE_ACL_ATTR 16 /* create with ACL + attrs */
|
||||
#define TX_MKDIR_ACL 17 /* mkdir with ACL */
|
||||
#define TX_MKDIR_ATTR 18 /* mkdir with attr */
|
||||
#define TX_MKDIR_ACL_ATTR 19 /* mkdir with ACL + attrs */
|
||||
#define TX_MAX_TYPE 20 /* Max transaction type */
|
||||
|
||||
/*
|
||||
* The transactions for mkdir, symlink, remove, rmdir, link, and rename
|
||||
* may have the following bit set, indicating the original request
|
||||
* specified case-insensitive handling of names.
|
||||
*/
|
||||
#define TX_CI ((uint64_t)0x1 << 63) /* case-insensitive behavior requested */
|
||||
|
||||
/*
|
||||
* Format of log records.
|
||||
* The fields are carefully defined to allow them to be aligned
|
||||
* and sized the same on sparc & intel architectures.
|
||||
* Each log record has a common structure at the beginning.
|
||||
*
|
||||
* Note, lrc_seq holds two different sequence numbers. Whilst in memory
|
||||
* it contains the transaction sequence number. The log record on
|
||||
* disk holds the sequence number of all log records which is used to
|
||||
* ensure we don't replay the same record. The two sequence numbers are
|
||||
* different because the transactions can now be pushed out of order.
|
||||
*/
|
||||
typedef struct { /* common log record header */
|
||||
uint64_t lrc_txtype; /* intent log transaction type */
|
||||
uint64_t lrc_reclen; /* transaction record length */
|
||||
uint64_t lrc_txg; /* dmu transaction group number */
|
||||
uint64_t lrc_seq; /* see comment above */
|
||||
} lr_t;
|
||||
|
||||
/*
|
||||
* Handle option extended vattr attributes.
|
||||
*
|
||||
* Whenever new attributes are added the version number
|
||||
* will need to be updated as will code in
|
||||
* zfs_log.c and zfs_replay.c
|
||||
*/
|
||||
typedef struct {
|
||||
uint32_t lr_attr_masksize; /* number of elements in array */
|
||||
uint32_t lr_attr_bitmap; /* First entry of array */
|
||||
/* remainder of array and any additional fields */
|
||||
} lr_attr_t;
|
||||
|
||||
/*
|
||||
* log record for creates without optional ACL.
|
||||
* This log record does support optional xvattr_t attributes.
|
||||
*/
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_doid; /* object id of directory */
|
||||
uint64_t lr_foid; /* object id of created file object */
|
||||
uint64_t lr_mode; /* mode of object */
|
||||
uint64_t lr_uid; /* uid of object */
|
||||
uint64_t lr_gid; /* gid of object */
|
||||
uint64_t lr_gen; /* generation (txg of creation) */
|
||||
uint64_t lr_crtime[2]; /* creation time */
|
||||
uint64_t lr_rdev; /* rdev of object to create */
|
||||
/* name of object to create follows this */
|
||||
/* for symlinks, link content follows name */
|
||||
/* for creates with xvattr data, the name follows the xvattr info */
|
||||
} lr_create_t;
|
||||
|
||||
/*
|
||||
* FUID ACL record will be an array of ACEs from the original ACL.
|
||||
* If this array includes ephemeral IDs, the record will also include
|
||||
* an array of log-specific FUIDs to replace the ephemeral IDs.
|
||||
* Only one copy of each unique domain will be present, so the log-specific
|
||||
* FUIDs will use an index into a compressed domain table. On replay this
|
||||
* information will be used to construct real FUIDs (and bypass idmap,
|
||||
* since it may not be available).
|
||||
*/
|
||||
|
||||
/*
|
||||
* Log record for creates with optional ACL
|
||||
* This log record is also used for recording any FUID
|
||||
* information needed for replaying the create. If the
|
||||
* file doesn't have any actual ACEs then the lr_aclcnt
|
||||
* would be zero.
|
||||
*/
|
||||
typedef struct {
|
||||
lr_create_t lr_create; /* common create portion */
|
||||
uint64_t lr_aclcnt; /* number of ACEs in ACL */
|
||||
uint64_t lr_domcnt; /* number of unique domains */
|
||||
uint64_t lr_fuidcnt; /* number of real fuids */
|
||||
uint64_t lr_acl_bytes; /* number of bytes in ACL */
|
||||
uint64_t lr_acl_flags; /* ACL flags */
|
||||
/* lr_acl_bytes number of variable sized ace's follows */
|
||||
/* if create is also setting xvattr's, then acl data follows xvattr */
|
||||
/* if ACE FUIDs are needed then they will follow the xvattr_t */
|
||||
/* Following the FUIDs will be the domain table information. */
|
||||
/* The FUIDs for the owner and group will be in the lr_create */
|
||||
/* portion of the record. */
|
||||
/* name follows ACL data */
|
||||
} lr_acl_create_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_doid; /* obj id of directory */
|
||||
/* name of object to remove follows this */
|
||||
} lr_remove_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_doid; /* obj id of directory */
|
||||
uint64_t lr_link_obj; /* obj id of link */
|
||||
/* name of object to link follows this */
|
||||
} lr_link_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_sdoid; /* obj id of source directory */
|
||||
uint64_t lr_tdoid; /* obj id of target directory */
|
||||
/* 2 strings: names of source and destination follow this */
|
||||
} lr_rename_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_foid; /* file object to write */
|
||||
uint64_t lr_offset; /* offset to write to */
|
||||
uint64_t lr_length; /* user data length to write */
|
||||
uint64_t lr_blkoff; /* offset represented by lr_blkptr */
|
||||
blkptr_t lr_blkptr; /* spa block pointer for replay */
|
||||
/* write data will follow for small writes */
|
||||
} lr_write_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_foid; /* object id of file to truncate */
|
||||
uint64_t lr_offset; /* offset to truncate from */
|
||||
uint64_t lr_length; /* length to truncate */
|
||||
} lr_truncate_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_foid; /* file object to change attributes */
|
||||
uint64_t lr_mask; /* mask of attributes to set */
|
||||
uint64_t lr_mode; /* mode to set */
|
||||
uint64_t lr_uid; /* uid to set */
|
||||
uint64_t lr_gid; /* gid to set */
|
||||
uint64_t lr_size; /* size to set */
|
||||
uint64_t lr_atime[2]; /* access time */
|
||||
uint64_t lr_mtime[2]; /* modification time */
|
||||
/* optional attribute lr_attr_t may be here */
|
||||
} lr_setattr_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_foid; /* obj id of file */
|
||||
uint64_t lr_aclcnt; /* number of acl entries */
|
||||
/* lr_aclcnt number of ace_t entries follow this */
|
||||
} lr_acl_v0_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_foid; /* obj id of file */
|
||||
uint64_t lr_aclcnt; /* number of ACEs in ACL */
|
||||
uint64_t lr_domcnt; /* number of unique domains */
|
||||
uint64_t lr_fuidcnt; /* number of real fuids */
|
||||
uint64_t lr_acl_bytes; /* number of bytes in ACL */
|
||||
uint64_t lr_acl_flags; /* ACL flags */
|
||||
/* lr_acl_bytes number of variable sized ace's follows */
|
||||
} lr_acl_t;
|
||||
|
||||
/*
|
||||
* ZIL structure definitions, interface function prototype and globals.
|
||||
*/
|
||||
|
||||
/*
|
||||
* ZFS intent log transaction structure
|
||||
*/
|
||||
typedef enum {
|
||||
WR_INDIRECT, /* indirect - a large write (dmu_sync() data */
|
||||
/* and put blkptr in log, rather than actual data) */
|
||||
WR_COPIED, /* immediate - data is copied into lr_write_t */
|
||||
WR_NEED_COPY, /* immediate - data needs to be copied if pushed */
|
||||
} itx_wr_state_t;
|
||||
|
||||
typedef struct itx {
|
||||
list_node_t itx_node; /* linkage on zl_itx_list */
|
||||
void *itx_private; /* type-specific opaque data */
|
||||
itx_wr_state_t itx_wr_state; /* write state */
|
||||
uint8_t itx_sync; /* synchronous transaction */
|
||||
uint64_t itx_sod; /* record size on disk */
|
||||
lr_t itx_lr; /* common part of log record */
|
||||
/* followed by type-specific part of lr_xx_t and its immediate data */
|
||||
} itx_t;
|
||||
|
||||
|
||||
/*
|
||||
* zgd_t is passed through dmu_sync() to the callback routine zfs_get_done()
|
||||
* to handle the cleanup of the dmu_sync() buffer write
|
||||
*/
|
||||
typedef struct {
|
||||
zilog_t *zgd_zilog; /* zilog */
|
||||
blkptr_t *zgd_bp; /* block pointer */
|
||||
struct rl *zgd_rl; /* range lock */
|
||||
} zgd_t;
|
||||
|
||||
|
||||
typedef void zil_parse_blk_func_t(zilog_t *zilog, blkptr_t *bp, void *arg,
|
||||
uint64_t txg);
|
||||
typedef void zil_parse_lr_func_t(zilog_t *zilog, lr_t *lr, void *arg,
|
||||
uint64_t txg);
|
||||
typedef int zil_replay_func_t();
|
||||
typedef void zil_replay_cleaner_t();
|
||||
typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio);
|
||||
|
||||
extern uint64_t zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
|
||||
zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg);
|
||||
|
||||
extern void zil_init(void);
|
||||
extern void zil_fini(void);
|
||||
|
||||
extern zilog_t *zil_alloc(objset_t *os, zil_header_t *zh_phys);
|
||||
extern void zil_free(zilog_t *zilog);
|
||||
|
||||
extern zilog_t *zil_open(objset_t *os, zil_get_data_t *get_data);
|
||||
extern void zil_close(zilog_t *zilog);
|
||||
|
||||
extern void zil_replay(objset_t *os, void *arg, uint64_t *txgp,
|
||||
zil_replay_func_t *replay_func[TX_MAX_TYPE],
|
||||
zil_replay_cleaner_t *replay_cleaner);
|
||||
extern void zil_destroy(zilog_t *zilog, boolean_t keep_first);
|
||||
extern void zil_rollback_destroy(zilog_t *zilog, dmu_tx_t *tx);
|
||||
|
||||
extern itx_t *zil_itx_create(uint64_t txtype, size_t lrsize);
|
||||
extern uint64_t zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx);
|
||||
|
||||
extern void zil_commit(zilog_t *zilog, uint64_t seq, uint64_t oid);
|
||||
|
||||
extern int zil_claim(char *osname, void *txarg);
|
||||
extern int zil_check_log_chain(char *osname, void *txarg);
|
||||
extern int zil_clear_log_chain(char *osname, void *txarg);
|
||||
extern void zil_sync(zilog_t *zilog, dmu_tx_t *tx);
|
||||
extern void zil_clean(zilog_t *zilog);
|
||||
extern int zil_is_committed(zilog_t *zilog);
|
||||
|
||||
extern int zil_suspend(zilog_t *zilog);
|
||||
extern void zil_resume(zilog_t *zilog);
|
||||
|
||||
extern void zil_add_block(zilog_t *zilog, blkptr_t *bp);
|
||||
|
||||
extern int zil_disable;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZIL_H */
|
||||
@@ -0,0 +1,109 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZIL_IMPL_H
|
||||
#define _SYS_ZIL_IMPL_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/zil.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Log write buffer.
|
||||
*/
|
||||
typedef struct lwb {
|
||||
zilog_t *lwb_zilog; /* back pointer to log struct */
|
||||
blkptr_t lwb_blk; /* on disk address of this log blk */
|
||||
int lwb_nused; /* # used bytes in buffer */
|
||||
int lwb_sz; /* size of block and buffer */
|
||||
char *lwb_buf; /* log write buffer */
|
||||
zio_t *lwb_zio; /* zio for this buffer */
|
||||
uint64_t lwb_max_txg; /* highest txg in this lwb */
|
||||
txg_handle_t lwb_txgh; /* txg handle for txg_exit() */
|
||||
list_node_t lwb_node; /* zilog->zl_lwb_list linkage */
|
||||
} lwb_t;
|
||||
|
||||
/*
|
||||
* Vdev flushing: during a zil_commit(), we build up an AVL tree of the vdevs
|
||||
* we've touched so we know which ones need a write cache flush at the end.
|
||||
*/
|
||||
typedef struct zil_vdev_node {
|
||||
uint64_t zv_vdev; /* vdev to be flushed */
|
||||
avl_node_t zv_node; /* AVL tree linkage */
|
||||
} zil_vdev_node_t;
|
||||
|
||||
/*
|
||||
* Stable storage intent log management structure. One per dataset.
|
||||
*/
|
||||
struct zilog {
|
||||
kmutex_t zl_lock; /* protects most zilog_t fields */
|
||||
struct dsl_pool *zl_dmu_pool; /* DSL pool */
|
||||
spa_t *zl_spa; /* handle for read/write log */
|
||||
const zil_header_t *zl_header; /* log header buffer */
|
||||
objset_t *zl_os; /* object set we're logging */
|
||||
zil_get_data_t *zl_get_data; /* callback to get object content */
|
||||
zio_t *zl_root_zio; /* log writer root zio */
|
||||
uint64_t zl_itx_seq; /* next itx sequence number */
|
||||
uint64_t zl_commit_seq; /* committed upto this number */
|
||||
uint64_t zl_lr_seq; /* log record sequence number */
|
||||
uint64_t zl_destroy_txg; /* txg of last zil_destroy() */
|
||||
uint64_t zl_replay_seq[TXG_SIZE]; /* seq of last replayed rec */
|
||||
uint32_t zl_suspend; /* log suspend count */
|
||||
kcondvar_t zl_cv_writer; /* log writer thread completion */
|
||||
kcondvar_t zl_cv_suspend; /* log suspend completion */
|
||||
uint8_t zl_suspending; /* log is currently suspending */
|
||||
uint8_t zl_keep_first; /* keep first log block in destroy */
|
||||
uint8_t zl_stop_replay; /* don't replay any further */
|
||||
uint8_t zl_stop_sync; /* for debugging */
|
||||
uint8_t zl_writer; /* boolean: write setup in progress */
|
||||
uint8_t zl_log_error; /* boolean: log write error */
|
||||
list_t zl_itx_list; /* in-memory itx list */
|
||||
uint64_t zl_itx_list_sz; /* total size of records on list */
|
||||
uint64_t zl_cur_used; /* current commit log size used */
|
||||
uint64_t zl_prev_used; /* previous commit log size used */
|
||||
list_t zl_lwb_list; /* in-flight log write list */
|
||||
kmutex_t zl_vdev_lock; /* protects zl_vdev_tree */
|
||||
avl_tree_t zl_vdev_tree; /* vdevs to flush in zil_commit() */
|
||||
taskq_t *zl_clean_taskq; /* runs lwb and itx clean tasks */
|
||||
avl_tree_t zl_dva_tree; /* track DVAs during log parse */
|
||||
clock_t zl_replay_time; /* lbolt of when replay started */
|
||||
uint64_t zl_replay_blks; /* number of log blocks replayed */
|
||||
};
|
||||
|
||||
typedef struct zil_dva_node {
|
||||
dva_t zn_dva;
|
||||
avl_node_t zn_node;
|
||||
} zil_dva_node_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZIL_IMPL_H */
|
||||
@@ -0,0 +1,424 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _ZIO_H
|
||||
#define _ZIO_H
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/avl.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/zio_impl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ZBT_MAGIC 0x210da7ab10c7a11ULL /* zio data bloc tail */
|
||||
|
||||
typedef struct zio_block_tail {
|
||||
uint64_t zbt_magic; /* for validation, endianness */
|
||||
zio_cksum_t zbt_cksum; /* 256-bit checksum */
|
||||
} zio_block_tail_t;
|
||||
|
||||
/*
|
||||
* Gang block headers are self-checksumming and contain an array
|
||||
* of block pointers.
|
||||
*/
|
||||
#define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE
|
||||
#define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \
|
||||
sizeof (zio_block_tail_t)) / sizeof (blkptr_t))
|
||||
#define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \
|
||||
sizeof (zio_block_tail_t) - \
|
||||
(SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\
|
||||
sizeof (uint64_t))
|
||||
|
||||
typedef struct zio_gbh {
|
||||
blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS];
|
||||
uint64_t zg_filler[SPA_GBH_FILLER];
|
||||
zio_block_tail_t zg_tail;
|
||||
} zio_gbh_phys_t;
|
||||
|
||||
enum zio_checksum {
|
||||
ZIO_CHECKSUM_INHERIT = 0,
|
||||
ZIO_CHECKSUM_ON,
|
||||
ZIO_CHECKSUM_OFF,
|
||||
ZIO_CHECKSUM_LABEL,
|
||||
ZIO_CHECKSUM_GANG_HEADER,
|
||||
ZIO_CHECKSUM_ZILOG,
|
||||
ZIO_CHECKSUM_FLETCHER_2,
|
||||
ZIO_CHECKSUM_FLETCHER_4,
|
||||
ZIO_CHECKSUM_SHA256,
|
||||
ZIO_CHECKSUM_FUNCTIONS
|
||||
};
|
||||
|
||||
#define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_2
|
||||
#define ZIO_CHECKSUM_DEFAULT ZIO_CHECKSUM_ON
|
||||
|
||||
enum zio_compress {
|
||||
ZIO_COMPRESS_INHERIT = 0,
|
||||
ZIO_COMPRESS_ON,
|
||||
ZIO_COMPRESS_OFF,
|
||||
ZIO_COMPRESS_LZJB,
|
||||
ZIO_COMPRESS_EMPTY,
|
||||
ZIO_COMPRESS_GZIP_1,
|
||||
ZIO_COMPRESS_GZIP_2,
|
||||
ZIO_COMPRESS_GZIP_3,
|
||||
ZIO_COMPRESS_GZIP_4,
|
||||
ZIO_COMPRESS_GZIP_5,
|
||||
ZIO_COMPRESS_GZIP_6,
|
||||
ZIO_COMPRESS_GZIP_7,
|
||||
ZIO_COMPRESS_GZIP_8,
|
||||
ZIO_COMPRESS_GZIP_9,
|
||||
ZIO_COMPRESS_FUNCTIONS
|
||||
};
|
||||
|
||||
#define ZIO_COMPRESS_ON_VALUE ZIO_COMPRESS_LZJB
|
||||
#define ZIO_COMPRESS_DEFAULT ZIO_COMPRESS_OFF
|
||||
|
||||
#define ZIO_FAILURE_MODE_WAIT 0
|
||||
#define ZIO_FAILURE_MODE_CONTINUE 1
|
||||
#define ZIO_FAILURE_MODE_PANIC 2
|
||||
|
||||
#define ZIO_PRIORITY_NOW (zio_priority_table[0])
|
||||
#define ZIO_PRIORITY_SYNC_READ (zio_priority_table[1])
|
||||
#define ZIO_PRIORITY_SYNC_WRITE (zio_priority_table[2])
|
||||
#define ZIO_PRIORITY_ASYNC_READ (zio_priority_table[3])
|
||||
#define ZIO_PRIORITY_ASYNC_WRITE (zio_priority_table[4])
|
||||
#define ZIO_PRIORITY_FREE (zio_priority_table[5])
|
||||
#define ZIO_PRIORITY_CACHE_FILL (zio_priority_table[6])
|
||||
#define ZIO_PRIORITY_LOG_WRITE (zio_priority_table[7])
|
||||
#define ZIO_PRIORITY_RESILVER (zio_priority_table[8])
|
||||
#define ZIO_PRIORITY_SCRUB (zio_priority_table[9])
|
||||
#define ZIO_PRIORITY_TABLE_SIZE 10
|
||||
|
||||
#define ZIO_FLAG_MUSTSUCCEED 0x00000
|
||||
#define ZIO_FLAG_CANFAIL 0x00001
|
||||
#define ZIO_FLAG_SPECULATIVE 0x00002
|
||||
#define ZIO_FLAG_CONFIG_WRITER 0x00004
|
||||
#define ZIO_FLAG_DONT_RETRY 0x00008
|
||||
|
||||
#define ZIO_FLAG_DONT_CACHE 0x00010
|
||||
#define ZIO_FLAG_DONT_QUEUE 0x00020
|
||||
#define ZIO_FLAG_DONT_AGGREGATE 0x00040
|
||||
#define ZIO_FLAG_DONT_PROPAGATE 0x00080
|
||||
|
||||
#define ZIO_FLAG_IO_BYPASS 0x00100
|
||||
#define ZIO_FLAG_IO_REPAIR 0x00200
|
||||
#define ZIO_FLAG_IO_RETRY 0x00400
|
||||
#define ZIO_FLAG_IO_REWRITE 0x00800
|
||||
|
||||
#define ZIO_FLAG_PROBE 0x01000
|
||||
#define ZIO_FLAG_RESILVER 0x02000
|
||||
#define ZIO_FLAG_SCRUB 0x04000
|
||||
#define ZIO_FLAG_SCRUB_THREAD 0x08000
|
||||
|
||||
#define ZIO_FLAG_GANG_CHILD 0x10000
|
||||
|
||||
#define ZIO_FLAG_GANG_INHERIT \
|
||||
(ZIO_FLAG_CANFAIL | \
|
||||
ZIO_FLAG_SPECULATIVE | \
|
||||
ZIO_FLAG_CONFIG_WRITER | \
|
||||
ZIO_FLAG_DONT_RETRY | \
|
||||
ZIO_FLAG_DONT_CACHE | \
|
||||
ZIO_FLAG_DONT_AGGREGATE | \
|
||||
ZIO_FLAG_RESILVER | \
|
||||
ZIO_FLAG_SCRUB | \
|
||||
ZIO_FLAG_SCRUB_THREAD)
|
||||
|
||||
#define ZIO_FLAG_VDEV_INHERIT \
|
||||
(ZIO_FLAG_GANG_INHERIT | \
|
||||
ZIO_FLAG_IO_REPAIR | \
|
||||
ZIO_FLAG_IO_RETRY | \
|
||||
ZIO_FLAG_PROBE)
|
||||
|
||||
#define ZIO_PIPELINE_CONTINUE 0x100
|
||||
#define ZIO_PIPELINE_STOP 0x101
|
||||
|
||||
#define ZIO_GANG_CHILD_FLAGS(zio) \
|
||||
(((zio)->io_flags & ZIO_FLAG_GANG_INHERIT) | \
|
||||
ZIO_FLAG_GANG_CHILD | ZIO_FLAG_CANFAIL)
|
||||
|
||||
enum zio_child {
|
||||
ZIO_CHILD_VDEV = 0,
|
||||
ZIO_CHILD_GANG,
|
||||
ZIO_CHILD_LOGICAL,
|
||||
ZIO_CHILD_TYPES
|
||||
};
|
||||
|
||||
enum zio_wait_type {
|
||||
ZIO_WAIT_READY = 0,
|
||||
ZIO_WAIT_DONE,
|
||||
ZIO_WAIT_TYPES
|
||||
};
|
||||
|
||||
/*
|
||||
* We'll take the unused errnos, 'EBADE' and 'EBADR' (from the Convergent
|
||||
* graveyard) to indicate checksum errors and fragmentation.
|
||||
*/
|
||||
#define ECKSUM EBADE
|
||||
#define EFRAGS EBADR
|
||||
|
||||
typedef struct zio zio_t;
|
||||
typedef void zio_done_func_t(zio_t *zio);
|
||||
|
||||
extern uint8_t zio_priority_table[ZIO_PRIORITY_TABLE_SIZE];
|
||||
extern char *zio_type_name[ZIO_TYPES];
|
||||
|
||||
/*
|
||||
* A bookmark is a four-tuple <objset, object, level, blkid> that uniquely
|
||||
* identifies any block in the pool. By convention, the meta-objset (MOS)
|
||||
* is objset 0, the meta-dnode is object 0, the root block (osphys_t) is
|
||||
* level -1 of the meta-dnode, and intent log blocks (which are chained
|
||||
* off the root block) have blkid == sequence number. In summary:
|
||||
*
|
||||
* mos is objset 0
|
||||
* meta-dnode is object 0
|
||||
* root block is <objset, 0, -1, 0>
|
||||
* intent log is <objset, 0, -1, ZIL sequence number>
|
||||
*
|
||||
* Note: this structure is called a bookmark because its first purpose was
|
||||
* to remember where to resume a pool-wide traverse. The absolute ordering
|
||||
* for block visitation during traversal is defined in compare_bookmark().
|
||||
*
|
||||
* Note: this structure is passed between userland and the kernel.
|
||||
* Therefore it must not change size or alignment between 32/64 bit
|
||||
* compilation options.
|
||||
*/
|
||||
typedef struct zbookmark {
|
||||
uint64_t zb_objset;
|
||||
uint64_t zb_object;
|
||||
int64_t zb_level;
|
||||
uint64_t zb_blkid;
|
||||
} zbookmark_t;
|
||||
|
||||
typedef struct zio_prop {
|
||||
enum zio_checksum zp_checksum;
|
||||
enum zio_compress zp_compress;
|
||||
dmu_object_type_t zp_type;
|
||||
uint8_t zp_level;
|
||||
uint8_t zp_ndvas;
|
||||
} zio_prop_t;
|
||||
|
||||
typedef struct zio_gang_node {
|
||||
zio_gbh_phys_t *gn_gbh;
|
||||
struct zio_gang_node *gn_child[SPA_GBH_NBLKPTRS];
|
||||
} zio_gang_node_t;
|
||||
|
||||
typedef zio_t *zio_gang_issue_func_t(zio_t *zio, blkptr_t *bp,
|
||||
zio_gang_node_t *gn, void *data);
|
||||
|
||||
typedef void zio_transform_func_t(zio_t *zio, void *data, uint64_t size);
|
||||
|
||||
typedef struct zio_transform {
|
||||
void *zt_orig_data;
|
||||
uint64_t zt_orig_size;
|
||||
uint64_t zt_bufsize;
|
||||
zio_transform_func_t *zt_transform;
|
||||
struct zio_transform *zt_next;
|
||||
} zio_transform_t;
|
||||
|
||||
typedef int zio_pipe_stage_t(zio_t *zio);
|
||||
|
||||
/*
|
||||
* The io_reexecute flags are distinct from io_flags because the child must
|
||||
* be able to propagate them to the parent. The normal io_flags are local
|
||||
* to the zio, not protected by any lock, and not modifiable by children;
|
||||
* the reexecute flags are protected by io_lock, modifiable by children,
|
||||
* and always propagated -- even when ZIO_FLAG_DONT_PROPAGATE is set.
|
||||
*/
|
||||
#define ZIO_REEXECUTE_NOW 0x01
|
||||
#define ZIO_REEXECUTE_SUSPEND 0x02
|
||||
|
||||
struct zio {
|
||||
/* Core information about this I/O */
|
||||
zbookmark_t io_bookmark;
|
||||
zio_prop_t io_prop;
|
||||
zio_type_t io_type;
|
||||
enum zio_child io_child_type;
|
||||
int io_cmd;
|
||||
uint8_t io_priority;
|
||||
uint8_t io_reexecute;
|
||||
uint8_t io_async_root;
|
||||
uint64_t io_txg;
|
||||
spa_t *io_spa;
|
||||
blkptr_t *io_bp;
|
||||
blkptr_t io_bp_copy;
|
||||
zio_t *io_parent;
|
||||
zio_t *io_child;
|
||||
zio_t *io_sibling_prev;
|
||||
zio_t *io_sibling_next;
|
||||
zio_t *io_logical;
|
||||
zio_transform_t *io_transform_stack;
|
||||
|
||||
/* Callback info */
|
||||
zio_done_func_t *io_ready;
|
||||
zio_done_func_t *io_done;
|
||||
void *io_private;
|
||||
blkptr_t io_bp_orig;
|
||||
|
||||
/* Data represented by this I/O */
|
||||
void *io_data;
|
||||
uint64_t io_size;
|
||||
|
||||
/* Stuff for the vdev stack */
|
||||
vdev_t *io_vd;
|
||||
void *io_vsd;
|
||||
zio_done_func_t *io_vsd_free;
|
||||
uint64_t io_offset;
|
||||
uint64_t io_deadline;
|
||||
avl_node_t io_offset_node;
|
||||
avl_node_t io_deadline_node;
|
||||
avl_tree_t *io_vdev_tree;
|
||||
zio_t *io_delegate_list;
|
||||
zio_t *io_delegate_next;
|
||||
|
||||
/* Internal pipeline state */
|
||||
int io_flags;
|
||||
zio_stage_t io_stage;
|
||||
uint32_t io_pipeline;
|
||||
int io_orig_flags;
|
||||
zio_stage_t io_orig_stage;
|
||||
uint32_t io_orig_pipeline;
|
||||
int io_error;
|
||||
int io_child_error[ZIO_CHILD_TYPES];
|
||||
uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES];
|
||||
uint64_t *io_stall;
|
||||
zio_gang_node_t *io_gang_tree;
|
||||
void *io_executor;
|
||||
void *io_waiter;
|
||||
kmutex_t io_lock;
|
||||
kcondvar_t io_cv;
|
||||
|
||||
/* FMA state */
|
||||
uint64_t io_ena;
|
||||
};
|
||||
|
||||
extern zio_t *zio_null(zio_t *pio, spa_t *spa,
|
||||
zio_done_func_t *done, void *private, int flags);
|
||||
|
||||
extern zio_t *zio_root(spa_t *spa,
|
||||
zio_done_func_t *done, void *private, int flags);
|
||||
|
||||
extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data,
|
||||
uint64_t size, zio_done_func_t *done, void *private,
|
||||
int priority, int flags, const zbookmark_t *zb);
|
||||
|
||||
extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
|
||||
void *data, uint64_t size, zio_prop_t *zp,
|
||||
zio_done_func_t *ready, zio_done_func_t *done, void *private,
|
||||
int priority, int flags, const zbookmark_t *zb);
|
||||
|
||||
extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
|
||||
void *data, uint64_t size, zio_done_func_t *done, void *private,
|
||||
int priority, int flags, zbookmark_t *zb);
|
||||
|
||||
extern void zio_skip_write(zio_t *zio);
|
||||
|
||||
extern zio_t *zio_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
|
||||
zio_done_func_t *done, void *private, int flags);
|
||||
|
||||
extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
|
||||
zio_done_func_t *done, void *private, int flags);
|
||||
|
||||
extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
|
||||
zio_done_func_t *done, void *private, int priority, int flags);
|
||||
|
||||
extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
|
||||
uint64_t size, void *data, int checksum,
|
||||
zio_done_func_t *done, void *private, int priority, int flags,
|
||||
boolean_t labels);
|
||||
|
||||
extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
|
||||
uint64_t size, void *data, int checksum,
|
||||
zio_done_func_t *done, void *private, int priority, int flags,
|
||||
boolean_t labels);
|
||||
|
||||
extern int zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp,
|
||||
blkptr_t *old_bp, uint64_t txg);
|
||||
extern void zio_free_blk(spa_t *spa, blkptr_t *bp, uint64_t txg);
|
||||
extern void zio_flush(zio_t *zio, vdev_t *vd);
|
||||
|
||||
extern int zio_wait(zio_t *zio);
|
||||
extern void zio_nowait(zio_t *zio);
|
||||
extern void zio_execute(zio_t *zio);
|
||||
extern void zio_interrupt(zio_t *zio);
|
||||
|
||||
extern void *zio_buf_alloc(size_t size);
|
||||
extern void zio_buf_free(void *buf, size_t size);
|
||||
extern void *zio_data_buf_alloc(size_t size);
|
||||
extern void zio_data_buf_free(void *buf, size_t size);
|
||||
|
||||
extern void zio_resubmit_stage_async(void *);
|
||||
|
||||
extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd,
|
||||
uint64_t offset, void *data, uint64_t size, int type, int priority,
|
||||
int flags, zio_done_func_t *done, void *private);
|
||||
|
||||
extern zio_t *zio_vdev_delegated_io(vdev_t *vd, uint64_t offset,
|
||||
void *data, uint64_t size, int type, int priority,
|
||||
int flags, zio_done_func_t *done, void *private);
|
||||
|
||||
extern void zio_vdev_io_bypass(zio_t *zio);
|
||||
extern void zio_vdev_io_reissue(zio_t *zio);
|
||||
extern void zio_vdev_io_redone(zio_t *zio);
|
||||
|
||||
extern void zio_checksum_verified(zio_t *zio);
|
||||
extern int zio_worst_error(int e1, int e2);
|
||||
|
||||
extern uint8_t zio_checksum_select(uint8_t child, uint8_t parent);
|
||||
extern uint8_t zio_compress_select(uint8_t child, uint8_t parent);
|
||||
|
||||
extern void zio_suspend(spa_t *spa, zio_t *zio);
|
||||
extern void zio_resume(spa_t *spa);
|
||||
extern void zio_resume_wait(spa_t *spa);
|
||||
|
||||
/*
|
||||
* Initial setup and teardown.
|
||||
*/
|
||||
extern void zio_init(void);
|
||||
extern void zio_fini(void);
|
||||
|
||||
/*
|
||||
* Fault injection
|
||||
*/
|
||||
struct zinject_record;
|
||||
extern uint32_t zio_injection_enabled;
|
||||
extern int zio_inject_fault(char *name, int flags, int *id,
|
||||
struct zinject_record *record);
|
||||
extern int zio_inject_list_next(int *id, char *name, size_t buflen,
|
||||
struct zinject_record *record);
|
||||
extern int zio_clear_fault(int id);
|
||||
extern int zio_handle_fault_injection(zio_t *zio, int error);
|
||||
extern int zio_handle_device_injection(vdev_t *vd, int error);
|
||||
extern int zio_handle_label_injection(zio_t *zio, int error);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ZIO_H */
|
||||
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZIO_CHECKSUM_H
|
||||
#define _SYS_ZIO_CHECKSUM_H
|
||||
|
||||
#include <sys/zio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Signature for checksum functions.
|
||||
*/
|
||||
typedef void zio_checksum_t(const void *data, uint64_t size, zio_cksum_t *zcp);
|
||||
|
||||
/*
|
||||
* Information about each checksum function.
|
||||
*/
|
||||
typedef struct zio_checksum_info {
|
||||
zio_checksum_t *ci_func[2]; /* checksum function for each byteorder */
|
||||
int ci_correctable; /* number of correctable bits */
|
||||
int ci_zbt; /* uses zio block tail? */
|
||||
char *ci_name; /* descriptive name */
|
||||
} zio_checksum_info_t;
|
||||
|
||||
extern zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS];
|
||||
|
||||
/*
|
||||
* Checksum routines.
|
||||
*/
|
||||
extern zio_checksum_t fletcher_2_native;
|
||||
extern zio_checksum_t fletcher_4_native;
|
||||
extern zio_checksum_t fletcher_4_incremental_native;
|
||||
|
||||
extern zio_checksum_t fletcher_2_byteswap;
|
||||
extern zio_checksum_t fletcher_4_byteswap;
|
||||
extern zio_checksum_t fletcher_4_incremental_byteswap;
|
||||
|
||||
extern zio_checksum_t zio_checksum_SHA256;
|
||||
|
||||
extern void zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
|
||||
void *data, uint64_t size);
|
||||
extern int zio_checksum_error(zio_t *zio);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZIO_CHECKSUM_H */
|
||||
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZIO_COMPRESS_H
|
||||
#define _SYS_ZIO_COMPRESS_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/zio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Common signature for all zio compress/decompress functions.
|
||||
*/
|
||||
typedef size_t zio_compress_func_t(void *src, void *dst,
|
||||
size_t s_len, size_t d_len, int);
|
||||
typedef int zio_decompress_func_t(void *src, void *dst,
|
||||
size_t s_len, size_t d_len, int);
|
||||
|
||||
/*
|
||||
* Information about each compression function.
|
||||
*/
|
||||
typedef struct zio_compress_info {
|
||||
zio_compress_func_t *ci_compress; /* compression function */
|
||||
zio_decompress_func_t *ci_decompress; /* decompression function */
|
||||
int ci_level; /* level parameter */
|
||||
char *ci_name; /* algorithm name */
|
||||
} zio_compress_info_t;
|
||||
|
||||
extern zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS];
|
||||
|
||||
/*
|
||||
* Compression routines.
|
||||
*/
|
||||
extern size_t lzjb_compress(void *src, void *dst, size_t s_len, size_t d_len,
|
||||
int level);
|
||||
extern int lzjb_decompress(void *src, void *dst, size_t s_len, size_t d_len,
|
||||
int level);
|
||||
extern size_t gzip_compress(void *src, void *dst, size_t s_len, size_t d_len,
|
||||
int level);
|
||||
extern int gzip_decompress(void *src, void *dst, size_t s_len, size_t d_len,
|
||||
int level);
|
||||
|
||||
/*
|
||||
* Compress and decompress data if necessary.
|
||||
*/
|
||||
extern int zio_compress_data(int cpfunc, void *src, uint64_t srcsize,
|
||||
void **destp, uint64_t *destsizep, uint64_t *destbufsizep);
|
||||
extern int zio_decompress_data(int cpfunc, void *src, uint64_t srcsize,
|
||||
void *dest, uint64_t destsize);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZIO_COMPRESS_H */
|
||||
@@ -0,0 +1,143 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _ZIO_IMPL_H
|
||||
#define _ZIO_IMPL_H
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* I/O Groups: pipeline stage definitions.
|
||||
*/
|
||||
typedef enum zio_stage {
|
||||
ZIO_STAGE_OPEN = 0, /* RWFCI */
|
||||
|
||||
ZIO_STAGE_ISSUE_ASYNC, /* -W--- */
|
||||
|
||||
ZIO_STAGE_READ_BP_INIT, /* R---- */
|
||||
ZIO_STAGE_WRITE_BP_INIT, /* -W--- */
|
||||
|
||||
ZIO_STAGE_CHECKSUM_GENERATE, /* -W--- */
|
||||
|
||||
ZIO_STAGE_GANG_ASSEMBLE, /* RWFC- */
|
||||
ZIO_STAGE_GANG_ISSUE, /* RWFC- */
|
||||
|
||||
ZIO_STAGE_DVA_ALLOCATE, /* -W--- */
|
||||
ZIO_STAGE_DVA_FREE, /* --F-- */
|
||||
ZIO_STAGE_DVA_CLAIM, /* ---C- */
|
||||
|
||||
ZIO_STAGE_READY, /* RWFCI */
|
||||
|
||||
ZIO_STAGE_VDEV_IO_START, /* RW--I */
|
||||
ZIO_STAGE_VDEV_IO_DONE, /* RW--I */
|
||||
ZIO_STAGE_VDEV_IO_ASSESS, /* RW--I */
|
||||
|
||||
ZIO_STAGE_CHECKSUM_VERIFY, /* R---- */
|
||||
|
||||
ZIO_STAGE_DONE, /* RWFCI */
|
||||
ZIO_STAGES
|
||||
} zio_stage_t;
|
||||
|
||||
#define ZIO_INTERLOCK_STAGES \
|
||||
((1U << ZIO_STAGE_READY) | \
|
||||
(1U << ZIO_STAGE_DONE))
|
||||
|
||||
#define ZIO_INTERLOCK_PIPELINE \
|
||||
ZIO_INTERLOCK_STAGES
|
||||
|
||||
#define ZIO_VDEV_IO_STAGES \
|
||||
((1U << ZIO_STAGE_VDEV_IO_START) | \
|
||||
(1U << ZIO_STAGE_VDEV_IO_DONE) | \
|
||||
(1U << ZIO_STAGE_VDEV_IO_ASSESS))
|
||||
|
||||
#define ZIO_VDEV_CHILD_PIPELINE \
|
||||
(ZIO_VDEV_IO_STAGES | \
|
||||
(1U << ZIO_STAGE_DONE))
|
||||
|
||||
#define ZIO_READ_COMMON_STAGES \
|
||||
(ZIO_INTERLOCK_STAGES | \
|
||||
ZIO_VDEV_IO_STAGES | \
|
||||
(1U << ZIO_STAGE_CHECKSUM_VERIFY))
|
||||
|
||||
#define ZIO_READ_PHYS_PIPELINE \
|
||||
ZIO_READ_COMMON_STAGES
|
||||
|
||||
#define ZIO_READ_PIPELINE \
|
||||
(ZIO_READ_COMMON_STAGES | \
|
||||
(1U << ZIO_STAGE_READ_BP_INIT))
|
||||
|
||||
#define ZIO_WRITE_COMMON_STAGES \
|
||||
(ZIO_INTERLOCK_STAGES | \
|
||||
ZIO_VDEV_IO_STAGES | \
|
||||
(1U << ZIO_STAGE_ISSUE_ASYNC) | \
|
||||
(1U << ZIO_STAGE_CHECKSUM_GENERATE))
|
||||
|
||||
#define ZIO_WRITE_PHYS_PIPELINE \
|
||||
ZIO_WRITE_COMMON_STAGES
|
||||
|
||||
#define ZIO_REWRITE_PIPELINE \
|
||||
(ZIO_WRITE_COMMON_STAGES | \
|
||||
(1U << ZIO_STAGE_WRITE_BP_INIT))
|
||||
|
||||
#define ZIO_WRITE_PIPELINE \
|
||||
(ZIO_WRITE_COMMON_STAGES | \
|
||||
(1U << ZIO_STAGE_WRITE_BP_INIT) | \
|
||||
(1U << ZIO_STAGE_DVA_ALLOCATE))
|
||||
|
||||
#define ZIO_GANG_STAGES \
|
||||
((1U << ZIO_STAGE_GANG_ASSEMBLE) | \
|
||||
(1U << ZIO_STAGE_GANG_ISSUE))
|
||||
|
||||
#define ZIO_FREE_PIPELINE \
|
||||
(ZIO_INTERLOCK_STAGES | \
|
||||
(1U << ZIO_STAGE_DVA_FREE))
|
||||
|
||||
#define ZIO_CLAIM_PIPELINE \
|
||||
(ZIO_INTERLOCK_STAGES | \
|
||||
(1U << ZIO_STAGE_DVA_CLAIM))
|
||||
|
||||
#define ZIO_IOCTL_PIPELINE \
|
||||
(ZIO_INTERLOCK_STAGES | \
|
||||
(1U << ZIO_STAGE_VDEV_IO_START) | \
|
||||
(1U << ZIO_STAGE_VDEV_IO_ASSESS))
|
||||
|
||||
#define ZIO_CONFIG_LOCK_BLOCKING_STAGES \
|
||||
((1U << ZIO_STAGE_VDEV_IO_START) | \
|
||||
(1U << ZIO_STAGE_DVA_ALLOCATE) | \
|
||||
(1U << ZIO_STAGE_DVA_CLAIM))
|
||||
|
||||
extern void zio_inject_init(void);
|
||||
extern void zio_inject_fini(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ZIO_IMPL_H */
|
||||
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZVOL_H
|
||||
#define _SYS_ZVOL_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ZVOL_OBJ 1ULL
|
||||
#define ZVOL_ZAP_OBJ 2ULL
|
||||
|
||||
#ifdef _KERNEL
|
||||
extern int zvol_check_volsize(uint64_t volsize, uint64_t blocksize);
|
||||
extern int zvol_check_volblocksize(uint64_t volblocksize);
|
||||
extern int zvol_get_stats(objset_t *os, nvlist_t *nv);
|
||||
extern void zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
|
||||
extern int zvol_create_minor(const char *, major_t);
|
||||
extern int zvol_remove_minor(const char *);
|
||||
extern int zvol_set_volsize(const char *, major_t, uint64_t);
|
||||
extern int zvol_set_volblocksize(const char *, uint64_t);
|
||||
|
||||
extern int zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr);
|
||||
extern int zvol_dump(dev_t dev, caddr_t addr, daddr_t offset, int nblocks);
|
||||
extern int zvol_close(dev_t dev, int flag, int otyp, cred_t *cr);
|
||||
extern int zvol_strategy(buf_t *bp);
|
||||
extern int zvol_read(dev_t dev, uio_t *uiop, cred_t *cr);
|
||||
extern int zvol_write(dev_t dev, uio_t *uiop, cred_t *cr);
|
||||
extern int zvol_aread(dev_t dev, struct aio_req *aio, cred_t *cr);
|
||||
extern int zvol_awrite(dev_t dev, struct aio_req *aio, cred_t *cr);
|
||||
extern int zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr,
|
||||
int *rvalp);
|
||||
extern int zvol_busy(void);
|
||||
extern void zvol_init(void);
|
||||
extern void zvol_fini(void);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZVOL_H */
|
||||
Reference in New Issue
Block a user