mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 18:40:43 +03:00
Rebase to OpenSolaris b103, in the process we are removing any code which did not originate from the OpenSolaris source. These changes will be reintroduced in topic branches for easier tracking
This commit is contained in:
@@ -0,0 +1,138 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ARC_H
|
||||
#define _SYS_ARC_H
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/spa.h>
|
||||
|
||||
typedef struct arc_buf_hdr arc_buf_hdr_t;
|
||||
typedef struct arc_buf arc_buf_t;
|
||||
typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *private);
|
||||
typedef int arc_evict_func_t(void *private);
|
||||
|
||||
/* generic arc_done_func_t's which you can use */
|
||||
arc_done_func_t arc_bcopy_func;
|
||||
arc_done_func_t arc_getbuf_func;
|
||||
|
||||
struct arc_buf {
|
||||
arc_buf_hdr_t *b_hdr;
|
||||
arc_buf_t *b_next;
|
||||
krwlock_t b_lock;
|
||||
void *b_data;
|
||||
arc_evict_func_t *b_efunc;
|
||||
void *b_private;
|
||||
};
|
||||
|
||||
typedef enum arc_buf_contents {
|
||||
ARC_BUFC_DATA, /* buffer contains data */
|
||||
ARC_BUFC_METADATA, /* buffer contains metadata */
|
||||
ARC_BUFC_NUMTYPES
|
||||
} arc_buf_contents_t;
|
||||
/*
|
||||
* These are the flags we pass into calls to the arc
|
||||
*/
|
||||
#define ARC_WAIT (1 << 1) /* perform I/O synchronously */
|
||||
#define ARC_NOWAIT (1 << 2) /* perform I/O asynchronously */
|
||||
#define ARC_PREFETCH (1 << 3) /* I/O is a prefetch */
|
||||
#define ARC_CACHED (1 << 4) /* I/O was already in cache */
|
||||
#define ARC_L2CACHE (1 << 5) /* cache in L2ARC */
|
||||
|
||||
void arc_space_consume(uint64_t space);
|
||||
void arc_space_return(uint64_t space);
|
||||
void *arc_data_buf_alloc(uint64_t space);
|
||||
void arc_data_buf_free(void *buf, uint64_t space);
|
||||
arc_buf_t *arc_buf_alloc(spa_t *spa, int size, void *tag,
|
||||
arc_buf_contents_t type);
|
||||
void arc_buf_add_ref(arc_buf_t *buf, void *tag);
|
||||
int arc_buf_remove_ref(arc_buf_t *buf, void *tag);
|
||||
int arc_buf_size(arc_buf_t *buf);
|
||||
void arc_release(arc_buf_t *buf, void *tag);
|
||||
int arc_released(arc_buf_t *buf);
|
||||
int arc_has_callback(arc_buf_t *buf);
|
||||
void arc_buf_freeze(arc_buf_t *buf);
|
||||
void arc_buf_thaw(arc_buf_t *buf);
|
||||
#ifdef ZFS_DEBUG
|
||||
int arc_referenced(arc_buf_t *buf);
|
||||
#endif
|
||||
|
||||
typedef struct writeprops {
|
||||
dmu_object_type_t wp_type;
|
||||
uint8_t wp_level;
|
||||
uint8_t wp_copies;
|
||||
uint8_t wp_dncompress, wp_oscompress;
|
||||
uint8_t wp_dnchecksum, wp_oschecksum;
|
||||
} writeprops_t;
|
||||
|
||||
void write_policy(spa_t *spa, const writeprops_t *wp, zio_prop_t *zp);
|
||||
int arc_read(zio_t *pio, spa_t *spa, blkptr_t *bp, arc_buf_t *pbuf,
|
||||
arc_done_func_t *done, void *private, int priority, int zio_flags,
|
||||
uint32_t *arc_flags, const zbookmark_t *zb);
|
||||
int arc_read_nolock(zio_t *pio, spa_t *spa, blkptr_t *bp,
|
||||
arc_done_func_t *done, void *private, int priority, int flags,
|
||||
uint32_t *arc_flags, const zbookmark_t *zb);
|
||||
zio_t *arc_write(zio_t *pio, spa_t *spa, const writeprops_t *wp,
|
||||
boolean_t l2arc, uint64_t txg, blkptr_t *bp, arc_buf_t *buf,
|
||||
arc_done_func_t *ready, arc_done_func_t *done, void *private, int priority,
|
||||
int zio_flags, const zbookmark_t *zb);
|
||||
int arc_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
|
||||
zio_done_func_t *done, void *private, uint32_t arc_flags);
|
||||
int arc_tryread(spa_t *spa, blkptr_t *bp, void *data);
|
||||
|
||||
void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
|
||||
int arc_buf_evict(arc_buf_t *buf);
|
||||
|
||||
void arc_flush(spa_t *spa);
|
||||
void arc_tempreserve_clear(uint64_t reserve);
|
||||
int arc_tempreserve_space(uint64_t reserve, uint64_t txg);
|
||||
|
||||
void arc_init(void);
|
||||
void arc_fini(void);
|
||||
|
||||
/*
|
||||
* Level 2 ARC
|
||||
*/
|
||||
|
||||
void l2arc_add_vdev(spa_t *spa, vdev_t *vd, uint64_t start, uint64_t end);
|
||||
void l2arc_remove_vdev(vdev_t *vd);
|
||||
boolean_t l2arc_vdev_present(vdev_t *vd);
|
||||
void l2arc_init(void);
|
||||
void l2arc_fini(void);
|
||||
void l2arc_start(void);
|
||||
void l2arc_stop(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ARC_H */
|
||||
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_BPLIST_H
|
||||
#define _SYS_BPLIST_H
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct bplist_phys {
|
||||
/*
|
||||
* This is the bonus buffer for the dead lists. The object's
|
||||
* contents is an array of bpl_entries blkptr_t's, representing
|
||||
* a total of bpl_bytes physical space.
|
||||
*/
|
||||
uint64_t bpl_entries;
|
||||
uint64_t bpl_bytes;
|
||||
uint64_t bpl_comp;
|
||||
uint64_t bpl_uncomp;
|
||||
} bplist_phys_t;
|
||||
|
||||
#define BPLIST_SIZE_V0 (2 * sizeof (uint64_t))
|
||||
|
||||
typedef struct bplist_q {
|
||||
blkptr_t bpq_blk;
|
||||
void *bpq_next;
|
||||
} bplist_q_t;
|
||||
|
||||
typedef struct bplist {
|
||||
kmutex_t bpl_lock;
|
||||
objset_t *bpl_mos;
|
||||
uint64_t bpl_object;
|
||||
uint8_t bpl_blockshift;
|
||||
uint8_t bpl_bpshift;
|
||||
uint8_t bpl_havecomp;
|
||||
bplist_q_t *bpl_queue;
|
||||
bplist_phys_t *bpl_phys;
|
||||
dmu_buf_t *bpl_dbuf;
|
||||
dmu_buf_t *bpl_cached_dbuf;
|
||||
} bplist_t;
|
||||
|
||||
extern uint64_t bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx);
|
||||
extern void bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx);
|
||||
extern int bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object);
|
||||
extern void bplist_close(bplist_t *bpl);
|
||||
extern boolean_t bplist_empty(bplist_t *bpl);
|
||||
extern int bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp);
|
||||
extern int bplist_enqueue(bplist_t *bpl, const blkptr_t *bp, dmu_tx_t *tx);
|
||||
extern void bplist_enqueue_deferred(bplist_t *bpl, const blkptr_t *bp);
|
||||
extern void bplist_sync(bplist_t *bpl, dmu_tx_t *tx);
|
||||
extern void bplist_vacate(bplist_t *bpl, dmu_tx_t *tx);
|
||||
extern int bplist_space(bplist_t *bpl,
|
||||
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
|
||||
extern int bplist_space_birthrange(bplist_t *bpl,
|
||||
uint64_t mintxg, uint64_t maxtxg, uint64_t *dasizep);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_BPLIST_H */
|
||||
@@ -0,0 +1,347 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DBUF_H
|
||||
#define _SYS_DBUF_H
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/arc.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/refcount.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define DB_BONUS_BLKID (-1ULL)
|
||||
#define IN_DMU_SYNC 2
|
||||
|
||||
/*
|
||||
* define flags for dbuf_read
|
||||
*/
|
||||
|
||||
#define DB_RF_MUST_SUCCEED (1 << 0)
|
||||
#define DB_RF_CANFAIL (1 << 1)
|
||||
#define DB_RF_HAVESTRUCT (1 << 2)
|
||||
#define DB_RF_NOPREFETCH (1 << 3)
|
||||
#define DB_RF_NEVERWAIT (1 << 4)
|
||||
#define DB_RF_CACHED (1 << 5)
|
||||
|
||||
/*
|
||||
* The simplified state transition diagram for dbufs looks like:
|
||||
*
|
||||
* +----> READ ----+
|
||||
* | |
|
||||
* | V
|
||||
* (alloc)-->UNCACHED CACHED-->EVICTING-->(free)
|
||||
* | ^ ^
|
||||
* | | |
|
||||
* +----> FILL ----+ |
|
||||
* | |
|
||||
* | |
|
||||
* +--------> NOFILL -------+
|
||||
*/
|
||||
typedef enum dbuf_states {
|
||||
DB_UNCACHED,
|
||||
DB_FILL,
|
||||
DB_NOFILL,
|
||||
DB_READ,
|
||||
DB_CACHED,
|
||||
DB_EVICTING
|
||||
} dbuf_states_t;
|
||||
|
||||
struct objset_impl;
|
||||
struct dnode;
|
||||
struct dmu_tx;
|
||||
|
||||
/*
|
||||
* level = 0 means the user data
|
||||
* level = 1 means the single indirect block
|
||||
* etc.
|
||||
*/
|
||||
|
||||
#define LIST_LINK_INACTIVE(link) \
|
||||
((link)->list_next == NULL && (link)->list_prev == NULL)
|
||||
|
||||
struct dmu_buf_impl;
|
||||
|
||||
typedef enum override_states {
|
||||
DR_NOT_OVERRIDDEN,
|
||||
DR_IN_DMU_SYNC,
|
||||
DR_OVERRIDDEN
|
||||
} override_states_t;
|
||||
|
||||
typedef struct dbuf_dirty_record {
|
||||
/* link on our parents dirty list */
|
||||
list_node_t dr_dirty_node;
|
||||
|
||||
/* transaction group this data will sync in */
|
||||
uint64_t dr_txg;
|
||||
|
||||
/* zio of outstanding write IO */
|
||||
zio_t *dr_zio;
|
||||
|
||||
/* pointer back to our dbuf */
|
||||
struct dmu_buf_impl *dr_dbuf;
|
||||
|
||||
/* pointer to next dirty record */
|
||||
struct dbuf_dirty_record *dr_next;
|
||||
|
||||
/* pointer to parent dirty record */
|
||||
struct dbuf_dirty_record *dr_parent;
|
||||
|
||||
union dirty_types {
|
||||
struct dirty_indirect {
|
||||
|
||||
/* protect access to list */
|
||||
kmutex_t dr_mtx;
|
||||
|
||||
/* Our list of dirty children */
|
||||
list_t dr_children;
|
||||
} di;
|
||||
struct dirty_leaf {
|
||||
|
||||
/*
|
||||
* dr_data is set when we dirty the buffer
|
||||
* so that we can retain the pointer even if it
|
||||
* gets COW'd in a subsequent transaction group.
|
||||
*/
|
||||
arc_buf_t *dr_data;
|
||||
blkptr_t dr_overridden_by;
|
||||
override_states_t dr_override_state;
|
||||
} dl;
|
||||
} dt;
|
||||
} dbuf_dirty_record_t;
|
||||
|
||||
typedef struct dmu_buf_impl {
|
||||
/*
|
||||
* The following members are immutable, with the exception of
|
||||
* db.db_data, which is protected by db_mtx.
|
||||
*/
|
||||
|
||||
/* the publicly visible structure */
|
||||
dmu_buf_t db;
|
||||
|
||||
/* the objset we belong to */
|
||||
struct objset_impl *db_objset;
|
||||
|
||||
/*
|
||||
* the dnode we belong to (NULL when evicted)
|
||||
*/
|
||||
struct dnode *db_dnode;
|
||||
|
||||
/*
|
||||
* our parent buffer; if the dnode points to us directly,
|
||||
* db_parent == db_dnode->dn_dbuf
|
||||
* only accessed by sync thread ???
|
||||
* (NULL when evicted)
|
||||
*/
|
||||
struct dmu_buf_impl *db_parent;
|
||||
|
||||
/*
|
||||
* link for hash table of all dmu_buf_impl_t's
|
||||
*/
|
||||
struct dmu_buf_impl *db_hash_next;
|
||||
|
||||
/* our block number */
|
||||
uint64_t db_blkid;
|
||||
|
||||
/*
|
||||
* Pointer to the blkptr_t which points to us. May be NULL if we
|
||||
* don't have one yet. (NULL when evicted)
|
||||
*/
|
||||
blkptr_t *db_blkptr;
|
||||
|
||||
/*
|
||||
* Our indirection level. Data buffers have db_level==0.
|
||||
* Indirect buffers which point to data buffers have
|
||||
* db_level==1. etc. Buffers which contain dnodes have
|
||||
* db_level==0, since the dnodes are stored in a file.
|
||||
*/
|
||||
uint8_t db_level;
|
||||
|
||||
/* db_mtx protects the members below */
|
||||
kmutex_t db_mtx;
|
||||
|
||||
/*
|
||||
* Current state of the buffer
|
||||
*/
|
||||
dbuf_states_t db_state;
|
||||
|
||||
/*
|
||||
* Refcount accessed by dmu_buf_{hold,rele}.
|
||||
* If nonzero, the buffer can't be destroyed.
|
||||
* Protected by db_mtx.
|
||||
*/
|
||||
refcount_t db_holds;
|
||||
|
||||
/* buffer holding our data */
|
||||
arc_buf_t *db_buf;
|
||||
|
||||
kcondvar_t db_changed;
|
||||
dbuf_dirty_record_t *db_data_pending;
|
||||
|
||||
/* pointer to most recent dirty record for this buffer */
|
||||
dbuf_dirty_record_t *db_last_dirty;
|
||||
|
||||
/*
|
||||
* Our link on the owner dnodes's dn_dbufs list.
|
||||
* Protected by its dn_dbufs_mtx.
|
||||
*/
|
||||
list_node_t db_link;
|
||||
|
||||
/* Data which is unique to data (leaf) blocks: */
|
||||
|
||||
/* stuff we store for the user (see dmu_buf_set_user) */
|
||||
void *db_user_ptr;
|
||||
void **db_user_data_ptr_ptr;
|
||||
dmu_buf_evict_func_t *db_evict_func;
|
||||
|
||||
uint8_t db_immediate_evict;
|
||||
uint8_t db_freed_in_flight;
|
||||
|
||||
uint8_t db_dirtycnt;
|
||||
} dmu_buf_impl_t;
|
||||
|
||||
/* Note: the dbuf hash table is exposed only for the mdb module */
|
||||
#define DBUF_MUTEXES 256
|
||||
#define DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)])
|
||||
typedef struct dbuf_hash_table {
|
||||
uint64_t hash_table_mask;
|
||||
dmu_buf_impl_t **hash_table;
|
||||
kmutex_t hash_mutexes[DBUF_MUTEXES];
|
||||
} dbuf_hash_table_t;
|
||||
|
||||
|
||||
uint64_t dbuf_whichblock(struct dnode *di, uint64_t offset);
|
||||
|
||||
dmu_buf_impl_t *dbuf_create_tlib(struct dnode *dn, char *data);
|
||||
void dbuf_create_bonus(struct dnode *dn);
|
||||
|
||||
dmu_buf_impl_t *dbuf_hold(struct dnode *dn, uint64_t blkid, void *tag);
|
||||
dmu_buf_impl_t *dbuf_hold_level(struct dnode *dn, int level, uint64_t blkid,
|
||||
void *tag);
|
||||
int dbuf_hold_impl(struct dnode *dn, uint8_t level, uint64_t blkid, int create,
|
||||
void *tag, dmu_buf_impl_t **dbp);
|
||||
|
||||
void dbuf_prefetch(struct dnode *dn, uint64_t blkid);
|
||||
|
||||
void dbuf_add_ref(dmu_buf_impl_t *db, void *tag);
|
||||
uint64_t dbuf_refcount(dmu_buf_impl_t *db);
|
||||
|
||||
void dbuf_rele(dmu_buf_impl_t *db, void *tag);
|
||||
|
||||
dmu_buf_impl_t *dbuf_find(struct dnode *dn, uint8_t level, uint64_t blkid);
|
||||
|
||||
int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags);
|
||||
void dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||
void dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||
void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||
|
||||
void dbuf_clear(dmu_buf_impl_t *db);
|
||||
void dbuf_evict(dmu_buf_impl_t *db);
|
||||
|
||||
void dbuf_setdirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||
void dbuf_unoverride(dbuf_dirty_record_t *dr);
|
||||
void dbuf_sync_list(list_t *list, dmu_tx_t *tx);
|
||||
|
||||
void dbuf_free_range(struct dnode *dn, uint64_t start, uint64_t end,
|
||||
struct dmu_tx *);
|
||||
|
||||
void dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx);
|
||||
|
||||
void dbuf_init(void);
|
||||
void dbuf_fini(void);
|
||||
|
||||
#define DBUF_IS_METADATA(db) \
|
||||
((db)->db_level > 0 || dmu_ot[(db)->db_dnode->dn_type].ot_metadata)
|
||||
|
||||
#define DBUF_GET_BUFC_TYPE(db) \
|
||||
(DBUF_IS_METADATA(db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
|
||||
|
||||
#define DBUF_IS_CACHEABLE(db) \
|
||||
((db)->db_objset->os_primary_cache == ZFS_CACHE_ALL || \
|
||||
(DBUF_IS_METADATA(db) && \
|
||||
((db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA)))
|
||||
|
||||
#define DBUF_IS_L2CACHEABLE(db) \
|
||||
((db)->db_objset->os_secondary_cache == ZFS_CACHE_ALL || \
|
||||
(DBUF_IS_METADATA(db) && \
|
||||
((db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
|
||||
/*
|
||||
* There should be a ## between the string literal and fmt, to make it
|
||||
* clear that we're joining two strings together, but gcc does not
|
||||
* support that preprocessor token.
|
||||
*/
|
||||
#define dprintf_dbuf(dbuf, fmt, ...) do { \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
|
||||
char __db_buf[32]; \
|
||||
uint64_t __db_obj = (dbuf)->db.db_object; \
|
||||
if (__db_obj == DMU_META_DNODE_OBJECT) \
|
||||
(void) strcpy(__db_buf, "mdn"); \
|
||||
else \
|
||||
(void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \
|
||||
(u_longlong_t)__db_obj); \
|
||||
dprintf_ds((dbuf)->db_objset->os_dsl_dataset, \
|
||||
"obj=%s lvl=%u blkid=%lld " fmt, \
|
||||
__db_buf, (dbuf)->db_level, \
|
||||
(u_longlong_t)(dbuf)->db_blkid, __VA_ARGS__); \
|
||||
} \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
|
||||
#define dprintf_dbuf_bp(db, bp, fmt, ...) do { \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
|
||||
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \
|
||||
sprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, bp); \
|
||||
dprintf_dbuf(db, fmt " %s\n", __VA_ARGS__, __blkbuf); \
|
||||
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
|
||||
} \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
|
||||
#define DBUF_VERIFY(db) dbuf_verify(db)
|
||||
|
||||
#else
|
||||
|
||||
#define dprintf_dbuf(db, fmt, ...)
|
||||
#define dprintf_dbuf_bp(db, bp, fmt, ...)
|
||||
#define DBUF_VERIFY(db)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DBUF_H */
|
||||
@@ -0,0 +1,638 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DMU_H
|
||||
#define _SYS_DMU_H
|
||||
|
||||
/*
|
||||
* This file describes the interface that the DMU provides for its
|
||||
* consumers.
|
||||
*
|
||||
* The DMU also interacts with the SPA. That interface is described in
|
||||
* dmu_spa.h.
|
||||
*/
|
||||
|
||||
#include <sys/inttypes.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/cred.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct uio;
|
||||
struct page;
|
||||
struct vnode;
|
||||
struct spa;
|
||||
struct zilog;
|
||||
struct zio;
|
||||
struct blkptr;
|
||||
struct zap_cursor;
|
||||
struct dsl_dataset;
|
||||
struct dsl_pool;
|
||||
struct dnode;
|
||||
struct drr_begin;
|
||||
struct drr_end;
|
||||
struct zbookmark;
|
||||
struct spa;
|
||||
struct nvlist;
|
||||
struct objset_impl;
|
||||
|
||||
typedef struct objset objset_t;
|
||||
typedef struct dmu_tx dmu_tx_t;
|
||||
typedef struct dsl_dir dsl_dir_t;
|
||||
|
||||
typedef enum dmu_object_type {
|
||||
DMU_OT_NONE,
|
||||
/* general: */
|
||||
DMU_OT_OBJECT_DIRECTORY, /* ZAP */
|
||||
DMU_OT_OBJECT_ARRAY, /* UINT64 */
|
||||
DMU_OT_PACKED_NVLIST, /* UINT8 (XDR by nvlist_pack/unpack) */
|
||||
DMU_OT_PACKED_NVLIST_SIZE, /* UINT64 */
|
||||
DMU_OT_BPLIST, /* UINT64 */
|
||||
DMU_OT_BPLIST_HDR, /* UINT64 */
|
||||
/* spa: */
|
||||
DMU_OT_SPACE_MAP_HEADER, /* UINT64 */
|
||||
DMU_OT_SPACE_MAP, /* UINT64 */
|
||||
/* zil: */
|
||||
DMU_OT_INTENT_LOG, /* UINT64 */
|
||||
/* dmu: */
|
||||
DMU_OT_DNODE, /* DNODE */
|
||||
DMU_OT_OBJSET, /* OBJSET */
|
||||
/* dsl: */
|
||||
DMU_OT_DSL_DIR, /* UINT64 */
|
||||
DMU_OT_DSL_DIR_CHILD_MAP, /* ZAP */
|
||||
DMU_OT_DSL_DS_SNAP_MAP, /* ZAP */
|
||||
DMU_OT_DSL_PROPS, /* ZAP */
|
||||
DMU_OT_DSL_DATASET, /* UINT64 */
|
||||
/* zpl: */
|
||||
DMU_OT_ZNODE, /* ZNODE */
|
||||
DMU_OT_OLDACL, /* Old ACL */
|
||||
DMU_OT_PLAIN_FILE_CONTENTS, /* UINT8 */
|
||||
DMU_OT_DIRECTORY_CONTENTS, /* ZAP */
|
||||
DMU_OT_MASTER_NODE, /* ZAP */
|
||||
DMU_OT_UNLINKED_SET, /* ZAP */
|
||||
/* zvol: */
|
||||
DMU_OT_ZVOL, /* UINT8 */
|
||||
DMU_OT_ZVOL_PROP, /* ZAP */
|
||||
/* other; for testing only! */
|
||||
DMU_OT_PLAIN_OTHER, /* UINT8 */
|
||||
DMU_OT_UINT64_OTHER, /* UINT64 */
|
||||
DMU_OT_ZAP_OTHER, /* ZAP */
|
||||
/* new object types: */
|
||||
DMU_OT_ERROR_LOG, /* ZAP */
|
||||
DMU_OT_SPA_HISTORY, /* UINT8 */
|
||||
DMU_OT_SPA_HISTORY_OFFSETS, /* spa_his_phys_t */
|
||||
DMU_OT_POOL_PROPS, /* ZAP */
|
||||
DMU_OT_DSL_PERMS, /* ZAP */
|
||||
DMU_OT_ACL, /* ACL */
|
||||
DMU_OT_SYSACL, /* SYSACL */
|
||||
DMU_OT_FUID, /* FUID table (Packed NVLIST UINT8) */
|
||||
DMU_OT_FUID_SIZE, /* FUID table size UINT64 */
|
||||
DMU_OT_NEXT_CLONES, /* ZAP */
|
||||
DMU_OT_SCRUB_QUEUE, /* ZAP */
|
||||
DMU_OT_NUMTYPES
|
||||
} dmu_object_type_t;
|
||||
|
||||
typedef enum dmu_objset_type {
|
||||
DMU_OST_NONE,
|
||||
DMU_OST_META,
|
||||
DMU_OST_ZFS,
|
||||
DMU_OST_ZVOL,
|
||||
DMU_OST_OTHER, /* For testing only! */
|
||||
DMU_OST_ANY, /* Be careful! */
|
||||
DMU_OST_NUMTYPES
|
||||
} dmu_objset_type_t;
|
||||
|
||||
void byteswap_uint64_array(void *buf, size_t size);
|
||||
void byteswap_uint32_array(void *buf, size_t size);
|
||||
void byteswap_uint16_array(void *buf, size_t size);
|
||||
void byteswap_uint8_array(void *buf, size_t size);
|
||||
void zap_byteswap(void *buf, size_t size);
|
||||
void zfs_oldacl_byteswap(void *buf, size_t size);
|
||||
void zfs_acl_byteswap(void *buf, size_t size);
|
||||
void zfs_znode_byteswap(void *buf, size_t size);
|
||||
|
||||
#define DS_MODE_NOHOLD 0 /* internal use only */
|
||||
#define DS_MODE_USER 1 /* simple access, no special needs */
|
||||
#define DS_MODE_OWNER 2 /* the "main" access, e.g. a mount */
|
||||
#define DS_MODE_TYPE_MASK 0x3
|
||||
#define DS_MODE_TYPE(x) ((x) & DS_MODE_TYPE_MASK)
|
||||
#define DS_MODE_READONLY 0x8
|
||||
#define DS_MODE_IS_READONLY(x) ((x) & DS_MODE_READONLY)
|
||||
#define DS_MODE_INCONSISTENT 0x10
|
||||
#define DS_MODE_IS_INCONSISTENT(x) ((x) & DS_MODE_INCONSISTENT)
|
||||
|
||||
#define DS_FIND_SNAPSHOTS (1<<0)
|
||||
#define DS_FIND_CHILDREN (1<<1)
|
||||
|
||||
/*
|
||||
* The maximum number of bytes that can be accessed as part of one
|
||||
* operation, including metadata.
|
||||
*/
|
||||
#define DMU_MAX_ACCESS (10<<20) /* 10MB */
|
||||
#define DMU_MAX_DELETEBLKCNT (20480) /* ~5MB of indirect blocks */
|
||||
|
||||
/*
|
||||
* Public routines to create, destroy, open, and close objsets.
|
||||
*/
|
||||
int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
|
||||
objset_t **osp);
|
||||
int dmu_objset_open_ds(struct dsl_dataset *ds, dmu_objset_type_t type,
|
||||
objset_t **osp);
|
||||
void dmu_objset_close(objset_t *os);
|
||||
int dmu_objset_evict_dbufs(objset_t *os);
|
||||
int dmu_objset_create(const char *name, dmu_objset_type_t type,
|
||||
objset_t *clone_parent, uint64_t flags,
|
||||
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
|
||||
int dmu_objset_destroy(const char *name);
|
||||
int dmu_snapshots_destroy(char *fsname, char *snapname);
|
||||
int dmu_objset_rollback(objset_t *os);
|
||||
int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive);
|
||||
int dmu_objset_rename(const char *name, const char *newname,
|
||||
boolean_t recursive);
|
||||
int dmu_objset_find(char *name, int func(char *, void *), void *arg,
|
||||
int flags);
|
||||
void dmu_objset_byteswap(void *buf, size_t size);
|
||||
|
||||
typedef struct dmu_buf {
|
||||
uint64_t db_object; /* object that this buffer is part of */
|
||||
uint64_t db_offset; /* byte offset in this object */
|
||||
uint64_t db_size; /* size of buffer in bytes */
|
||||
void *db_data; /* data in buffer */
|
||||
} dmu_buf_t;
|
||||
|
||||
typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
|
||||
|
||||
/*
|
||||
* The names of zap entries in the DIRECTORY_OBJECT of the MOS.
|
||||
*/
|
||||
#define DMU_POOL_DIRECTORY_OBJECT 1
|
||||
#define DMU_POOL_CONFIG "config"
|
||||
#define DMU_POOL_ROOT_DATASET "root_dataset"
|
||||
#define DMU_POOL_SYNC_BPLIST "sync_bplist"
|
||||
#define DMU_POOL_ERRLOG_SCRUB "errlog_scrub"
|
||||
#define DMU_POOL_ERRLOG_LAST "errlog_last"
|
||||
#define DMU_POOL_SPARES "spares"
|
||||
#define DMU_POOL_DEFLATE "deflate"
|
||||
#define DMU_POOL_HISTORY "history"
|
||||
#define DMU_POOL_PROPS "pool_props"
|
||||
#define DMU_POOL_L2CACHE "l2cache"
|
||||
|
||||
/* 4x8 zbookmark_t */
|
||||
#define DMU_POOL_SCRUB_BOOKMARK "scrub_bookmark"
|
||||
/* 1x8 zap obj DMU_OT_SCRUB_QUEUE */
|
||||
#define DMU_POOL_SCRUB_QUEUE "scrub_queue"
|
||||
/* 1x8 txg */
|
||||
#define DMU_POOL_SCRUB_MIN_TXG "scrub_min_txg"
|
||||
/* 1x8 txg */
|
||||
#define DMU_POOL_SCRUB_MAX_TXG "scrub_max_txg"
|
||||
/* 1x4 enum scrub_func */
|
||||
#define DMU_POOL_SCRUB_FUNC "scrub_func"
|
||||
/* 1x8 count */
|
||||
#define DMU_POOL_SCRUB_ERRORS "scrub_errors"
|
||||
|
||||
/*
|
||||
* Allocate an object from this objset. The range of object numbers
|
||||
* available is (0, DN_MAX_OBJECT). Object 0 is the meta-dnode.
|
||||
*
|
||||
* The transaction must be assigned to a txg. The newly allocated
|
||||
* object will be "held" in the transaction (ie. you can modify the
|
||||
* newly allocated object in this transaction).
|
||||
*
|
||||
* dmu_object_alloc() chooses an object and returns it in *objectp.
|
||||
*
|
||||
* dmu_object_claim() allocates a specific object number. If that
|
||||
* number is already allocated, it fails and returns EEXIST.
|
||||
*
|
||||
* Return 0 on success, or ENOSPC or EEXIST as specified above.
|
||||
*/
|
||||
uint64_t dmu_object_alloc(objset_t *os, dmu_object_type_t ot,
|
||||
int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
|
||||
int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
|
||||
int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
|
||||
int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
|
||||
int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Free an object from this objset.
|
||||
*
|
||||
* The object's data will be freed as well (ie. you don't need to call
|
||||
* dmu_free(object, 0, -1, tx)).
|
||||
*
|
||||
* The object need not be held in the transaction.
|
||||
*
|
||||
* If there are any holds on this object's buffers (via dmu_buf_hold()),
|
||||
* or tx holds on the object (via dmu_tx_hold_object()), you can not
|
||||
* free it; it fails and returns EBUSY.
|
||||
*
|
||||
* If the object is not allocated, it fails and returns ENOENT.
|
||||
*
|
||||
* Return 0 on success, or EBUSY or ENOENT as specified above.
|
||||
*/
|
||||
int dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Find the next allocated or free object.
|
||||
*
|
||||
* The objectp parameter is in-out. It will be updated to be the next
|
||||
* object which is allocated. Ignore objects which have not been
|
||||
* modified since txg.
|
||||
*
|
||||
* XXX Can only be called on a objset with no dirty data.
|
||||
*
|
||||
* Returns 0 on success, or ENOENT if there are no more objects.
|
||||
*/
|
||||
int dmu_object_next(objset_t *os, uint64_t *objectp,
|
||||
boolean_t hole, uint64_t txg);
|
||||
|
||||
/*
|
||||
* Set the data blocksize for an object.
|
||||
*
|
||||
* The object cannot have any blocks allcated beyond the first. If
|
||||
* the first block is allocated already, the new size must be greater
|
||||
* than the current block size. If these conditions are not met,
|
||||
* ENOTSUP will be returned.
|
||||
*
|
||||
* Returns 0 on success, or EBUSY if there are any holds on the object
|
||||
* contents, or ENOTSUP as described above.
|
||||
*/
|
||||
int dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size,
|
||||
int ibs, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Set the checksum property on a dnode. The new checksum algorithm will
|
||||
* apply to all newly written blocks; existing blocks will not be affected.
|
||||
*/
|
||||
void dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum,
|
||||
dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Set the compress property on a dnode. The new compression algorithm will
|
||||
* apply to all newly written blocks; existing blocks will not be affected.
|
||||
*/
|
||||
void dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
|
||||
dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Decide how many copies of a given block we should make. Can be from
|
||||
* 1 to SPA_DVAS_PER_BP.
|
||||
*/
|
||||
int dmu_get_replication_level(struct objset_impl *, struct zbookmark *zb,
|
||||
dmu_object_type_t ot);
|
||||
/*
|
||||
* The bonus data is accessed more or less like a regular buffer.
|
||||
* You must dmu_bonus_hold() to get the buffer, which will give you a
|
||||
* dmu_buf_t with db_offset==-1ULL, and db_size = the size of the bonus
|
||||
* data. As with any normal buffer, you must call dmu_buf_read() to
|
||||
* read db_data, dmu_buf_will_dirty() before modifying it, and the
|
||||
* object must be held in an assigned transaction before calling
|
||||
* dmu_buf_will_dirty. You may use dmu_buf_set_user() on the bonus
|
||||
* buffer as well. You must release your hold with dmu_buf_rele().
|
||||
*/
|
||||
int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **);
|
||||
int dmu_bonus_max(void);
|
||||
int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
|
||||
|
||||
/*
|
||||
* Obtain the DMU buffer from the specified object which contains the
|
||||
* specified offset. dmu_buf_hold() puts a "hold" on the buffer, so
|
||||
* that it will remain in memory. You must release the hold with
|
||||
* dmu_buf_rele(). You musn't access the dmu_buf_t after releasing your
|
||||
* hold. You must have a hold on any dmu_buf_t* you pass to the DMU.
|
||||
*
|
||||
* You must call dmu_buf_read, dmu_buf_will_dirty, or dmu_buf_will_fill
|
||||
* on the returned buffer before reading or writing the buffer's
|
||||
* db_data. The comments for those routines describe what particular
|
||||
* operations are valid after calling them.
|
||||
*
|
||||
* The object number must be a valid, allocated object number.
|
||||
*/
|
||||
int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
|
||||
void *tag, dmu_buf_t **);
|
||||
void dmu_buf_add_ref(dmu_buf_t *db, void* tag);
|
||||
void dmu_buf_rele(dmu_buf_t *db, void *tag);
|
||||
uint64_t dmu_buf_refcount(dmu_buf_t *db);
|
||||
|
||||
/*
|
||||
* dmu_buf_hold_array holds the DMU buffers which contain all bytes in a
|
||||
* range of an object. A pointer to an array of dmu_buf_t*'s is
|
||||
* returned (in *dbpp).
|
||||
*
|
||||
* dmu_buf_rele_array releases the hold on an array of dmu_buf_t*'s, and
|
||||
* frees the array. The hold on the array of buffers MUST be released
|
||||
* with dmu_buf_rele_array. You can NOT release the hold on each buffer
|
||||
* individually with dmu_buf_rele.
|
||||
*/
|
||||
int dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset,
|
||||
uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp);
|
||||
void dmu_buf_rele_array(dmu_buf_t **, int numbufs, void *tag);
|
||||
|
||||
/*
|
||||
* Returns NULL on success, or the existing user ptr if it's already
|
||||
* been set.
|
||||
*
|
||||
* user_ptr is for use by the user and can be obtained via dmu_buf_get_user().
|
||||
*
|
||||
* user_data_ptr_ptr should be NULL, or a pointer to a pointer which
|
||||
* will be set to db->db_data when you are allowed to access it. Note
|
||||
* that db->db_data (the pointer) can change when you do dmu_buf_read(),
|
||||
* dmu_buf_tryupgrade(), dmu_buf_will_dirty(), or dmu_buf_will_fill().
|
||||
* *user_data_ptr_ptr will be set to the new value when it changes.
|
||||
*
|
||||
* If non-NULL, pageout func will be called when this buffer is being
|
||||
* excised from the cache, so that you can clean up the data structure
|
||||
* pointed to by user_ptr.
|
||||
*
|
||||
* dmu_evict_user() will call the pageout func for all buffers in a
|
||||
* objset with a given pageout func.
|
||||
*/
|
||||
void *dmu_buf_set_user(dmu_buf_t *db, void *user_ptr, void *user_data_ptr_ptr,
|
||||
dmu_buf_evict_func_t *pageout_func);
|
||||
/*
|
||||
* set_user_ie is the same as set_user, but request immediate eviction
|
||||
* when hold count goes to zero.
|
||||
*/
|
||||
void *dmu_buf_set_user_ie(dmu_buf_t *db, void *user_ptr,
|
||||
void *user_data_ptr_ptr, dmu_buf_evict_func_t *pageout_func);
|
||||
void *dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr,
|
||||
void *user_ptr, void *user_data_ptr_ptr,
|
||||
dmu_buf_evict_func_t *pageout_func);
|
||||
void dmu_evict_user(objset_t *os, dmu_buf_evict_func_t *func);
|
||||
|
||||
/*
|
||||
* Returns the user_ptr set with dmu_buf_set_user(), or NULL if not set.
|
||||
*/
|
||||
void *dmu_buf_get_user(dmu_buf_t *db);
|
||||
|
||||
/*
|
||||
* Indicate that you are going to modify the buffer's data (db_data).
|
||||
*
|
||||
* The transaction (tx) must be assigned to a txg (ie. you've called
|
||||
* dmu_tx_assign()). The buffer's object must be held in the tx
|
||||
* (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
|
||||
*/
|
||||
void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* You must create a transaction, then hold the objects which you will
|
||||
* (or might) modify as part of this transaction. Then you must assign
|
||||
* the transaction to a transaction group. Once the transaction has
|
||||
* been assigned, you can modify buffers which belong to held objects as
|
||||
* part of this transaction. You can't modify buffers before the
|
||||
* transaction has been assigned; you can't modify buffers which don't
|
||||
* belong to objects which this transaction holds; you can't hold
|
||||
* objects once the transaction has been assigned. You may hold an
|
||||
* object which you are going to free (with dmu_object_free()), but you
|
||||
* don't have to.
|
||||
*
|
||||
* You can abort the transaction before it has been assigned.
|
||||
*
|
||||
* Note that you may hold buffers (with dmu_buf_hold) at any time,
|
||||
* regardless of transaction state.
|
||||
*/
|
||||
|
||||
#define DMU_NEW_OBJECT (-1ULL)
|
||||
#define DMU_OBJECT_END (-1ULL)
|
||||
|
||||
dmu_tx_t *dmu_tx_create(objset_t *os);
|
||||
void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
|
||||
void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
|
||||
uint64_t len);
|
||||
void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name);
|
||||
void dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object);
|
||||
void dmu_tx_abort(dmu_tx_t *tx);
|
||||
int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
|
||||
void dmu_tx_wait(dmu_tx_t *tx);
|
||||
void dmu_tx_commit(dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Free up the data blocks for a defined range of a file. If size is
|
||||
* zero, the range from offset to end-of-file is freed.
|
||||
*/
|
||||
int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
|
||||
uint64_t size, dmu_tx_t *tx);
|
||||
int dmu_free_long_range(objset_t *os, uint64_t object, uint64_t offset,
|
||||
uint64_t size);
|
||||
int dmu_free_object(objset_t *os, uint64_t object);
|
||||
|
||||
/*
|
||||
* Convenience functions.
|
||||
*
|
||||
* Canfail routines will return 0 on success, or an errno if there is a
|
||||
* nonrecoverable I/O error.
|
||||
*/
|
||||
int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||
void *buf);
|
||||
void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||
const void *buf, dmu_tx_t *tx);
|
||||
void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||
dmu_tx_t *tx);
|
||||
int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
|
||||
int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size,
|
||||
dmu_tx_t *tx);
|
||||
int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset,
|
||||
uint64_t size, struct page *pp, dmu_tx_t *tx);
|
||||
|
||||
extern int zfs_prefetch_disable;
|
||||
|
||||
/*
|
||||
* Asynchronously try to read in the data.
|
||||
*/
|
||||
void dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset,
|
||||
uint64_t len);
|
||||
|
||||
typedef struct dmu_object_info {
|
||||
/* All sizes are in bytes. */
|
||||
uint32_t doi_data_block_size;
|
||||
uint32_t doi_metadata_block_size;
|
||||
uint64_t doi_bonus_size;
|
||||
dmu_object_type_t doi_type;
|
||||
dmu_object_type_t doi_bonus_type;
|
||||
uint8_t doi_indirection; /* 2 = dnode->indirect->data */
|
||||
uint8_t doi_checksum;
|
||||
uint8_t doi_compress;
|
||||
uint8_t doi_pad[5];
|
||||
/* Values below are number of 512-byte blocks. */
|
||||
uint64_t doi_physical_blks; /* data + metadata */
|
||||
uint64_t doi_max_block_offset;
|
||||
} dmu_object_info_t;
|
||||
|
||||
typedef void arc_byteswap_func_t(void *buf, size_t size);
|
||||
|
||||
typedef struct dmu_object_type_info {
|
||||
arc_byteswap_func_t *ot_byteswap;
|
||||
boolean_t ot_metadata;
|
||||
char *ot_name;
|
||||
} dmu_object_type_info_t;
|
||||
|
||||
extern const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES];
|
||||
|
||||
/*
|
||||
* Get information on a DMU object.
|
||||
*
|
||||
* Return 0 on success or ENOENT if object is not allocated.
|
||||
*
|
||||
* If doi is NULL, just indicates whether the object exists.
|
||||
*/
|
||||
int dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi);
|
||||
void dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi);
|
||||
void dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi);
|
||||
void dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize,
|
||||
u_longlong_t *nblk512);
|
||||
|
||||
typedef struct dmu_objset_stats {
|
||||
uint64_t dds_num_clones; /* number of clones of this */
|
||||
uint64_t dds_creation_txg;
|
||||
uint64_t dds_guid;
|
||||
dmu_objset_type_t dds_type;
|
||||
uint8_t dds_is_snapshot;
|
||||
uint8_t dds_inconsistent;
|
||||
char dds_origin[MAXNAMELEN];
|
||||
} dmu_objset_stats_t;
|
||||
|
||||
/*
|
||||
* Get stats on a dataset.
|
||||
*/
|
||||
void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
|
||||
|
||||
/*
|
||||
* Add entries to the nvlist for all the objset's properties. See
|
||||
* zfs_prop_table[] and zfs(1m) for details on the properties.
|
||||
*/
|
||||
void dmu_objset_stats(objset_t *os, struct nvlist *nv);
|
||||
|
||||
/*
|
||||
* Get the space usage statistics for statvfs().
|
||||
*
|
||||
* refdbytes is the amount of space "referenced" by this objset.
|
||||
* availbytes is the amount of space available to this objset, taking
|
||||
* into account quotas & reservations, assuming that no other objsets
|
||||
* use the space first. These values correspond to the 'referenced' and
|
||||
* 'available' properties, described in the zfs(1m) manpage.
|
||||
*
|
||||
* usedobjs and availobjs are the number of objects currently allocated,
|
||||
* and available.
|
||||
*/
|
||||
void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
|
||||
uint64_t *usedobjsp, uint64_t *availobjsp);
|
||||
|
||||
/*
|
||||
* The fsid_guid is a 56-bit ID that can change to avoid collisions.
|
||||
* (Contrast with the ds_guid which is a 64-bit ID that will never
|
||||
* change, so there is a small probability that it will collide.)
|
||||
*/
|
||||
uint64_t dmu_objset_fsid_guid(objset_t *os);
|
||||
|
||||
int dmu_objset_is_snapshot(objset_t *os);
|
||||
|
||||
extern struct spa *dmu_objset_spa(objset_t *os);
|
||||
extern struct zilog *dmu_objset_zil(objset_t *os);
|
||||
extern struct dsl_pool *dmu_objset_pool(objset_t *os);
|
||||
extern struct dsl_dataset *dmu_objset_ds(objset_t *os);
|
||||
extern void dmu_objset_name(objset_t *os, char *buf);
|
||||
extern dmu_objset_type_t dmu_objset_type(objset_t *os);
|
||||
extern uint64_t dmu_objset_id(objset_t *os);
|
||||
extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
|
||||
uint64_t *id, uint64_t *offp, boolean_t *case_conflict);
|
||||
extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
|
||||
int maxlen, boolean_t *conflict);
|
||||
extern int dmu_dir_list_next(objset_t *os, int namelen, char *name,
|
||||
uint64_t *idp, uint64_t *offp);
|
||||
extern void dmu_objset_set_user(objset_t *os, void *user_ptr);
|
||||
extern void *dmu_objset_get_user(objset_t *os);
|
||||
|
||||
/*
|
||||
* Return the txg number for the given assigned transaction.
|
||||
*/
|
||||
uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Synchronous write.
|
||||
* If a parent zio is provided this function initiates a write on the
|
||||
* provided buffer as a child of the parent zio.
|
||||
* In the absence of a parent zio, the write is completed synchronously.
|
||||
* At write completion, blk is filled with the bp of the written block.
|
||||
* Note that while the data covered by this function will be on stable
|
||||
* storage when the write completes this new data does not become a
|
||||
* permanent part of the file until the associated transaction commits.
|
||||
*/
|
||||
typedef void dmu_sync_cb_t(dmu_buf_t *db, void *arg);
|
||||
int dmu_sync(struct zio *zio, dmu_buf_t *db,
|
||||
struct blkptr *bp, uint64_t txg, dmu_sync_cb_t *done, void *arg);
|
||||
|
||||
/*
|
||||
* Find the next hole or data block in file starting at *off
|
||||
* Return found offset in *off. Return ESRCH for end of file.
|
||||
*/
|
||||
int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole,
|
||||
uint64_t *off);
|
||||
|
||||
/*
|
||||
* Initial setup and final teardown.
|
||||
*/
|
||||
extern void dmu_init(void);
|
||||
extern void dmu_fini(void);
|
||||
|
||||
typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp,
|
||||
uint64_t object, uint64_t offset, int len);
|
||||
void dmu_traverse_objset(objset_t *os, uint64_t txg_start,
|
||||
dmu_traverse_cb_t cb, void *arg);
|
||||
|
||||
int dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
|
||||
struct vnode *vp, offset_t *off);
|
||||
|
||||
typedef struct dmu_recv_cookie {
|
||||
/*
|
||||
* This structure is opaque!
|
||||
*
|
||||
* If logical and real are different, we are recving the stream
|
||||
* into the "real" temporary clone, and then switching it with
|
||||
* the "logical" target.
|
||||
*/
|
||||
struct dsl_dataset *drc_logical_ds;
|
||||
struct dsl_dataset *drc_real_ds;
|
||||
struct drr_begin *drc_drrb;
|
||||
char *drc_tosnap;
|
||||
boolean_t drc_newfs;
|
||||
boolean_t drc_force;
|
||||
} dmu_recv_cookie_t;
|
||||
|
||||
int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *,
|
||||
boolean_t force, objset_t *origin, boolean_t online, dmu_recv_cookie_t *);
|
||||
int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp);
|
||||
int dmu_recv_end(dmu_recv_cookie_t *drc);
|
||||
void dmu_recv_abort_cleanup(dmu_recv_cookie_t *drc);
|
||||
|
||||
/* CRC64 table */
|
||||
#define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */
|
||||
extern uint64_t zfs_crc64_table[256];
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DMU_H */
|
||||
@@ -0,0 +1,239 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DMU_IMPL_H
|
||||
#define _SYS_DMU_IMPL_H
|
||||
|
||||
#include <sys/txg_impl.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dnode.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This is the locking strategy for the DMU. Numbers in parenthesis are
|
||||
* cases that use that lock order, referenced below:
|
||||
*
|
||||
* ARC is self-contained
|
||||
* bplist is self-contained
|
||||
* refcount is self-contained
|
||||
* txg is self-contained (hopefully!)
|
||||
* zst_lock
|
||||
* zf_rwlock
|
||||
*
|
||||
* XXX try to improve evicting path?
|
||||
*
|
||||
* dp_config_rwlock > os_obj_lock > dn_struct_rwlock >
|
||||
* dn_dbufs_mtx > hash_mutexes > db_mtx > dd_lock > leafs
|
||||
*
|
||||
* dp_config_rwlock
|
||||
* must be held before: everything
|
||||
* protects dd namespace changes
|
||||
* protects property changes globally
|
||||
* held from:
|
||||
* dsl_dir_open/r:
|
||||
* dsl_dir_create_sync/w:
|
||||
* dsl_dir_sync_destroy/w:
|
||||
* dsl_dir_rename_sync/w:
|
||||
* dsl_prop_changed_notify/r:
|
||||
*
|
||||
* os_obj_lock
|
||||
* must be held before:
|
||||
* everything except dp_config_rwlock
|
||||
* protects os_obj_next
|
||||
* held from:
|
||||
* dmu_object_alloc: dn_dbufs_mtx, db_mtx, hash_mutexes, dn_struct_rwlock
|
||||
*
|
||||
* dn_struct_rwlock
|
||||
* must be held before:
|
||||
* everything except dp_config_rwlock and os_obj_lock
|
||||
* protects structure of dnode (eg. nlevels)
|
||||
* db_blkptr can change when syncing out change to nlevels
|
||||
* dn_maxblkid
|
||||
* dn_nlevels
|
||||
* dn_*blksz*
|
||||
* phys nlevels, maxblkid, physical blkptr_t's (?)
|
||||
* held from:
|
||||
* callers of dbuf_read_impl, dbuf_hold[_impl], dbuf_prefetch
|
||||
* dmu_object_info_from_dnode: dn_dirty_mtx (dn_datablksz)
|
||||
* dmu_tx_count_free:
|
||||
* dbuf_read_impl: db_mtx, dmu_zfetch()
|
||||
* dmu_zfetch: zf_rwlock/r, zst_lock, dbuf_prefetch()
|
||||
* dbuf_new_size: db_mtx
|
||||
* dbuf_dirty: db_mtx
|
||||
* dbuf_findbp: (callers, phys? - the real need)
|
||||
* dbuf_create: dn_dbufs_mtx, hash_mutexes, db_mtx (phys?)
|
||||
* dbuf_prefetch: dn_dirty_mtx, hash_mutexes, db_mtx, dn_dbufs_mtx
|
||||
* dbuf_hold_impl: hash_mutexes, db_mtx, dn_dbufs_mtx, dbuf_findbp()
|
||||
* dnode_sync/w (increase_indirection): db_mtx (phys)
|
||||
* dnode_set_blksz/w: dn_dbufs_mtx (dn_*blksz*)
|
||||
* dnode_new_blkid/w: (dn_maxblkid)
|
||||
* dnode_free_range/w: dn_dirty_mtx (dn_maxblkid)
|
||||
* dnode_next_offset: (phys)
|
||||
*
|
||||
* dn_dbufs_mtx
|
||||
* must be held before:
|
||||
* db_mtx, hash_mutexes
|
||||
* protects:
|
||||
* dn_dbufs
|
||||
* dn_evicted
|
||||
* held from:
|
||||
* dmu_evict_user: db_mtx (dn_dbufs)
|
||||
* dbuf_free_range: db_mtx (dn_dbufs)
|
||||
* dbuf_remove_ref: db_mtx, callees:
|
||||
* dbuf_hash_remove: hash_mutexes, db_mtx
|
||||
* dbuf_create: hash_mutexes, db_mtx (dn_dbufs)
|
||||
* dnode_set_blksz: (dn_dbufs)
|
||||
*
|
||||
* hash_mutexes (global)
|
||||
* must be held before:
|
||||
* db_mtx
|
||||
* protects dbuf_hash_table (global) and db_hash_next
|
||||
* held from:
|
||||
* dbuf_find: db_mtx
|
||||
* dbuf_hash_insert: db_mtx
|
||||
* dbuf_hash_remove: db_mtx
|
||||
*
|
||||
* db_mtx (meta-leaf)
|
||||
* must be held before:
|
||||
* dn_mtx, dn_dirty_mtx, dd_lock (leaf mutexes)
|
||||
* protects:
|
||||
* db_state
|
||||
* db_holds
|
||||
* db_buf
|
||||
* db_changed
|
||||
* db_data_pending
|
||||
* db_dirtied
|
||||
* db_link
|
||||
* db_dirty_node (??)
|
||||
* db_dirtycnt
|
||||
* db_d.*
|
||||
* db.*
|
||||
* held from:
|
||||
* dbuf_dirty: dn_mtx, dn_dirty_mtx
|
||||
* dbuf_dirty->dsl_dir_willuse_space: dd_lock
|
||||
* dbuf_dirty->dbuf_new_block->dsl_dataset_block_freeable: dd_lock
|
||||
* dbuf_undirty: dn_dirty_mtx (db_d)
|
||||
* dbuf_write_done: dn_dirty_mtx (db_state)
|
||||
* dbuf_*
|
||||
* dmu_buf_update_user: none (db_d)
|
||||
* dmu_evict_user: none (db_d) (maybe can eliminate)
|
||||
* dbuf_find: none (db_holds)
|
||||
* dbuf_hash_insert: none (db_holds)
|
||||
* dmu_buf_read_array_impl: none (db_state, db_changed)
|
||||
* dmu_sync: none (db_dirty_node, db_d)
|
||||
* dnode_reallocate: none (db)
|
||||
*
|
||||
* dn_mtx (leaf)
|
||||
* protects:
|
||||
* dn_dirty_dbufs
|
||||
* dn_ranges
|
||||
* phys accounting
|
||||
* dn_allocated_txg
|
||||
* dn_free_txg
|
||||
* dn_assigned_txg
|
||||
* dd_assigned_tx
|
||||
* dn_notxholds
|
||||
* dn_dirtyctx
|
||||
* dn_dirtyctx_firstset
|
||||
* (dn_phys copy fields?)
|
||||
* (dn_phys contents?)
|
||||
* held from:
|
||||
* dnode_*
|
||||
* dbuf_dirty: none
|
||||
* dbuf_sync: none (phys accounting)
|
||||
* dbuf_undirty: none (dn_ranges, dn_dirty_dbufs)
|
||||
* dbuf_write_done: none (phys accounting)
|
||||
* dmu_object_info_from_dnode: none (accounting)
|
||||
* dmu_tx_commit: none
|
||||
* dmu_tx_hold_object_impl: none
|
||||
* dmu_tx_try_assign: dn_notxholds(cv)
|
||||
* dmu_tx_unassign: none
|
||||
*
|
||||
* dd_lock
|
||||
* must be held before:
|
||||
* ds_lock
|
||||
* ancestors' dd_lock
|
||||
* protects:
|
||||
* dd_prop_cbs
|
||||
* dd_sync_*
|
||||
* dd_used_bytes
|
||||
* dd_tempreserved
|
||||
* dd_space_towrite
|
||||
* dd_myname
|
||||
* dd_phys accounting?
|
||||
* held from:
|
||||
* dsl_dir_*
|
||||
* dsl_prop_changed_notify: none (dd_prop_cbs)
|
||||
* dsl_prop_register: none (dd_prop_cbs)
|
||||
* dsl_prop_unregister: none (dd_prop_cbs)
|
||||
* dsl_dataset_block_freeable: none (dd_sync_*)
|
||||
*
|
||||
* os_lock (leaf)
|
||||
* protects:
|
||||
* os_dirty_dnodes
|
||||
* os_free_dnodes
|
||||
* os_dnodes
|
||||
* os_downgraded_dbufs
|
||||
* dn_dirtyblksz
|
||||
* dn_dirty_link
|
||||
* held from:
|
||||
* dnode_create: none (os_dnodes)
|
||||
* dnode_destroy: none (os_dnodes)
|
||||
* dnode_setdirty: none (dn_dirtyblksz, os_*_dnodes)
|
||||
* dnode_free: none (dn_dirtyblksz, os_*_dnodes)
|
||||
*
|
||||
* ds_lock
|
||||
* protects:
|
||||
* ds_user_ptr
|
||||
* ds_user_evice_func
|
||||
* ds_open_refcount
|
||||
* ds_snapname
|
||||
* ds_phys accounting
|
||||
* ds_reserved
|
||||
* held from:
|
||||
* dsl_dataset_*
|
||||
*
|
||||
* dr_mtx (leaf)
|
||||
* protects:
|
||||
* dr_children
|
||||
* held from:
|
||||
* dbuf_dirty
|
||||
* dbuf_undirty
|
||||
* dbuf_sync_indirect
|
||||
* dnode_new_blkid
|
||||
*/
|
||||
|
||||
struct objset;
|
||||
struct dmu_pool;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DMU_IMPL_H */
|
||||
@@ -0,0 +1,136 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DMU_OBJSET_H
|
||||
#define _SYS_DMU_OBJSET_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/arc.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/dnode.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/zil.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct dsl_dataset;
|
||||
struct dmu_tx;
|
||||
struct objset_impl;
|
||||
|
||||
typedef struct objset_phys {
|
||||
dnode_phys_t os_meta_dnode;
|
||||
zil_header_t os_zil_header;
|
||||
uint64_t os_type;
|
||||
char os_pad[1024 - sizeof (dnode_phys_t) - sizeof (zil_header_t) -
|
||||
sizeof (uint64_t)];
|
||||
} objset_phys_t;
|
||||
|
||||
struct objset {
|
||||
struct objset_impl *os;
|
||||
int os_mode;
|
||||
};
|
||||
|
||||
typedef struct objset_impl {
|
||||
/* Immutable: */
|
||||
struct dsl_dataset *os_dsl_dataset;
|
||||
spa_t *os_spa;
|
||||
arc_buf_t *os_phys_buf;
|
||||
objset_phys_t *os_phys;
|
||||
dnode_t *os_meta_dnode;
|
||||
zilog_t *os_zil;
|
||||
objset_t os;
|
||||
uint8_t os_checksum; /* can change, under dsl_dir's locks */
|
||||
uint8_t os_compress; /* can change, under dsl_dir's locks */
|
||||
uint8_t os_copies; /* can change, under dsl_dir's locks */
|
||||
uint8_t os_primary_cache; /* can change, under dsl_dir's locks */
|
||||
uint8_t os_secondary_cache; /* can change, under dsl_dir's locks */
|
||||
|
||||
/* no lock needed: */
|
||||
struct dmu_tx *os_synctx; /* XXX sketchy */
|
||||
blkptr_t *os_rootbp;
|
||||
zil_header_t os_zil_header;
|
||||
|
||||
/* Protected by os_obj_lock */
|
||||
kmutex_t os_obj_lock;
|
||||
uint64_t os_obj_next;
|
||||
|
||||
/* Protected by os_lock */
|
||||
kmutex_t os_lock;
|
||||
list_t os_dirty_dnodes[TXG_SIZE];
|
||||
list_t os_free_dnodes[TXG_SIZE];
|
||||
list_t os_dnodes;
|
||||
list_t os_downgraded_dbufs;
|
||||
|
||||
/* stuff we store for the user */
|
||||
kmutex_t os_user_ptr_lock;
|
||||
void *os_user_ptr;
|
||||
} objset_impl_t;
|
||||
|
||||
#define DMU_META_DNODE_OBJECT 0
|
||||
|
||||
#define DMU_OS_IS_L2CACHEABLE(os) \
|
||||
((os)->os_secondary_cache == ZFS_CACHE_ALL || \
|
||||
(os)->os_secondary_cache == ZFS_CACHE_METADATA)
|
||||
|
||||
/* called from zpl */
|
||||
int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
|
||||
objset_t **osp);
|
||||
void dmu_objset_close(objset_t *os);
|
||||
int dmu_objset_create(const char *name, dmu_objset_type_t type,
|
||||
objset_t *clone_parent, uint64_t flags,
|
||||
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
|
||||
int dmu_objset_destroy(const char *name);
|
||||
int dmu_objset_rollback(objset_t *os);
|
||||
int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive);
|
||||
void dmu_objset_stats(objset_t *os, nvlist_t *nv);
|
||||
void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
|
||||
void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
|
||||
uint64_t *usedobjsp, uint64_t *availobjsp);
|
||||
uint64_t dmu_objset_fsid_guid(objset_t *os);
|
||||
int dmu_objset_find(char *name, int func(char *, void *), void *arg,
|
||||
int flags);
|
||||
int dmu_objset_find_spa(spa_t *spa, const char *name,
|
||||
int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags);
|
||||
void dmu_objset_byteswap(void *buf, size_t size);
|
||||
int dmu_objset_evict_dbufs(objset_t *os);
|
||||
|
||||
/* called from dsl */
|
||||
void dmu_objset_sync(objset_impl_t *os, zio_t *zio, dmu_tx_t *tx);
|
||||
objset_impl_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
|
||||
blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx);
|
||||
int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
|
||||
objset_impl_t **osip);
|
||||
void dmu_objset_evict(struct dsl_dataset *ds, void *arg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DMU_OBJSET_H */
|
||||
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DMU_TRAVERSE_H
|
||||
#define _SYS_DMU_TRAVERSE_H
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct dnode_phys;
|
||||
struct dsl_dataset;
|
||||
|
||||
typedef int (blkptr_cb_t)(spa_t *spa, blkptr_t *bp,
|
||||
const zbookmark_t *zb, const struct dnode_phys *dnp, void *arg);
|
||||
|
||||
#define TRAVERSE_PRE (1<<0)
|
||||
#define TRAVERSE_POST (1<<1)
|
||||
#define TRAVERSE_PREFETCH_METADATA (1<<2)
|
||||
#define TRAVERSE_PREFETCH_DATA (1<<3)
|
||||
#define TRAVERSE_PREFETCH (TRAVERSE_PREFETCH_METADATA | TRAVERSE_PREFETCH_DATA)
|
||||
|
||||
int traverse_dataset(struct dsl_dataset *ds, uint64_t txg_start,
|
||||
int flags, blkptr_cb_t func, void *arg);
|
||||
int traverse_pool(spa_t *spa, blkptr_cb_t func, void *arg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DMU_TRAVERSE_H */
|
||||
@@ -0,0 +1,139 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DMU_TX_H
|
||||
#define _SYS_DMU_TX_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/inttypes.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/refcount.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct dmu_buf_impl;
|
||||
struct dmu_tx_hold;
|
||||
struct dnode_link;
|
||||
struct dsl_pool;
|
||||
struct dnode;
|
||||
struct dsl_dir;
|
||||
|
||||
struct dmu_tx {
|
||||
/*
|
||||
* No synchronization is needed because a tx can only be handled
|
||||
* by one thread.
|
||||
*/
|
||||
list_t tx_holds; /* list of dmu_tx_hold_t */
|
||||
objset_t *tx_objset;
|
||||
struct dsl_dir *tx_dir;
|
||||
struct dsl_pool *tx_pool;
|
||||
uint64_t tx_txg;
|
||||
uint64_t tx_lastsnap_txg;
|
||||
uint64_t tx_lasttried_txg;
|
||||
txg_handle_t tx_txgh;
|
||||
void *tx_tempreserve_cookie;
|
||||
struct dmu_tx_hold *tx_needassign_txh;
|
||||
uint8_t tx_anyobj;
|
||||
int tx_err;
|
||||
#ifdef ZFS_DEBUG
|
||||
uint64_t tx_space_towrite;
|
||||
uint64_t tx_space_tofree;
|
||||
uint64_t tx_space_tooverwrite;
|
||||
uint64_t tx_space_tounref;
|
||||
refcount_t tx_space_written;
|
||||
refcount_t tx_space_freed;
|
||||
#endif
|
||||
};
|
||||
|
||||
enum dmu_tx_hold_type {
|
||||
THT_NEWOBJECT,
|
||||
THT_WRITE,
|
||||
THT_BONUS,
|
||||
THT_FREE,
|
||||
THT_ZAP,
|
||||
THT_SPACE,
|
||||
THT_NUMTYPES
|
||||
};
|
||||
|
||||
typedef struct dmu_tx_hold {
|
||||
dmu_tx_t *txh_tx;
|
||||
list_node_t txh_node;
|
||||
struct dnode *txh_dnode;
|
||||
uint64_t txh_space_towrite;
|
||||
uint64_t txh_space_tofree;
|
||||
uint64_t txh_space_tooverwrite;
|
||||
uint64_t txh_space_tounref;
|
||||
uint64_t txh_memory_tohold;
|
||||
uint64_t txh_fudge;
|
||||
#ifdef ZFS_DEBUG
|
||||
enum dmu_tx_hold_type txh_type;
|
||||
uint64_t txh_arg1;
|
||||
uint64_t txh_arg2;
|
||||
#endif
|
||||
} dmu_tx_hold_t;
|
||||
|
||||
|
||||
/*
|
||||
* These routines are defined in dmu.h, and are called by the user.
|
||||
*/
|
||||
dmu_tx_t *dmu_tx_create(objset_t *dd);
|
||||
int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
|
||||
void dmu_tx_commit(dmu_tx_t *tx);
|
||||
void dmu_tx_abort(dmu_tx_t *tx);
|
||||
uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
|
||||
void dmu_tx_wait(dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* These routines are defined in dmu_spa.h, and are called by the SPA.
|
||||
*/
|
||||
extern dmu_tx_t *dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg);
|
||||
|
||||
/*
|
||||
* These routines are only called by the DMU.
|
||||
*/
|
||||
dmu_tx_t *dmu_tx_create_dd(dsl_dir_t *dd);
|
||||
int dmu_tx_is_syncing(dmu_tx_t *tx);
|
||||
int dmu_tx_private_ok(dmu_tx_t *tx);
|
||||
void dmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object);
|
||||
void dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta);
|
||||
void dmu_tx_dirty_buf(dmu_tx_t *tx, struct dmu_buf_impl *db);
|
||||
int dmu_tx_holds(dmu_tx_t *tx, uint64_t object);
|
||||
void dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space);
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
#define DMU_TX_DIRTY_BUF(tx, db) dmu_tx_dirty_buf(tx, db)
|
||||
#else
|
||||
#define DMU_TX_DIRTY_BUF(tx, db)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DMU_TX_H */
|
||||
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _DFETCH_H
|
||||
#define _DFETCH_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern uint64_t zfetch_array_rd_sz;
|
||||
|
||||
struct dnode; /* so we can reference dnode */
|
||||
|
||||
typedef enum zfetch_dirn {
|
||||
ZFETCH_FORWARD = 1, /* prefetch increasing block numbers */
|
||||
ZFETCH_BACKWARD = -1 /* prefetch decreasing block numbers */
|
||||
} zfetch_dirn_t;
|
||||
|
||||
typedef struct zstream {
|
||||
uint64_t zst_offset; /* offset of starting block in range */
|
||||
uint64_t zst_len; /* length of range, in blocks */
|
||||
zfetch_dirn_t zst_direction; /* direction of prefetch */
|
||||
uint64_t zst_stride; /* length of stride, in blocks */
|
||||
uint64_t zst_ph_offset; /* prefetch offset, in blocks */
|
||||
uint64_t zst_cap; /* prefetch limit (cap), in blocks */
|
||||
kmutex_t zst_lock; /* protects stream */
|
||||
clock_t zst_last; /* lbolt of last prefetch */
|
||||
avl_node_t zst_node; /* embed avl node here */
|
||||
} zstream_t;
|
||||
|
||||
typedef struct zfetch {
|
||||
krwlock_t zf_rwlock; /* protects zfetch structure */
|
||||
list_t zf_stream; /* AVL tree of zstream_t's */
|
||||
struct dnode *zf_dnode; /* dnode that owns this zfetch */
|
||||
uint32_t zf_stream_cnt; /* # of active streams */
|
||||
uint64_t zf_alloc_fail; /* # of failed attempts to alloc strm */
|
||||
} zfetch_t;
|
||||
|
||||
void dmu_zfetch_init(zfetch_t *, struct dnode *);
|
||||
void dmu_zfetch_rele(zfetch_t *);
|
||||
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, int);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _DFETCH_H */
|
||||
@@ -0,0 +1,275 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DNODE_H
|
||||
#define _SYS_DNODE_H
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/avl.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/dmu_zfetch.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* dnode_hold() flags.
|
||||
*/
|
||||
#define DNODE_MUST_BE_ALLOCATED 1
|
||||
#define DNODE_MUST_BE_FREE 2
|
||||
|
||||
/*
|
||||
* dnode_next_offset() flags.
|
||||
*/
|
||||
#define DNODE_FIND_HOLE 1
|
||||
#define DNODE_FIND_BACKWARDS 2
|
||||
#define DNODE_FIND_HAVELOCK 4
|
||||
|
||||
/*
|
||||
* Fixed constants.
|
||||
*/
|
||||
#define DNODE_SHIFT 9 /* 512 bytes */
|
||||
#define DN_MIN_INDBLKSHIFT 10 /* 1k */
|
||||
#define DN_MAX_INDBLKSHIFT 14 /* 16k */
|
||||
#define DNODE_BLOCK_SHIFT 14 /* 16k */
|
||||
#define DNODE_CORE_SIZE 64 /* 64 bytes for dnode sans blkptrs */
|
||||
#define DN_MAX_OBJECT_SHIFT 48 /* 256 trillion (zfs_fid_t limit) */
|
||||
#define DN_MAX_OFFSET_SHIFT 64 /* 2^64 bytes in a dnode */
|
||||
|
||||
/*
|
||||
* Derived constants.
|
||||
*/
|
||||
#define DNODE_SIZE (1 << DNODE_SHIFT)
|
||||
#define DN_MAX_NBLKPTR ((DNODE_SIZE - DNODE_CORE_SIZE) >> SPA_BLKPTRSHIFT)
|
||||
#define DN_MAX_BONUSLEN (DNODE_SIZE - DNODE_CORE_SIZE - (1 << SPA_BLKPTRSHIFT))
|
||||
#define DN_MAX_OBJECT (1ULL << DN_MAX_OBJECT_SHIFT)
|
||||
#define DN_ZERO_BONUSLEN (DN_MAX_BONUSLEN + 1)
|
||||
|
||||
#define DNODES_PER_BLOCK_SHIFT (DNODE_BLOCK_SHIFT - DNODE_SHIFT)
|
||||
#define DNODES_PER_BLOCK (1ULL << DNODES_PER_BLOCK_SHIFT)
|
||||
#define DNODES_PER_LEVEL_SHIFT (DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT)
|
||||
|
||||
/* The +2 here is a cheesy way to round up */
|
||||
#define DN_MAX_LEVELS (2 + ((DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT) / \
|
||||
(DN_MIN_INDBLKSHIFT - SPA_BLKPTRSHIFT)))
|
||||
|
||||
#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \
|
||||
(((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
|
||||
|
||||
#define DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \
|
||||
(dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT)
|
||||
|
||||
#define EPB(blkshift, typeshift) (1 << (blkshift - typeshift))
|
||||
|
||||
struct dmu_buf_impl;
|
||||
struct objset_impl;
|
||||
struct zio;
|
||||
|
||||
enum dnode_dirtycontext {
|
||||
DN_UNDIRTIED,
|
||||
DN_DIRTY_OPEN,
|
||||
DN_DIRTY_SYNC
|
||||
};
|
||||
|
||||
/* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */
|
||||
#define DNODE_FLAG_USED_BYTES (1<<0)
|
||||
|
||||
typedef struct dnode_phys {
|
||||
uint8_t dn_type; /* dmu_object_type_t */
|
||||
uint8_t dn_indblkshift; /* ln2(indirect block size) */
|
||||
uint8_t dn_nlevels; /* 1=dn_blkptr->data blocks */
|
||||
uint8_t dn_nblkptr; /* length of dn_blkptr */
|
||||
uint8_t dn_bonustype; /* type of data in bonus buffer */
|
||||
uint8_t dn_checksum; /* ZIO_CHECKSUM type */
|
||||
uint8_t dn_compress; /* ZIO_COMPRESS type */
|
||||
uint8_t dn_flags; /* DNODE_FLAG_* */
|
||||
uint16_t dn_datablkszsec; /* data block size in 512b sectors */
|
||||
uint16_t dn_bonuslen; /* length of dn_bonus */
|
||||
uint8_t dn_pad2[4];
|
||||
|
||||
/* accounting is protected by dn_dirty_mtx */
|
||||
uint64_t dn_maxblkid; /* largest allocated block ID */
|
||||
uint64_t dn_used; /* bytes (or sectors) of disk space */
|
||||
|
||||
uint64_t dn_pad3[4];
|
||||
|
||||
blkptr_t dn_blkptr[1];
|
||||
uint8_t dn_bonus[DN_MAX_BONUSLEN];
|
||||
} dnode_phys_t;
|
||||
|
||||
typedef struct dnode {
|
||||
/*
|
||||
* dn_struct_rwlock protects the structure of the dnode,
|
||||
* including the number of levels of indirection (dn_nlevels),
|
||||
* dn_maxblkid, and dn_next_*
|
||||
*/
|
||||
krwlock_t dn_struct_rwlock;
|
||||
|
||||
/*
|
||||
* Our link on dataset's dd_dnodes list.
|
||||
* Protected by dd_accounting_mtx.
|
||||
*/
|
||||
list_node_t dn_link;
|
||||
|
||||
/* immutable: */
|
||||
struct objset_impl *dn_objset;
|
||||
uint64_t dn_object;
|
||||
struct dmu_buf_impl *dn_dbuf;
|
||||
dnode_phys_t *dn_phys; /* pointer into dn->dn_dbuf->db.db_data */
|
||||
|
||||
/*
|
||||
* Copies of stuff in dn_phys. They're valid in the open
|
||||
* context (eg. even before the dnode is first synced).
|
||||
* Where necessary, these are protected by dn_struct_rwlock.
|
||||
*/
|
||||
dmu_object_type_t dn_type; /* object type */
|
||||
uint16_t dn_bonuslen; /* bonus length */
|
||||
uint8_t dn_bonustype; /* bonus type */
|
||||
uint8_t dn_nblkptr; /* number of blkptrs (immutable) */
|
||||
uint8_t dn_checksum; /* ZIO_CHECKSUM type */
|
||||
uint8_t dn_compress; /* ZIO_COMPRESS type */
|
||||
uint8_t dn_nlevels;
|
||||
uint8_t dn_indblkshift;
|
||||
uint8_t dn_datablkshift; /* zero if blksz not power of 2! */
|
||||
uint16_t dn_datablkszsec; /* in 512b sectors */
|
||||
uint32_t dn_datablksz; /* in bytes */
|
||||
uint64_t dn_maxblkid;
|
||||
uint8_t dn_next_nlevels[TXG_SIZE];
|
||||
uint8_t dn_next_indblkshift[TXG_SIZE];
|
||||
uint16_t dn_next_bonuslen[TXG_SIZE];
|
||||
uint32_t dn_next_blksz[TXG_SIZE]; /* next block size in bytes */
|
||||
|
||||
/* protected by os_lock: */
|
||||
list_node_t dn_dirty_link[TXG_SIZE]; /* next on dataset's dirty */
|
||||
|
||||
/* protected by dn_mtx: */
|
||||
kmutex_t dn_mtx;
|
||||
list_t dn_dirty_records[TXG_SIZE];
|
||||
avl_tree_t dn_ranges[TXG_SIZE];
|
||||
uint64_t dn_allocated_txg;
|
||||
uint64_t dn_free_txg;
|
||||
uint64_t dn_assigned_txg;
|
||||
kcondvar_t dn_notxholds;
|
||||
enum dnode_dirtycontext dn_dirtyctx;
|
||||
uint8_t *dn_dirtyctx_firstset; /* dbg: contents meaningless */
|
||||
|
||||
/* protected by own devices */
|
||||
refcount_t dn_tx_holds;
|
||||
refcount_t dn_holds;
|
||||
|
||||
kmutex_t dn_dbufs_mtx;
|
||||
list_t dn_dbufs; /* linked list of descendent dbuf_t's */
|
||||
struct dmu_buf_impl *dn_bonus; /* bonus buffer dbuf */
|
||||
|
||||
/* parent IO for current sync write */
|
||||
zio_t *dn_zio;
|
||||
|
||||
/* holds prefetch structure */
|
||||
struct zfetch dn_zfetch;
|
||||
} dnode_t;
|
||||
|
||||
typedef struct free_range {
|
||||
avl_node_t fr_node;
|
||||
uint64_t fr_blkid;
|
||||
uint64_t fr_nblks;
|
||||
} free_range_t;
|
||||
|
||||
dnode_t *dnode_special_open(struct objset_impl *dd, dnode_phys_t *dnp,
|
||||
uint64_t object);
|
||||
void dnode_special_close(dnode_t *dn);
|
||||
|
||||
void dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx);
|
||||
int dnode_hold(struct objset_impl *dd, uint64_t object,
|
||||
void *ref, dnode_t **dnp);
|
||||
int dnode_hold_impl(struct objset_impl *dd, uint64_t object, int flag,
|
||||
void *ref, dnode_t **dnp);
|
||||
boolean_t dnode_add_ref(dnode_t *dn, void *ref);
|
||||
void dnode_rele(dnode_t *dn, void *ref);
|
||||
void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
|
||||
void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
|
||||
void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
void dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
void dnode_free(dnode_t *dn, dmu_tx_t *tx);
|
||||
void dnode_byteswap(dnode_phys_t *dnp);
|
||||
void dnode_buf_byteswap(void *buf, size_t size);
|
||||
void dnode_verify(dnode_t *dn);
|
||||
int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
|
||||
uint64_t dnode_current_max_length(dnode_t *dn);
|
||||
void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
|
||||
void dnode_clear_range(dnode_t *dn, uint64_t blkid,
|
||||
uint64_t nblks, dmu_tx_t *tx);
|
||||
void dnode_diduse_space(dnode_t *dn, int64_t space);
|
||||
void dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx);
|
||||
void dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t);
|
||||
uint64_t dnode_block_freed(dnode_t *dn, uint64_t blkid);
|
||||
void dnode_init(void);
|
||||
void dnode_fini(void);
|
||||
int dnode_next_offset(dnode_t *dn, int flags, uint64_t *off,
|
||||
int minlvl, uint64_t blkfill, uint64_t txg);
|
||||
void dnode_evict_dbufs(dnode_t *dn);
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
|
||||
/*
|
||||
* There should be a ## between the string literal and fmt, to make it
|
||||
* clear that we're joining two strings together, but that piece of shit
|
||||
* gcc doesn't support that preprocessor token.
|
||||
*/
|
||||
#define dprintf_dnode(dn, fmt, ...) do { \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
|
||||
char __db_buf[32]; \
|
||||
uint64_t __db_obj = (dn)->dn_object; \
|
||||
if (__db_obj == DMU_META_DNODE_OBJECT) \
|
||||
(void) strcpy(__db_buf, "mdn"); \
|
||||
else \
|
||||
(void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \
|
||||
(u_longlong_t)__db_obj);\
|
||||
dprintf_ds((dn)->dn_objset->os_dsl_dataset, "obj=%s " fmt, \
|
||||
__db_buf, __VA_ARGS__); \
|
||||
} \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
|
||||
#define DNODE_VERIFY(dn) dnode_verify(dn)
|
||||
#define FREE_VERIFY(db, start, end, tx) free_verify(db, start, end, tx)
|
||||
|
||||
#else
|
||||
|
||||
#define dprintf_dnode(db, fmt, ...)
|
||||
#define DNODE_VERIFY(dn)
|
||||
#define FREE_VERIFY(db, start, end, tx)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DNODE_H */
|
||||
@@ -0,0 +1,239 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_DATASET_H
|
||||
#define _SYS_DSL_DATASET_H
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/bplist.h>
|
||||
#include <sys/dsl_synctask.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct dsl_dataset;
|
||||
struct dsl_dir;
|
||||
struct dsl_pool;
|
||||
|
||||
typedef void dsl_dataset_evict_func_t(struct dsl_dataset *, void *);
|
||||
|
||||
#define DS_FLAG_INCONSISTENT (1ULL<<0)
|
||||
#define DS_IS_INCONSISTENT(ds) \
|
||||
((ds)->ds_phys->ds_flags & DS_FLAG_INCONSISTENT)
|
||||
/*
|
||||
* NB: nopromote can not yet be set, but we want support for it in this
|
||||
* on-disk version, so that we don't need to upgrade for it later. It
|
||||
* will be needed when we implement 'zfs split' (where the split off
|
||||
* clone should not be promoted).
|
||||
*/
|
||||
#define DS_FLAG_NOPROMOTE (1ULL<<1)
|
||||
|
||||
/*
|
||||
* DS_FLAG_UNIQUE_ACCURATE is set if ds_unique_bytes has been correctly
|
||||
* calculated for head datasets (starting with SPA_VERSION_UNIQUE_ACCURATE,
|
||||
* refquota/refreservations).
|
||||
*/
|
||||
#define DS_FLAG_UNIQUE_ACCURATE (1ULL<<2)
|
||||
|
||||
/*
|
||||
* DS_FLAG_CI_DATASET is set if the dataset contains a file system whose
|
||||
* name lookups should be performed case-insensitively.
|
||||
*/
|
||||
#define DS_FLAG_CI_DATASET (1ULL<<16)
|
||||
|
||||
typedef struct dsl_dataset_phys {
|
||||
uint64_t ds_dir_obj; /* DMU_OT_DSL_DIR */
|
||||
uint64_t ds_prev_snap_obj; /* DMU_OT_DSL_DATASET */
|
||||
uint64_t ds_prev_snap_txg;
|
||||
uint64_t ds_next_snap_obj; /* DMU_OT_DSL_DATASET */
|
||||
uint64_t ds_snapnames_zapobj; /* DMU_OT_DSL_DS_SNAP_MAP 0 for snaps */
|
||||
uint64_t ds_num_children; /* clone/snap children; ==0 for head */
|
||||
uint64_t ds_creation_time; /* seconds since 1970 */
|
||||
uint64_t ds_creation_txg;
|
||||
uint64_t ds_deadlist_obj; /* DMU_OT_BPLIST */
|
||||
uint64_t ds_used_bytes;
|
||||
uint64_t ds_compressed_bytes;
|
||||
uint64_t ds_uncompressed_bytes;
|
||||
uint64_t ds_unique_bytes; /* only relevant to snapshots */
|
||||
/*
|
||||
* The ds_fsid_guid is a 56-bit ID that can change to avoid
|
||||
* collisions. The ds_guid is a 64-bit ID that will never
|
||||
* change, so there is a small probability that it will collide.
|
||||
*/
|
||||
uint64_t ds_fsid_guid;
|
||||
uint64_t ds_guid;
|
||||
uint64_t ds_flags; /* DS_FLAG_* */
|
||||
blkptr_t ds_bp;
|
||||
uint64_t ds_next_clones_obj; /* DMU_OT_DSL_CLONES */
|
||||
uint64_t ds_props_obj; /* DMU_OT_DSL_PROPS for snaps */
|
||||
uint64_t ds_pad[6]; /* pad out to 320 bytes for good measure */
|
||||
} dsl_dataset_phys_t;
|
||||
|
||||
typedef struct dsl_dataset {
|
||||
/* Immutable: */
|
||||
struct dsl_dir *ds_dir;
|
||||
dsl_dataset_phys_t *ds_phys;
|
||||
dmu_buf_t *ds_dbuf;
|
||||
uint64_t ds_object;
|
||||
uint64_t ds_fsid_guid;
|
||||
|
||||
/* only used in syncing context, only valid for non-snapshots: */
|
||||
struct dsl_dataset *ds_prev;
|
||||
uint64_t ds_origin_txg;
|
||||
|
||||
/* has internal locking: */
|
||||
bplist_t ds_deadlist;
|
||||
|
||||
/* protected by lock on pool's dp_dirty_datasets list */
|
||||
txg_node_t ds_dirty_link;
|
||||
list_node_t ds_synced_link;
|
||||
|
||||
/*
|
||||
* ds_phys->ds_<accounting> is also protected by ds_lock.
|
||||
* Protected by ds_lock:
|
||||
*/
|
||||
kmutex_t ds_lock;
|
||||
void *ds_user_ptr;
|
||||
dsl_dataset_evict_func_t *ds_user_evict_func;
|
||||
|
||||
/*
|
||||
* ds_owner is protected by the ds_rwlock and the ds_lock
|
||||
*/
|
||||
krwlock_t ds_rwlock;
|
||||
kcondvar_t ds_exclusive_cv;
|
||||
void *ds_owner;
|
||||
|
||||
/* no locking; only for making guesses */
|
||||
uint64_t ds_trysnap_txg;
|
||||
|
||||
/* for objset_open() */
|
||||
kmutex_t ds_opening_lock;
|
||||
|
||||
uint64_t ds_reserved; /* cached refreservation */
|
||||
uint64_t ds_quota; /* cached refquota */
|
||||
|
||||
/* Protected by ds_lock; keep at end of struct for better locality */
|
||||
char ds_snapname[MAXNAMELEN];
|
||||
} dsl_dataset_t;
|
||||
|
||||
#define dsl_dataset_is_snapshot(ds) \
|
||||
((ds)->ds_phys->ds_num_children != 0)
|
||||
|
||||
#define DS_UNIQUE_IS_ACCURATE(ds) \
|
||||
(((ds)->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
|
||||
|
||||
int dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp);
|
||||
int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj,
|
||||
void *tag, dsl_dataset_t **);
|
||||
int dsl_dataset_own(const char *name, int flags, void *owner,
|
||||
dsl_dataset_t **dsp);
|
||||
int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
|
||||
int flags, void *owner, dsl_dataset_t **);
|
||||
void dsl_dataset_name(dsl_dataset_t *ds, char *name);
|
||||
void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
|
||||
void dsl_dataset_disown(dsl_dataset_t *ds, void *owner);
|
||||
void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag);
|
||||
boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok,
|
||||
void *owner);
|
||||
void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner);
|
||||
uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
|
||||
dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
|
||||
uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
|
||||
uint64_t flags, dmu_tx_t *tx);
|
||||
int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag);
|
||||
int dsl_snapshots_destroy(char *fsname, char *snapname);
|
||||
dsl_checkfunc_t dsl_dataset_destroy_check;
|
||||
dsl_syncfunc_t dsl_dataset_destroy_sync;
|
||||
dsl_checkfunc_t dsl_dataset_snapshot_check;
|
||||
dsl_syncfunc_t dsl_dataset_snapshot_sync;
|
||||
int dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost);
|
||||
int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive);
|
||||
int dsl_dataset_promote(const char *name);
|
||||
int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
|
||||
boolean_t force);
|
||||
|
||||
void *dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
|
||||
void *p, dsl_dataset_evict_func_t func);
|
||||
void *dsl_dataset_get_user_ptr(dsl_dataset_t *ds);
|
||||
|
||||
blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
|
||||
void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
|
||||
|
||||
spa_t *dsl_dataset_get_spa(dsl_dataset_t *ds);
|
||||
|
||||
boolean_t dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds);
|
||||
|
||||
void dsl_dataset_sync(dsl_dataset_t *os, zio_t *zio, dmu_tx_t *tx);
|
||||
|
||||
void dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
|
||||
int dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
|
||||
dmu_tx_t *tx);
|
||||
int dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth);
|
||||
uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds);
|
||||
|
||||
void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx);
|
||||
void dsl_dataset_stats(dsl_dataset_t *os, nvlist_t *nv);
|
||||
void dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat);
|
||||
void dsl_dataset_space(dsl_dataset_t *ds,
|
||||
uint64_t *refdbytesp, uint64_t *availbytesp,
|
||||
uint64_t *usedobjsp, uint64_t *availobjsp);
|
||||
uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds);
|
||||
|
||||
int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
|
||||
|
||||
int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
|
||||
uint64_t asize, uint64_t inflight, uint64_t *used,
|
||||
uint64_t *ref_rsrv);
|
||||
int dsl_dataset_set_quota(const char *dsname, uint64_t quota);
|
||||
void dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr,
|
||||
dmu_tx_t *tx);
|
||||
int dsl_dataset_set_reservation(const char *dsname, uint64_t reservation);
|
||||
void dsl_dataset_set_flags(dsl_dataset_t *ds, uint64_t flags);
|
||||
int64_t dsl_dataset_new_refreservation(dsl_dataset_t *ds, uint64_t reservation,
|
||||
dmu_tx_t *tx);
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
#define dprintf_ds(ds, fmt, ...) do { \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
|
||||
char *__ds_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); \
|
||||
dsl_dataset_name(ds, __ds_name); \
|
||||
dprintf("ds=%s " fmt, __ds_name, __VA_ARGS__); \
|
||||
kmem_free(__ds_name, MAXNAMELEN); \
|
||||
} \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
#else
|
||||
#define dprintf_ds(dd, fmt, ...)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DSL_DATASET_H */
|
||||
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_DELEG_H
|
||||
#define _SYS_DSL_DELEG_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ZFS_DELEG_PERM_NONE ""
|
||||
#define ZFS_DELEG_PERM_CREATE "create"
|
||||
#define ZFS_DELEG_PERM_DESTROY "destroy"
|
||||
#define ZFS_DELEG_PERM_SNAPSHOT "snapshot"
|
||||
#define ZFS_DELEG_PERM_ROLLBACK "rollback"
|
||||
#define ZFS_DELEG_PERM_CLONE "clone"
|
||||
#define ZFS_DELEG_PERM_PROMOTE "promote"
|
||||
#define ZFS_DELEG_PERM_RENAME "rename"
|
||||
#define ZFS_DELEG_PERM_MOUNT "mount"
|
||||
#define ZFS_DELEG_PERM_SHARE "share"
|
||||
#define ZFS_DELEG_PERM_SEND "send"
|
||||
#define ZFS_DELEG_PERM_RECEIVE "receive"
|
||||
#define ZFS_DELEG_PERM_ALLOW "allow"
|
||||
#define ZFS_DELEG_PERM_USERPROP "userprop"
|
||||
#define ZFS_DELEG_PERM_VSCAN "vscan"
|
||||
|
||||
/*
|
||||
* Note: the names of properties that are marked delegatable are also
|
||||
* valid delegated permissions
|
||||
*/
|
||||
|
||||
int dsl_deleg_get(const char *ddname, nvlist_t **nvp);
|
||||
int dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset);
|
||||
int dsl_deleg_access(const char *ddname, const char *perm, cred_t *cr);
|
||||
void dsl_deleg_set_create_perms(dsl_dir_t *dd, dmu_tx_t *tx, cred_t *cr);
|
||||
int dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr);
|
||||
int dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr);
|
||||
int dsl_deleg_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx);
|
||||
boolean_t dsl_delegation_on(objset_t *os);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DSL_DELEG_H */
|
||||
@@ -0,0 +1,160 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_DIR_H
|
||||
#define _SYS_DSL_DIR_H
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/dsl_synctask.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct dsl_dataset;
|
||||
|
||||
typedef enum dd_used {
|
||||
DD_USED_HEAD,
|
||||
DD_USED_SNAP,
|
||||
DD_USED_CHILD,
|
||||
DD_USED_CHILD_RSRV,
|
||||
DD_USED_REFRSRV,
|
||||
DD_USED_NUM
|
||||
} dd_used_t;
|
||||
|
||||
#define DD_FLAG_USED_BREAKDOWN (1<<0)
|
||||
|
||||
typedef struct dsl_dir_phys {
|
||||
uint64_t dd_creation_time; /* not actually used */
|
||||
uint64_t dd_head_dataset_obj;
|
||||
uint64_t dd_parent_obj;
|
||||
uint64_t dd_origin_obj;
|
||||
uint64_t dd_child_dir_zapobj;
|
||||
/*
|
||||
* how much space our children are accounting for; for leaf
|
||||
* datasets, == physical space used by fs + snaps
|
||||
*/
|
||||
uint64_t dd_used_bytes;
|
||||
uint64_t dd_compressed_bytes;
|
||||
uint64_t dd_uncompressed_bytes;
|
||||
/* Administrative quota setting */
|
||||
uint64_t dd_quota;
|
||||
/* Administrative reservation setting */
|
||||
uint64_t dd_reserved;
|
||||
uint64_t dd_props_zapobj;
|
||||
uint64_t dd_deleg_zapobj; /* dataset delegation permissions */
|
||||
uint64_t dd_flags;
|
||||
uint64_t dd_used_breakdown[DD_USED_NUM];
|
||||
uint64_t dd_pad[14]; /* pad out to 256 bytes for good measure */
|
||||
} dsl_dir_phys_t;
|
||||
|
||||
struct dsl_dir {
|
||||
/* These are immutable; no lock needed: */
|
||||
uint64_t dd_object;
|
||||
dsl_dir_phys_t *dd_phys;
|
||||
dmu_buf_t *dd_dbuf;
|
||||
dsl_pool_t *dd_pool;
|
||||
|
||||
/* protected by lock on pool's dp_dirty_dirs list */
|
||||
txg_node_t dd_dirty_link;
|
||||
|
||||
/* protected by dp_config_rwlock */
|
||||
dsl_dir_t *dd_parent;
|
||||
|
||||
/* Protected by dd_lock */
|
||||
kmutex_t dd_lock;
|
||||
list_t dd_prop_cbs; /* list of dsl_prop_cb_record_t's */
|
||||
|
||||
/* gross estimate of space used by in-flight tx's */
|
||||
uint64_t dd_tempreserved[TXG_SIZE];
|
||||
/* amount of space we expect to write; == amount of dirty data */
|
||||
int64_t dd_space_towrite[TXG_SIZE];
|
||||
|
||||
/* protected by dd_lock; keep at end of struct for better locality */
|
||||
char dd_myname[MAXNAMELEN];
|
||||
};
|
||||
|
||||
void dsl_dir_close(dsl_dir_t *dd, void *tag);
|
||||
int dsl_dir_open(const char *name, void *tag, dsl_dir_t **, const char **tail);
|
||||
int dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, dsl_dir_t **,
|
||||
const char **tailp);
|
||||
int dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
|
||||
const char *tail, void *tag, dsl_dir_t **);
|
||||
void dsl_dir_name(dsl_dir_t *dd, char *buf);
|
||||
int dsl_dir_namelen(dsl_dir_t *dd);
|
||||
int dsl_dir_is_private(dsl_dir_t *dd);
|
||||
uint64_t dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds,
|
||||
const char *name, dmu_tx_t *tx);
|
||||
dsl_checkfunc_t dsl_dir_destroy_check;
|
||||
dsl_syncfunc_t dsl_dir_destroy_sync;
|
||||
void dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv);
|
||||
uint64_t dsl_dir_space_available(dsl_dir_t *dd,
|
||||
dsl_dir_t *ancestor, int64_t delta, int ondiskonly);
|
||||
void dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx);
|
||||
void dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx);
|
||||
int dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t mem,
|
||||
uint64_t asize, uint64_t fsize, uint64_t usize, void **tr_cookiep,
|
||||
dmu_tx_t *tx);
|
||||
void dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx);
|
||||
void dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx);
|
||||
void dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
|
||||
int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx);
|
||||
void dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
|
||||
dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx);
|
||||
int dsl_dir_set_quota(const char *ddname, uint64_t quota);
|
||||
int dsl_dir_set_reservation(const char *ddname, uint64_t reservation);
|
||||
int dsl_dir_rename(dsl_dir_t *dd, const char *newname);
|
||||
int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space);
|
||||
int dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx);
|
||||
boolean_t dsl_dir_is_clone(dsl_dir_t *dd);
|
||||
void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds,
|
||||
uint64_t reservation, cred_t *cr, dmu_tx_t *tx);
|
||||
|
||||
/* internal reserved dir name */
|
||||
#define MOS_DIR_NAME "$MOS"
|
||||
#define ORIGIN_DIR_NAME "$ORIGIN"
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
#define dprintf_dd(dd, fmt, ...) do { \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
|
||||
char *__ds_name = kmem_alloc(MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, \
|
||||
KM_SLEEP); \
|
||||
dsl_dir_name(dd, __ds_name); \
|
||||
dprintf("dd=%s " fmt, __ds_name, __VA_ARGS__); \
|
||||
kmem_free(__ds_name, MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); \
|
||||
} \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
#else
|
||||
#define dprintf_dd(dd, fmt, ...)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DSL_DIR_H */
|
||||
@@ -0,0 +1,150 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_POOL_H
|
||||
#define _SYS_DSL_POOL_H
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/txg_impl.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dnode.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct objset;
|
||||
struct dsl_dir;
|
||||
struct dsl_dataset;
|
||||
struct dsl_pool;
|
||||
struct dmu_tx;
|
||||
|
||||
enum scrub_func {
|
||||
SCRUB_FUNC_NONE,
|
||||
SCRUB_FUNC_CLEAN,
|
||||
SCRUB_FUNC_NUMFUNCS
|
||||
};
|
||||
|
||||
/* These macros are for indexing into the zfs_all_blkstats_t. */
|
||||
#define DMU_OT_DEFERRED DMU_OT_NONE
|
||||
#define DMU_OT_TOTAL DMU_OT_NUMTYPES
|
||||
|
||||
typedef struct zfs_blkstat {
|
||||
uint64_t zb_count;
|
||||
uint64_t zb_asize;
|
||||
uint64_t zb_lsize;
|
||||
uint64_t zb_psize;
|
||||
uint64_t zb_gangs;
|
||||
uint64_t zb_ditto_2_of_2_samevdev;
|
||||
uint64_t zb_ditto_2_of_3_samevdev;
|
||||
uint64_t zb_ditto_3_of_3_samevdev;
|
||||
} zfs_blkstat_t;
|
||||
|
||||
typedef struct zfs_all_blkstats {
|
||||
zfs_blkstat_t zab_type[DN_MAX_LEVELS + 1][DMU_OT_TOTAL + 1];
|
||||
} zfs_all_blkstats_t;
|
||||
|
||||
|
||||
typedef struct dsl_pool {
|
||||
/* Immutable */
|
||||
spa_t *dp_spa;
|
||||
struct objset *dp_meta_objset;
|
||||
struct dsl_dir *dp_root_dir;
|
||||
struct dsl_dir *dp_mos_dir;
|
||||
struct dsl_dataset *dp_origin_snap;
|
||||
uint64_t dp_root_dir_obj;
|
||||
|
||||
/* No lock needed - sync context only */
|
||||
blkptr_t dp_meta_rootbp;
|
||||
list_t dp_synced_datasets;
|
||||
hrtime_t dp_read_overhead;
|
||||
uint64_t dp_throughput;
|
||||
uint64_t dp_write_limit;
|
||||
|
||||
/* Uses dp_lock */
|
||||
kmutex_t dp_lock;
|
||||
uint64_t dp_space_towrite[TXG_SIZE];
|
||||
uint64_t dp_tempreserved[TXG_SIZE];
|
||||
|
||||
enum scrub_func dp_scrub_func;
|
||||
uint64_t dp_scrub_queue_obj;
|
||||
uint64_t dp_scrub_min_txg;
|
||||
uint64_t dp_scrub_max_txg;
|
||||
zbookmark_t dp_scrub_bookmark;
|
||||
boolean_t dp_scrub_pausing;
|
||||
boolean_t dp_scrub_isresilver;
|
||||
uint64_t dp_scrub_start_time;
|
||||
kmutex_t dp_scrub_cancel_lock; /* protects dp_scrub_restart */
|
||||
boolean_t dp_scrub_restart;
|
||||
|
||||
/* Has its own locking */
|
||||
tx_state_t dp_tx;
|
||||
txg_list_t dp_dirty_datasets;
|
||||
txg_list_t dp_dirty_dirs;
|
||||
txg_list_t dp_sync_tasks;
|
||||
|
||||
/*
|
||||
* Protects administrative changes (properties, namespace)
|
||||
* It is only held for write in syncing context. Therefore
|
||||
* syncing context does not need to ever have it for read, since
|
||||
* nobody else could possibly have it for write.
|
||||
*/
|
||||
krwlock_t dp_config_rwlock;
|
||||
|
||||
zfs_all_blkstats_t *dp_blkstats;
|
||||
} dsl_pool_t;
|
||||
|
||||
int dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
|
||||
void dsl_pool_close(dsl_pool_t *dp);
|
||||
dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg);
|
||||
void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg);
|
||||
void dsl_pool_zil_clean(dsl_pool_t *dp);
|
||||
int dsl_pool_sync_context(dsl_pool_t *dp);
|
||||
uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree);
|
||||
int dsl_pool_tempreserve_space(dsl_pool_t *dp, uint64_t space, dmu_tx_t *tx);
|
||||
void dsl_pool_tempreserve_clear(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
|
||||
void dsl_pool_memory_pressure(dsl_pool_t *dp);
|
||||
void dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
|
||||
int dsl_free(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp,
|
||||
zio_done_func_t *done, void *private, uint32_t arc_flags);
|
||||
void dsl_pool_ds_destroyed(struct dsl_dataset *ds, struct dmu_tx *tx);
|
||||
void dsl_pool_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
|
||||
void dsl_pool_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
|
||||
struct dmu_tx *tx);
|
||||
void dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx);
|
||||
void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx);
|
||||
|
||||
int dsl_pool_scrub_cancel(dsl_pool_t *dp);
|
||||
int dsl_pool_scrub_clean(dsl_pool_t *dp);
|
||||
void dsl_pool_scrub_sync(dsl_pool_t *dp, dmu_tx_t *tx);
|
||||
void dsl_pool_scrub_restart(dsl_pool_t *dp);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DSL_POOL_H */
|
||||
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_PROP_H
|
||||
#define _SYS_DSL_PROP_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct dsl_dataset;
|
||||
struct dsl_dir;
|
||||
|
||||
/* The callback func may not call into the DMU or DSL! */
|
||||
typedef void (dsl_prop_changed_cb_t)(void *arg, uint64_t newval);
|
||||
|
||||
typedef struct dsl_prop_cb_record {
|
||||
list_node_t cbr_node; /* link on dd_prop_cbs */
|
||||
struct dsl_dataset *cbr_ds;
|
||||
const char *cbr_propname;
|
||||
dsl_prop_changed_cb_t *cbr_func;
|
||||
void *cbr_arg;
|
||||
} dsl_prop_cb_record_t;
|
||||
|
||||
int dsl_prop_register(struct dsl_dataset *ds, const char *propname,
|
||||
dsl_prop_changed_cb_t *callback, void *cbarg);
|
||||
int dsl_prop_unregister(struct dsl_dataset *ds, const char *propname,
|
||||
dsl_prop_changed_cb_t *callback, void *cbarg);
|
||||
int dsl_prop_numcb(struct dsl_dataset *ds);
|
||||
|
||||
int dsl_prop_get(const char *ddname, const char *propname,
|
||||
int intsz, int numints, void *buf, char *setpoint);
|
||||
int dsl_prop_get_integer(const char *ddname, const char *propname,
|
||||
uint64_t *valuep, char *setpoint);
|
||||
int dsl_prop_get_all(objset_t *os, nvlist_t **nvp, boolean_t local);
|
||||
int dsl_prop_get_ds(struct dsl_dataset *ds, const char *propname,
|
||||
int intsz, int numints, void *buf, char *setpoint);
|
||||
int dsl_prop_get_dd(struct dsl_dir *dd, const char *propname,
|
||||
int intsz, int numints, void *buf, char *setpoint);
|
||||
|
||||
int dsl_prop_set(const char *ddname, const char *propname,
|
||||
int intsz, int numints, const void *buf);
|
||||
void dsl_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
|
||||
cred_t *cr, dmu_tx_t *tx);
|
||||
|
||||
void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value);
|
||||
void dsl_prop_nvlist_add_string(nvlist_t *nv,
|
||||
zfs_prop_t prop, const char *value);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DSL_PROP_H */
|
||||
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_SYNCTASK_H
|
||||
#define _SYS_DSL_SYNCTASK_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct dsl_pool;
|
||||
|
||||
typedef int (dsl_checkfunc_t)(void *, void *, dmu_tx_t *);
|
||||
typedef void (dsl_syncfunc_t)(void *, void *, cred_t *, dmu_tx_t *);
|
||||
|
||||
typedef struct dsl_sync_task {
|
||||
list_node_t dst_node;
|
||||
dsl_checkfunc_t *dst_checkfunc;
|
||||
dsl_syncfunc_t *dst_syncfunc;
|
||||
void *dst_arg1;
|
||||
void *dst_arg2;
|
||||
int dst_err;
|
||||
} dsl_sync_task_t;
|
||||
|
||||
typedef struct dsl_sync_task_group {
|
||||
txg_node_t dstg_node;
|
||||
list_t dstg_tasks;
|
||||
struct dsl_pool *dstg_pool;
|
||||
cred_t *dstg_cr;
|
||||
uint64_t dstg_txg;
|
||||
int dstg_err;
|
||||
int dstg_space;
|
||||
boolean_t dstg_nowaiter;
|
||||
} dsl_sync_task_group_t;
|
||||
|
||||
dsl_sync_task_group_t *dsl_sync_task_group_create(struct dsl_pool *dp);
|
||||
void dsl_sync_task_create(dsl_sync_task_group_t *dstg,
|
||||
dsl_checkfunc_t *, dsl_syncfunc_t *,
|
||||
void *arg1, void *arg2, int blocks_modified);
|
||||
int dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg);
|
||||
void dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
|
||||
void dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg);
|
||||
void dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
|
||||
|
||||
int dsl_sync_task_do(struct dsl_pool *dp,
|
||||
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
|
||||
void *arg1, void *arg2, int blocks_modified);
|
||||
void dsl_sync_task_do_nowait(struct dsl_pool *dp,
|
||||
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
|
||||
void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DSL_SYNCTASK_H */
|
||||
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_METASLAB_H
|
||||
#define _SYS_METASLAB_H
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/space_map.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/avl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct metaslab_class metaslab_class_t;
|
||||
typedef struct metaslab_group metaslab_group_t;
|
||||
|
||||
extern metaslab_t *metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
|
||||
uint64_t start, uint64_t size, uint64_t txg);
|
||||
extern void metaslab_fini(metaslab_t *msp);
|
||||
extern void metaslab_sync(metaslab_t *msp, uint64_t txg);
|
||||
extern void metaslab_sync_done(metaslab_t *msp, uint64_t txg);
|
||||
|
||||
#define METASLAB_HINTBP_FAVOR 0x0
|
||||
#define METASLAB_HINTBP_AVOID 0x1
|
||||
#define METASLAB_GANG_HEADER 0x2
|
||||
|
||||
extern int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
||||
blkptr_t *bp, int ncopies, uint64_t txg, blkptr_t *hintbp, int flags);
|
||||
extern void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg,
|
||||
boolean_t now);
|
||||
extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg);
|
||||
|
||||
extern metaslab_class_t *metaslab_class_create(void);
|
||||
extern void metaslab_class_destroy(metaslab_class_t *mc);
|
||||
extern void metaslab_class_add(metaslab_class_t *mc, metaslab_group_t *mg);
|
||||
extern void metaslab_class_remove(metaslab_class_t *mc, metaslab_group_t *mg);
|
||||
|
||||
extern metaslab_group_t *metaslab_group_create(metaslab_class_t *mc,
|
||||
vdev_t *vd);
|
||||
extern void metaslab_group_destroy(metaslab_group_t *mg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_METASLAB_H */
|
||||
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_METASLAB_IMPL_H
|
||||
#define _SYS_METASLAB_IMPL_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/metaslab.h>
|
||||
#include <sys/space_map.h>
|
||||
#include <sys/vdev.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/avl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct metaslab_class {
|
||||
metaslab_group_t *mc_rotor;
|
||||
uint64_t mc_allocated;
|
||||
};
|
||||
|
||||
struct metaslab_group {
|
||||
kmutex_t mg_lock;
|
||||
avl_tree_t mg_metaslab_tree;
|
||||
uint64_t mg_aliquot;
|
||||
int64_t mg_bias;
|
||||
metaslab_class_t *mg_class;
|
||||
vdev_t *mg_vd;
|
||||
metaslab_group_t *mg_prev;
|
||||
metaslab_group_t *mg_next;
|
||||
};
|
||||
|
||||
/*
|
||||
* Each metaslab's free space is tracked in space map object in the MOS,
|
||||
* which is only updated in syncing context. Each time we sync a txg,
|
||||
* we append the allocs and frees from that txg to the space map object.
|
||||
* When the txg is done syncing, metaslab_sync_done() updates ms_smo
|
||||
* to ms_smo_syncing. Everything in ms_smo is always safe to allocate.
|
||||
*/
|
||||
struct metaslab {
|
||||
kmutex_t ms_lock; /* metaslab lock */
|
||||
space_map_obj_t ms_smo; /* synced space map object */
|
||||
space_map_obj_t ms_smo_syncing; /* syncing space map object */
|
||||
space_map_t ms_allocmap[TXG_SIZE]; /* allocated this txg */
|
||||
space_map_t ms_freemap[TXG_SIZE]; /* freed this txg */
|
||||
space_map_t ms_map; /* in-core free space map */
|
||||
uint64_t ms_weight; /* weight vs. others in group */
|
||||
metaslab_group_t *ms_group; /* metaslab group */
|
||||
avl_node_t ms_group_node; /* node in metaslab group tree */
|
||||
txg_node_t ms_txg_node; /* per-txg dirty metaslab links */
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_METASLAB_IMPL_H */
|
||||
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_REFCOUNT_H
|
||||
#define _SYS_REFCOUNT_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/inttypes.h>
|
||||
#include <sys/list.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If the reference is held only by the calling function and not any
|
||||
* particular object, use FTAG (which is a string) for the holder_tag.
|
||||
* Otherwise, use the object that holds the reference.
|
||||
*/
|
||||
#define FTAG ((char *)__func__)
|
||||
|
||||
#if defined(DEBUG) || !defined(_KERNEL)
|
||||
typedef struct reference {
|
||||
list_node_t ref_link;
|
||||
void *ref_holder;
|
||||
uint64_t ref_number;
|
||||
uint8_t *ref_removed;
|
||||
} reference_t;
|
||||
|
||||
typedef struct refcount {
|
||||
kmutex_t rc_mtx;
|
||||
list_t rc_list;
|
||||
list_t rc_removed;
|
||||
int64_t rc_count;
|
||||
int64_t rc_removed_count;
|
||||
} refcount_t;
|
||||
|
||||
/* Note: refcount_t must be initialized with refcount_create() */
|
||||
|
||||
void refcount_create(refcount_t *rc);
|
||||
void refcount_destroy(refcount_t *rc);
|
||||
void refcount_destroy_many(refcount_t *rc, uint64_t number);
|
||||
int refcount_is_zero(refcount_t *rc);
|
||||
int64_t refcount_count(refcount_t *rc);
|
||||
int64_t refcount_add(refcount_t *rc, void *holder_tag);
|
||||
int64_t refcount_remove(refcount_t *rc, void *holder_tag);
|
||||
int64_t refcount_add_many(refcount_t *rc, uint64_t number, void *holder_tag);
|
||||
int64_t refcount_remove_many(refcount_t *rc, uint64_t number, void *holder_tag);
|
||||
|
||||
void refcount_init(void);
|
||||
void refcount_fini(void);
|
||||
|
||||
#else /* DEBUG */
|
||||
|
||||
typedef struct refcount {
|
||||
uint64_t rc_count;
|
||||
} refcount_t;
|
||||
|
||||
#define refcount_create(rc) ((rc)->rc_count = 0)
|
||||
#define refcount_destroy(rc) ((rc)->rc_count = 0)
|
||||
#define refcount_destroy_many(rc, number) ((rc)->rc_count = 0)
|
||||
#define refcount_is_zero(rc) ((rc)->rc_count == 0)
|
||||
#define refcount_count(rc) ((rc)->rc_count)
|
||||
#define refcount_add(rc, holder) atomic_add_64_nv(&(rc)->rc_count, 1)
|
||||
#define refcount_remove(rc, holder) atomic_add_64_nv(&(rc)->rc_count, -1)
|
||||
#define refcount_add_many(rc, number, holder) \
|
||||
atomic_add_64_nv(&(rc)->rc_count, number)
|
||||
#define refcount_remove_many(rc, number, holder) \
|
||||
atomic_add_64_nv(&(rc)->rc_count, -number)
|
||||
|
||||
#define refcount_init()
|
||||
#define refcount_fini()
|
||||
|
||||
#endif /* DEBUG */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_REFCOUNT_H */
|
||||
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_RR_RW_LOCK_H
|
||||
#define _SYS_RR_RW_LOCK_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <sys/inttypes.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/refcount.h>
|
||||
|
||||
/*
|
||||
* A reader-writer lock implementation that allows re-entrant reads, but
|
||||
* still gives writers priority on "new" reads.
|
||||
*
|
||||
* See rrwlock.c for more details about the implementation.
|
||||
*
|
||||
* Fields of the rrwlock_t structure:
|
||||
* - rr_lock: protects modification and reading of rrwlock_t fields
|
||||
* - rr_cv: cv for waking up readers or waiting writers
|
||||
* - rr_writer: thread id of the current writer
|
||||
* - rr_anon_rount: number of active anonymous readers
|
||||
* - rr_linked_rcount: total number of non-anonymous active readers
|
||||
* - rr_writer_wanted: a writer wants the lock
|
||||
*/
|
||||
typedef struct rrwlock {
|
||||
kmutex_t rr_lock;
|
||||
kcondvar_t rr_cv;
|
||||
kthread_t *rr_writer;
|
||||
refcount_t rr_anon_rcount;
|
||||
refcount_t rr_linked_rcount;
|
||||
boolean_t rr_writer_wanted;
|
||||
} rrwlock_t;
|
||||
|
||||
/*
|
||||
* 'tag' is used in reference counting tracking. The
|
||||
* 'tag' must be the same in a rrw_enter() as in its
|
||||
* corresponding rrw_exit().
|
||||
*/
|
||||
void rrw_init(rrwlock_t *rrl);
|
||||
void rrw_destroy(rrwlock_t *rrl);
|
||||
void rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag);
|
||||
void rrw_exit(rrwlock_t *rrl, void *tag);
|
||||
boolean_t rrw_held(rrwlock_t *rrl, krw_t rw);
|
||||
|
||||
#define RRW_READ_HELD(x) rrw_held(x, RW_READER)
|
||||
#define RRW_WRITE_HELD(x) rrw_held(x, RW_WRITER)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_RR_RW_LOCK_H */
|
||||
@@ -0,0 +1,554 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_SPA_H
|
||||
#define _SYS_SPA_H
|
||||
|
||||
#include <sys/avl.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/nvpair.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Forward references that lots of things need.
|
||||
*/
|
||||
typedef struct spa spa_t;
|
||||
typedef struct vdev vdev_t;
|
||||
typedef struct metaslab metaslab_t;
|
||||
typedef struct zilog zilog_t;
|
||||
typedef struct spa_aux_vdev spa_aux_vdev_t;
|
||||
struct dsl_pool;
|
||||
|
||||
/*
|
||||
* General-purpose 32-bit and 64-bit bitfield encodings.
|
||||
*/
|
||||
#define BF32_DECODE(x, low, len) P2PHASE((x) >> (low), 1U << (len))
|
||||
#define BF64_DECODE(x, low, len) P2PHASE((x) >> (low), 1ULL << (len))
|
||||
#define BF32_ENCODE(x, low, len) (P2PHASE((x), 1U << (len)) << (low))
|
||||
#define BF64_ENCODE(x, low, len) (P2PHASE((x), 1ULL << (len)) << (low))
|
||||
|
||||
#define BF32_GET(x, low, len) BF32_DECODE(x, low, len)
|
||||
#define BF64_GET(x, low, len) BF64_DECODE(x, low, len)
|
||||
|
||||
#define BF32_SET(x, low, len, val) \
|
||||
((x) ^= BF32_ENCODE((x >> low) ^ (val), low, len))
|
||||
#define BF64_SET(x, low, len, val) \
|
||||
((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len))
|
||||
|
||||
#define BF32_GET_SB(x, low, len, shift, bias) \
|
||||
((BF32_GET(x, low, len) + (bias)) << (shift))
|
||||
#define BF64_GET_SB(x, low, len, shift, bias) \
|
||||
((BF64_GET(x, low, len) + (bias)) << (shift))
|
||||
|
||||
#define BF32_SET_SB(x, low, len, shift, bias, val) \
|
||||
BF32_SET(x, low, len, ((val) >> (shift)) - (bias))
|
||||
#define BF64_SET_SB(x, low, len, shift, bias, val) \
|
||||
BF64_SET(x, low, len, ((val) >> (shift)) - (bias))
|
||||
|
||||
/*
|
||||
* We currently support nine block sizes, from 512 bytes to 128K.
|
||||
* We could go higher, but the benefits are near-zero and the cost
|
||||
* of COWing a giant block to modify one byte would become excessive.
|
||||
*/
|
||||
#define SPA_MINBLOCKSHIFT 9
|
||||
#define SPA_MAXBLOCKSHIFT 17
|
||||
#define SPA_MINBLOCKSIZE (1ULL << SPA_MINBLOCKSHIFT)
|
||||
#define SPA_MAXBLOCKSIZE (1ULL << SPA_MAXBLOCKSHIFT)
|
||||
|
||||
#define SPA_BLOCKSIZES (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1)
|
||||
|
||||
/*
|
||||
* Size of block to hold the configuration data (a packed nvlist)
|
||||
*/
|
||||
#define SPA_CONFIG_BLOCKSIZE (1 << 14)
|
||||
|
||||
/*
|
||||
* The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
|
||||
* The ASIZE encoding should be at least 64 times larger (6 more bits)
|
||||
* to support up to 4-way RAID-Z mirror mode with worst-case gang block
|
||||
* overhead, three DVAs per bp, plus one more bit in case we do anything
|
||||
* else that expands the ASIZE.
|
||||
*/
|
||||
#define SPA_LSIZEBITS 16 /* LSIZE up to 32M (2^16 * 512) */
|
||||
#define SPA_PSIZEBITS 16 /* PSIZE up to 32M (2^16 * 512) */
|
||||
#define SPA_ASIZEBITS 24 /* ASIZE up to 64 times larger */
|
||||
|
||||
/*
|
||||
* All SPA data is represented by 128-bit data virtual addresses (DVAs).
|
||||
* The members of the dva_t should be considered opaque outside the SPA.
|
||||
*/
|
||||
typedef struct dva {
|
||||
uint64_t dva_word[2];
|
||||
} dva_t;
|
||||
|
||||
/*
|
||||
* Each block has a 256-bit checksum -- strong enough for cryptographic hashes.
|
||||
*/
|
||||
typedef struct zio_cksum {
|
||||
uint64_t zc_word[4];
|
||||
} zio_cksum_t;
|
||||
|
||||
/*
|
||||
* Each block is described by its DVAs, time of birth, checksum, etc.
|
||||
* The word-by-word, bit-by-bit layout of the blkptr is as follows:
|
||||
*
|
||||
* 64 56 48 40 32 24 16 8 0
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 0 | vdev1 | GRID | ASIZE |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 1 |G| offset1 |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 2 | vdev2 | GRID | ASIZE |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 3 |G| offset2 |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 4 | vdev3 | GRID | ASIZE |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 5 |G| offset3 |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 6 |E| lvl | type | cksum | comp | PSIZE | LSIZE |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 7 | padding |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 8 | padding |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 9 | padding |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* a | birth txg |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* b | fill count |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* c | checksum[0] |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* d | checksum[1] |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* e | checksum[2] |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* f | checksum[3] |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
*
|
||||
* Legend:
|
||||
*
|
||||
* vdev virtual device ID
|
||||
* offset offset into virtual device
|
||||
* LSIZE logical size
|
||||
* PSIZE physical size (after compression)
|
||||
* ASIZE allocated size (including RAID-Z parity and gang block headers)
|
||||
* GRID RAID-Z layout information (reserved for future use)
|
||||
* cksum checksum function
|
||||
* comp compression function
|
||||
* G gang block indicator
|
||||
* E endianness
|
||||
* type DMU object type
|
||||
* lvl level of indirection
|
||||
* birth txg transaction group in which the block was born
|
||||
* fill count number of non-zero blocks under this bp
|
||||
* checksum[4] 256-bit checksum of the data this bp describes
|
||||
*/
|
||||
typedef struct blkptr {
|
||||
dva_t blk_dva[3]; /* 128-bit Data Virtual Address */
|
||||
uint64_t blk_prop; /* size, compression, type, etc */
|
||||
uint64_t blk_pad[3]; /* Extra space for the future */
|
||||
uint64_t blk_birth; /* transaction group at birth */
|
||||
uint64_t blk_fill; /* fill count */
|
||||
zio_cksum_t blk_cksum; /* 256-bit checksum */
|
||||
} blkptr_t;
|
||||
|
||||
#define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */
|
||||
#define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */
|
||||
|
||||
/*
|
||||
* Macros to get and set fields in a bp or DVA.
|
||||
*/
|
||||
#define DVA_GET_ASIZE(dva) \
|
||||
BF64_GET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0)
|
||||
#define DVA_SET_ASIZE(dva, x) \
|
||||
BF64_SET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0, x)
|
||||
|
||||
#define DVA_GET_GRID(dva) BF64_GET((dva)->dva_word[0], 24, 8)
|
||||
#define DVA_SET_GRID(dva, x) BF64_SET((dva)->dva_word[0], 24, 8, x)
|
||||
|
||||
#define DVA_GET_VDEV(dva) BF64_GET((dva)->dva_word[0], 32, 32)
|
||||
#define DVA_SET_VDEV(dva, x) BF64_SET((dva)->dva_word[0], 32, 32, x)
|
||||
|
||||
#define DVA_GET_OFFSET(dva) \
|
||||
BF64_GET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0)
|
||||
#define DVA_SET_OFFSET(dva, x) \
|
||||
BF64_SET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0, x)
|
||||
|
||||
#define DVA_GET_GANG(dva) BF64_GET((dva)->dva_word[1], 63, 1)
|
||||
#define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x)
|
||||
|
||||
#define BP_GET_LSIZE(bp) \
|
||||
(BP_IS_HOLE(bp) ? 0 : \
|
||||
BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1))
|
||||
#define BP_SET_LSIZE(bp, x) \
|
||||
BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
|
||||
|
||||
#define BP_GET_PSIZE(bp) \
|
||||
BF64_GET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1)
|
||||
#define BP_SET_PSIZE(bp, x) \
|
||||
BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
|
||||
|
||||
#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8)
|
||||
#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x)
|
||||
|
||||
#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8)
|
||||
#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x)
|
||||
|
||||
#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8)
|
||||
#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x)
|
||||
|
||||
#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5)
|
||||
#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x)
|
||||
|
||||
#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1))
|
||||
#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x)
|
||||
|
||||
#define BP_GET_ASIZE(bp) \
|
||||
(DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
|
||||
DVA_GET_ASIZE(&(bp)->blk_dva[2]))
|
||||
|
||||
#define BP_GET_UCSIZE(bp) \
|
||||
((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
|
||||
BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp));
|
||||
|
||||
#define BP_GET_NDVAS(bp) \
|
||||
(!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
|
||||
!!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
|
||||
!!DVA_GET_ASIZE(&(bp)->blk_dva[2]))
|
||||
|
||||
#define BP_COUNT_GANG(bp) \
|
||||
(DVA_GET_GANG(&(bp)->blk_dva[0]) + \
|
||||
DVA_GET_GANG(&(bp)->blk_dva[1]) + \
|
||||
DVA_GET_GANG(&(bp)->blk_dva[2]))
|
||||
|
||||
#define DVA_EQUAL(dva1, dva2) \
|
||||
((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
|
||||
(dva1)->dva_word[0] == (dva2)->dva_word[0])
|
||||
|
||||
#define ZIO_CHECKSUM_EQUAL(zc1, zc2) \
|
||||
(0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \
|
||||
((zc1).zc_word[1] - (zc2).zc_word[1]) | \
|
||||
((zc1).zc_word[2] - (zc2).zc_word[2]) | \
|
||||
((zc1).zc_word[3] - (zc2).zc_word[3])))
|
||||
|
||||
#define DVA_IS_VALID(dva) (DVA_GET_ASIZE(dva) != 0)
|
||||
|
||||
#define ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3) \
|
||||
{ \
|
||||
(zcp)->zc_word[0] = w0; \
|
||||
(zcp)->zc_word[1] = w1; \
|
||||
(zcp)->zc_word[2] = w2; \
|
||||
(zcp)->zc_word[3] = w3; \
|
||||
}
|
||||
|
||||
#define BP_IDENTITY(bp) (&(bp)->blk_dva[0])
|
||||
#define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp))
|
||||
#define BP_IS_HOLE(bp) ((bp)->blk_birth == 0)
|
||||
#define BP_IS_OLDER(bp, txg) (!BP_IS_HOLE(bp) && (bp)->blk_birth < (txg))
|
||||
|
||||
#define BP_ZERO(bp) \
|
||||
{ \
|
||||
(bp)->blk_dva[0].dva_word[0] = 0; \
|
||||
(bp)->blk_dva[0].dva_word[1] = 0; \
|
||||
(bp)->blk_dva[1].dva_word[0] = 0; \
|
||||
(bp)->blk_dva[1].dva_word[1] = 0; \
|
||||
(bp)->blk_dva[2].dva_word[0] = 0; \
|
||||
(bp)->blk_dva[2].dva_word[1] = 0; \
|
||||
(bp)->blk_prop = 0; \
|
||||
(bp)->blk_pad[0] = 0; \
|
||||
(bp)->blk_pad[1] = 0; \
|
||||
(bp)->blk_pad[2] = 0; \
|
||||
(bp)->blk_birth = 0; \
|
||||
(bp)->blk_fill = 0; \
|
||||
ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \
|
||||
}
|
||||
|
||||
#define BLK_FILL_ALREADY_FREED (-1ULL)
|
||||
|
||||
/*
|
||||
* Note: the byteorder is either 0 or -1, both of which are palindromes.
|
||||
* This simplifies the endianness handling a bit.
|
||||
*/
|
||||
#ifdef _BIG_ENDIAN
|
||||
#define ZFS_HOST_BYTEORDER (0ULL)
|
||||
#else
|
||||
#define ZFS_HOST_BYTEORDER (-1ULL)
|
||||
#endif
|
||||
|
||||
#define BP_SHOULD_BYTESWAP(bp) (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER)
|
||||
|
||||
#define BP_SPRINTF_LEN 320
|
||||
|
||||
#include <sys/dmu.h>
|
||||
|
||||
#define BP_GET_BUFC_TYPE(bp) \
|
||||
(((BP_GET_LEVEL(bp) > 0) || (dmu_ot[BP_GET_TYPE(bp)].ot_metadata)) ? \
|
||||
ARC_BUFC_METADATA : ARC_BUFC_DATA);
|
||||
/*
|
||||
* Routines found in spa.c
|
||||
*/
|
||||
|
||||
/* state manipulation functions */
|
||||
extern int spa_open(const char *pool, spa_t **, void *tag);
|
||||
extern int spa_get_stats(const char *pool, nvlist_t **config,
|
||||
char *altroot, size_t buflen);
|
||||
extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
|
||||
const char *history_str, nvlist_t *zplprops);
|
||||
extern int spa_check_rootconf(char *devpath, char *devid,
|
||||
nvlist_t **bestconf, uint64_t *besttxg);
|
||||
extern boolean_t spa_rootdev_validate(nvlist_t *nv);
|
||||
extern int spa_import_rootpool(char *devpath, char *devid);
|
||||
extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props);
|
||||
extern int spa_import_faulted(const char *, nvlist_t *, nvlist_t *);
|
||||
extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
|
||||
extern int spa_destroy(char *pool);
|
||||
extern int spa_export(char *pool, nvlist_t **oldconfig, boolean_t force);
|
||||
extern int spa_reset(char *pool);
|
||||
extern void spa_async_request(spa_t *spa, int flag);
|
||||
extern void spa_async_unrequest(spa_t *spa, int flag);
|
||||
extern void spa_async_suspend(spa_t *spa);
|
||||
extern void spa_async_resume(spa_t *spa);
|
||||
extern spa_t *spa_inject_addref(char *pool);
|
||||
extern void spa_inject_delref(spa_t *spa);
|
||||
|
||||
#define SPA_ASYNC_CONFIG_UPDATE 0x01
|
||||
#define SPA_ASYNC_REMOVE 0x02
|
||||
#define SPA_ASYNC_PROBE 0x04
|
||||
#define SPA_ASYNC_RESILVER_DONE 0x08
|
||||
#define SPA_ASYNC_RESILVER 0x10
|
||||
|
||||
/* device manipulation */
|
||||
extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
|
||||
extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot,
|
||||
int replacing);
|
||||
extern int spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done);
|
||||
extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
|
||||
extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
|
||||
|
||||
/* spare state (which is global across all pools) */
|
||||
extern void spa_spare_add(vdev_t *vd);
|
||||
extern void spa_spare_remove(vdev_t *vd);
|
||||
extern boolean_t spa_spare_exists(uint64_t guid, uint64_t *pool, int *refcnt);
|
||||
extern void spa_spare_activate(vdev_t *vd);
|
||||
|
||||
/* L2ARC state (which is global across all pools) */
|
||||
extern void spa_l2cache_add(vdev_t *vd);
|
||||
extern void spa_l2cache_remove(vdev_t *vd);
|
||||
extern boolean_t spa_l2cache_exists(uint64_t guid, uint64_t *pool);
|
||||
extern void spa_l2cache_activate(vdev_t *vd);
|
||||
extern void spa_l2cache_drop(spa_t *spa);
|
||||
extern void spa_l2cache_space_update(vdev_t *vd, int64_t space, int64_t alloc);
|
||||
|
||||
/* scrubbing */
|
||||
extern int spa_scrub(spa_t *spa, pool_scrub_type_t type);
|
||||
|
||||
/* spa syncing */
|
||||
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
|
||||
extern void spa_sync_allpools(void);
|
||||
|
||||
/* spa namespace global mutex */
|
||||
extern kmutex_t spa_namespace_lock;
|
||||
|
||||
/*
|
||||
* SPA configuration functions in spa_config.c
|
||||
*/
|
||||
|
||||
#define SPA_CONFIG_UPDATE_POOL 0
|
||||
#define SPA_CONFIG_UPDATE_VDEVS 1
|
||||
|
||||
extern void spa_config_sync(spa_t *, boolean_t, boolean_t);
|
||||
extern void spa_config_load(void);
|
||||
extern nvlist_t *spa_all_configs(uint64_t *);
|
||||
extern void spa_config_set(spa_t *spa, nvlist_t *config);
|
||||
extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
|
||||
int getstats);
|
||||
extern void spa_config_update(spa_t *spa, int what);
|
||||
extern void spa_config_update_common(spa_t *spa, int what, boolean_t isroot);
|
||||
|
||||
/*
|
||||
* Miscellaneous SPA routines in spa_misc.c
|
||||
*/
|
||||
|
||||
/* Namespace manipulation */
|
||||
extern spa_t *spa_lookup(const char *name);
|
||||
extern spa_t *spa_add(const char *name, const char *altroot);
|
||||
extern void spa_remove(spa_t *spa);
|
||||
extern spa_t *spa_next(spa_t *prev);
|
||||
|
||||
/* Refcount functions */
|
||||
extern void spa_open_ref(spa_t *spa, void *tag);
|
||||
extern void spa_close(spa_t *spa, void *tag);
|
||||
extern boolean_t spa_refcount_zero(spa_t *spa);
|
||||
|
||||
#define SCL_CONFIG 0x01
|
||||
#define SCL_STATE 0x02
|
||||
#define SCL_L2ARC 0x04 /* hack until L2ARC 2.0 */
|
||||
#define SCL_ALLOC 0x08
|
||||
#define SCL_ZIO 0x10
|
||||
#define SCL_FREE 0x20
|
||||
#define SCL_VDEV 0x40
|
||||
#define SCL_LOCKS 7
|
||||
#define SCL_ALL ((1 << SCL_LOCKS) - 1)
|
||||
#define SCL_STATE_ALL (SCL_STATE | SCL_L2ARC | SCL_ZIO)
|
||||
|
||||
/* Pool configuration locks */
|
||||
extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
|
||||
extern void spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw);
|
||||
extern void spa_config_exit(spa_t *spa, int locks, void *tag);
|
||||
extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
|
||||
|
||||
/* Pool vdev add/remove lock */
|
||||
extern uint64_t spa_vdev_enter(spa_t *spa);
|
||||
extern int spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error);
|
||||
|
||||
/* Pool vdev state change lock */
|
||||
extern void spa_vdev_state_enter(spa_t *spa);
|
||||
extern int spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error);
|
||||
|
||||
/* Accessor functions */
|
||||
extern boolean_t spa_shutting_down(spa_t *spa);
|
||||
extern struct dsl_pool *spa_get_dsl(spa_t *spa);
|
||||
extern blkptr_t *spa_get_rootblkptr(spa_t *spa);
|
||||
extern void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp);
|
||||
extern void spa_altroot(spa_t *, char *, size_t);
|
||||
extern int spa_sync_pass(spa_t *spa);
|
||||
extern char *spa_name(spa_t *spa);
|
||||
extern uint64_t spa_guid(spa_t *spa);
|
||||
extern uint64_t spa_last_synced_txg(spa_t *spa);
|
||||
extern uint64_t spa_first_txg(spa_t *spa);
|
||||
extern uint64_t spa_version(spa_t *spa);
|
||||
extern pool_state_t spa_state(spa_t *spa);
|
||||
extern uint64_t spa_freeze_txg(spa_t *spa);
|
||||
extern uint64_t spa_get_alloc(spa_t *spa);
|
||||
extern uint64_t spa_get_space(spa_t *spa);
|
||||
extern uint64_t spa_get_dspace(spa_t *spa);
|
||||
extern uint64_t spa_get_asize(spa_t *spa, uint64_t lsize);
|
||||
extern uint64_t spa_version(spa_t *spa);
|
||||
extern int spa_max_replication(spa_t *spa);
|
||||
extern int spa_busy(void);
|
||||
extern uint8_t spa_get_failmode(spa_t *spa);
|
||||
extern boolean_t spa_suspended(spa_t *spa);
|
||||
|
||||
/* Miscellaneous support routines */
|
||||
extern int spa_rename(const char *oldname, const char *newname);
|
||||
extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
|
||||
extern char *spa_strdup(const char *);
|
||||
extern void spa_strfree(char *);
|
||||
extern uint64_t spa_get_random(uint64_t range);
|
||||
extern void sprintf_blkptr(char *buf, int len, const blkptr_t *bp);
|
||||
extern void spa_freeze(spa_t *spa);
|
||||
extern void spa_upgrade(spa_t *spa, uint64_t version);
|
||||
extern void spa_evict_all(void);
|
||||
extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid,
|
||||
boolean_t l2cache);
|
||||
extern boolean_t spa_has_spare(spa_t *, uint64_t guid);
|
||||
extern uint64_t bp_get_dasize(spa_t *spa, const blkptr_t *bp);
|
||||
extern boolean_t spa_has_slogs(spa_t *spa);
|
||||
extern boolean_t spa_is_root(spa_t *spa);
|
||||
|
||||
/* history logging */
|
||||
typedef enum history_log_type {
|
||||
LOG_CMD_POOL_CREATE,
|
||||
LOG_CMD_NORMAL,
|
||||
LOG_INTERNAL
|
||||
} history_log_type_t;
|
||||
|
||||
typedef struct history_arg {
|
||||
const char *ha_history_str;
|
||||
history_log_type_t ha_log_type;
|
||||
history_internal_events_t ha_event;
|
||||
char ha_zone[MAXPATHLEN];
|
||||
} history_arg_t;
|
||||
|
||||
extern char *spa_his_ievent_table[];
|
||||
|
||||
extern void spa_history_create_obj(spa_t *spa, dmu_tx_t *tx);
|
||||
extern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read,
|
||||
char *his_buf);
|
||||
extern int spa_history_log(spa_t *spa, const char *his_buf,
|
||||
history_log_type_t what);
|
||||
void spa_history_internal_log(history_internal_events_t event, spa_t *spa,
|
||||
dmu_tx_t *tx, cred_t *cr, const char *fmt, ...);
|
||||
|
||||
/* error handling */
|
||||
struct zbookmark;
|
||||
struct zio;
|
||||
extern void spa_log_error(spa_t *spa, struct zio *zio);
|
||||
extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
|
||||
struct zio *zio, uint64_t stateoroffset, uint64_t length);
|
||||
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
|
||||
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
|
||||
extern uint64_t spa_get_errlog_size(spa_t *spa);
|
||||
extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
|
||||
extern void spa_errlog_rotate(spa_t *spa);
|
||||
extern void spa_errlog_drain(spa_t *spa);
|
||||
extern void spa_errlog_sync(spa_t *spa, uint64_t txg);
|
||||
extern void spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub);
|
||||
|
||||
/* vdev cache */
|
||||
extern void vdev_cache_stat_init(void);
|
||||
extern void vdev_cache_stat_fini(void);
|
||||
|
||||
/* Initialization and termination */
|
||||
extern void spa_init(int flags);
|
||||
extern void spa_fini(void);
|
||||
extern void spa_boot_init();
|
||||
|
||||
/* properties */
|
||||
extern int spa_prop_set(spa_t *spa, nvlist_t *nvp);
|
||||
extern int spa_prop_get(spa_t *spa, nvlist_t **nvp);
|
||||
extern void spa_prop_clear_bootfs(spa_t *spa, uint64_t obj, dmu_tx_t *tx);
|
||||
|
||||
/* asynchronous event notification */
|
||||
extern void spa_event_notify(spa_t *spa, vdev_t *vdev, const char *name);
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
#define dprintf_bp(bp, fmt, ...) do { \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
|
||||
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \
|
||||
sprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, (bp)); \
|
||||
dprintf(fmt " %s\n", __VA_ARGS__, __blkbuf); \
|
||||
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
|
||||
} \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
#else
|
||||
#define dprintf_bp(bp, fmt, ...)
|
||||
#endif
|
||||
|
||||
extern int spa_mode; /* mode, e.g. FREAD | FWRITE */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_SPA_H */
|
||||
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_SPA_BOOT_H
|
||||
#define _SYS_SPA_BOOT_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/nvpair.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern char *spa_get_bootprop(char *prop);
|
||||
extern void spa_free_bootprop(char *prop);
|
||||
extern int spa_get_rootconf(char *devpath, char *devid, nvlist_t **bestconf_p);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_SPA_BOOT_H */
|
||||
@@ -0,0 +1,196 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_SPA_IMPL_H
|
||||
#define _SYS_SPA_IMPL_H
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/vdev.h>
|
||||
#include <sys/metaslab.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/uberblock_impl.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/avl.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/bplist.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct spa_error_entry {
|
||||
zbookmark_t se_bookmark;
|
||||
char *se_name;
|
||||
avl_node_t se_avl;
|
||||
} spa_error_entry_t;
|
||||
|
||||
typedef struct spa_history_phys {
|
||||
uint64_t sh_pool_create_len; /* ending offset of zpool create */
|
||||
uint64_t sh_phys_max_off; /* physical EOF */
|
||||
uint64_t sh_bof; /* logical BOF */
|
||||
uint64_t sh_eof; /* logical EOF */
|
||||
uint64_t sh_records_lost; /* num of records overwritten */
|
||||
} spa_history_phys_t;
|
||||
|
||||
struct spa_aux_vdev {
|
||||
uint64_t sav_object; /* MOS object for device list */
|
||||
nvlist_t *sav_config; /* cached device config */
|
||||
vdev_t **sav_vdevs; /* devices */
|
||||
int sav_count; /* number devices */
|
||||
boolean_t sav_sync; /* sync the device list */
|
||||
nvlist_t **sav_pending; /* pending device additions */
|
||||
uint_t sav_npending; /* # pending devices */
|
||||
};
|
||||
|
||||
typedef struct spa_config_lock {
|
||||
kmutex_t scl_lock;
|
||||
kthread_t *scl_writer;
|
||||
int scl_write_wanted;
|
||||
kcondvar_t scl_cv;
|
||||
refcount_t scl_count;
|
||||
} spa_config_lock_t;
|
||||
|
||||
typedef struct spa_config_dirent {
|
||||
list_node_t scd_link;
|
||||
char *scd_path;
|
||||
} spa_config_dirent_t;
|
||||
|
||||
typedef enum spa_log_state {
|
||||
SPA_LOG_UNKNOWN = 0, /* unknown log state */
|
||||
SPA_LOG_MISSING, /* missing log(s) */
|
||||
SPA_LOG_CLEAR, /* clear the log(s) */
|
||||
SPA_LOG_GOOD, /* log(s) are good */
|
||||
} spa_log_state_t;
|
||||
|
||||
enum zio_taskq_type {
|
||||
ZIO_TASKQ_ISSUE = 0,
|
||||
ZIO_TASKQ_INTERRUPT,
|
||||
ZIO_TASKQ_TYPES
|
||||
};
|
||||
|
||||
struct spa {
|
||||
/*
|
||||
* Fields protected by spa_namespace_lock.
|
||||
*/
|
||||
char spa_name[MAXNAMELEN]; /* pool name */
|
||||
avl_node_t spa_avl; /* node in spa_namespace_avl */
|
||||
nvlist_t *spa_config; /* last synced config */
|
||||
nvlist_t *spa_config_syncing; /* currently syncing config */
|
||||
uint64_t spa_config_txg; /* txg of last config change */
|
||||
int spa_sync_pass; /* iterate-to-convergence */
|
||||
pool_state_t spa_state; /* pool state */
|
||||
int spa_inject_ref; /* injection references */
|
||||
uint8_t spa_sync_on; /* sync threads are running */
|
||||
spa_load_state_t spa_load_state; /* current load operation */
|
||||
taskq_t *spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES];
|
||||
dsl_pool_t *spa_dsl_pool;
|
||||
metaslab_class_t *spa_normal_class; /* normal data class */
|
||||
metaslab_class_t *spa_log_class; /* intent log data class */
|
||||
uint64_t spa_first_txg; /* first txg after spa_open() */
|
||||
uint64_t spa_final_txg; /* txg of export/destroy */
|
||||
uint64_t spa_freeze_txg; /* freeze pool at this txg */
|
||||
objset_t *spa_meta_objset; /* copy of dp->dp_meta_objset */
|
||||
txg_list_t spa_vdev_txg_list; /* per-txg dirty vdev list */
|
||||
vdev_t *spa_root_vdev; /* top-level vdev container */
|
||||
uint64_t spa_load_guid; /* initial guid for spa_load */
|
||||
list_t spa_config_dirty_list; /* vdevs with dirty config */
|
||||
list_t spa_state_dirty_list; /* vdevs with dirty state */
|
||||
spa_aux_vdev_t spa_spares; /* hot spares */
|
||||
spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */
|
||||
uint64_t spa_config_object; /* MOS object for pool config */
|
||||
uint64_t spa_syncing_txg; /* txg currently syncing */
|
||||
uint64_t spa_sync_bplist_obj; /* object for deferred frees */
|
||||
bplist_t spa_sync_bplist; /* deferred-free bplist */
|
||||
uberblock_t spa_ubsync; /* last synced uberblock */
|
||||
uberblock_t spa_uberblock; /* current uberblock */
|
||||
kmutex_t spa_scrub_lock; /* resilver/scrub lock */
|
||||
uint64_t spa_scrub_inflight; /* in-flight scrub I/Os */
|
||||
uint64_t spa_scrub_maxinflight; /* max in-flight scrub I/Os */
|
||||
uint64_t spa_scrub_errors; /* scrub I/O error count */
|
||||
kcondvar_t spa_scrub_io_cv; /* scrub I/O completion */
|
||||
uint8_t spa_scrub_active; /* active or suspended? */
|
||||
uint8_t spa_scrub_type; /* type of scrub we're doing */
|
||||
uint8_t spa_scrub_finished; /* indicator to rotate logs */
|
||||
uint8_t spa_scrub_started; /* started since last boot */
|
||||
uint8_t spa_scrub_reopen; /* scrub doing vdev_reopen */
|
||||
kmutex_t spa_async_lock; /* protect async state */
|
||||
kthread_t *spa_async_thread; /* thread doing async task */
|
||||
int spa_async_suspended; /* async tasks suspended */
|
||||
kcondvar_t spa_async_cv; /* wait for thread_exit() */
|
||||
uint16_t spa_async_tasks; /* async task mask */
|
||||
kmutex_t spa_async_root_lock; /* protects async root count */
|
||||
uint64_t spa_async_root_count; /* number of async root zios */
|
||||
kcondvar_t spa_async_root_cv; /* notify when count == 0 */
|
||||
char *spa_root; /* alternate root directory */
|
||||
uint64_t spa_ena; /* spa-wide ereport ENA */
|
||||
boolean_t spa_last_open_failed; /* true if last open faled */
|
||||
kmutex_t spa_errlog_lock; /* error log lock */
|
||||
uint64_t spa_errlog_last; /* last error log object */
|
||||
uint64_t spa_errlog_scrub; /* scrub error log object */
|
||||
kmutex_t spa_errlist_lock; /* error list/ereport lock */
|
||||
avl_tree_t spa_errlist_last; /* last error list */
|
||||
avl_tree_t spa_errlist_scrub; /* scrub error list */
|
||||
uint64_t spa_deflate; /* should we deflate? */
|
||||
uint64_t spa_history; /* history object */
|
||||
kmutex_t spa_history_lock; /* history lock */
|
||||
vdev_t *spa_pending_vdev; /* pending vdev additions */
|
||||
kmutex_t spa_props_lock; /* property lock */
|
||||
uint64_t spa_pool_props_object; /* object for properties */
|
||||
uint64_t spa_bootfs; /* default boot filesystem */
|
||||
uint64_t spa_failmode; /* failure mode for the pool */
|
||||
uint64_t spa_delegation; /* delegation on/off */
|
||||
list_t spa_config_list; /* previous cache file(s) */
|
||||
zio_t *spa_suspend_zio_root; /* root of all suspended I/O */
|
||||
kmutex_t spa_suspend_lock; /* protects suspend_zio_root */
|
||||
kcondvar_t spa_suspend_cv; /* notification of resume */
|
||||
uint8_t spa_suspended; /* pool is suspended */
|
||||
boolean_t spa_import_faulted; /* allow faulted vdevs */
|
||||
boolean_t spa_is_root; /* pool is root */
|
||||
int spa_minref; /* num refs when first opened */
|
||||
spa_log_state_t spa_log_state; /* log state */
|
||||
/*
|
||||
* spa_refcnt & spa_config_lock must be the last elements
|
||||
* because refcount_t changes size based on compilation options.
|
||||
* In order for the MDB module to function correctly, the other
|
||||
* fields must remain in the same location.
|
||||
*/
|
||||
spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */
|
||||
refcount_t spa_refcount; /* number of opens */
|
||||
};
|
||||
|
||||
extern const char *spa_config_path;
|
||||
|
||||
#define BOOTFS_COMPRESS_VALID(compress) \
|
||||
((compress) == ZIO_COMPRESS_LZJB || \
|
||||
((compress) == ZIO_COMPRESS_ON && \
|
||||
ZIO_COMPRESS_ON_VALUE == ZIO_COMPRESS_LZJB) || \
|
||||
(compress) == ZIO_COMPRESS_OFF)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_SPA_IMPL_H */
|
||||
@@ -0,0 +1,162 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_SPACE_MAP_H
|
||||
#define _SYS_SPACE_MAP_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/avl.h>
|
||||
#include <sys/dmu.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct space_map_ops space_map_ops_t;
|
||||
|
||||
typedef struct space_map {
|
||||
avl_tree_t sm_root; /* AVL tree of map segments */
|
||||
uint64_t sm_space; /* sum of all segments in the map */
|
||||
uint64_t sm_start; /* start of map */
|
||||
uint64_t sm_size; /* size of map */
|
||||
uint8_t sm_shift; /* unit shift */
|
||||
uint8_t sm_pad[3]; /* unused */
|
||||
uint8_t sm_loaded; /* map loaded? */
|
||||
uint8_t sm_loading; /* map loading? */
|
||||
kcondvar_t sm_load_cv; /* map load completion */
|
||||
space_map_ops_t *sm_ops; /* space map block picker ops vector */
|
||||
void *sm_ppd; /* picker-private data */
|
||||
kmutex_t *sm_lock; /* pointer to lock that protects map */
|
||||
} space_map_t;
|
||||
|
||||
typedef struct space_seg {
|
||||
avl_node_t ss_node; /* AVL node */
|
||||
uint64_t ss_start; /* starting offset of this segment */
|
||||
uint64_t ss_end; /* ending offset (non-inclusive) */
|
||||
} space_seg_t;
|
||||
|
||||
typedef struct space_map_obj {
|
||||
uint64_t smo_object; /* on-disk space map object */
|
||||
uint64_t smo_objsize; /* size of the object */
|
||||
uint64_t smo_alloc; /* space allocated from the map */
|
||||
} space_map_obj_t;
|
||||
|
||||
struct space_map_ops {
|
||||
void (*smop_load)(space_map_t *sm);
|
||||
void (*smop_unload)(space_map_t *sm);
|
||||
uint64_t (*smop_alloc)(space_map_t *sm, uint64_t size);
|
||||
void (*smop_claim)(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
void (*smop_free)(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
};
|
||||
|
||||
/*
|
||||
* debug entry
|
||||
*
|
||||
* 1 3 10 50
|
||||
* ,---+--------+------------+---------------------------------.
|
||||
* | 1 | action | syncpass | txg (lower bits) |
|
||||
* `---+--------+------------+---------------------------------'
|
||||
* 63 62 60 59 50 49 0
|
||||
*
|
||||
*
|
||||
*
|
||||
* non-debug entry
|
||||
*
|
||||
* 1 47 1 15
|
||||
* ,-----------------------------------------------------------.
|
||||
* | 0 | offset (sm_shift units) | type | run |
|
||||
* `-----------------------------------------------------------'
|
||||
* 63 62 17 16 15 0
|
||||
*/
|
||||
|
||||
/* All this stuff takes and returns bytes */
|
||||
#define SM_RUN_DECODE(x) (BF64_DECODE(x, 0, 15) + 1)
|
||||
#define SM_RUN_ENCODE(x) BF64_ENCODE((x) - 1, 0, 15)
|
||||
#define SM_TYPE_DECODE(x) BF64_DECODE(x, 15, 1)
|
||||
#define SM_TYPE_ENCODE(x) BF64_ENCODE(x, 15, 1)
|
||||
#define SM_OFFSET_DECODE(x) BF64_DECODE(x, 16, 47)
|
||||
#define SM_OFFSET_ENCODE(x) BF64_ENCODE(x, 16, 47)
|
||||
#define SM_DEBUG_DECODE(x) BF64_DECODE(x, 63, 1)
|
||||
#define SM_DEBUG_ENCODE(x) BF64_ENCODE(x, 63, 1)
|
||||
|
||||
#define SM_DEBUG_ACTION_DECODE(x) BF64_DECODE(x, 60, 3)
|
||||
#define SM_DEBUG_ACTION_ENCODE(x) BF64_ENCODE(x, 60, 3)
|
||||
|
||||
#define SM_DEBUG_SYNCPASS_DECODE(x) BF64_DECODE(x, 50, 10)
|
||||
#define SM_DEBUG_SYNCPASS_ENCODE(x) BF64_ENCODE(x, 50, 10)
|
||||
|
||||
#define SM_DEBUG_TXG_DECODE(x) BF64_DECODE(x, 0, 50)
|
||||
#define SM_DEBUG_TXG_ENCODE(x) BF64_ENCODE(x, 0, 50)
|
||||
|
||||
#define SM_RUN_MAX SM_RUN_DECODE(~0ULL)
|
||||
|
||||
#define SM_ALLOC 0x0
|
||||
#define SM_FREE 0x1
|
||||
|
||||
/*
|
||||
* The data for a given space map can be kept on blocks of any size.
|
||||
* Larger blocks entail fewer i/o operations, but they also cause the
|
||||
* DMU to keep more data in-core, and also to waste more i/o bandwidth
|
||||
* when only a few blocks have changed since the last transaction group.
|
||||
* This could use a lot more research, but for now, set the freelist
|
||||
* block size to 4k (2^12).
|
||||
*/
|
||||
#define SPACE_MAP_BLOCKSHIFT 12
|
||||
|
||||
typedef void space_map_func_t(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
|
||||
extern void space_map_create(space_map_t *sm, uint64_t start, uint64_t size,
|
||||
uint8_t shift, kmutex_t *lp);
|
||||
extern void space_map_destroy(space_map_t *sm);
|
||||
extern void space_map_add(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
extern void space_map_remove(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
extern int space_map_contains(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
extern void space_map_vacate(space_map_t *sm,
|
||||
space_map_func_t *func, space_map_t *mdest);
|
||||
extern void space_map_walk(space_map_t *sm,
|
||||
space_map_func_t *func, space_map_t *mdest);
|
||||
extern void space_map_excise(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
extern void space_map_union(space_map_t *smd, space_map_t *sms);
|
||||
|
||||
extern void space_map_load_wait(space_map_t *sm);
|
||||
extern int space_map_load(space_map_t *sm, space_map_ops_t *ops,
|
||||
uint8_t maptype, space_map_obj_t *smo, objset_t *os);
|
||||
extern void space_map_unload(space_map_t *sm);
|
||||
|
||||
extern uint64_t space_map_alloc(space_map_t *sm, uint64_t size);
|
||||
extern void space_map_claim(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
extern void space_map_free(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
|
||||
extern void space_map_sync(space_map_t *sm, uint8_t maptype,
|
||||
space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx);
|
||||
extern void space_map_truncate(space_map_obj_t *smo,
|
||||
objset_t *os, dmu_tx_t *tx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_SPACE_MAP_H */
|
||||
@@ -0,0 +1,130 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_TXG_H
|
||||
#define _SYS_TXG_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define TXG_CONCURRENT_STATES 3 /* open, quiescing, syncing */
|
||||
#define TXG_SIZE 4 /* next power of 2 */
|
||||
#define TXG_MASK (TXG_SIZE - 1) /* mask for size */
|
||||
#define TXG_INITIAL TXG_SIZE /* initial txg */
|
||||
#define TXG_IDX (txg & TXG_MASK)
|
||||
|
||||
#define TXG_WAIT 1ULL
|
||||
#define TXG_NOWAIT 2ULL
|
||||
|
||||
typedef struct tx_cpu tx_cpu_t;
|
||||
|
||||
typedef struct txg_handle {
|
||||
tx_cpu_t *th_cpu;
|
||||
uint64_t th_txg;
|
||||
} txg_handle_t;
|
||||
|
||||
typedef struct txg_node {
|
||||
struct txg_node *tn_next[TXG_SIZE];
|
||||
uint8_t tn_member[TXG_SIZE];
|
||||
} txg_node_t;
|
||||
|
||||
typedef struct txg_list {
|
||||
kmutex_t tl_lock;
|
||||
size_t tl_offset;
|
||||
txg_node_t *tl_head[TXG_SIZE];
|
||||
} txg_list_t;
|
||||
|
||||
struct dsl_pool;
|
||||
|
||||
extern void txg_init(struct dsl_pool *dp, uint64_t txg);
|
||||
extern void txg_fini(struct dsl_pool *dp);
|
||||
extern void txg_sync_start(struct dsl_pool *dp);
|
||||
extern void txg_sync_stop(struct dsl_pool *dp);
|
||||
extern uint64_t txg_hold_open(struct dsl_pool *dp, txg_handle_t *txghp);
|
||||
extern void txg_rele_to_quiesce(txg_handle_t *txghp);
|
||||
extern void txg_rele_to_sync(txg_handle_t *txghp);
|
||||
extern void txg_suspend(struct dsl_pool *dp);
|
||||
extern void txg_resume(struct dsl_pool *dp);
|
||||
|
||||
/*
|
||||
* Delay the caller by the specified number of ticks or until
|
||||
* the txg closes (whichever comes first). This is intended
|
||||
* to be used to throttle writers when the system nears its
|
||||
* capacity.
|
||||
*/
|
||||
extern void txg_delay(struct dsl_pool *dp, uint64_t txg, int ticks);
|
||||
|
||||
/*
|
||||
* Wait until the given transaction group has finished syncing.
|
||||
* Try to make this happen as soon as possible (eg. kick off any
|
||||
* necessary syncs immediately). If txg==0, wait for the currently open
|
||||
* txg to finish syncing.
|
||||
*/
|
||||
extern void txg_wait_synced(struct dsl_pool *dp, uint64_t txg);
|
||||
|
||||
/*
|
||||
* Wait until the given transaction group, or one after it, is
|
||||
* the open transaction group. Try to make this happen as soon
|
||||
* as possible (eg. kick off any necessary syncs immediately).
|
||||
* If txg == 0, wait for the next open txg.
|
||||
*/
|
||||
extern void txg_wait_open(struct dsl_pool *dp, uint64_t txg);
|
||||
|
||||
/*
|
||||
* Returns TRUE if we are "backed up" waiting for the syncing
|
||||
* transaction to complete; otherwise returns FALSE.
|
||||
*/
|
||||
extern boolean_t txg_stalled(struct dsl_pool *dp);
|
||||
|
||||
/* returns TRUE if someone is waiting for the next txg to sync */
|
||||
extern boolean_t txg_sync_waiting(struct dsl_pool *dp);
|
||||
|
||||
/*
|
||||
* Per-txg object lists.
|
||||
*/
|
||||
|
||||
#define TXG_CLEAN(txg) ((txg) - 1)
|
||||
|
||||
extern void txg_list_create(txg_list_t *tl, size_t offset);
|
||||
extern void txg_list_destroy(txg_list_t *tl);
|
||||
extern int txg_list_empty(txg_list_t *tl, uint64_t txg);
|
||||
extern int txg_list_add(txg_list_t *tl, void *p, uint64_t txg);
|
||||
extern void *txg_list_remove(txg_list_t *tl, uint64_t txg);
|
||||
extern void *txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg);
|
||||
extern int txg_list_member(txg_list_t *tl, void *p, uint64_t txg);
|
||||
extern void *txg_list_head(txg_list_t *tl, uint64_t txg);
|
||||
extern void *txg_list_next(txg_list_t *tl, void *p, uint64_t txg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_TXG_H */
|
||||
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_TXG_IMPL_H
|
||||
#define _SYS_TXG_IMPL_H
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct tx_cpu {
|
||||
kmutex_t tc_lock;
|
||||
kcondvar_t tc_cv[TXG_SIZE];
|
||||
uint64_t tc_count[TXG_SIZE];
|
||||
char tc_pad[16];
|
||||
};
|
||||
|
||||
typedef struct tx_state {
|
||||
tx_cpu_t *tx_cpu; /* protects right to enter txg */
|
||||
kmutex_t tx_sync_lock; /* protects tx_state_t */
|
||||
krwlock_t tx_suspend;
|
||||
uint64_t tx_open_txg; /* currently open txg id */
|
||||
uint64_t tx_quiesced_txg; /* quiesced txg waiting for sync */
|
||||
uint64_t tx_syncing_txg; /* currently syncing txg id */
|
||||
uint64_t tx_synced_txg; /* last synced txg id */
|
||||
|
||||
uint64_t tx_sync_txg_waiting; /* txg we're waiting to sync */
|
||||
uint64_t tx_quiesce_txg_waiting; /* txg we're waiting to open */
|
||||
|
||||
kcondvar_t tx_sync_more_cv;
|
||||
kcondvar_t tx_sync_done_cv;
|
||||
kcondvar_t tx_quiesce_more_cv;
|
||||
kcondvar_t tx_quiesce_done_cv;
|
||||
kcondvar_t tx_timeout_cv;
|
||||
kcondvar_t tx_exit_cv; /* wait for all threads to exit */
|
||||
|
||||
uint8_t tx_threads; /* number of threads */
|
||||
uint8_t tx_exiting; /* set when we're exiting */
|
||||
|
||||
kthread_t *tx_sync_thread;
|
||||
kthread_t *tx_quiesce_thread;
|
||||
} tx_state_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_TXG_IMPL_H */
|
||||
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License, Version 1.0 only
|
||||
* (the "License"). You may not use this file except in compliance
|
||||
* with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_UBERBLOCK_H
|
||||
#define _SYS_UBERBLOCK_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/vdev.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct uberblock uberblock_t;
|
||||
|
||||
extern int uberblock_verify(uberblock_t *ub);
|
||||
extern int uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_UBERBLOCK_H */
|
||||
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_UBERBLOCK_IMPL_H
|
||||
#define _SYS_UBERBLOCK_IMPL_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/uberblock.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The uberblock version is incremented whenever an incompatible on-disk
|
||||
* format change is made to the SPA, DMU, or ZAP.
|
||||
*
|
||||
* Note: the first two fields should never be moved. When a storage pool
|
||||
* is opened, the uberblock must be read off the disk before the version
|
||||
* can be checked. If the ub_version field is moved, we may not detect
|
||||
* version mismatch. If the ub_magic field is moved, applications that
|
||||
* expect the magic number in the first word won't work.
|
||||
*/
|
||||
#define UBERBLOCK_MAGIC 0x00bab10c /* oo-ba-bloc! */
|
||||
#define UBERBLOCK_SHIFT 10 /* up to 1K */
|
||||
|
||||
struct uberblock {
|
||||
uint64_t ub_magic; /* UBERBLOCK_MAGIC */
|
||||
uint64_t ub_version; /* SPA_VERSION */
|
||||
uint64_t ub_txg; /* txg of last sync */
|
||||
uint64_t ub_guid_sum; /* sum of all vdev guids */
|
||||
uint64_t ub_timestamp; /* UTC time of last sync */
|
||||
blkptr_t ub_rootbp; /* MOS objset_phys_t */
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_UBERBLOCK_IMPL_H */
|
||||
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_UNIQUE_H
|
||||
#define _SYS_UNIQUE_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* The number of significant bits in each unique value. */
|
||||
#define UNIQUE_BITS 56
|
||||
|
||||
void unique_init(void);
|
||||
void unique_fini(void);
|
||||
|
||||
/*
|
||||
* Return a new unique value (which will not be uniquified against until
|
||||
* it is unique_insert()-ed.
|
||||
*/
|
||||
uint64_t unique_create(void);
|
||||
|
||||
/* Return a unique value, which equals the one passed in if possible. */
|
||||
uint64_t unique_insert(uint64_t value);
|
||||
|
||||
/* Indicate that this value no longer needs to be uniquified against. */
|
||||
void unique_remove(uint64_t value);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_UNIQUE_H */
|
||||
@@ -0,0 +1,135 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_VDEV_H
|
||||
#define _SYS_VDEV_H
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/space_map.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern boolean_t zfs_nocacheflush;
|
||||
|
||||
extern int vdev_open(vdev_t *);
|
||||
extern int vdev_validate(vdev_t *);
|
||||
extern void vdev_close(vdev_t *);
|
||||
extern int vdev_create(vdev_t *, uint64_t txg, boolean_t isreplace);
|
||||
extern void vdev_init(vdev_t *, uint64_t txg);
|
||||
extern void vdev_reopen(vdev_t *);
|
||||
extern int vdev_validate_aux(vdev_t *vd);
|
||||
extern zio_t *vdev_probe(vdev_t *vd, zio_t *pio);
|
||||
|
||||
extern boolean_t vdev_is_bootable(vdev_t *vd);
|
||||
extern vdev_t *vdev_lookup_top(spa_t *spa, uint64_t vdev);
|
||||
extern vdev_t *vdev_lookup_by_guid(vdev_t *vd, uint64_t guid);
|
||||
extern void vdev_dtl_dirty(space_map_t *sm, uint64_t txg, uint64_t size);
|
||||
extern int vdev_dtl_contains(space_map_t *sm, uint64_t txg, uint64_t size);
|
||||
extern void vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
|
||||
int scrub_done);
|
||||
extern boolean_t vdev_resilver_needed(vdev_t *vd,
|
||||
uint64_t *minp, uint64_t *maxp);
|
||||
|
||||
extern int vdev_metaslab_init(vdev_t *vd, uint64_t txg);
|
||||
extern void vdev_metaslab_fini(vdev_t *vd);
|
||||
|
||||
extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
|
||||
extern void vdev_clear_stats(vdev_t *vd);
|
||||
extern void vdev_stat_update(zio_t *zio, uint64_t psize);
|
||||
extern void vdev_scrub_stat_update(vdev_t *vd, pool_scrub_type_t type,
|
||||
boolean_t complete);
|
||||
extern int vdev_getspec(spa_t *spa, uint64_t vdev, char **vdev_spec);
|
||||
extern void vdev_propagate_state(vdev_t *vd);
|
||||
extern void vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state,
|
||||
vdev_aux_t aux);
|
||||
|
||||
extern void vdev_space_update(vdev_t *vd, int64_t space_delta,
|
||||
int64_t alloc_delta, boolean_t update_root);
|
||||
|
||||
extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize);
|
||||
|
||||
extern int vdev_fault(spa_t *spa, uint64_t guid);
|
||||
extern int vdev_degrade(spa_t *spa, uint64_t guid);
|
||||
extern int vdev_online(spa_t *spa, uint64_t guid, uint64_t flags,
|
||||
vdev_state_t *);
|
||||
extern int vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags);
|
||||
extern void vdev_clear(spa_t *spa, vdev_t *vd);
|
||||
|
||||
extern boolean_t vdev_is_dead(vdev_t *vd);
|
||||
extern boolean_t vdev_readable(vdev_t *vd);
|
||||
extern boolean_t vdev_writeable(vdev_t *vd);
|
||||
extern boolean_t vdev_allocatable(vdev_t *vd);
|
||||
extern boolean_t vdev_accessible(vdev_t *vd, zio_t *zio);
|
||||
|
||||
extern void vdev_cache_init(vdev_t *vd);
|
||||
extern void vdev_cache_fini(vdev_t *vd);
|
||||
extern int vdev_cache_read(zio_t *zio);
|
||||
extern void vdev_cache_write(zio_t *zio);
|
||||
extern void vdev_cache_purge(vdev_t *vd);
|
||||
|
||||
extern void vdev_queue_init(vdev_t *vd);
|
||||
extern void vdev_queue_fini(vdev_t *vd);
|
||||
extern zio_t *vdev_queue_io(zio_t *zio);
|
||||
extern void vdev_queue_io_done(zio_t *zio);
|
||||
|
||||
extern void vdev_config_dirty(vdev_t *vd);
|
||||
extern void vdev_config_clean(vdev_t *vd);
|
||||
extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg);
|
||||
|
||||
extern void vdev_state_dirty(vdev_t *vd);
|
||||
extern void vdev_state_clean(vdev_t *vd);
|
||||
|
||||
extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
|
||||
boolean_t getstats, boolean_t isspare, boolean_t isl2cache);
|
||||
|
||||
/*
|
||||
* Label routines
|
||||
*/
|
||||
struct uberblock;
|
||||
extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset);
|
||||
extern int vdev_label_number(uint64_t psise, uint64_t offset);
|
||||
extern nvlist_t *vdev_label_read_config(vdev_t *vd);
|
||||
extern void vdev_uberblock_load(zio_t *zio, vdev_t *vd, struct uberblock *ub);
|
||||
|
||||
typedef enum {
|
||||
VDEV_LABEL_CREATE, /* create/add a new device */
|
||||
VDEV_LABEL_REPLACE, /* replace an existing device */
|
||||
VDEV_LABEL_SPARE, /* add a new hot spare */
|
||||
VDEV_LABEL_REMOVE, /* remove an existing device */
|
||||
VDEV_LABEL_L2CACHE /* add an L2ARC cache device */
|
||||
} vdev_labeltype_t;
|
||||
|
||||
extern int vdev_label_init(vdev_t *vd, uint64_t txg, vdev_labeltype_t reason);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_VDEV_H */
|
||||
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License, Version 1.0 only
|
||||
* (the "License"). You may not use this file except in compliance
|
||||
* with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_VDEV_FILE_H
|
||||
#define _SYS_VDEV_FILE_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/vdev.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct vdev_file {
|
||||
vnode_t *vf_vnode;
|
||||
} vdev_file_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_VDEV_FILE_H */
|
||||
@@ -0,0 +1,305 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_VDEV_IMPL_H
|
||||
#define _SYS_VDEV_IMPL_H
|
||||
|
||||
#include <sys/avl.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/metaslab.h>
|
||||
#include <sys/nvpair.h>
|
||||
#include <sys/space_map.h>
|
||||
#include <sys/vdev.h>
|
||||
#include <sys/dkio.h>
|
||||
#include <sys/uberblock_impl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Virtual device descriptors.
|
||||
*
|
||||
* All storage pool operations go through the virtual device framework,
|
||||
* which provides data replication and I/O scheduling.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Forward declarations that lots of things need.
|
||||
*/
|
||||
typedef struct vdev_queue vdev_queue_t;
|
||||
typedef struct vdev_cache vdev_cache_t;
|
||||
typedef struct vdev_cache_entry vdev_cache_entry_t;
|
||||
|
||||
/*
|
||||
* Virtual device operations
|
||||
*/
|
||||
typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *ashift);
|
||||
typedef void vdev_close_func_t(vdev_t *vd);
|
||||
typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize);
|
||||
typedef int vdev_io_start_func_t(zio_t *zio);
|
||||
typedef void vdev_io_done_func_t(zio_t *zio);
|
||||
typedef void vdev_state_change_func_t(vdev_t *vd, int, int);
|
||||
|
||||
typedef struct vdev_ops {
|
||||
vdev_open_func_t *vdev_op_open;
|
||||
vdev_close_func_t *vdev_op_close;
|
||||
vdev_asize_func_t *vdev_op_asize;
|
||||
vdev_io_start_func_t *vdev_op_io_start;
|
||||
vdev_io_done_func_t *vdev_op_io_done;
|
||||
vdev_state_change_func_t *vdev_op_state_change;
|
||||
char vdev_op_type[16];
|
||||
boolean_t vdev_op_leaf;
|
||||
} vdev_ops_t;
|
||||
|
||||
/*
|
||||
* Virtual device properties
|
||||
*/
|
||||
struct vdev_cache_entry {
|
||||
char *ve_data;
|
||||
uint64_t ve_offset;
|
||||
uint64_t ve_lastused;
|
||||
avl_node_t ve_offset_node;
|
||||
avl_node_t ve_lastused_node;
|
||||
uint32_t ve_hits;
|
||||
uint16_t ve_missed_update;
|
||||
zio_t *ve_fill_io;
|
||||
};
|
||||
|
||||
struct vdev_cache {
|
||||
avl_tree_t vc_offset_tree;
|
||||
avl_tree_t vc_lastused_tree;
|
||||
kmutex_t vc_lock;
|
||||
};
|
||||
|
||||
struct vdev_queue {
|
||||
avl_tree_t vq_deadline_tree;
|
||||
avl_tree_t vq_read_tree;
|
||||
avl_tree_t vq_write_tree;
|
||||
avl_tree_t vq_pending_tree;
|
||||
kmutex_t vq_lock;
|
||||
};
|
||||
|
||||
/*
|
||||
* Virtual device descriptor
|
||||
*/
|
||||
struct vdev {
|
||||
/*
|
||||
* Common to all vdev types.
|
||||
*/
|
||||
uint64_t vdev_id; /* child number in vdev parent */
|
||||
uint64_t vdev_guid; /* unique ID for this vdev */
|
||||
uint64_t vdev_guid_sum; /* self guid + all child guids */
|
||||
uint64_t vdev_asize; /* allocatable device capacity */
|
||||
uint64_t vdev_ashift; /* block alignment shift */
|
||||
uint64_t vdev_state; /* see VDEV_STATE_* #defines */
|
||||
uint64_t vdev_prevstate; /* used when reopening a vdev */
|
||||
vdev_ops_t *vdev_ops; /* vdev operations */
|
||||
spa_t *vdev_spa; /* spa for this vdev */
|
||||
void *vdev_tsd; /* type-specific data */
|
||||
vdev_t *vdev_top; /* top-level vdev */
|
||||
vdev_t *vdev_parent; /* parent vdev */
|
||||
vdev_t **vdev_child; /* array of children */
|
||||
uint64_t vdev_children; /* number of children */
|
||||
space_map_t vdev_dtl_map; /* dirty time log in-core state */
|
||||
space_map_t vdev_dtl_scrub; /* DTL for scrub repair writes */
|
||||
vdev_stat_t vdev_stat; /* virtual device statistics */
|
||||
|
||||
/*
|
||||
* Top-level vdev state.
|
||||
*/
|
||||
uint64_t vdev_ms_array; /* metaslab array object */
|
||||
uint64_t vdev_ms_shift; /* metaslab size shift */
|
||||
uint64_t vdev_ms_count; /* number of metaslabs */
|
||||
metaslab_group_t *vdev_mg; /* metaslab group */
|
||||
metaslab_t **vdev_ms; /* metaslab array */
|
||||
txg_list_t vdev_ms_list; /* per-txg dirty metaslab lists */
|
||||
txg_list_t vdev_dtl_list; /* per-txg dirty DTL lists */
|
||||
txg_node_t vdev_txg_node; /* per-txg dirty vdev linkage */
|
||||
boolean_t vdev_remove_wanted; /* async remove wanted? */
|
||||
boolean_t vdev_probe_wanted; /* async probe wanted? */
|
||||
list_node_t vdev_config_dirty_node; /* config dirty list */
|
||||
list_node_t vdev_state_dirty_node; /* state dirty list */
|
||||
uint64_t vdev_deflate_ratio; /* deflation ratio (x512) */
|
||||
uint64_t vdev_islog; /* is an intent log device */
|
||||
|
||||
/*
|
||||
* Leaf vdev state.
|
||||
*/
|
||||
uint64_t vdev_psize; /* physical device capacity */
|
||||
space_map_obj_t vdev_dtl; /* dirty time log on-disk state */
|
||||
txg_node_t vdev_dtl_node; /* per-txg dirty DTL linkage */
|
||||
uint64_t vdev_wholedisk; /* true if this is a whole disk */
|
||||
uint64_t vdev_offline; /* persistent offline state */
|
||||
uint64_t vdev_faulted; /* persistent faulted state */
|
||||
uint64_t vdev_degraded; /* persistent degraded state */
|
||||
uint64_t vdev_removed; /* persistent removed state */
|
||||
uint64_t vdev_nparity; /* number of parity devices for raidz */
|
||||
char *vdev_path; /* vdev path (if any) */
|
||||
char *vdev_devid; /* vdev devid (if any) */
|
||||
char *vdev_physpath; /* vdev device path (if any) */
|
||||
uint64_t vdev_not_present; /* not present during import */
|
||||
uint64_t vdev_unspare; /* unspare when resilvering done */
|
||||
hrtime_t vdev_last_try; /* last reopen time */
|
||||
boolean_t vdev_nowritecache; /* true if flushwritecache failed */
|
||||
boolean_t vdev_checkremove; /* temporary online test */
|
||||
boolean_t vdev_forcefault; /* force online fault */
|
||||
uint8_t vdev_tmpoffline; /* device taken offline temporarily? */
|
||||
uint8_t vdev_detached; /* device detached? */
|
||||
uint8_t vdev_cant_read; /* vdev is failing all reads */
|
||||
uint8_t vdev_cant_write; /* vdev is failing all writes */
|
||||
uint64_t vdev_isspare; /* was a hot spare */
|
||||
uint64_t vdev_isl2cache; /* was a l2cache device */
|
||||
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
|
||||
vdev_cache_t vdev_cache; /* physical block cache */
|
||||
spa_aux_vdev_t *vdev_aux; /* for l2cache vdevs */
|
||||
zio_t *vdev_probe_zio; /* root of current probe */
|
||||
|
||||
/*
|
||||
* For DTrace to work in userland (libzpool) context, these fields must
|
||||
* remain at the end of the structure. DTrace will use the kernel's
|
||||
* CTF definition for 'struct vdev', and since the size of a kmutex_t is
|
||||
* larger in userland, the offsets for the rest fields would be
|
||||
* incorrect.
|
||||
*/
|
||||
kmutex_t vdev_dtl_lock; /* vdev_dtl_{map,resilver} */
|
||||
kmutex_t vdev_stat_lock; /* vdev_stat */
|
||||
kmutex_t vdev_probe_lock; /* protects vdev_probe_zio */
|
||||
};
|
||||
|
||||
#define VDEV_SKIP_SIZE (8 << 10)
|
||||
#define VDEV_BOOT_HEADER_SIZE (8 << 10)
|
||||
#define VDEV_PHYS_SIZE (112 << 10)
|
||||
#define VDEV_UBERBLOCK_RING (128 << 10)
|
||||
|
||||
#define VDEV_UBERBLOCK_SHIFT(vd) \
|
||||
MAX((vd)->vdev_top->vdev_ashift, UBERBLOCK_SHIFT)
|
||||
#define VDEV_UBERBLOCK_COUNT(vd) \
|
||||
(VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT(vd))
|
||||
#define VDEV_UBERBLOCK_OFFSET(vd, n) \
|
||||
offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT(vd)])
|
||||
#define VDEV_UBERBLOCK_SIZE(vd) (1ULL << VDEV_UBERBLOCK_SHIFT(vd))
|
||||
|
||||
/* ZFS boot block */
|
||||
#define VDEV_BOOT_MAGIC 0x2f5b007b10cULL
|
||||
#define VDEV_BOOT_VERSION 1 /* version number */
|
||||
|
||||
typedef struct vdev_boot_header {
|
||||
uint64_t vb_magic; /* VDEV_BOOT_MAGIC */
|
||||
uint64_t vb_version; /* VDEV_BOOT_VERSION */
|
||||
uint64_t vb_offset; /* start offset (bytes) */
|
||||
uint64_t vb_size; /* size (bytes) */
|
||||
char vb_pad[VDEV_BOOT_HEADER_SIZE - 4 * sizeof (uint64_t)];
|
||||
} vdev_boot_header_t;
|
||||
|
||||
typedef struct vdev_phys {
|
||||
char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_block_tail_t)];
|
||||
zio_block_tail_t vp_zbt;
|
||||
} vdev_phys_t;
|
||||
|
||||
typedef struct vdev_label {
|
||||
char vl_pad[VDEV_SKIP_SIZE]; /* 8K */
|
||||
vdev_boot_header_t vl_boot_header; /* 8K */
|
||||
vdev_phys_t vl_vdev_phys; /* 112K */
|
||||
char vl_uberblock[VDEV_UBERBLOCK_RING]; /* 128K */
|
||||
} vdev_label_t; /* 256K total */
|
||||
|
||||
/*
|
||||
* vdev_dirty() flags
|
||||
*/
|
||||
#define VDD_METASLAB 0x01
|
||||
#define VDD_DTL 0x02
|
||||
|
||||
/*
|
||||
* Size and offset of embedded boot loader region on each label.
|
||||
* The total size of the first two labels plus the boot area is 4MB.
|
||||
*/
|
||||
#define VDEV_BOOT_OFFSET (2 * sizeof (vdev_label_t))
|
||||
#define VDEV_BOOT_SIZE (7ULL << 19) /* 3.5M */
|
||||
|
||||
/*
|
||||
* Size of label regions at the start and end of each leaf device.
|
||||
*/
|
||||
#define VDEV_LABEL_START_SIZE (2 * sizeof (vdev_label_t) + VDEV_BOOT_SIZE)
|
||||
#define VDEV_LABEL_END_SIZE (2 * sizeof (vdev_label_t))
|
||||
#define VDEV_LABELS 4
|
||||
|
||||
#define VDEV_ALLOC_LOAD 0
|
||||
#define VDEV_ALLOC_ADD 1
|
||||
#define VDEV_ALLOC_SPARE 2
|
||||
#define VDEV_ALLOC_L2CACHE 3
|
||||
|
||||
/*
|
||||
* Allocate or free a vdev
|
||||
*/
|
||||
extern int vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *config,
|
||||
vdev_t *parent, uint_t id, int alloctype);
|
||||
extern void vdev_free(vdev_t *vd);
|
||||
|
||||
/*
|
||||
* Add or remove children and parents
|
||||
*/
|
||||
extern void vdev_add_child(vdev_t *pvd, vdev_t *cvd);
|
||||
extern void vdev_remove_child(vdev_t *pvd, vdev_t *cvd);
|
||||
extern void vdev_compact_children(vdev_t *pvd);
|
||||
extern vdev_t *vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops);
|
||||
extern void vdev_remove_parent(vdev_t *cvd);
|
||||
|
||||
/*
|
||||
* vdev sync load and sync
|
||||
*/
|
||||
extern void vdev_load(vdev_t *vd);
|
||||
extern void vdev_sync(vdev_t *vd, uint64_t txg);
|
||||
extern void vdev_sync_done(vdev_t *vd, uint64_t txg);
|
||||
extern void vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg);
|
||||
|
||||
/*
|
||||
* Available vdev types.
|
||||
*/
|
||||
extern vdev_ops_t vdev_root_ops;
|
||||
extern vdev_ops_t vdev_mirror_ops;
|
||||
extern vdev_ops_t vdev_replacing_ops;
|
||||
extern vdev_ops_t vdev_raidz_ops;
|
||||
extern vdev_ops_t vdev_disk_ops;
|
||||
extern vdev_ops_t vdev_file_ops;
|
||||
extern vdev_ops_t vdev_missing_ops;
|
||||
extern vdev_ops_t vdev_spare_ops;
|
||||
|
||||
/*
|
||||
* Common size functions
|
||||
*/
|
||||
extern uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize);
|
||||
extern uint64_t vdev_get_rsize(vdev_t *vd);
|
||||
|
||||
/*
|
||||
* zdb uses this tunable, so it must be declared here to make lint happy.
|
||||
*/
|
||||
extern int zfs_vdev_cache_size;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_VDEV_IMPL_H */
|
||||
@@ -0,0 +1,425 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZAP_H
|
||||
#define _SYS_ZAP_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
/*
|
||||
* ZAP - ZFS Attribute Processor
|
||||
*
|
||||
* The ZAP is a module which sits on top of the DMU (Data Management
|
||||
* Unit) and implements a higher-level storage primitive using DMU
|
||||
* objects. Its primary consumer is the ZPL (ZFS Posix Layer).
|
||||
*
|
||||
* A "zapobj" is a DMU object which the ZAP uses to stores attributes.
|
||||
* Users should use only zap routines to access a zapobj - they should
|
||||
* not access the DMU object directly using DMU routines.
|
||||
*
|
||||
* The attributes stored in a zapobj are name-value pairs. The name is
|
||||
* a zero-terminated string of up to ZAP_MAXNAMELEN bytes (including
|
||||
* terminating NULL). The value is an array of integers, which may be
|
||||
* 1, 2, 4, or 8 bytes long. The total space used by the array (number
|
||||
* of integers * integer length) can be up to ZAP_MAXVALUELEN bytes.
|
||||
* Note that an 8-byte integer value can be used to store the location
|
||||
* (object number) of another dmu object (which may be itself a zapobj).
|
||||
* Note that you can use a zero-length attribute to store a single bit
|
||||
* of information - the attribute is present or not.
|
||||
*
|
||||
* The ZAP routines are thread-safe. However, you must observe the
|
||||
* DMU's restriction that a transaction may not be operated on
|
||||
* concurrently.
|
||||
*
|
||||
* Any of the routines that return an int may return an I/O error (EIO
|
||||
* or ECHECKSUM).
|
||||
*
|
||||
*
|
||||
* Implementation / Performance Notes:
|
||||
*
|
||||
* The ZAP is intended to operate most efficiently on attributes with
|
||||
* short (49 bytes or less) names and single 8-byte values, for which
|
||||
* the microzap will be used. The ZAP should be efficient enough so
|
||||
* that the user does not need to cache these attributes.
|
||||
*
|
||||
* The ZAP's locking scheme makes its routines thread-safe. Operations
|
||||
* on different zapobjs will be processed concurrently. Operations on
|
||||
* the same zapobj which only read data will be processed concurrently.
|
||||
* Operations on the same zapobj which modify data will be processed
|
||||
* concurrently when there are many attributes in the zapobj (because
|
||||
* the ZAP uses per-block locking - more than 128 * (number of cpus)
|
||||
* small attributes will suffice).
|
||||
*/
|
||||
|
||||
/*
|
||||
* We're using zero-terminated byte strings (ie. ASCII or UTF-8 C
|
||||
* strings) for the names of attributes, rather than a byte string
|
||||
* bounded by an explicit length. If some day we want to support names
|
||||
* in character sets which have embedded zeros (eg. UTF-16, UTF-32),
|
||||
* we'll have to add routines for using length-bounded strings.
|
||||
*/
|
||||
|
||||
#include <sys/dmu.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ZAP_MAXNAMELEN 256
|
||||
#define ZAP_MAXVALUELEN 1024
|
||||
|
||||
/*
|
||||
* The matchtype specifies which entry will be accessed.
|
||||
* MT_EXACT: only find an exact match (non-normalized)
|
||||
* MT_FIRST: find the "first" normalized (case and Unicode
|
||||
* form) match; the designated "first" match will not change as long
|
||||
* as the set of entries with this normalization doesn't change
|
||||
* MT_BEST: if there is an exact match, find that, otherwise find the
|
||||
* first normalized match
|
||||
*/
|
||||
typedef enum matchtype
|
||||
{
|
||||
MT_EXACT,
|
||||
MT_BEST,
|
||||
MT_FIRST
|
||||
} matchtype_t;
|
||||
|
||||
/*
|
||||
* Create a new zapobj with no attributes and return its object number.
|
||||
* MT_EXACT will cause the zap object to only support MT_EXACT lookups,
|
||||
* otherwise any matchtype can be used for lookups.
|
||||
*
|
||||
* normflags specifies what normalization will be done. values are:
|
||||
* 0: no normalization (legacy on-disk format, supports MT_EXACT matching
|
||||
* only)
|
||||
* U8_TEXTPREP_TOLOWER: case normalization will be performed.
|
||||
* MT_FIRST/MT_BEST matching will find entries that match without
|
||||
* regard to case (eg. looking for "foo" can find an entry "Foo").
|
||||
* Eventually, other flags will permit unicode normalization as well.
|
||||
*/
|
||||
uint64_t zap_create(objset_t *ds, dmu_object_type_t ot,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
uint64_t zap_create_norm(objset_t *ds, int normflags, dmu_object_type_t ot,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Create a new zapobj with no attributes from the given (unallocated)
|
||||
* object number.
|
||||
*/
|
||||
int zap_create_claim(objset_t *ds, uint64_t obj, dmu_object_type_t ot,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
int zap_create_claim_norm(objset_t *ds, uint64_t obj,
|
||||
int normflags, dmu_object_type_t ot,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* The zapobj passed in must be a valid ZAP object for all of the
|
||||
* following routines.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Destroy this zapobj and all its attributes.
|
||||
*
|
||||
* Frees the object number using dmu_object_free.
|
||||
*/
|
||||
int zap_destroy(objset_t *ds, uint64_t zapobj, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Manipulate attributes.
|
||||
*
|
||||
* 'integer_size' is in bytes, and must be 1, 2, 4, or 8.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Retrieve the contents of the attribute with the given name.
|
||||
*
|
||||
* If the requested attribute does not exist, the call will fail and
|
||||
* return ENOENT.
|
||||
*
|
||||
* If 'integer_size' is smaller than the attribute's integer size, the
|
||||
* call will fail and return EINVAL.
|
||||
*
|
||||
* If 'integer_size' is equal to or larger than the attribute's integer
|
||||
* size, the call will succeed and return 0. * When converting to a
|
||||
* larger integer size, the integers will be treated as unsigned (ie. no
|
||||
* sign-extension will be performed).
|
||||
*
|
||||
* 'num_integers' is the length (in integers) of 'buf'.
|
||||
*
|
||||
* If the attribute is longer than the buffer, as many integers as will
|
||||
* fit will be transferred to 'buf'. If the entire attribute was not
|
||||
* transferred, the call will return EOVERFLOW.
|
||||
*
|
||||
* If rn_len is nonzero, realname will be set to the name of the found
|
||||
* entry (which may be different from the requested name if matchtype is
|
||||
* not MT_EXACT).
|
||||
*
|
||||
* If normalization_conflictp is not NULL, it will be set if there is
|
||||
* another name with the same case/unicode normalized form.
|
||||
*/
|
||||
int zap_lookup(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf);
|
||||
int zap_lookup_norm(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf,
|
||||
matchtype_t mt, char *realname, int rn_len,
|
||||
boolean_t *normalization_conflictp);
|
||||
|
||||
/*
|
||||
* Create an attribute with the given name and value.
|
||||
*
|
||||
* If an attribute with the given name already exists, the call will
|
||||
* fail and return EEXIST.
|
||||
*/
|
||||
int zap_add(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
int integer_size, uint64_t num_integers,
|
||||
const void *val, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Set the attribute with the given name to the given value. If an
|
||||
* attribute with the given name does not exist, it will be created. If
|
||||
* an attribute with the given name already exists, the previous value
|
||||
* will be overwritten. The integer_size may be different from the
|
||||
* existing attribute's integer size, in which case the attribute's
|
||||
* integer size will be updated to the new value.
|
||||
*/
|
||||
int zap_update(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Get the length (in integers) and the integer size of the specified
|
||||
* attribute.
|
||||
*
|
||||
* If the requested attribute does not exist, the call will fail and
|
||||
* return ENOENT.
|
||||
*/
|
||||
int zap_length(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
uint64_t *integer_size, uint64_t *num_integers);
|
||||
|
||||
/*
|
||||
* Remove the specified attribute.
|
||||
*
|
||||
* If the specified attribute does not exist, the call will fail and
|
||||
* return ENOENT.
|
||||
*/
|
||||
int zap_remove(objset_t *ds, uint64_t zapobj, const char *name, dmu_tx_t *tx);
|
||||
int zap_remove_norm(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
matchtype_t mt, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Returns (in *count) the number of attributes in the specified zap
|
||||
* object.
|
||||
*/
|
||||
int zap_count(objset_t *ds, uint64_t zapobj, uint64_t *count);
|
||||
|
||||
|
||||
/*
|
||||
* Returns (in name) the name of the entry whose (value & mask)
|
||||
* (za_first_integer) is value, or ENOENT if not found. The string
|
||||
* pointed to by name must be at least 256 bytes long. If mask==0, the
|
||||
* match must be exact (ie, same as mask=-1ULL).
|
||||
*/
|
||||
int zap_value_search(objset_t *os, uint64_t zapobj,
|
||||
uint64_t value, uint64_t mask, char *name);
|
||||
|
||||
/*
|
||||
* Transfer all the entries from fromobj into intoobj. Only works on
|
||||
* int_size=8 num_integers=1 values. Fails if there are any duplicated
|
||||
* entries.
|
||||
*/
|
||||
int zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Manipulate entries where the name + value are the "same" (the name is
|
||||
* a stringified version of the value).
|
||||
*/
|
||||
int zap_add_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);
|
||||
int zap_remove_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);
|
||||
int zap_lookup_int(objset_t *os, uint64_t obj, uint64_t value);
|
||||
|
||||
struct zap;
|
||||
struct zap_leaf;
|
||||
typedef struct zap_cursor {
|
||||
/* This structure is opaque! */
|
||||
objset_t *zc_objset;
|
||||
struct zap *zc_zap;
|
||||
struct zap_leaf *zc_leaf;
|
||||
uint64_t zc_zapobj;
|
||||
uint64_t zc_hash;
|
||||
uint32_t zc_cd;
|
||||
} zap_cursor_t;
|
||||
|
||||
typedef struct {
|
||||
int za_integer_length;
|
||||
/*
|
||||
* za_normalization_conflict will be set if there are additional
|
||||
* entries with this normalized form (eg, "foo" and "Foo").
|
||||
*/
|
||||
boolean_t za_normalization_conflict;
|
||||
uint64_t za_num_integers;
|
||||
uint64_t za_first_integer; /* no sign extension for <8byte ints */
|
||||
char za_name[MAXNAMELEN];
|
||||
} zap_attribute_t;
|
||||
|
||||
/*
|
||||
* The interface for listing all the attributes of a zapobj can be
|
||||
* thought of as cursor moving down a list of the attributes one by
|
||||
* one. The cookie returned by the zap_cursor_serialize routine is
|
||||
* persistent across system calls (and across reboot, even).
|
||||
*/
|
||||
|
||||
/*
|
||||
* Initialize a zap cursor, pointing to the "first" attribute of the
|
||||
* zapobj. You must _fini the cursor when you are done with it.
|
||||
*/
|
||||
void zap_cursor_init(zap_cursor_t *zc, objset_t *ds, uint64_t zapobj);
|
||||
void zap_cursor_fini(zap_cursor_t *zc);
|
||||
|
||||
/*
|
||||
* Get the attribute currently pointed to by the cursor. Returns
|
||||
* ENOENT if at the end of the attributes.
|
||||
*/
|
||||
int zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za);
|
||||
|
||||
/*
|
||||
* Advance the cursor to the next attribute.
|
||||
*/
|
||||
void zap_cursor_advance(zap_cursor_t *zc);
|
||||
|
||||
/*
|
||||
* Get a persistent cookie pointing to the current position of the zap
|
||||
* cursor. The low 4 bits in the cookie are always zero, and thus can
|
||||
* be used as to differentiate a serialized cookie from a different type
|
||||
* of value. The cookie will be less than 2^32 as long as there are
|
||||
* fewer than 2^22 (4.2 million) entries in the zap object.
|
||||
*/
|
||||
uint64_t zap_cursor_serialize(zap_cursor_t *zc);
|
||||
|
||||
/*
|
||||
* Initialize a zap cursor pointing to the position recorded by
|
||||
* zap_cursor_serialize (in the "serialized" argument). You can also
|
||||
* use a "serialized" argument of 0 to start at the beginning of the
|
||||
* zapobj (ie. zap_cursor_init_serialized(..., 0) is equivalent to
|
||||
* zap_cursor_init(...).)
|
||||
*/
|
||||
void zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *ds,
|
||||
uint64_t zapobj, uint64_t serialized);
|
||||
|
||||
|
||||
#define ZAP_HISTOGRAM_SIZE 10
|
||||
|
||||
typedef struct zap_stats {
|
||||
/*
|
||||
* Size of the pointer table (in number of entries).
|
||||
* This is always a power of 2, or zero if it's a microzap.
|
||||
* In general, it should be considerably greater than zs_num_leafs.
|
||||
*/
|
||||
uint64_t zs_ptrtbl_len;
|
||||
|
||||
uint64_t zs_blocksize; /* size of zap blocks */
|
||||
|
||||
/*
|
||||
* The number of blocks used. Note that some blocks may be
|
||||
* wasted because old ptrtbl's and large name/value blocks are
|
||||
* not reused. (Although their space is reclaimed, we don't
|
||||
* reuse those offsets in the object.)
|
||||
*/
|
||||
uint64_t zs_num_blocks;
|
||||
|
||||
/*
|
||||
* Pointer table values from zap_ptrtbl in the zap_phys_t
|
||||
*/
|
||||
uint64_t zs_ptrtbl_nextblk; /* next (larger) copy start block */
|
||||
uint64_t zs_ptrtbl_blks_copied; /* number source blocks copied */
|
||||
uint64_t zs_ptrtbl_zt_blk; /* starting block number */
|
||||
uint64_t zs_ptrtbl_zt_numblks; /* number of blocks */
|
||||
uint64_t zs_ptrtbl_zt_shift; /* bits to index it */
|
||||
|
||||
/*
|
||||
* Values of the other members of the zap_phys_t
|
||||
*/
|
||||
uint64_t zs_block_type; /* ZBT_HEADER */
|
||||
uint64_t zs_magic; /* ZAP_MAGIC */
|
||||
uint64_t zs_num_leafs; /* The number of leaf blocks */
|
||||
uint64_t zs_num_entries; /* The number of zap entries */
|
||||
uint64_t zs_salt; /* salt to stir into hash function */
|
||||
|
||||
/*
|
||||
* Histograms. For all histograms, the last index
|
||||
* (ZAP_HISTOGRAM_SIZE-1) includes any values which are greater
|
||||
* than what can be represented. For example
|
||||
* zs_leafs_with_n5_entries[ZAP_HISTOGRAM_SIZE-1] is the number
|
||||
* of leafs with more than 45 entries.
|
||||
*/
|
||||
|
||||
/*
|
||||
* zs_leafs_with_n_pointers[n] is the number of leafs with
|
||||
* 2^n pointers to it.
|
||||
*/
|
||||
uint64_t zs_leafs_with_2n_pointers[ZAP_HISTOGRAM_SIZE];
|
||||
|
||||
/*
|
||||
* zs_leafs_with_n_entries[n] is the number of leafs with
|
||||
* [n*5, (n+1)*5) entries. In the current implementation, there
|
||||
* can be at most 55 entries in any block, but there may be
|
||||
* fewer if the name or value is large, or the block is not
|
||||
* completely full.
|
||||
*/
|
||||
uint64_t zs_blocks_with_n5_entries[ZAP_HISTOGRAM_SIZE];
|
||||
|
||||
/*
|
||||
* zs_leafs_n_tenths_full[n] is the number of leafs whose
|
||||
* fullness is in the range [n/10, (n+1)/10).
|
||||
*/
|
||||
uint64_t zs_blocks_n_tenths_full[ZAP_HISTOGRAM_SIZE];
|
||||
|
||||
/*
|
||||
* zs_entries_using_n_chunks[n] is the number of entries which
|
||||
* consume n 24-byte chunks. (Note, large names/values only use
|
||||
* one chunk, but contribute to zs_num_blocks_large.)
|
||||
*/
|
||||
uint64_t zs_entries_using_n_chunks[ZAP_HISTOGRAM_SIZE];
|
||||
|
||||
/*
|
||||
* zs_buckets_with_n_entries[n] is the number of buckets (each
|
||||
* leaf has 64 buckets) with n entries.
|
||||
* zs_buckets_with_n_entries[1] should be very close to
|
||||
* zs_num_entries.
|
||||
*/
|
||||
uint64_t zs_buckets_with_n_entries[ZAP_HISTOGRAM_SIZE];
|
||||
} zap_stats_t;
|
||||
|
||||
/*
|
||||
* Get statistics about a ZAP object. Note: you need to be aware of the
|
||||
* internal implementation of the ZAP to correctly interpret some of the
|
||||
* statistics. This interface shouldn't be relied on unless you really
|
||||
* know what you're doing.
|
||||
*/
|
||||
int zap_get_stats(objset_t *ds, uint64_t zapobj, zap_stats_t *zs);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZAP_H */
|
||||
@@ -0,0 +1,218 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZAP_IMPL_H
|
||||
#define _SYS_ZAP_IMPL_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/zap.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/avl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern int fzap_default_block_shift;
|
||||
|
||||
#define ZAP_MAGIC 0x2F52AB2ABULL
|
||||
|
||||
#define FZAP_BLOCK_SHIFT(zap) ((zap)->zap_f.zap_block_shift)
|
||||
|
||||
#define ZAP_MAXCD (uint32_t)(-1)
|
||||
#define ZAP_HASHBITS 28
|
||||
#define MZAP_ENT_LEN 64
|
||||
#define MZAP_NAME_LEN (MZAP_ENT_LEN - 8 - 4 - 2)
|
||||
#define MZAP_MAX_BLKSHIFT SPA_MAXBLOCKSHIFT
|
||||
#define MZAP_MAX_BLKSZ (1 << MZAP_MAX_BLKSHIFT)
|
||||
|
||||
typedef struct mzap_ent_phys {
|
||||
uint64_t mze_value;
|
||||
uint32_t mze_cd;
|
||||
uint16_t mze_pad; /* in case we want to chain them someday */
|
||||
char mze_name[MZAP_NAME_LEN];
|
||||
} mzap_ent_phys_t;
|
||||
|
||||
typedef struct mzap_phys {
|
||||
uint64_t mz_block_type; /* ZBT_MICRO */
|
||||
uint64_t mz_salt;
|
||||
uint64_t mz_normflags;
|
||||
uint64_t mz_pad[5];
|
||||
mzap_ent_phys_t mz_chunk[1];
|
||||
/* actually variable size depending on block size */
|
||||
} mzap_phys_t;
|
||||
|
||||
typedef struct mzap_ent {
|
||||
avl_node_t mze_node;
|
||||
int mze_chunkid;
|
||||
uint64_t mze_hash;
|
||||
mzap_ent_phys_t mze_phys;
|
||||
} mzap_ent_t;
|
||||
|
||||
|
||||
/*
|
||||
* The (fat) zap is stored in one object. It is an array of
|
||||
* 1<<FZAP_BLOCK_SHIFT byte blocks. The layout looks like one of:
|
||||
*
|
||||
* ptrtbl fits in first block:
|
||||
* [zap_phys_t zap_ptrtbl_shift < 6] [zap_leaf_t] ...
|
||||
*
|
||||
* ptrtbl too big for first block:
|
||||
* [zap_phys_t zap_ptrtbl_shift >= 6] [zap_leaf_t] [ptrtbl] ...
|
||||
*
|
||||
*/
|
||||
|
||||
struct dmu_buf;
|
||||
struct zap_leaf;
|
||||
|
||||
#define ZBT_LEAF ((1ULL << 63) + 0)
|
||||
#define ZBT_HEADER ((1ULL << 63) + 1)
|
||||
#define ZBT_MICRO ((1ULL << 63) + 3)
|
||||
/* any other values are ptrtbl blocks */
|
||||
|
||||
/*
|
||||
* the embedded pointer table takes up half a block:
|
||||
* block size / entry size (2^3) / 2
|
||||
*/
|
||||
#define ZAP_EMBEDDED_PTRTBL_SHIFT(zap) (FZAP_BLOCK_SHIFT(zap) - 3 - 1)
|
||||
|
||||
/*
|
||||
* The embedded pointer table starts half-way through the block. Since
|
||||
* the pointer table itself is half the block, it starts at (64-bit)
|
||||
* word number (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)).
|
||||
*/
|
||||
#define ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) \
|
||||
((uint64_t *)(zap)->zap_f.zap_phys) \
|
||||
[(idx) + (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap))]
|
||||
|
||||
/*
|
||||
* TAKE NOTE:
|
||||
* If zap_phys_t is modified, zap_byteswap() must be modified.
|
||||
*/
|
||||
typedef struct zap_phys {
|
||||
uint64_t zap_block_type; /* ZBT_HEADER */
|
||||
uint64_t zap_magic; /* ZAP_MAGIC */
|
||||
|
||||
struct zap_table_phys {
|
||||
uint64_t zt_blk; /* starting block number */
|
||||
uint64_t zt_numblks; /* number of blocks */
|
||||
uint64_t zt_shift; /* bits to index it */
|
||||
uint64_t zt_nextblk; /* next (larger) copy start block */
|
||||
uint64_t zt_blks_copied; /* number source blocks copied */
|
||||
} zap_ptrtbl;
|
||||
|
||||
uint64_t zap_freeblk; /* the next free block */
|
||||
uint64_t zap_num_leafs; /* number of leafs */
|
||||
uint64_t zap_num_entries; /* number of entries */
|
||||
uint64_t zap_salt; /* salt to stir into hash function */
|
||||
uint64_t zap_normflags; /* flags for u8_textprep_str() */
|
||||
/*
|
||||
* This structure is followed by padding, and then the embedded
|
||||
* pointer table. The embedded pointer table takes up second
|
||||
* half of the block. It is accessed using the
|
||||
* ZAP_EMBEDDED_PTRTBL_ENT() macro.
|
||||
*/
|
||||
} zap_phys_t;
|
||||
|
||||
typedef struct zap_table_phys zap_table_phys_t;
|
||||
|
||||
typedef struct zap {
|
||||
objset_t *zap_objset;
|
||||
uint64_t zap_object;
|
||||
struct dmu_buf *zap_dbuf;
|
||||
krwlock_t zap_rwlock;
|
||||
boolean_t zap_ismicro;
|
||||
int zap_normflags;
|
||||
uint64_t zap_salt;
|
||||
union {
|
||||
struct {
|
||||
zap_phys_t *zap_phys;
|
||||
|
||||
/*
|
||||
* zap_num_entries_mtx protects
|
||||
* zap_num_entries
|
||||
*/
|
||||
kmutex_t zap_num_entries_mtx;
|
||||
int zap_block_shift;
|
||||
} zap_fat;
|
||||
struct {
|
||||
mzap_phys_t *zap_phys;
|
||||
int16_t zap_num_entries;
|
||||
int16_t zap_num_chunks;
|
||||
int16_t zap_alloc_next;
|
||||
avl_tree_t zap_avl;
|
||||
} zap_micro;
|
||||
} zap_u;
|
||||
} zap_t;
|
||||
|
||||
typedef struct zap_name {
|
||||
zap_t *zn_zap;
|
||||
const char *zn_name_orij;
|
||||
uint64_t zn_hash;
|
||||
matchtype_t zn_matchtype;
|
||||
const char *zn_name_norm;
|
||||
char zn_normbuf[ZAP_MAXNAMELEN];
|
||||
} zap_name_t;
|
||||
|
||||
#define zap_f zap_u.zap_fat
|
||||
#define zap_m zap_u.zap_micro
|
||||
|
||||
boolean_t zap_match(zap_name_t *zn, const char *matchname);
|
||||
int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
|
||||
krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp);
|
||||
void zap_unlockdir(zap_t *zap);
|
||||
void zap_evict(dmu_buf_t *db, void *vmzap);
|
||||
zap_name_t *zap_name_alloc(zap_t *zap, const char *name, matchtype_t mt);
|
||||
void zap_name_free(zap_name_t *zn);
|
||||
|
||||
#define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))
|
||||
|
||||
void fzap_byteswap(void *buf, size_t size);
|
||||
int fzap_count(zap_t *zap, uint64_t *count);
|
||||
int fzap_lookup(zap_name_t *zn,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf,
|
||||
char *realname, int rn_len, boolean_t *normalization_conflictp);
|
||||
int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
|
||||
const void *val, dmu_tx_t *tx);
|
||||
int fzap_update(zap_name_t *zn,
|
||||
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
|
||||
int fzap_length(zap_name_t *zn,
|
||||
uint64_t *integer_size, uint64_t *num_integers);
|
||||
int fzap_remove(zap_name_t *zn, dmu_tx_t *tx);
|
||||
int fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za);
|
||||
void fzap_get_stats(zap_t *zap, zap_stats_t *zs);
|
||||
void zap_put_leaf(struct zap_leaf *l);
|
||||
|
||||
int fzap_add_cd(zap_name_t *zn,
|
||||
uint64_t integer_size, uint64_t num_integers,
|
||||
const void *val, uint32_t cd, dmu_tx_t *tx);
|
||||
void fzap_upgrade(zap_t *zap, dmu_tx_t *tx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZAP_IMPL_H */
|
||||
@@ -0,0 +1,244 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZAP_LEAF_H
|
||||
#define _SYS_ZAP_LEAF_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct zap;
|
||||
|
||||
#define ZAP_LEAF_MAGIC 0x2AB1EAF
|
||||
|
||||
/* chunk size = 24 bytes */
|
||||
#define ZAP_LEAF_CHUNKSIZE 24
|
||||
|
||||
/*
|
||||
* The amount of space available for chunks is:
|
||||
* block size (1<<l->l_bs) - hash entry size (2) * number of hash
|
||||
* entries - header space (2*chunksize)
|
||||
*/
|
||||
#define ZAP_LEAF_NUMCHUNKS(l) \
|
||||
(((1<<(l)->l_bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(l)) / \
|
||||
ZAP_LEAF_CHUNKSIZE - 2)
|
||||
|
||||
/*
|
||||
* The amount of space within the chunk available for the array is:
|
||||
* chunk size - space for type (1) - space for next pointer (2)
|
||||
*/
|
||||
#define ZAP_LEAF_ARRAY_BYTES (ZAP_LEAF_CHUNKSIZE - 3)
|
||||
|
||||
#define ZAP_LEAF_ARRAY_NCHUNKS(bytes) \
|
||||
(((bytes)+ZAP_LEAF_ARRAY_BYTES-1)/ZAP_LEAF_ARRAY_BYTES)
|
||||
|
||||
/*
|
||||
* Low water mark: when there are only this many chunks free, start
|
||||
* growing the ptrtbl. Ideally, this should be larger than a
|
||||
* "reasonably-sized" entry. 20 chunks is more than enough for the
|
||||
* largest directory entry (MAXNAMELEN (256) byte name, 8-byte value),
|
||||
* while still being only around 3% for 16k blocks.
|
||||
*/
|
||||
#define ZAP_LEAF_LOW_WATER (20)
|
||||
|
||||
/*
|
||||
* The leaf hash table has block size / 2^5 (32) number of entries,
|
||||
* which should be more than enough for the maximum number of entries,
|
||||
* which is less than block size / CHUNKSIZE (24) / minimum number of
|
||||
* chunks per entry (3).
|
||||
*/
|
||||
#define ZAP_LEAF_HASH_SHIFT(l) ((l)->l_bs - 5)
|
||||
#define ZAP_LEAF_HASH_NUMENTRIES(l) (1 << ZAP_LEAF_HASH_SHIFT(l))
|
||||
|
||||
/*
|
||||
* The chunks start immediately after the hash table. The end of the
|
||||
* hash table is at l_hash + HASH_NUMENTRIES, which we simply cast to a
|
||||
* chunk_t.
|
||||
*/
|
||||
#define ZAP_LEAF_CHUNK(l, idx) \
|
||||
((zap_leaf_chunk_t *) \
|
||||
((l)->l_phys->l_hash + ZAP_LEAF_HASH_NUMENTRIES(l)))[idx]
|
||||
#define ZAP_LEAF_ENTRY(l, idx) (&ZAP_LEAF_CHUNK(l, idx).l_entry)
|
||||
|
||||
typedef enum zap_chunk_type {
|
||||
ZAP_CHUNK_FREE = 253,
|
||||
ZAP_CHUNK_ENTRY = 252,
|
||||
ZAP_CHUNK_ARRAY = 251,
|
||||
ZAP_CHUNK_TYPE_MAX = 250
|
||||
} zap_chunk_type_t;
|
||||
|
||||
#define ZLF_ENTRIES_CDSORTED (1<<0)
|
||||
|
||||
/*
|
||||
* TAKE NOTE:
|
||||
* If zap_leaf_phys_t is modified, zap_leaf_byteswap() must be modified.
|
||||
*/
|
||||
typedef struct zap_leaf_phys {
|
||||
struct zap_leaf_header {
|
||||
uint64_t lh_block_type; /* ZBT_LEAF */
|
||||
uint64_t lh_pad1;
|
||||
uint64_t lh_prefix; /* hash prefix of this leaf */
|
||||
uint32_t lh_magic; /* ZAP_LEAF_MAGIC */
|
||||
uint16_t lh_nfree; /* number free chunks */
|
||||
uint16_t lh_nentries; /* number of entries */
|
||||
uint16_t lh_prefix_len; /* num bits used to id this */
|
||||
|
||||
/* above is accessable to zap, below is zap_leaf private */
|
||||
|
||||
uint16_t lh_freelist; /* chunk head of free list */
|
||||
uint8_t lh_flags; /* ZLF_* flags */
|
||||
uint8_t lh_pad2[11];
|
||||
} l_hdr; /* 2 24-byte chunks */
|
||||
|
||||
/*
|
||||
* The header is followed by a hash table with
|
||||
* ZAP_LEAF_HASH_NUMENTRIES(zap) entries. The hash table is
|
||||
* followed by an array of ZAP_LEAF_NUMCHUNKS(zap)
|
||||
* zap_leaf_chunk structures. These structures are accessed
|
||||
* with the ZAP_LEAF_CHUNK() macro.
|
||||
*/
|
||||
|
||||
uint16_t l_hash[1];
|
||||
} zap_leaf_phys_t;
|
||||
|
||||
typedef union zap_leaf_chunk {
|
||||
struct zap_leaf_entry {
|
||||
uint8_t le_type; /* always ZAP_CHUNK_ENTRY */
|
||||
uint8_t le_int_size; /* size of ints */
|
||||
uint16_t le_next; /* next entry in hash chain */
|
||||
uint16_t le_name_chunk; /* first chunk of the name */
|
||||
uint16_t le_name_length; /* bytes in name, incl null */
|
||||
uint16_t le_value_chunk; /* first chunk of the value */
|
||||
uint16_t le_value_length; /* value length in ints */
|
||||
uint32_t le_cd; /* collision differentiator */
|
||||
uint64_t le_hash; /* hash value of the name */
|
||||
} l_entry;
|
||||
struct zap_leaf_array {
|
||||
uint8_t la_type; /* always ZAP_CHUNK_ARRAY */
|
||||
uint8_t la_array[ZAP_LEAF_ARRAY_BYTES];
|
||||
uint16_t la_next; /* next blk or CHAIN_END */
|
||||
} l_array;
|
||||
struct zap_leaf_free {
|
||||
uint8_t lf_type; /* always ZAP_CHUNK_FREE */
|
||||
uint8_t lf_pad[ZAP_LEAF_ARRAY_BYTES];
|
||||
uint16_t lf_next; /* next in free list, or CHAIN_END */
|
||||
} l_free;
|
||||
} zap_leaf_chunk_t;
|
||||
|
||||
typedef struct zap_leaf {
|
||||
krwlock_t l_rwlock;
|
||||
uint64_t l_blkid; /* 1<<ZAP_BLOCK_SHIFT byte block off */
|
||||
int l_bs; /* block size shift */
|
||||
dmu_buf_t *l_dbuf;
|
||||
zap_leaf_phys_t *l_phys;
|
||||
} zap_leaf_t;
|
||||
|
||||
|
||||
typedef struct zap_entry_handle {
|
||||
/* below is set by zap_leaf.c and is public to zap.c */
|
||||
uint64_t zeh_num_integers;
|
||||
uint64_t zeh_hash;
|
||||
uint32_t zeh_cd;
|
||||
uint8_t zeh_integer_size;
|
||||
|
||||
/* below is private to zap_leaf.c */
|
||||
uint16_t zeh_fakechunk;
|
||||
uint16_t *zeh_chunkp;
|
||||
zap_leaf_t *zeh_leaf;
|
||||
} zap_entry_handle_t;
|
||||
|
||||
/*
|
||||
* Return a handle to the named entry, or ENOENT if not found. The hash
|
||||
* value must equal zap_hash(name).
|
||||
*/
|
||||
extern int zap_leaf_lookup(zap_leaf_t *l,
|
||||
zap_name_t *zn, zap_entry_handle_t *zeh);
|
||||
|
||||
/*
|
||||
* Return a handle to the entry with this hash+cd, or the entry with the
|
||||
* next closest hash+cd.
|
||||
*/
|
||||
extern int zap_leaf_lookup_closest(zap_leaf_t *l,
|
||||
uint64_t hash, uint32_t cd, zap_entry_handle_t *zeh);
|
||||
|
||||
/*
|
||||
* Read the first num_integers in the attribute. Integer size
|
||||
* conversion will be done without sign extension. Return EINVAL if
|
||||
* integer_size is too small. Return EOVERFLOW if there are more than
|
||||
* num_integers in the attribute.
|
||||
*/
|
||||
extern int zap_entry_read(const zap_entry_handle_t *zeh,
|
||||
uint8_t integer_size, uint64_t num_integers, void *buf);
|
||||
|
||||
extern int zap_entry_read_name(const zap_entry_handle_t *zeh,
|
||||
uint16_t buflen, char *buf);
|
||||
|
||||
/*
|
||||
* Replace the value of an existing entry.
|
||||
*
|
||||
* zap_entry_update may fail if it runs out of space (ENOSPC).
|
||||
*/
|
||||
extern int zap_entry_update(zap_entry_handle_t *zeh,
|
||||
uint8_t integer_size, uint64_t num_integers, const void *buf);
|
||||
|
||||
/*
|
||||
* Remove an entry.
|
||||
*/
|
||||
extern void zap_entry_remove(zap_entry_handle_t *zeh);
|
||||
|
||||
/*
|
||||
* Create an entry. An equal entry must not exist, and this entry must
|
||||
* belong in this leaf (according to its hash value). Fills in the
|
||||
* entry handle on success. Returns 0 on success or ENOSPC on failure.
|
||||
*/
|
||||
extern int zap_entry_create(zap_leaf_t *l,
|
||||
const char *name, uint64_t h, uint32_t cd,
|
||||
uint8_t integer_size, uint64_t num_integers, const void *buf,
|
||||
zap_entry_handle_t *zeh);
|
||||
|
||||
/*
|
||||
* Return true if there are additional entries with the same normalized
|
||||
* form.
|
||||
*/
|
||||
extern boolean_t zap_entry_normalization_conflict(zap_entry_handle_t *zeh,
|
||||
zap_name_t *zn, const char *name, zap_t *zap);
|
||||
|
||||
/*
|
||||
* Other stuff.
|
||||
*/
|
||||
|
||||
extern void zap_leaf_init(zap_leaf_t *l, boolean_t sort);
|
||||
extern void zap_leaf_byteswap(zap_leaf_phys_t *buf, int len);
|
||||
extern void zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort);
|
||||
extern void zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZAP_LEAF_H */
|
||||
@@ -0,0 +1,214 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FS_ZFS_ACL_H
|
||||
#define _SYS_FS_ZFS_ACL_H
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/isa_defs.h>
|
||||
#include <sys/types32.h>
|
||||
#endif
|
||||
#include <sys/acl.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zfs_fuid.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct znode_phys;
|
||||
|
||||
#define ACE_SLOT_CNT 6
|
||||
#define ZFS_ACL_VERSION_INITIAL 0ULL
|
||||
#define ZFS_ACL_VERSION_FUID 1ULL
|
||||
#define ZFS_ACL_VERSION ZFS_ACL_VERSION_FUID
|
||||
|
||||
/*
|
||||
* ZFS ACLs are store in various forms.
|
||||
* Files created with ACL version ZFS_ACL_VERSION_INITIAL
|
||||
* will all be created with fixed length ACEs of type
|
||||
* zfs_oldace_t.
|
||||
*
|
||||
* Files with ACL version ZFS_ACL_VERSION_FUID will be created
|
||||
* with various sized ACEs. The abstraction entries will utilize
|
||||
* zfs_ace_hdr_t, normal user/group entries will use zfs_ace_t
|
||||
* and some specialized CIFS ACEs will use zfs_object_ace_t.
|
||||
*/
|
||||
|
||||
/*
|
||||
* All ACEs have a common hdr. For
|
||||
* owner@, group@, and everyone@ this is all
|
||||
* thats needed.
|
||||
*/
|
||||
typedef struct zfs_ace_hdr {
|
||||
uint16_t z_type;
|
||||
uint16_t z_flags;
|
||||
uint32_t z_access_mask;
|
||||
} zfs_ace_hdr_t;
|
||||
|
||||
typedef zfs_ace_hdr_t zfs_ace_abstract_t;
|
||||
|
||||
/*
|
||||
* Standard ACE
|
||||
*/
|
||||
typedef struct zfs_ace {
|
||||
zfs_ace_hdr_t z_hdr;
|
||||
uint64_t z_fuid;
|
||||
} zfs_ace_t;
|
||||
|
||||
/*
|
||||
* The following type only applies to ACE_ACCESS_ALLOWED|DENIED_OBJECT_ACE_TYPE
|
||||
* and will only be set/retrieved in a CIFS context.
|
||||
*/
|
||||
|
||||
typedef struct zfs_object_ace {
|
||||
zfs_ace_t z_ace;
|
||||
uint8_t z_object_type[16]; /* object type */
|
||||
uint8_t z_inherit_type[16]; /* inherited object type */
|
||||
} zfs_object_ace_t;
|
||||
|
||||
typedef struct zfs_oldace {
|
||||
uint32_t z_fuid; /* "who" */
|
||||
uint32_t z_access_mask; /* access mask */
|
||||
uint16_t z_flags; /* flags, i.e inheritance */
|
||||
uint16_t z_type; /* type of entry allow/deny */
|
||||
} zfs_oldace_t;
|
||||
|
||||
typedef struct zfs_acl_phys_v0 {
|
||||
uint64_t z_acl_extern_obj; /* ext acl pieces */
|
||||
uint32_t z_acl_count; /* Number of ACEs */
|
||||
uint16_t z_acl_version; /* acl version */
|
||||
uint16_t z_acl_pad; /* pad */
|
||||
zfs_oldace_t z_ace_data[ACE_SLOT_CNT]; /* 6 standard ACEs */
|
||||
} zfs_acl_phys_v0_t;
|
||||
|
||||
#define ZFS_ACE_SPACE (sizeof (zfs_oldace_t) * ACE_SLOT_CNT)
|
||||
|
||||
typedef struct zfs_acl_phys {
|
||||
uint64_t z_acl_extern_obj; /* ext acl pieces */
|
||||
uint32_t z_acl_size; /* Number of bytes in ACL */
|
||||
uint16_t z_acl_version; /* acl version */
|
||||
uint16_t z_acl_count; /* ace count */
|
||||
uint8_t z_ace_data[ZFS_ACE_SPACE]; /* space for embedded ACEs */
|
||||
} zfs_acl_phys_t;
|
||||
|
||||
|
||||
|
||||
typedef struct acl_ops {
|
||||
uint32_t (*ace_mask_get) (void *acep); /* get access mask */
|
||||
void (*ace_mask_set) (void *acep,
|
||||
uint32_t mask); /* set access mask */
|
||||
uint16_t (*ace_flags_get) (void *acep); /* get flags */
|
||||
void (*ace_flags_set) (void *acep,
|
||||
uint16_t flags); /* set flags */
|
||||
uint16_t (*ace_type_get)(void *acep); /* get type */
|
||||
void (*ace_type_set)(void *acep,
|
||||
uint16_t type); /* set type */
|
||||
uint64_t (*ace_who_get)(void *acep); /* get who/fuid */
|
||||
void (*ace_who_set)(void *acep,
|
||||
uint64_t who); /* set who/fuid */
|
||||
size_t (*ace_size)(void *acep); /* how big is this ace */
|
||||
size_t (*ace_abstract_size)(void); /* sizeof abstract entry */
|
||||
int (*ace_mask_off)(void); /* off of access mask in ace */
|
||||
int (*ace_data)(void *acep, void **datap);
|
||||
/* ptr to data if any */
|
||||
} acl_ops_t;
|
||||
|
||||
/*
|
||||
* A zfs_acl_t structure is composed of a list of zfs_acl_node_t's.
|
||||
* Each node will have one or more ACEs associated with it. You will
|
||||
* only have multiple nodes during a chmod operation. Normally only
|
||||
* one node is required.
|
||||
*/
|
||||
typedef struct zfs_acl_node {
|
||||
list_node_t z_next; /* Next chunk of ACEs */
|
||||
void *z_acldata; /* pointer into actual ACE(s) */
|
||||
void *z_allocdata; /* pointer to kmem allocated memory */
|
||||
size_t z_allocsize; /* Size of blob in bytes */
|
||||
size_t z_size; /* length of ACL data */
|
||||
int z_ace_count; /* number of ACEs in this acl node */
|
||||
int z_ace_idx; /* ace iterator positioned on */
|
||||
} zfs_acl_node_t;
|
||||
|
||||
typedef struct zfs_acl {
|
||||
int z_acl_count; /* Number of ACEs */
|
||||
size_t z_acl_bytes; /* Number of bytes in ACL */
|
||||
uint_t z_version; /* version of ACL */
|
||||
void *z_next_ace; /* pointer to next ACE */
|
||||
int z_hints; /* ACL hints (ZFS_INHERIT_ACE ...) */
|
||||
zfs_acl_node_t *z_curr_node; /* current node iterator is handling */
|
||||
list_t z_acl; /* chunks of ACE data */
|
||||
acl_ops_t z_ops; /* ACL operations */
|
||||
boolean_t z_has_fuids; /* FUIDs present in ACL? */
|
||||
} zfs_acl_t;
|
||||
|
||||
#define ACL_DATA_ALLOCED 0x1
|
||||
#define ZFS_ACL_SIZE(aclcnt) (sizeof (ace_t) * (aclcnt))
|
||||
|
||||
/*
|
||||
* Property values for acl_mode and acl_inherit.
|
||||
*
|
||||
* acl_mode can take discard, noallow, groupmask and passthrough.
|
||||
* whereas acl_inherit has secure instead of groupmask.
|
||||
*/
|
||||
|
||||
#define ZFS_ACL_DISCARD 0
|
||||
#define ZFS_ACL_NOALLOW 1
|
||||
#define ZFS_ACL_GROUPMASK 2
|
||||
#define ZFS_ACL_PASSTHROUGH 3
|
||||
#define ZFS_ACL_RESTRICTED 4
|
||||
#define ZFS_ACL_PASSTHROUGH_X 5
|
||||
|
||||
struct znode;
|
||||
struct zfsvfs;
|
||||
struct zfs_fuid_info;
|
||||
|
||||
#ifdef _KERNEL
|
||||
void zfs_perm_init(struct znode *, struct znode *, int, vattr_t *,
|
||||
dmu_tx_t *, cred_t *, zfs_acl_t *, zfs_fuid_info_t **);
|
||||
int zfs_getacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
|
||||
int zfs_setacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
|
||||
void zfs_acl_rele(void *);
|
||||
void zfs_oldace_byteswap(ace_t *, int);
|
||||
void zfs_ace_byteswap(void *, size_t, boolean_t);
|
||||
extern int zfs_zaccess(struct znode *, int, int, boolean_t, cred_t *);
|
||||
extern int zfs_zaccess_rwx(struct znode *, mode_t, int, cred_t *);
|
||||
extern int zfs_zaccess_unix(struct znode *, mode_t, cred_t *);
|
||||
extern int zfs_acl_access(struct znode *, int, cred_t *);
|
||||
int zfs_acl_chmod_setattr(struct znode *, zfs_acl_t **, uint64_t);
|
||||
int zfs_zaccess_delete(struct znode *, struct znode *, cred_t *);
|
||||
int zfs_zaccess_rename(struct znode *, struct znode *,
|
||||
struct znode *, struct znode *, cred_t *cr);
|
||||
void zfs_acl_free(zfs_acl_t *);
|
||||
int zfs_vsec_2_aclp(struct zfsvfs *, vtype_t, vsecattr_t *, zfs_acl_t **);
|
||||
int zfs_aclset_common(struct znode *, zfs_acl_t *, cred_t *,
|
||||
struct zfs_fuid_info **, dmu_tx_t *);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* _SYS_FS_ZFS_ACL_H */
|
||||
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZFS_CONTEXT_H
|
||||
#define _SYS_ZFS_CONTEXT_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <sys/note.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/t_lock.h>
|
||||
#include <sys/atomic.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#include <sys/bitmap.h>
|
||||
#include <sys/cmn_err.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/taskq.h>
|
||||
#include <sys/buf.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/cpuvar.h>
|
||||
#include <sys/kobj.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/disp.h>
|
||||
#include <sys/debug.h>
|
||||
#include <sys/random.h>
|
||||
#include <sys/byteorder.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/list.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/dirent.h>
|
||||
#include <sys/time.h>
|
||||
#include <vm/seg_kmem.h>
|
||||
#include <sys/zone.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/zfs_debug.h>
|
||||
#include <sys/sysevent.h>
|
||||
#include <sys/sysevent/eventdefs.h>
|
||||
#include <sys/fm/util.h>
|
||||
|
||||
#define CPU_SEQID (CPU->cpu_seqid)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZFS_CONTEXT_H */
|
||||
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _ZFS_CTLDIR_H
|
||||
#define _ZFS_CTLDIR_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/pathname.h>
|
||||
#include <sys/vnode.h>
|
||||
#include <sys/zfs_vfsops.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ZFS_CTLDIR_NAME ".zfs"
|
||||
|
||||
#define zfs_has_ctldir(zdp) \
|
||||
((zdp)->z_id == (zdp)->z_zfsvfs->z_root && \
|
||||
((zdp)->z_zfsvfs->z_ctldir != NULL))
|
||||
#define zfs_show_ctldir(zdp) \
|
||||
(zfs_has_ctldir(zdp) && \
|
||||
((zdp)->z_zfsvfs->z_show_ctldir))
|
||||
|
||||
void zfsctl_create(zfsvfs_t *);
|
||||
void zfsctl_destroy(zfsvfs_t *);
|
||||
vnode_t *zfsctl_root(znode_t *);
|
||||
void zfsctl_init(void);
|
||||
void zfsctl_fini(void);
|
||||
|
||||
int zfsctl_rename_snapshot(const char *from, const char *to);
|
||||
int zfsctl_destroy_snapshot(const char *snapname, int force);
|
||||
int zfsctl_umount_snapshots(vfs_t *, int, cred_t *);
|
||||
|
||||
int zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
|
||||
int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
|
||||
int *direntflags, pathname_t *realpnp);
|
||||
|
||||
int zfsctl_make_fid(zfsvfs_t *zfsvfsp, uint64_t object, uint32_t gen,
|
||||
fid_t *fidp);
|
||||
int zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp);
|
||||
|
||||
#define ZFSCTL_INO_ROOT 0x1
|
||||
#define ZFSCTL_INO_SNAPDIR 0x2
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ZFS_CTLDIR_H */
|
||||
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZFS_DEBUG_H
|
||||
#define _SYS_ZFS_DEBUG_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef TRUE
|
||||
#define TRUE 1
|
||||
#endif
|
||||
|
||||
#ifndef FALSE
|
||||
#define FALSE 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* ZFS debugging
|
||||
*/
|
||||
|
||||
#if defined(DEBUG) || !defined(_KERNEL)
|
||||
#define ZFS_DEBUG
|
||||
#endif
|
||||
|
||||
extern int zfs_flags;
|
||||
|
||||
#define ZFS_DEBUG_DPRINTF 0x0001
|
||||
#define ZFS_DEBUG_DBUF_VERIFY 0x0002
|
||||
#define ZFS_DEBUG_DNODE_VERIFY 0x0004
|
||||
#define ZFS_DEBUG_SNAPNAMES 0x0008
|
||||
#define ZFS_DEBUG_MODIFY 0x0010
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
extern void __dprintf(const char *file, const char *func,
|
||||
int line, const char *fmt, ...);
|
||||
#define dprintf(...) \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) \
|
||||
__dprintf(__FILE__, __func__, __LINE__, __VA_ARGS__)
|
||||
#else
|
||||
#define dprintf(...) ((void)0)
|
||||
#endif /* ZFS_DEBUG */
|
||||
|
||||
extern void zfs_panic_recover(const char *fmt, ...);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZFS_DEBUG_H */
|
||||
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FS_ZFS_DIR_H
|
||||
#define _SYS_FS_ZFS_DIR_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/pathname.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* zfs_dirent_lock() flags */
|
||||
#define ZNEW 0x0001 /* entry should not exist */
|
||||
#define ZEXISTS 0x0002 /* entry should exist */
|
||||
#define ZSHARED 0x0004 /* shared access (zfs_dirlook()) */
|
||||
#define ZXATTR 0x0008 /* we want the xattr dir */
|
||||
#define ZRENAMING 0x0010 /* znode is being renamed */
|
||||
#define ZCILOOK 0x0020 /* case-insensitive lookup requested */
|
||||
#define ZCIEXACT 0x0040 /* c-i requires c-s match (rename) */
|
||||
|
||||
/* mknode flags */
|
||||
#define IS_ROOT_NODE 0x01 /* create a root node */
|
||||
#define IS_XATTR 0x02 /* create an extended attribute node */
|
||||
#define IS_REPLAY 0x04 /* we are replaying intent log */
|
||||
|
||||
extern int zfs_dirent_lock(zfs_dirlock_t **, znode_t *, char *, znode_t **,
|
||||
int, int *, pathname_t *);
|
||||
extern void zfs_dirent_unlock(zfs_dirlock_t *);
|
||||
extern int zfs_link_create(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int);
|
||||
extern int zfs_link_destroy(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int,
|
||||
boolean_t *);
|
||||
extern int zfs_dirlook(znode_t *, char *, vnode_t **, int, int *,
|
||||
pathname_t *);
|
||||
extern void zfs_mknode(znode_t *, vattr_t *, dmu_tx_t *, cred_t *,
|
||||
uint_t, znode_t **, int, zfs_acl_t *, zfs_fuid_info_t **);
|
||||
extern void zfs_rmnode(znode_t *);
|
||||
extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old);
|
||||
extern boolean_t zfs_dirempty(znode_t *);
|
||||
extern void zfs_unlinked_add(znode_t *, dmu_tx_t *);
|
||||
extern void zfs_unlinked_drain(zfsvfs_t *zfsvfs);
|
||||
extern int zfs_sticky_remove_access(znode_t *, znode_t *, cred_t *cr);
|
||||
extern int zfs_get_xattrdir(znode_t *, vnode_t **, cred_t *, int);
|
||||
extern int zfs_make_xattrdir(znode_t *, vattr_t *, vnode_t **, cred_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_FS_ZFS_DIR_H */
|
||||
@@ -0,0 +1,125 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FS_ZFS_FUID_H
|
||||
#define _SYS_FS_ZFS_FUID_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/kidmap.h>
|
||||
#include <sys/sid.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zfs_vfsops.h>
|
||||
#endif
|
||||
#include <sys/avl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
ZFS_OWNER,
|
||||
ZFS_GROUP,
|
||||
ZFS_ACE_USER,
|
||||
ZFS_ACE_GROUP
|
||||
} zfs_fuid_type_t;
|
||||
|
||||
/*
|
||||
* Estimate space needed for one more fuid table entry.
|
||||
* for now assume its current size + 1K
|
||||
*/
|
||||
#define FUID_SIZE_ESTIMATE(z) (z->z_fuid_size + (SPA_MINBLOCKSIZE << 1))
|
||||
|
||||
#define FUID_INDEX(x) (x >> 32)
|
||||
#define FUID_RID(x) (x & 0xffffffff)
|
||||
#define FUID_ENCODE(idx, rid) ((idx << 32) | rid)
|
||||
/*
|
||||
* FUIDs cause problems for the intent log
|
||||
* we need to replay the creation of the FUID,
|
||||
* but we can't count on the idmapper to be around
|
||||
* and during replay the FUID index may be different than
|
||||
* before. Also, if an ACL has 100 ACEs and 12 different
|
||||
* domains we don't want to log 100 domain strings, but rather
|
||||
* just the unique 12.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The FUIDs in the log will index into
|
||||
* domain string table and the bottom half will be the rid.
|
||||
* Used for mapping ephemeral uid/gid during ACL setting to FUIDs
|
||||
*/
|
||||
typedef struct zfs_fuid {
|
||||
list_node_t z_next;
|
||||
uint64_t z_id; /* uid/gid being converted to fuid */
|
||||
uint64_t z_domidx; /* index in AVL domain table */
|
||||
uint64_t z_logfuid; /* index for domain in log */
|
||||
} zfs_fuid_t;
|
||||
|
||||
/* list of unique domains */
|
||||
typedef struct zfs_fuid_domain {
|
||||
list_node_t z_next;
|
||||
uint64_t z_domidx; /* AVL tree idx */
|
||||
const char *z_domain; /* domain string */
|
||||
} zfs_fuid_domain_t;
|
||||
|
||||
/*
|
||||
* FUID information necessary for logging create, setattr, and setacl.
|
||||
*/
|
||||
typedef struct zfs_fuid_info {
|
||||
list_t z_fuids;
|
||||
list_t z_domains;
|
||||
uint64_t z_fuid_owner;
|
||||
uint64_t z_fuid_group;
|
||||
char **z_domain_table; /* Used during replay */
|
||||
uint32_t z_fuid_cnt; /* How many fuids in z_fuids */
|
||||
uint32_t z_domain_cnt; /* How many domains */
|
||||
size_t z_domain_str_sz; /* len of domain strings z_domain list */
|
||||
} zfs_fuid_info_t;
|
||||
|
||||
#ifdef _KERNEL
|
||||
struct znode;
|
||||
extern uid_t zfs_fuid_map_id(zfsvfs_t *, uint64_t, cred_t *, zfs_fuid_type_t);
|
||||
extern void zfs_fuid_destroy(zfsvfs_t *);
|
||||
extern uint64_t zfs_fuid_create_cred(zfsvfs_t *, zfs_fuid_type_t,
|
||||
dmu_tx_t *, cred_t *, zfs_fuid_info_t **);
|
||||
extern uint64_t zfs_fuid_create(zfsvfs_t *, uint64_t, cred_t *, zfs_fuid_type_t,
|
||||
dmu_tx_t *, zfs_fuid_info_t **);
|
||||
extern void zfs_fuid_map_ids(struct znode *zp, cred_t *cr, uid_t *uid,
|
||||
uid_t *gid);
|
||||
extern zfs_fuid_info_t *zfs_fuid_info_alloc(void);
|
||||
extern void zfs_fuid_info_free();
|
||||
extern boolean_t zfs_groupmember(zfsvfs_t *, uint64_t, cred_t *);
|
||||
#endif
|
||||
|
||||
char *zfs_fuid_idx_domain(avl_tree_t *, uint32_t);
|
||||
uint64_t zfs_fuid_table_load(objset_t *, uint64_t, avl_tree_t *, avl_tree_t *);
|
||||
void zfs_fuid_table_destroy(avl_tree_t *, avl_tree_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_FS_ZFS_FUID_H */
|
||||
@@ -0,0 +1,196 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZFS_IOCTL_H
|
||||
#define _SYS_ZFS_IOCTL_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/cred.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dsl_deleg.h>
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/nvpair.h>
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Property values for snapdir
|
||||
*/
|
||||
#define ZFS_SNAPDIR_HIDDEN 0
|
||||
#define ZFS_SNAPDIR_VISIBLE 1
|
||||
|
||||
#define DMU_BACKUP_STREAM_VERSION (1ULL)
|
||||
#define DMU_BACKUP_HEADER_VERSION (2ULL)
|
||||
#define DMU_BACKUP_MAGIC 0x2F5bacbacULL
|
||||
|
||||
#define DRR_FLAG_CLONE (1<<0)
|
||||
#define DRR_FLAG_CI_DATA (1<<1)
|
||||
|
||||
/*
|
||||
* zfs ioctl command structure
|
||||
*/
|
||||
typedef struct dmu_replay_record {
|
||||
enum {
|
||||
DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
|
||||
DRR_WRITE, DRR_FREE, DRR_END,
|
||||
} drr_type;
|
||||
uint32_t drr_payloadlen;
|
||||
union {
|
||||
struct drr_begin {
|
||||
uint64_t drr_magic;
|
||||
uint64_t drr_version;
|
||||
uint64_t drr_creation_time;
|
||||
dmu_objset_type_t drr_type;
|
||||
uint32_t drr_flags;
|
||||
uint64_t drr_toguid;
|
||||
uint64_t drr_fromguid;
|
||||
char drr_toname[MAXNAMELEN];
|
||||
} drr_begin;
|
||||
struct drr_end {
|
||||
zio_cksum_t drr_checksum;
|
||||
} drr_end;
|
||||
struct drr_object {
|
||||
uint64_t drr_object;
|
||||
dmu_object_type_t drr_type;
|
||||
dmu_object_type_t drr_bonustype;
|
||||
uint32_t drr_blksz;
|
||||
uint32_t drr_bonuslen;
|
||||
uint8_t drr_checksum;
|
||||
uint8_t drr_compress;
|
||||
uint8_t drr_pad[6];
|
||||
/* bonus content follows */
|
||||
} drr_object;
|
||||
struct drr_freeobjects {
|
||||
uint64_t drr_firstobj;
|
||||
uint64_t drr_numobjs;
|
||||
} drr_freeobjects;
|
||||
struct drr_write {
|
||||
uint64_t drr_object;
|
||||
dmu_object_type_t drr_type;
|
||||
uint32_t drr_pad;
|
||||
uint64_t drr_offset;
|
||||
uint64_t drr_length;
|
||||
/* content follows */
|
||||
} drr_write;
|
||||
struct drr_free {
|
||||
uint64_t drr_object;
|
||||
uint64_t drr_offset;
|
||||
uint64_t drr_length;
|
||||
} drr_free;
|
||||
} drr_u;
|
||||
} dmu_replay_record_t;
|
||||
|
||||
typedef struct zinject_record {
|
||||
uint64_t zi_objset;
|
||||
uint64_t zi_object;
|
||||
uint64_t zi_start;
|
||||
uint64_t zi_end;
|
||||
uint64_t zi_guid;
|
||||
uint32_t zi_level;
|
||||
uint32_t zi_error;
|
||||
uint64_t zi_type;
|
||||
uint32_t zi_freq;
|
||||
uint32_t zi_pad; /* pad out to 64 bit alignment */
|
||||
} zinject_record_t;
|
||||
|
||||
#define ZINJECT_NULL 0x1
|
||||
#define ZINJECT_FLUSH_ARC 0x2
|
||||
#define ZINJECT_UNLOAD_SPA 0x4
|
||||
|
||||
typedef struct zfs_share {
|
||||
uint64_t z_exportdata;
|
||||
uint64_t z_sharedata;
|
||||
uint64_t z_sharetype; /* 0 = share, 1 = unshare */
|
||||
uint64_t z_sharemax; /* max length of share string */
|
||||
} zfs_share_t;
|
||||
|
||||
/*
|
||||
* ZFS file systems may behave the usual, POSIX-compliant way, where
|
||||
* name lookups are case-sensitive. They may also be set up so that
|
||||
* all the name lookups are case-insensitive, or so that only some
|
||||
* lookups, the ones that set an FIGNORECASE flag, are case-insensitive.
|
||||
*/
|
||||
typedef enum zfs_case {
|
||||
ZFS_CASE_SENSITIVE,
|
||||
ZFS_CASE_INSENSITIVE,
|
||||
ZFS_CASE_MIXED
|
||||
} zfs_case_t;
|
||||
|
||||
typedef struct zfs_cmd {
|
||||
char zc_name[MAXPATHLEN];
|
||||
char zc_value[MAXPATHLEN * 2];
|
||||
char zc_string[MAXNAMELEN];
|
||||
uint64_t zc_guid;
|
||||
uint64_t zc_nvlist_conf; /* really (char *) */
|
||||
uint64_t zc_nvlist_conf_size;
|
||||
uint64_t zc_nvlist_src; /* really (char *) */
|
||||
uint64_t zc_nvlist_src_size;
|
||||
uint64_t zc_nvlist_dst; /* really (char *) */
|
||||
uint64_t zc_nvlist_dst_size;
|
||||
uint64_t zc_cookie;
|
||||
uint64_t zc_objset_type;
|
||||
uint64_t zc_perm_action;
|
||||
uint64_t zc_history; /* really (char *) */
|
||||
uint64_t zc_history_len;
|
||||
uint64_t zc_history_offset;
|
||||
uint64_t zc_obj;
|
||||
zfs_share_t zc_share;
|
||||
dmu_objset_stats_t zc_objset_stats;
|
||||
struct drr_begin zc_begin_record;
|
||||
zinject_record_t zc_inject_record;
|
||||
} zfs_cmd_t;
|
||||
|
||||
#define ZVOL_MAX_MINOR (1 << 16)
|
||||
#define ZFS_MIN_MINOR (ZVOL_MAX_MINOR + 1)
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
typedef struct zfs_creat {
|
||||
nvlist_t *zct_zplprops;
|
||||
nvlist_t *zct_props;
|
||||
} zfs_creat_t;
|
||||
|
||||
extern dev_info_t *zfs_dip;
|
||||
|
||||
extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr);
|
||||
extern int zfs_secpolicy_rename_perms(const char *from,
|
||||
const char *to, cred_t *cr);
|
||||
extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
|
||||
extern int zfs_busy(void);
|
||||
extern int zfs_unmount_snap(char *, void *);
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZFS_IOCTL_H */
|
||||
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FS_ZFS_RLOCK_H
|
||||
#define _SYS_FS_ZFS_RLOCK_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
#include <sys/zfs_znode.h>
|
||||
|
||||
typedef enum {
|
||||
RL_READER,
|
||||
RL_WRITER,
|
||||
RL_APPEND
|
||||
} rl_type_t;
|
||||
|
||||
typedef struct rl {
|
||||
znode_t *r_zp; /* znode this lock applies to */
|
||||
avl_node_t r_node; /* avl node link */
|
||||
uint64_t r_off; /* file range offset */
|
||||
uint64_t r_len; /* file range length */
|
||||
uint_t r_cnt; /* range reference count in tree */
|
||||
rl_type_t r_type; /* range type */
|
||||
kcondvar_t r_wr_cv; /* cv for waiting writers */
|
||||
kcondvar_t r_rd_cv; /* cv for waiting readers */
|
||||
uint8_t r_proxy; /* acting for original range */
|
||||
uint8_t r_write_wanted; /* writer wants to lock this range */
|
||||
uint8_t r_read_wanted; /* reader wants to lock this range */
|
||||
} rl_t;
|
||||
|
||||
/*
|
||||
* Lock a range (offset, length) as either shared (READER)
|
||||
* or exclusive (WRITER or APPEND). APPEND is a special type that
|
||||
* is converted to WRITER that specified to lock from the start of the
|
||||
* end of file. zfs_range_lock() returns the range lock structure.
|
||||
*/
|
||||
rl_t *zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type);
|
||||
|
||||
/*
|
||||
* Unlock range and destroy range lock structure.
|
||||
*/
|
||||
void zfs_range_unlock(rl_t *rl);
|
||||
|
||||
/*
|
||||
* Reduce range locked as RW_WRITER from whole file to specified range.
|
||||
* Asserts the whole file was previously locked.
|
||||
*/
|
||||
void zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len);
|
||||
|
||||
/*
|
||||
* AVL comparison function used to compare range locks
|
||||
*/
|
||||
int zfs_range_compare(const void *arg1, const void *arg2);
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_FS_ZFS_RLOCK_H */
|
||||
@@ -0,0 +1,140 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FS_ZFS_VFSOPS_H
|
||||
#define _SYS_FS_ZFS_VFSOPS_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/isa_defs.h>
|
||||
#include <sys/types32.h>
|
||||
#include <sys/list.h>
|
||||
#include <sys/vfs.h>
|
||||
#include <sys/zil.h>
|
||||
#include <sys/rrwlock.h>
|
||||
#include <sys/zfs_ioctl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct zfsvfs zfsvfs_t;
|
||||
|
||||
struct zfsvfs {
|
||||
vfs_t *z_vfs; /* generic fs struct */
|
||||
zfsvfs_t *z_parent; /* parent fs */
|
||||
objset_t *z_os; /* objset reference */
|
||||
uint64_t z_root; /* id of root znode */
|
||||
uint64_t z_unlinkedobj; /* id of unlinked zapobj */
|
||||
uint64_t z_max_blksz; /* maximum block size for files */
|
||||
uint64_t z_assign; /* TXG_NOWAIT or set by zil_replay() */
|
||||
uint64_t z_fuid_obj; /* fuid table object number */
|
||||
uint64_t z_fuid_size; /* fuid table size */
|
||||
avl_tree_t z_fuid_idx; /* fuid tree keyed by index */
|
||||
avl_tree_t z_fuid_domain; /* fuid tree keyed by domain */
|
||||
krwlock_t z_fuid_lock; /* fuid lock */
|
||||
boolean_t z_fuid_loaded; /* fuid tables are loaded */
|
||||
struct zfs_fuid_info *z_fuid_replay; /* fuid info for replay */
|
||||
zilog_t *z_log; /* intent log pointer */
|
||||
uint_t z_acl_mode; /* acl chmod/mode behavior */
|
||||
uint_t z_acl_inherit; /* acl inheritance behavior */
|
||||
zfs_case_t z_case; /* case-sense */
|
||||
boolean_t z_utf8; /* utf8-only */
|
||||
int z_norm; /* normalization flags */
|
||||
boolean_t z_atime; /* enable atimes mount option */
|
||||
boolean_t z_unmounted; /* unmounted */
|
||||
rrwlock_t z_teardown_lock;
|
||||
krwlock_t z_teardown_inactive_lock;
|
||||
list_t z_all_znodes; /* all vnodes in the fs */
|
||||
kmutex_t z_znodes_lock; /* lock for z_all_znodes */
|
||||
vnode_t *z_ctldir; /* .zfs directory pointer */
|
||||
boolean_t z_show_ctldir; /* expose .zfs in the root dir */
|
||||
boolean_t z_issnap; /* true if this is a snapshot */
|
||||
boolean_t z_vscan; /* virus scan on/off */
|
||||
boolean_t z_use_fuids; /* version allows fuids */
|
||||
kmutex_t z_online_recv_lock; /* recv in prog grabs as WRITER */
|
||||
uint64_t z_version; /* ZPL version */
|
||||
#define ZFS_OBJ_MTX_SZ 64
|
||||
kmutex_t z_hold_mtx[ZFS_OBJ_MTX_SZ]; /* znode hold locks */
|
||||
};
|
||||
|
||||
/*
|
||||
* Normal filesystems (those not under .zfs/snapshot) have a total
|
||||
* file ID size limited to 12 bytes (including the length field) due to
|
||||
* NFSv2 protocol's limitation of 32 bytes for a filehandle. For historical
|
||||
* reasons, this same limit is being imposed by the Solaris NFSv3 implementation
|
||||
* (although the NFSv3 protocol actually permits a maximum of 64 bytes). It
|
||||
* is not possible to expand beyond 12 bytes without abandoning support
|
||||
* of NFSv2.
|
||||
*
|
||||
* For normal filesystems, we partition up the available space as follows:
|
||||
* 2 bytes fid length (required)
|
||||
* 6 bytes object number (48 bits)
|
||||
* 4 bytes generation number (32 bits)
|
||||
*
|
||||
* We reserve only 48 bits for the object number, as this is the limit
|
||||
* currently defined and imposed by the DMU.
|
||||
*/
|
||||
typedef struct zfid_short {
|
||||
uint16_t zf_len;
|
||||
uint8_t zf_object[6]; /* obj[i] = obj >> (8 * i) */
|
||||
uint8_t zf_gen[4]; /* gen[i] = gen >> (8 * i) */
|
||||
} zfid_short_t;
|
||||
|
||||
/*
|
||||
* Filesystems under .zfs/snapshot have a total file ID size of 22 bytes
|
||||
* (including the length field). This makes files under .zfs/snapshot
|
||||
* accessible by NFSv3 and NFSv4, but not NFSv2.
|
||||
*
|
||||
* For files under .zfs/snapshot, we partition up the available space
|
||||
* as follows:
|
||||
* 2 bytes fid length (required)
|
||||
* 6 bytes object number (48 bits)
|
||||
* 4 bytes generation number (32 bits)
|
||||
* 6 bytes objset id (48 bits)
|
||||
* 4 bytes currently just zero (32 bits)
|
||||
*
|
||||
* We reserve only 48 bits for the object number and objset id, as these are
|
||||
* the limits currently defined and imposed by the DMU.
|
||||
*/
|
||||
typedef struct zfid_long {
|
||||
zfid_short_t z_fid;
|
||||
uint8_t zf_setid[6]; /* obj[i] = obj >> (8 * i) */
|
||||
uint8_t zf_setgen[4]; /* gen[i] = gen >> (8 * i) */
|
||||
} zfid_long_t;
|
||||
|
||||
#define SHORT_FID_LEN (sizeof (zfid_short_t) - sizeof (uint16_t))
|
||||
#define LONG_FID_LEN (sizeof (zfid_long_t) - sizeof (uint16_t))
|
||||
|
||||
extern uint_t zfs_fsyncer_key;
|
||||
|
||||
extern int zfs_suspend_fs(zfsvfs_t *zfsvfs, char *osname, int *mode);
|
||||
extern int zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_FS_ZFS_VFSOPS_H */
|
||||
@@ -0,0 +1,356 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FS_ZFS_ZNODE_H
|
||||
#define _SYS_FS_ZFS_ZNODE_H
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/isa_defs.h>
|
||||
#include <sys/types32.h>
|
||||
#include <sys/attr.h>
|
||||
#include <sys/list.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zfs_vfsops.h>
|
||||
#include <sys/rrwlock.h>
|
||||
#endif
|
||||
#include <sys/zfs_acl.h>
|
||||
#include <sys/zil.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Additional file level attributes, that are stored
|
||||
* in the upper half of zp_flags
|
||||
*/
|
||||
#define ZFS_READONLY 0x0000000100000000
|
||||
#define ZFS_HIDDEN 0x0000000200000000
|
||||
#define ZFS_SYSTEM 0x0000000400000000
|
||||
#define ZFS_ARCHIVE 0x0000000800000000
|
||||
#define ZFS_IMMUTABLE 0x0000001000000000
|
||||
#define ZFS_NOUNLINK 0x0000002000000000
|
||||
#define ZFS_APPENDONLY 0x0000004000000000
|
||||
#define ZFS_NODUMP 0x0000008000000000
|
||||
#define ZFS_OPAQUE 0x0000010000000000
|
||||
#define ZFS_AV_QUARANTINED 0x0000020000000000
|
||||
#define ZFS_AV_MODIFIED 0x0000040000000000
|
||||
|
||||
#define ZFS_ATTR_SET(zp, attr, value) \
|
||||
{ \
|
||||
if (value) \
|
||||
zp->z_phys->zp_flags |= attr; \
|
||||
else \
|
||||
zp->z_phys->zp_flags &= ~attr; \
|
||||
}
|
||||
|
||||
/*
|
||||
* Define special zfs pflags
|
||||
*/
|
||||
#define ZFS_XATTR 0x1 /* is an extended attribute */
|
||||
#define ZFS_INHERIT_ACE 0x2 /* ace has inheritable ACEs */
|
||||
#define ZFS_ACL_TRIVIAL 0x4 /* files ACL is trivial */
|
||||
#define ZFS_ACL_OBJ_ACE 0x8 /* ACL has CMPLX Object ACE */
|
||||
#define ZFS_ACL_PROTECTED 0x10 /* ACL protected */
|
||||
#define ZFS_ACL_DEFAULTED 0x20 /* ACL should be defaulted */
|
||||
#define ZFS_ACL_AUTO_INHERIT 0x40 /* ACL should be inherited */
|
||||
#define ZFS_BONUS_SCANSTAMP 0x80 /* Scanstamp in bonus area */
|
||||
|
||||
/*
|
||||
* Is ID ephemeral?
|
||||
*/
|
||||
#define IS_EPHEMERAL(x) (x > MAXUID)
|
||||
|
||||
/*
|
||||
* Should we use FUIDs?
|
||||
*/
|
||||
#define USE_FUIDS(version, os) (version >= ZPL_VERSION_FUID &&\
|
||||
spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID)
|
||||
|
||||
#define MASTER_NODE_OBJ 1
|
||||
|
||||
/*
|
||||
* Special attributes for master node.
|
||||
*/
|
||||
#define ZFS_FSID "FSID"
|
||||
#define ZFS_UNLINKED_SET "DELETE_QUEUE"
|
||||
#define ZFS_ROOT_OBJ "ROOT"
|
||||
#define ZPL_VERSION_STR "VERSION"
|
||||
#define ZFS_FUID_TABLES "FUID"
|
||||
|
||||
#define ZFS_MAX_BLOCKSIZE (SPA_MAXBLOCKSIZE)
|
||||
|
||||
/* Path component length */
|
||||
/*
|
||||
* The generic fs code uses MAXNAMELEN to represent
|
||||
* what the largest component length is. Unfortunately,
|
||||
* this length includes the terminating NULL. ZFS needs
|
||||
* to tell the users via pathconf() and statvfs() what the
|
||||
* true maximum length of a component is, excluding the NULL.
|
||||
*/
|
||||
#define ZFS_MAXNAMELEN (MAXNAMELEN - 1)
|
||||
|
||||
/*
|
||||
* Convert mode bits (zp_mode) to BSD-style DT_* values for storing in
|
||||
* the directory entries.
|
||||
*/
|
||||
#define IFTODT(mode) (((mode) & S_IFMT) >> 12)
|
||||
|
||||
/*
|
||||
* The directory entry has the type (currently unused on Solaris) in the
|
||||
* top 4 bits, and the object number in the low 48 bits. The "middle"
|
||||
* 12 bits are unused.
|
||||
*/
|
||||
#define ZFS_DIRENT_TYPE(de) BF64_GET(de, 60, 4)
|
||||
#define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48)
|
||||
|
||||
/*
|
||||
* This is the persistent portion of the znode. It is stored
|
||||
* in the "bonus buffer" of the file. Short symbolic links
|
||||
* are also stored in the bonus buffer.
|
||||
*/
|
||||
typedef struct znode_phys {
|
||||
uint64_t zp_atime[2]; /* 0 - last file access time */
|
||||
uint64_t zp_mtime[2]; /* 16 - last file modification time */
|
||||
uint64_t zp_ctime[2]; /* 32 - last file change time */
|
||||
uint64_t zp_crtime[2]; /* 48 - creation time */
|
||||
uint64_t zp_gen; /* 64 - generation (txg of creation) */
|
||||
uint64_t zp_mode; /* 72 - file mode bits */
|
||||
uint64_t zp_size; /* 80 - size of file */
|
||||
uint64_t zp_parent; /* 88 - directory parent (`..') */
|
||||
uint64_t zp_links; /* 96 - number of links to file */
|
||||
uint64_t zp_xattr; /* 104 - DMU object for xattrs */
|
||||
uint64_t zp_rdev; /* 112 - dev_t for VBLK & VCHR files */
|
||||
uint64_t zp_flags; /* 120 - persistent flags */
|
||||
uint64_t zp_uid; /* 128 - file owner */
|
||||
uint64_t zp_gid; /* 136 - owning group */
|
||||
uint64_t zp_zap; /* 144 - extra attributes */
|
||||
uint64_t zp_pad[3]; /* 152 - future */
|
||||
zfs_acl_phys_t zp_acl; /* 176 - 263 ACL */
|
||||
/*
|
||||
* Data may pad out any remaining bytes in the znode buffer, eg:
|
||||
*
|
||||
* |<---------------------- dnode_phys (512) ------------------------>|
|
||||
* |<-- dnode (192) --->|<----------- "bonus" buffer (320) ---------->|
|
||||
* |<---- znode (264) ---->|<---- data (56) ---->|
|
||||
*
|
||||
* At present, we use this space for the following:
|
||||
* - symbolic links
|
||||
* - 32-byte anti-virus scanstamp (regular files only)
|
||||
*/
|
||||
} znode_phys_t;
|
||||
|
||||
/*
|
||||
* Directory entry locks control access to directory entries.
|
||||
* They are used to protect creates, deletes, and renames.
|
||||
* Each directory znode has a mutex and a list of locked names.
|
||||
*/
|
||||
#ifdef _KERNEL
|
||||
typedef struct zfs_dirlock {
|
||||
char *dl_name; /* directory entry being locked */
|
||||
uint32_t dl_sharecnt; /* 0 if exclusive, > 0 if shared */
|
||||
uint16_t dl_namesize; /* set if dl_name was allocated */
|
||||
kcondvar_t dl_cv; /* wait for entry to be unlocked */
|
||||
struct znode *dl_dzp; /* directory znode */
|
||||
struct zfs_dirlock *dl_next; /* next in z_dirlocks list */
|
||||
} zfs_dirlock_t;
|
||||
|
||||
typedef struct znode {
|
||||
struct zfsvfs *z_zfsvfs;
|
||||
vnode_t *z_vnode;
|
||||
uint64_t z_id; /* object ID for this znode */
|
||||
kmutex_t z_lock; /* znode modification lock */
|
||||
krwlock_t z_map_lock; /* page map lock */
|
||||
krwlock_t z_parent_lock; /* parent lock for directories */
|
||||
krwlock_t z_name_lock; /* "master" lock for dirent locks */
|
||||
zfs_dirlock_t *z_dirlocks; /* directory entry lock list */
|
||||
kmutex_t z_range_lock; /* protects changes to z_range_avl */
|
||||
avl_tree_t z_range_avl; /* avl tree of file range locks */
|
||||
uint8_t z_unlinked; /* file has been unlinked */
|
||||
uint8_t z_atime_dirty; /* atime needs to be synced */
|
||||
uint8_t z_zn_prefetch; /* Prefetch znodes? */
|
||||
uint_t z_blksz; /* block size in bytes */
|
||||
uint_t z_seq; /* modification sequence number */
|
||||
uint64_t z_mapcnt; /* number of pages mapped to file */
|
||||
uint64_t z_last_itx; /* last ZIL itx on this znode */
|
||||
uint64_t z_gen; /* generation (same as zp_gen) */
|
||||
uint32_t z_sync_cnt; /* synchronous open count */
|
||||
kmutex_t z_acl_lock; /* acl data lock */
|
||||
list_node_t z_link_node; /* all znodes in fs link */
|
||||
/*
|
||||
* These are dmu managed fields.
|
||||
*/
|
||||
znode_phys_t *z_phys; /* pointer to persistent znode */
|
||||
dmu_buf_t *z_dbuf; /* buffer containing the z_phys */
|
||||
} znode_t;
|
||||
|
||||
|
||||
/*
|
||||
* Range locking rules
|
||||
* --------------------
|
||||
* 1. When truncating a file (zfs_create, zfs_setattr, zfs_space) the whole
|
||||
* file range needs to be locked as RL_WRITER. Only then can the pages be
|
||||
* freed etc and zp_size reset. zp_size must be set within range lock.
|
||||
* 2. For writes and punching holes (zfs_write & zfs_space) just the range
|
||||
* being written or freed needs to be locked as RL_WRITER.
|
||||
* Multiple writes at the end of the file must coordinate zp_size updates
|
||||
* to ensure data isn't lost. A compare and swap loop is currently used
|
||||
* to ensure the file size is at least the offset last written.
|
||||
* 3. For reads (zfs_read, zfs_get_data & zfs_putapage) just the range being
|
||||
* read needs to be locked as RL_READER. A check against zp_size can then
|
||||
* be made for reading beyond end of file.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Convert between znode pointers and vnode pointers
|
||||
*/
|
||||
#define ZTOV(ZP) ((ZP)->z_vnode)
|
||||
#define VTOZ(VP) ((znode_t *)(VP)->v_data)
|
||||
|
||||
/*
|
||||
* ZFS_ENTER() is called on entry to each ZFS vnode and vfs operation.
|
||||
* ZFS_EXIT() must be called before exitting the vop.
|
||||
* ZFS_VERIFY_ZP() verifies the znode is valid.
|
||||
*/
|
||||
#define ZFS_ENTER(zfsvfs) \
|
||||
{ \
|
||||
rrw_enter(&(zfsvfs)->z_teardown_lock, RW_READER, FTAG); \
|
||||
if ((zfsvfs)->z_unmounted) { \
|
||||
ZFS_EXIT(zfsvfs); \
|
||||
return (EIO); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define ZFS_EXIT(zfsvfs) rrw_exit(&(zfsvfs)->z_teardown_lock, FTAG)
|
||||
|
||||
#define ZFS_VERIFY_ZP(zp) \
|
||||
if ((zp)->z_dbuf == NULL) { \
|
||||
ZFS_EXIT((zp)->z_zfsvfs); \
|
||||
return (EIO); \
|
||||
} \
|
||||
|
||||
/*
|
||||
* Macros for dealing with dmu_buf_hold
|
||||
*/
|
||||
#define ZFS_OBJ_HASH(obj_num) ((obj_num) & (ZFS_OBJ_MTX_SZ - 1))
|
||||
#define ZFS_OBJ_MUTEX(zfsvfs, obj_num) \
|
||||
(&(zfsvfs)->z_hold_mtx[ZFS_OBJ_HASH(obj_num)])
|
||||
#define ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num) \
|
||||
mutex_enter(ZFS_OBJ_MUTEX((zfsvfs), (obj_num)))
|
||||
#define ZFS_OBJ_HOLD_TRYENTER(zfsvfs, obj_num) \
|
||||
mutex_tryenter(ZFS_OBJ_MUTEX((zfsvfs), (obj_num)))
|
||||
#define ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num) \
|
||||
mutex_exit(ZFS_OBJ_MUTEX((zfsvfs), (obj_num)))
|
||||
|
||||
/*
|
||||
* Macros to encode/decode ZFS stored time values from/to struct timespec
|
||||
*/
|
||||
#define ZFS_TIME_ENCODE(tp, stmp) \
|
||||
{ \
|
||||
(stmp)[0] = (uint64_t)(tp)->tv_sec; \
|
||||
(stmp)[1] = (uint64_t)(tp)->tv_nsec; \
|
||||
}
|
||||
|
||||
#define ZFS_TIME_DECODE(tp, stmp) \
|
||||
{ \
|
||||
(tp)->tv_sec = (time_t)(stmp)[0]; \
|
||||
(tp)->tv_nsec = (long)(stmp)[1]; \
|
||||
}
|
||||
|
||||
/*
|
||||
* Timestamp defines
|
||||
*/
|
||||
#define ACCESSED (AT_ATIME)
|
||||
#define STATE_CHANGED (AT_CTIME)
|
||||
#define CONTENT_MODIFIED (AT_MTIME | AT_CTIME)
|
||||
|
||||
#define ZFS_ACCESSTIME_STAMP(zfsvfs, zp) \
|
||||
if ((zfsvfs)->z_atime && !((zfsvfs)->z_vfs->vfs_flag & VFS_RDONLY)) \
|
||||
zfs_time_stamper(zp, ACCESSED, NULL)
|
||||
|
||||
extern int zfs_init_fs(zfsvfs_t *, znode_t **);
|
||||
extern void zfs_set_dataprop(objset_t *);
|
||||
extern void zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *,
|
||||
dmu_tx_t *tx);
|
||||
extern void zfs_time_stamper(znode_t *, uint_t, dmu_tx_t *);
|
||||
extern void zfs_time_stamper_locked(znode_t *, uint_t, dmu_tx_t *);
|
||||
extern void zfs_grow_blocksize(znode_t *, uint64_t, dmu_tx_t *);
|
||||
extern int zfs_freesp(znode_t *, uint64_t, uint64_t, int, boolean_t);
|
||||
extern void zfs_znode_init(void);
|
||||
extern void zfs_znode_fini(void);
|
||||
extern int zfs_zget(zfsvfs_t *, uint64_t, znode_t **);
|
||||
extern int zfs_rezget(znode_t *);
|
||||
extern void zfs_zinactive(znode_t *);
|
||||
extern void zfs_znode_delete(znode_t *, dmu_tx_t *);
|
||||
extern void zfs_znode_free(znode_t *);
|
||||
extern void zfs_remove_op_tables();
|
||||
extern int zfs_create_op_tables();
|
||||
extern int zfs_sync(vfs_t *vfsp, short flag, cred_t *cr);
|
||||
extern dev_t zfs_cmpldev(uint64_t);
|
||||
extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value);
|
||||
extern int zfs_set_version(const char *name, uint64_t newvers);
|
||||
extern int zfs_get_stats(objset_t *os, nvlist_t *nv);
|
||||
extern void zfs_znode_dmu_fini(znode_t *);
|
||||
|
||||
extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
|
||||
znode_t *dzp, znode_t *zp, char *name, vsecattr_t *, zfs_fuid_info_t *,
|
||||
vattr_t *vap);
|
||||
extern int zfs_log_create_txtype(zil_create_t, vsecattr_t *vsecp,
|
||||
vattr_t *vap);
|
||||
extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
|
||||
znode_t *dzp, char *name);
|
||||
extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
|
||||
znode_t *dzp, znode_t *zp, char *name);
|
||||
extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
|
||||
znode_t *dzp, znode_t *zp, char *name, char *link);
|
||||
extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
|
||||
znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp);
|
||||
extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||
znode_t *zp, offset_t off, ssize_t len, int ioflag);
|
||||
extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||
znode_t *zp, uint64_t off, uint64_t len);
|
||||
extern void zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||
znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp);
|
||||
extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
|
||||
vsecattr_t *vsecp, zfs_fuid_info_t *fuidp);
|
||||
extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap);
|
||||
extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
|
||||
|
||||
extern caddr_t zfs_map_page(page_t *, enum seg_rw);
|
||||
extern void zfs_unmap_page(page_t *, caddr_t);
|
||||
|
||||
extern zil_get_data_t zfs_get_data;
|
||||
extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE];
|
||||
extern int zfsfstype;
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
extern int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_FS_ZFS_ZNODE_H */
|
||||
@@ -0,0 +1,382 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZIL_H
|
||||
#define _SYS_ZIL_H
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dmu.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Intent log format:
|
||||
*
|
||||
* Each objset has its own intent log. The log header (zil_header_t)
|
||||
* for objset N's intent log is kept in the Nth object of the SPA's
|
||||
* intent_log objset. The log header points to a chain of log blocks,
|
||||
* each of which contains log records (i.e., transactions) followed by
|
||||
* a log block trailer (zil_trailer_t). The format of a log record
|
||||
* depends on the record (or transaction) type, but all records begin
|
||||
* with a common structure that defines the type, length, and txg.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Intent log header - this on disk structure holds fields to manage
|
||||
* the log. All fields are 64 bit to easily handle cross architectures.
|
||||
*/
|
||||
typedef struct zil_header {
|
||||
uint64_t zh_claim_txg; /* txg in which log blocks were claimed */
|
||||
uint64_t zh_replay_seq; /* highest replayed sequence number */
|
||||
blkptr_t zh_log; /* log chain */
|
||||
uint64_t zh_claim_seq; /* highest claimed sequence number */
|
||||
uint64_t zh_pad[5];
|
||||
} zil_header_t;
|
||||
|
||||
/*
|
||||
* Log block trailer - structure at the end of the header and each log block
|
||||
*
|
||||
* The zit_bt contains a zbt_cksum which for the intent log is
|
||||
* the sequence number of this log block. A seq of 0 is invalid.
|
||||
* The zbt_cksum is checked by the SPA against the sequence
|
||||
* number passed in the blk_cksum field of the blkptr_t
|
||||
*/
|
||||
typedef struct zil_trailer {
|
||||
uint64_t zit_pad;
|
||||
blkptr_t zit_next_blk; /* next block in chain */
|
||||
uint64_t zit_nused; /* bytes in log block used */
|
||||
zio_block_tail_t zit_bt; /* block trailer */
|
||||
} zil_trailer_t;
|
||||
|
||||
#define ZIL_MIN_BLKSZ 4096ULL
|
||||
#define ZIL_MAX_BLKSZ SPA_MAXBLOCKSIZE
|
||||
#define ZIL_BLK_DATA_SZ(lwb) ((lwb)->lwb_sz - sizeof (zil_trailer_t))
|
||||
|
||||
/*
|
||||
* The words of a log block checksum.
|
||||
*/
|
||||
#define ZIL_ZC_GUID_0 0
|
||||
#define ZIL_ZC_GUID_1 1
|
||||
#define ZIL_ZC_OBJSET 2
|
||||
#define ZIL_ZC_SEQ 3
|
||||
|
||||
typedef enum zil_create {
|
||||
Z_FILE,
|
||||
Z_DIR,
|
||||
Z_XATTRDIR,
|
||||
} zil_create_t;
|
||||
|
||||
/*
|
||||
* size of xvattr log section.
|
||||
* its composed of lr_attr_t + xvattr bitmap + 2 64 bit timestamps
|
||||
* for create time and a single 64 bit integer for all of the attributes,
|
||||
* and 4 64 bit integers (32 bytes) for the scanstamp.
|
||||
*
|
||||
*/
|
||||
|
||||
#define ZIL_XVAT_SIZE(mapsize) \
|
||||
sizeof (lr_attr_t) + (sizeof (uint32_t) * (mapsize - 1)) + \
|
||||
(sizeof (uint64_t) * 7)
|
||||
|
||||
/*
|
||||
* Size of ACL in log. The ACE data is padded out to properly align
|
||||
* on 8 byte boundary.
|
||||
*/
|
||||
|
||||
#define ZIL_ACE_LENGTH(x) (roundup(x, sizeof (uint64_t)))
|
||||
|
||||
/*
|
||||
* Intent log transaction types and record structures
|
||||
*/
|
||||
#define TX_CREATE 1 /* Create file */
|
||||
#define TX_MKDIR 2 /* Make directory */
|
||||
#define TX_MKXATTR 3 /* Make XATTR directory */
|
||||
#define TX_SYMLINK 4 /* Create symbolic link to a file */
|
||||
#define TX_REMOVE 5 /* Remove file */
|
||||
#define TX_RMDIR 6 /* Remove directory */
|
||||
#define TX_LINK 7 /* Create hard link to a file */
|
||||
#define TX_RENAME 8 /* Rename a file */
|
||||
#define TX_WRITE 9 /* File write */
|
||||
#define TX_TRUNCATE 10 /* Truncate a file */
|
||||
#define TX_SETATTR 11 /* Set file attributes */
|
||||
#define TX_ACL_V0 12 /* Set old formatted ACL */
|
||||
#define TX_ACL 13 /* Set ACL */
|
||||
#define TX_CREATE_ACL 14 /* create with ACL */
|
||||
#define TX_CREATE_ATTR 15 /* create + attrs */
|
||||
#define TX_CREATE_ACL_ATTR 16 /* create with ACL + attrs */
|
||||
#define TX_MKDIR_ACL 17 /* mkdir with ACL */
|
||||
#define TX_MKDIR_ATTR 18 /* mkdir with attr */
|
||||
#define TX_MKDIR_ACL_ATTR 19 /* mkdir with ACL + attrs */
|
||||
#define TX_MAX_TYPE 20 /* Max transaction type */
|
||||
|
||||
/*
|
||||
* The transactions for mkdir, symlink, remove, rmdir, link, and rename
|
||||
* may have the following bit set, indicating the original request
|
||||
* specified case-insensitive handling of names.
|
||||
*/
|
||||
#define TX_CI ((uint64_t)0x1 << 63) /* case-insensitive behavior requested */
|
||||
|
||||
/*
|
||||
* Format of log records.
|
||||
* The fields are carefully defined to allow them to be aligned
|
||||
* and sized the same on sparc & intel architectures.
|
||||
* Each log record has a common structure at the beginning.
|
||||
*
|
||||
* Note, lrc_seq holds two different sequence numbers. Whilst in memory
|
||||
* it contains the transaction sequence number. The log record on
|
||||
* disk holds the sequence number of all log records which is used to
|
||||
* ensure we don't replay the same record. The two sequence numbers are
|
||||
* different because the transactions can now be pushed out of order.
|
||||
*/
|
||||
typedef struct { /* common log record header */
|
||||
uint64_t lrc_txtype; /* intent log transaction type */
|
||||
uint64_t lrc_reclen; /* transaction record length */
|
||||
uint64_t lrc_txg; /* dmu transaction group number */
|
||||
uint64_t lrc_seq; /* see comment above */
|
||||
} lr_t;
|
||||
|
||||
/*
|
||||
* Handle option extended vattr attributes.
|
||||
*
|
||||
* Whenever new attributes are added the version number
|
||||
* will need to be updated as will code in
|
||||
* zfs_log.c and zfs_replay.c
|
||||
*/
|
||||
typedef struct {
|
||||
uint32_t lr_attr_masksize; /* number of elements in array */
|
||||
uint32_t lr_attr_bitmap; /* First entry of array */
|
||||
/* remainder of array and any additional fields */
|
||||
} lr_attr_t;
|
||||
|
||||
/*
|
||||
* log record for creates without optional ACL.
|
||||
* This log record does support optional xvattr_t attributes.
|
||||
*/
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_doid; /* object id of directory */
|
||||
uint64_t lr_foid; /* object id of created file object */
|
||||
uint64_t lr_mode; /* mode of object */
|
||||
uint64_t lr_uid; /* uid of object */
|
||||
uint64_t lr_gid; /* gid of object */
|
||||
uint64_t lr_gen; /* generation (txg of creation) */
|
||||
uint64_t lr_crtime[2]; /* creation time */
|
||||
uint64_t lr_rdev; /* rdev of object to create */
|
||||
/* name of object to create follows this */
|
||||
/* for symlinks, link content follows name */
|
||||
/* for creates with xvattr data, the name follows the xvattr info */
|
||||
} lr_create_t;
|
||||
|
||||
/*
|
||||
* FUID ACL record will be an array of ACEs from the original ACL.
|
||||
* If this array includes ephemeral IDs, the record will also include
|
||||
* an array of log-specific FUIDs to replace the ephemeral IDs.
|
||||
* Only one copy of each unique domain will be present, so the log-specific
|
||||
* FUIDs will use an index into a compressed domain table. On replay this
|
||||
* information will be used to construct real FUIDs (and bypass idmap,
|
||||
* since it may not be available).
|
||||
*/
|
||||
|
||||
/*
|
||||
* Log record for creates with optional ACL
|
||||
* This log record is also used for recording any FUID
|
||||
* information needed for replaying the create. If the
|
||||
* file doesn't have any actual ACEs then the lr_aclcnt
|
||||
* would be zero.
|
||||
*/
|
||||
typedef struct {
|
||||
lr_create_t lr_create; /* common create portion */
|
||||
uint64_t lr_aclcnt; /* number of ACEs in ACL */
|
||||
uint64_t lr_domcnt; /* number of unique domains */
|
||||
uint64_t lr_fuidcnt; /* number of real fuids */
|
||||
uint64_t lr_acl_bytes; /* number of bytes in ACL */
|
||||
uint64_t lr_acl_flags; /* ACL flags */
|
||||
/* lr_acl_bytes number of variable sized ace's follows */
|
||||
/* if create is also setting xvattr's, then acl data follows xvattr */
|
||||
/* if ACE FUIDs are needed then they will follow the xvattr_t */
|
||||
/* Following the FUIDs will be the domain table information. */
|
||||
/* The FUIDs for the owner and group will be in the lr_create */
|
||||
/* portion of the record. */
|
||||
/* name follows ACL data */
|
||||
} lr_acl_create_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_doid; /* obj id of directory */
|
||||
/* name of object to remove follows this */
|
||||
} lr_remove_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_doid; /* obj id of directory */
|
||||
uint64_t lr_link_obj; /* obj id of link */
|
||||
/* name of object to link follows this */
|
||||
} lr_link_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_sdoid; /* obj id of source directory */
|
||||
uint64_t lr_tdoid; /* obj id of target directory */
|
||||
/* 2 strings: names of source and destination follow this */
|
||||
} lr_rename_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_foid; /* file object to write */
|
||||
uint64_t lr_offset; /* offset to write to */
|
||||
uint64_t lr_length; /* user data length to write */
|
||||
uint64_t lr_blkoff; /* offset represented by lr_blkptr */
|
||||
blkptr_t lr_blkptr; /* spa block pointer for replay */
|
||||
/* write data will follow for small writes */
|
||||
} lr_write_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_foid; /* object id of file to truncate */
|
||||
uint64_t lr_offset; /* offset to truncate from */
|
||||
uint64_t lr_length; /* length to truncate */
|
||||
} lr_truncate_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_foid; /* file object to change attributes */
|
||||
uint64_t lr_mask; /* mask of attributes to set */
|
||||
uint64_t lr_mode; /* mode to set */
|
||||
uint64_t lr_uid; /* uid to set */
|
||||
uint64_t lr_gid; /* gid to set */
|
||||
uint64_t lr_size; /* size to set */
|
||||
uint64_t lr_atime[2]; /* access time */
|
||||
uint64_t lr_mtime[2]; /* modification time */
|
||||
/* optional attribute lr_attr_t may be here */
|
||||
} lr_setattr_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_foid; /* obj id of file */
|
||||
uint64_t lr_aclcnt; /* number of acl entries */
|
||||
/* lr_aclcnt number of ace_t entries follow this */
|
||||
} lr_acl_v0_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_foid; /* obj id of file */
|
||||
uint64_t lr_aclcnt; /* number of ACEs in ACL */
|
||||
uint64_t lr_domcnt; /* number of unique domains */
|
||||
uint64_t lr_fuidcnt; /* number of real fuids */
|
||||
uint64_t lr_acl_bytes; /* number of bytes in ACL */
|
||||
uint64_t lr_acl_flags; /* ACL flags */
|
||||
/* lr_acl_bytes number of variable sized ace's follows */
|
||||
} lr_acl_t;
|
||||
|
||||
/*
|
||||
* ZIL structure definitions, interface function prototype and globals.
|
||||
*/
|
||||
|
||||
/*
|
||||
* ZFS intent log transaction structure
|
||||
*/
|
||||
typedef enum {
|
||||
WR_INDIRECT, /* indirect - a large write (dmu_sync() data */
|
||||
/* and put blkptr in log, rather than actual data) */
|
||||
WR_COPIED, /* immediate - data is copied into lr_write_t */
|
||||
WR_NEED_COPY, /* immediate - data needs to be copied if pushed */
|
||||
} itx_wr_state_t;
|
||||
|
||||
typedef struct itx {
|
||||
list_node_t itx_node; /* linkage on zl_itx_list */
|
||||
void *itx_private; /* type-specific opaque data */
|
||||
itx_wr_state_t itx_wr_state; /* write state */
|
||||
uint8_t itx_sync; /* synchronous transaction */
|
||||
uint64_t itx_sod; /* record size on disk */
|
||||
lr_t itx_lr; /* common part of log record */
|
||||
/* followed by type-specific part of lr_xx_t and its immediate data */
|
||||
} itx_t;
|
||||
|
||||
|
||||
/*
|
||||
* zgd_t is passed through dmu_sync() to the callback routine zfs_get_done()
|
||||
* to handle the cleanup of the dmu_sync() buffer write
|
||||
*/
|
||||
typedef struct {
|
||||
zilog_t *zgd_zilog; /* zilog */
|
||||
blkptr_t *zgd_bp; /* block pointer */
|
||||
struct rl *zgd_rl; /* range lock */
|
||||
} zgd_t;
|
||||
|
||||
|
||||
typedef void zil_parse_blk_func_t(zilog_t *zilog, blkptr_t *bp, void *arg,
|
||||
uint64_t txg);
|
||||
typedef void zil_parse_lr_func_t(zilog_t *zilog, lr_t *lr, void *arg,
|
||||
uint64_t txg);
|
||||
typedef int zil_replay_func_t();
|
||||
typedef void zil_replay_cleaner_t();
|
||||
typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio);
|
||||
|
||||
extern uint64_t zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
|
||||
zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg);
|
||||
|
||||
extern void zil_init(void);
|
||||
extern void zil_fini(void);
|
||||
|
||||
extern zilog_t *zil_alloc(objset_t *os, zil_header_t *zh_phys);
|
||||
extern void zil_free(zilog_t *zilog);
|
||||
|
||||
extern zilog_t *zil_open(objset_t *os, zil_get_data_t *get_data);
|
||||
extern void zil_close(zilog_t *zilog);
|
||||
|
||||
extern void zil_replay(objset_t *os, void *arg, uint64_t *txgp,
|
||||
zil_replay_func_t *replay_func[TX_MAX_TYPE],
|
||||
zil_replay_cleaner_t *replay_cleaner);
|
||||
extern void zil_destroy(zilog_t *zilog, boolean_t keep_first);
|
||||
extern void zil_rollback_destroy(zilog_t *zilog, dmu_tx_t *tx);
|
||||
|
||||
extern itx_t *zil_itx_create(uint64_t txtype, size_t lrsize);
|
||||
extern uint64_t zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx);
|
||||
|
||||
extern void zil_commit(zilog_t *zilog, uint64_t seq, uint64_t oid);
|
||||
|
||||
extern int zil_claim(char *osname, void *txarg);
|
||||
extern int zil_check_log_chain(char *osname, void *txarg);
|
||||
extern int zil_clear_log_chain(char *osname, void *txarg);
|
||||
extern void zil_sync(zilog_t *zilog, dmu_tx_t *tx);
|
||||
extern void zil_clean(zilog_t *zilog);
|
||||
extern int zil_is_committed(zilog_t *zilog);
|
||||
|
||||
extern int zil_suspend(zilog_t *zilog);
|
||||
extern void zil_resume(zilog_t *zilog);
|
||||
|
||||
extern void zil_add_block(zilog_t *zilog, blkptr_t *bp);
|
||||
|
||||
extern int zil_disable;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZIL_H */
|
||||
@@ -0,0 +1,109 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZIL_IMPL_H
|
||||
#define _SYS_ZIL_IMPL_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/zil.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Log write buffer.
|
||||
*/
|
||||
typedef struct lwb {
|
||||
zilog_t *lwb_zilog; /* back pointer to log struct */
|
||||
blkptr_t lwb_blk; /* on disk address of this log blk */
|
||||
int lwb_nused; /* # used bytes in buffer */
|
||||
int lwb_sz; /* size of block and buffer */
|
||||
char *lwb_buf; /* log write buffer */
|
||||
zio_t *lwb_zio; /* zio for this buffer */
|
||||
uint64_t lwb_max_txg; /* highest txg in this lwb */
|
||||
txg_handle_t lwb_txgh; /* txg handle for txg_exit() */
|
||||
list_node_t lwb_node; /* zilog->zl_lwb_list linkage */
|
||||
} lwb_t;
|
||||
|
||||
/*
|
||||
* Vdev flushing: during a zil_commit(), we build up an AVL tree of the vdevs
|
||||
* we've touched so we know which ones need a write cache flush at the end.
|
||||
*/
|
||||
typedef struct zil_vdev_node {
|
||||
uint64_t zv_vdev; /* vdev to be flushed */
|
||||
avl_node_t zv_node; /* AVL tree linkage */
|
||||
} zil_vdev_node_t;
|
||||
|
||||
/*
|
||||
* Stable storage intent log management structure. One per dataset.
|
||||
*/
|
||||
struct zilog {
|
||||
kmutex_t zl_lock; /* protects most zilog_t fields */
|
||||
struct dsl_pool *zl_dmu_pool; /* DSL pool */
|
||||
spa_t *zl_spa; /* handle for read/write log */
|
||||
const zil_header_t *zl_header; /* log header buffer */
|
||||
objset_t *zl_os; /* object set we're logging */
|
||||
zil_get_data_t *zl_get_data; /* callback to get object content */
|
||||
zio_t *zl_root_zio; /* log writer root zio */
|
||||
uint64_t zl_itx_seq; /* next itx sequence number */
|
||||
uint64_t zl_commit_seq; /* committed upto this number */
|
||||
uint64_t zl_lr_seq; /* log record sequence number */
|
||||
uint64_t zl_destroy_txg; /* txg of last zil_destroy() */
|
||||
uint64_t zl_replay_seq[TXG_SIZE]; /* seq of last replayed rec */
|
||||
uint32_t zl_suspend; /* log suspend count */
|
||||
kcondvar_t zl_cv_writer; /* log writer thread completion */
|
||||
kcondvar_t zl_cv_suspend; /* log suspend completion */
|
||||
uint8_t zl_suspending; /* log is currently suspending */
|
||||
uint8_t zl_keep_first; /* keep first log block in destroy */
|
||||
uint8_t zl_stop_replay; /* don't replay any further */
|
||||
uint8_t zl_stop_sync; /* for debugging */
|
||||
uint8_t zl_writer; /* boolean: write setup in progress */
|
||||
uint8_t zl_log_error; /* boolean: log write error */
|
||||
list_t zl_itx_list; /* in-memory itx list */
|
||||
uint64_t zl_itx_list_sz; /* total size of records on list */
|
||||
uint64_t zl_cur_used; /* current commit log size used */
|
||||
uint64_t zl_prev_used; /* previous commit log size used */
|
||||
list_t zl_lwb_list; /* in-flight log write list */
|
||||
kmutex_t zl_vdev_lock; /* protects zl_vdev_tree */
|
||||
avl_tree_t zl_vdev_tree; /* vdevs to flush in zil_commit() */
|
||||
taskq_t *zl_clean_taskq; /* runs lwb and itx clean tasks */
|
||||
avl_tree_t zl_dva_tree; /* track DVAs during log parse */
|
||||
clock_t zl_replay_time; /* lbolt of when replay started */
|
||||
uint64_t zl_replay_blks; /* number of log blocks replayed */
|
||||
};
|
||||
|
||||
typedef struct zil_dva_node {
|
||||
dva_t zn_dva;
|
||||
avl_node_t zn_node;
|
||||
} zil_dva_node_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZIL_IMPL_H */
|
||||
@@ -0,0 +1,424 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _ZIO_H
|
||||
#define _ZIO_H
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/avl.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/zio_impl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ZBT_MAGIC 0x210da7ab10c7a11ULL /* zio data bloc tail */
|
||||
|
||||
typedef struct zio_block_tail {
|
||||
uint64_t zbt_magic; /* for validation, endianness */
|
||||
zio_cksum_t zbt_cksum; /* 256-bit checksum */
|
||||
} zio_block_tail_t;
|
||||
|
||||
/*
|
||||
* Gang block headers are self-checksumming and contain an array
|
||||
* of block pointers.
|
||||
*/
|
||||
#define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE
|
||||
#define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \
|
||||
sizeof (zio_block_tail_t)) / sizeof (blkptr_t))
|
||||
#define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \
|
||||
sizeof (zio_block_tail_t) - \
|
||||
(SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\
|
||||
sizeof (uint64_t))
|
||||
|
||||
typedef struct zio_gbh {
|
||||
blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS];
|
||||
uint64_t zg_filler[SPA_GBH_FILLER];
|
||||
zio_block_tail_t zg_tail;
|
||||
} zio_gbh_phys_t;
|
||||
|
||||
enum zio_checksum {
|
||||
ZIO_CHECKSUM_INHERIT = 0,
|
||||
ZIO_CHECKSUM_ON,
|
||||
ZIO_CHECKSUM_OFF,
|
||||
ZIO_CHECKSUM_LABEL,
|
||||
ZIO_CHECKSUM_GANG_HEADER,
|
||||
ZIO_CHECKSUM_ZILOG,
|
||||
ZIO_CHECKSUM_FLETCHER_2,
|
||||
ZIO_CHECKSUM_FLETCHER_4,
|
||||
ZIO_CHECKSUM_SHA256,
|
||||
ZIO_CHECKSUM_FUNCTIONS
|
||||
};
|
||||
|
||||
#define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_2
|
||||
#define ZIO_CHECKSUM_DEFAULT ZIO_CHECKSUM_ON
|
||||
|
||||
enum zio_compress {
|
||||
ZIO_COMPRESS_INHERIT = 0,
|
||||
ZIO_COMPRESS_ON,
|
||||
ZIO_COMPRESS_OFF,
|
||||
ZIO_COMPRESS_LZJB,
|
||||
ZIO_COMPRESS_EMPTY,
|
||||
ZIO_COMPRESS_GZIP_1,
|
||||
ZIO_COMPRESS_GZIP_2,
|
||||
ZIO_COMPRESS_GZIP_3,
|
||||
ZIO_COMPRESS_GZIP_4,
|
||||
ZIO_COMPRESS_GZIP_5,
|
||||
ZIO_COMPRESS_GZIP_6,
|
||||
ZIO_COMPRESS_GZIP_7,
|
||||
ZIO_COMPRESS_GZIP_8,
|
||||
ZIO_COMPRESS_GZIP_9,
|
||||
ZIO_COMPRESS_FUNCTIONS
|
||||
};
|
||||
|
||||
#define ZIO_COMPRESS_ON_VALUE ZIO_COMPRESS_LZJB
|
||||
#define ZIO_COMPRESS_DEFAULT ZIO_COMPRESS_OFF
|
||||
|
||||
#define ZIO_FAILURE_MODE_WAIT 0
|
||||
#define ZIO_FAILURE_MODE_CONTINUE 1
|
||||
#define ZIO_FAILURE_MODE_PANIC 2
|
||||
|
||||
#define ZIO_PRIORITY_NOW (zio_priority_table[0])
|
||||
#define ZIO_PRIORITY_SYNC_READ (zio_priority_table[1])
|
||||
#define ZIO_PRIORITY_SYNC_WRITE (zio_priority_table[2])
|
||||
#define ZIO_PRIORITY_ASYNC_READ (zio_priority_table[3])
|
||||
#define ZIO_PRIORITY_ASYNC_WRITE (zio_priority_table[4])
|
||||
#define ZIO_PRIORITY_FREE (zio_priority_table[5])
|
||||
#define ZIO_PRIORITY_CACHE_FILL (zio_priority_table[6])
|
||||
#define ZIO_PRIORITY_LOG_WRITE (zio_priority_table[7])
|
||||
#define ZIO_PRIORITY_RESILVER (zio_priority_table[8])
|
||||
#define ZIO_PRIORITY_SCRUB (zio_priority_table[9])
|
||||
#define ZIO_PRIORITY_TABLE_SIZE 10
|
||||
|
||||
#define ZIO_FLAG_MUSTSUCCEED 0x00000
|
||||
#define ZIO_FLAG_CANFAIL 0x00001
|
||||
#define ZIO_FLAG_SPECULATIVE 0x00002
|
||||
#define ZIO_FLAG_CONFIG_WRITER 0x00004
|
||||
#define ZIO_FLAG_DONT_RETRY 0x00008
|
||||
|
||||
#define ZIO_FLAG_DONT_CACHE 0x00010
|
||||
#define ZIO_FLAG_DONT_QUEUE 0x00020
|
||||
#define ZIO_FLAG_DONT_AGGREGATE 0x00040
|
||||
#define ZIO_FLAG_DONT_PROPAGATE 0x00080
|
||||
|
||||
#define ZIO_FLAG_IO_BYPASS 0x00100
|
||||
#define ZIO_FLAG_IO_REPAIR 0x00200
|
||||
#define ZIO_FLAG_IO_RETRY 0x00400
|
||||
#define ZIO_FLAG_IO_REWRITE 0x00800
|
||||
|
||||
#define ZIO_FLAG_PROBE 0x01000
|
||||
#define ZIO_FLAG_RESILVER 0x02000
|
||||
#define ZIO_FLAG_SCRUB 0x04000
|
||||
#define ZIO_FLAG_SCRUB_THREAD 0x08000
|
||||
|
||||
#define ZIO_FLAG_GANG_CHILD 0x10000
|
||||
|
||||
#define ZIO_FLAG_GANG_INHERIT \
|
||||
(ZIO_FLAG_CANFAIL | \
|
||||
ZIO_FLAG_SPECULATIVE | \
|
||||
ZIO_FLAG_CONFIG_WRITER | \
|
||||
ZIO_FLAG_DONT_RETRY | \
|
||||
ZIO_FLAG_DONT_CACHE | \
|
||||
ZIO_FLAG_DONT_AGGREGATE | \
|
||||
ZIO_FLAG_RESILVER | \
|
||||
ZIO_FLAG_SCRUB | \
|
||||
ZIO_FLAG_SCRUB_THREAD)
|
||||
|
||||
#define ZIO_FLAG_VDEV_INHERIT \
|
||||
(ZIO_FLAG_GANG_INHERIT | \
|
||||
ZIO_FLAG_IO_REPAIR | \
|
||||
ZIO_FLAG_IO_RETRY | \
|
||||
ZIO_FLAG_PROBE)
|
||||
|
||||
#define ZIO_PIPELINE_CONTINUE 0x100
|
||||
#define ZIO_PIPELINE_STOP 0x101
|
||||
|
||||
#define ZIO_GANG_CHILD_FLAGS(zio) \
|
||||
(((zio)->io_flags & ZIO_FLAG_GANG_INHERIT) | \
|
||||
ZIO_FLAG_GANG_CHILD | ZIO_FLAG_CANFAIL)
|
||||
|
||||
enum zio_child {
|
||||
ZIO_CHILD_VDEV = 0,
|
||||
ZIO_CHILD_GANG,
|
||||
ZIO_CHILD_LOGICAL,
|
||||
ZIO_CHILD_TYPES
|
||||
};
|
||||
|
||||
enum zio_wait_type {
|
||||
ZIO_WAIT_READY = 0,
|
||||
ZIO_WAIT_DONE,
|
||||
ZIO_WAIT_TYPES
|
||||
};
|
||||
|
||||
/*
|
||||
* We'll take the unused errnos, 'EBADE' and 'EBADR' (from the Convergent
|
||||
* graveyard) to indicate checksum errors and fragmentation.
|
||||
*/
|
||||
#define ECKSUM EBADE
|
||||
#define EFRAGS EBADR
|
||||
|
||||
typedef struct zio zio_t;
|
||||
typedef void zio_done_func_t(zio_t *zio);
|
||||
|
||||
extern uint8_t zio_priority_table[ZIO_PRIORITY_TABLE_SIZE];
|
||||
extern char *zio_type_name[ZIO_TYPES];
|
||||
|
||||
/*
|
||||
* A bookmark is a four-tuple <objset, object, level, blkid> that uniquely
|
||||
* identifies any block in the pool. By convention, the meta-objset (MOS)
|
||||
* is objset 0, the meta-dnode is object 0, the root block (osphys_t) is
|
||||
* level -1 of the meta-dnode, and intent log blocks (which are chained
|
||||
* off the root block) have blkid == sequence number. In summary:
|
||||
*
|
||||
* mos is objset 0
|
||||
* meta-dnode is object 0
|
||||
* root block is <objset, 0, -1, 0>
|
||||
* intent log is <objset, 0, -1, ZIL sequence number>
|
||||
*
|
||||
* Note: this structure is called a bookmark because its first purpose was
|
||||
* to remember where to resume a pool-wide traverse. The absolute ordering
|
||||
* for block visitation during traversal is defined in compare_bookmark().
|
||||
*
|
||||
* Note: this structure is passed between userland and the kernel.
|
||||
* Therefore it must not change size or alignment between 32/64 bit
|
||||
* compilation options.
|
||||
*/
|
||||
typedef struct zbookmark {
|
||||
uint64_t zb_objset;
|
||||
uint64_t zb_object;
|
||||
int64_t zb_level;
|
||||
uint64_t zb_blkid;
|
||||
} zbookmark_t;
|
||||
|
||||
typedef struct zio_prop {
|
||||
enum zio_checksum zp_checksum;
|
||||
enum zio_compress zp_compress;
|
||||
dmu_object_type_t zp_type;
|
||||
uint8_t zp_level;
|
||||
uint8_t zp_ndvas;
|
||||
} zio_prop_t;
|
||||
|
||||
typedef struct zio_gang_node {
|
||||
zio_gbh_phys_t *gn_gbh;
|
||||
struct zio_gang_node *gn_child[SPA_GBH_NBLKPTRS];
|
||||
} zio_gang_node_t;
|
||||
|
||||
typedef zio_t *zio_gang_issue_func_t(zio_t *zio, blkptr_t *bp,
|
||||
zio_gang_node_t *gn, void *data);
|
||||
|
||||
typedef void zio_transform_func_t(zio_t *zio, void *data, uint64_t size);
|
||||
|
||||
typedef struct zio_transform {
|
||||
void *zt_orig_data;
|
||||
uint64_t zt_orig_size;
|
||||
uint64_t zt_bufsize;
|
||||
zio_transform_func_t *zt_transform;
|
||||
struct zio_transform *zt_next;
|
||||
} zio_transform_t;
|
||||
|
||||
typedef int zio_pipe_stage_t(zio_t *zio);
|
||||
|
||||
/*
|
||||
* The io_reexecute flags are distinct from io_flags because the child must
|
||||
* be able to propagate them to the parent. The normal io_flags are local
|
||||
* to the zio, not protected by any lock, and not modifiable by children;
|
||||
* the reexecute flags are protected by io_lock, modifiable by children,
|
||||
* and always propagated -- even when ZIO_FLAG_DONT_PROPAGATE is set.
|
||||
*/
|
||||
#define ZIO_REEXECUTE_NOW 0x01
|
||||
#define ZIO_REEXECUTE_SUSPEND 0x02
|
||||
|
||||
struct zio {
|
||||
/* Core information about this I/O */
|
||||
zbookmark_t io_bookmark;
|
||||
zio_prop_t io_prop;
|
||||
zio_type_t io_type;
|
||||
enum zio_child io_child_type;
|
||||
int io_cmd;
|
||||
uint8_t io_priority;
|
||||
uint8_t io_reexecute;
|
||||
uint8_t io_async_root;
|
||||
uint64_t io_txg;
|
||||
spa_t *io_spa;
|
||||
blkptr_t *io_bp;
|
||||
blkptr_t io_bp_copy;
|
||||
zio_t *io_parent;
|
||||
zio_t *io_child;
|
||||
zio_t *io_sibling_prev;
|
||||
zio_t *io_sibling_next;
|
||||
zio_t *io_logical;
|
||||
zio_transform_t *io_transform_stack;
|
||||
|
||||
/* Callback info */
|
||||
zio_done_func_t *io_ready;
|
||||
zio_done_func_t *io_done;
|
||||
void *io_private;
|
||||
blkptr_t io_bp_orig;
|
||||
|
||||
/* Data represented by this I/O */
|
||||
void *io_data;
|
||||
uint64_t io_size;
|
||||
|
||||
/* Stuff for the vdev stack */
|
||||
vdev_t *io_vd;
|
||||
void *io_vsd;
|
||||
zio_done_func_t *io_vsd_free;
|
||||
uint64_t io_offset;
|
||||
uint64_t io_deadline;
|
||||
avl_node_t io_offset_node;
|
||||
avl_node_t io_deadline_node;
|
||||
avl_tree_t *io_vdev_tree;
|
||||
zio_t *io_delegate_list;
|
||||
zio_t *io_delegate_next;
|
||||
|
||||
/* Internal pipeline state */
|
||||
int io_flags;
|
||||
zio_stage_t io_stage;
|
||||
uint32_t io_pipeline;
|
||||
int io_orig_flags;
|
||||
zio_stage_t io_orig_stage;
|
||||
uint32_t io_orig_pipeline;
|
||||
int io_error;
|
||||
int io_child_error[ZIO_CHILD_TYPES];
|
||||
uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES];
|
||||
uint64_t *io_stall;
|
||||
zio_gang_node_t *io_gang_tree;
|
||||
void *io_executor;
|
||||
void *io_waiter;
|
||||
kmutex_t io_lock;
|
||||
kcondvar_t io_cv;
|
||||
|
||||
/* FMA state */
|
||||
uint64_t io_ena;
|
||||
};
|
||||
|
||||
extern zio_t *zio_null(zio_t *pio, spa_t *spa,
|
||||
zio_done_func_t *done, void *private, int flags);
|
||||
|
||||
extern zio_t *zio_root(spa_t *spa,
|
||||
zio_done_func_t *done, void *private, int flags);
|
||||
|
||||
extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data,
|
||||
uint64_t size, zio_done_func_t *done, void *private,
|
||||
int priority, int flags, const zbookmark_t *zb);
|
||||
|
||||
extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
|
||||
void *data, uint64_t size, zio_prop_t *zp,
|
||||
zio_done_func_t *ready, zio_done_func_t *done, void *private,
|
||||
int priority, int flags, const zbookmark_t *zb);
|
||||
|
||||
extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
|
||||
void *data, uint64_t size, zio_done_func_t *done, void *private,
|
||||
int priority, int flags, zbookmark_t *zb);
|
||||
|
||||
extern void zio_skip_write(zio_t *zio);
|
||||
|
||||
extern zio_t *zio_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
|
||||
zio_done_func_t *done, void *private, int flags);
|
||||
|
||||
extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
|
||||
zio_done_func_t *done, void *private, int flags);
|
||||
|
||||
extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
|
||||
zio_done_func_t *done, void *private, int priority, int flags);
|
||||
|
||||
extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
|
||||
uint64_t size, void *data, int checksum,
|
||||
zio_done_func_t *done, void *private, int priority, int flags,
|
||||
boolean_t labels);
|
||||
|
||||
extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
|
||||
uint64_t size, void *data, int checksum,
|
||||
zio_done_func_t *done, void *private, int priority, int flags,
|
||||
boolean_t labels);
|
||||
|
||||
extern int zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp,
|
||||
blkptr_t *old_bp, uint64_t txg);
|
||||
extern void zio_free_blk(spa_t *spa, blkptr_t *bp, uint64_t txg);
|
||||
extern void zio_flush(zio_t *zio, vdev_t *vd);
|
||||
|
||||
extern int zio_wait(zio_t *zio);
|
||||
extern void zio_nowait(zio_t *zio);
|
||||
extern void zio_execute(zio_t *zio);
|
||||
extern void zio_interrupt(zio_t *zio);
|
||||
|
||||
extern void *zio_buf_alloc(size_t size);
|
||||
extern void zio_buf_free(void *buf, size_t size);
|
||||
extern void *zio_data_buf_alloc(size_t size);
|
||||
extern void zio_data_buf_free(void *buf, size_t size);
|
||||
|
||||
extern void zio_resubmit_stage_async(void *);
|
||||
|
||||
extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd,
|
||||
uint64_t offset, void *data, uint64_t size, int type, int priority,
|
||||
int flags, zio_done_func_t *done, void *private);
|
||||
|
||||
extern zio_t *zio_vdev_delegated_io(vdev_t *vd, uint64_t offset,
|
||||
void *data, uint64_t size, int type, int priority,
|
||||
int flags, zio_done_func_t *done, void *private);
|
||||
|
||||
extern void zio_vdev_io_bypass(zio_t *zio);
|
||||
extern void zio_vdev_io_reissue(zio_t *zio);
|
||||
extern void zio_vdev_io_redone(zio_t *zio);
|
||||
|
||||
extern void zio_checksum_verified(zio_t *zio);
|
||||
extern int zio_worst_error(int e1, int e2);
|
||||
|
||||
extern uint8_t zio_checksum_select(uint8_t child, uint8_t parent);
|
||||
extern uint8_t zio_compress_select(uint8_t child, uint8_t parent);
|
||||
|
||||
extern void zio_suspend(spa_t *spa, zio_t *zio);
|
||||
extern void zio_resume(spa_t *spa);
|
||||
extern void zio_resume_wait(spa_t *spa);
|
||||
|
||||
/*
|
||||
* Initial setup and teardown.
|
||||
*/
|
||||
extern void zio_init(void);
|
||||
extern void zio_fini(void);
|
||||
|
||||
/*
|
||||
* Fault injection
|
||||
*/
|
||||
struct zinject_record;
|
||||
extern uint32_t zio_injection_enabled;
|
||||
extern int zio_inject_fault(char *name, int flags, int *id,
|
||||
struct zinject_record *record);
|
||||
extern int zio_inject_list_next(int *id, char *name, size_t buflen,
|
||||
struct zinject_record *record);
|
||||
extern int zio_clear_fault(int id);
|
||||
extern int zio_handle_fault_injection(zio_t *zio, int error);
|
||||
extern int zio_handle_device_injection(vdev_t *vd, int error);
|
||||
extern int zio_handle_label_injection(zio_t *zio, int error);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ZIO_H */
|
||||
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZIO_CHECKSUM_H
|
||||
#define _SYS_ZIO_CHECKSUM_H
|
||||
|
||||
#include <sys/zio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Signature for checksum functions.
|
||||
*/
|
||||
typedef void zio_checksum_t(const void *data, uint64_t size, zio_cksum_t *zcp);
|
||||
|
||||
/*
|
||||
* Information about each checksum function.
|
||||
*/
|
||||
typedef struct zio_checksum_info {
|
||||
zio_checksum_t *ci_func[2]; /* checksum function for each byteorder */
|
||||
int ci_correctable; /* number of correctable bits */
|
||||
int ci_zbt; /* uses zio block tail? */
|
||||
char *ci_name; /* descriptive name */
|
||||
} zio_checksum_info_t;
|
||||
|
||||
extern zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS];
|
||||
|
||||
/*
|
||||
* Checksum routines.
|
||||
*/
|
||||
extern zio_checksum_t fletcher_2_native;
|
||||
extern zio_checksum_t fletcher_4_native;
|
||||
extern zio_checksum_t fletcher_4_incremental_native;
|
||||
|
||||
extern zio_checksum_t fletcher_2_byteswap;
|
||||
extern zio_checksum_t fletcher_4_byteswap;
|
||||
extern zio_checksum_t fletcher_4_incremental_byteswap;
|
||||
|
||||
extern zio_checksum_t zio_checksum_SHA256;
|
||||
|
||||
extern void zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
|
||||
void *data, uint64_t size);
|
||||
extern int zio_checksum_error(zio_t *zio);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZIO_CHECKSUM_H */
|
||||
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZIO_COMPRESS_H
|
||||
#define _SYS_ZIO_COMPRESS_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/zio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Common signature for all zio compress/decompress functions.
|
||||
*/
|
||||
typedef size_t zio_compress_func_t(void *src, void *dst,
|
||||
size_t s_len, size_t d_len, int);
|
||||
typedef int zio_decompress_func_t(void *src, void *dst,
|
||||
size_t s_len, size_t d_len, int);
|
||||
|
||||
/*
|
||||
* Information about each compression function.
|
||||
*/
|
||||
typedef struct zio_compress_info {
|
||||
zio_compress_func_t *ci_compress; /* compression function */
|
||||
zio_decompress_func_t *ci_decompress; /* decompression function */
|
||||
int ci_level; /* level parameter */
|
||||
char *ci_name; /* algorithm name */
|
||||
} zio_compress_info_t;
|
||||
|
||||
extern zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS];
|
||||
|
||||
/*
|
||||
* Compression routines.
|
||||
*/
|
||||
extern size_t lzjb_compress(void *src, void *dst, size_t s_len, size_t d_len,
|
||||
int level);
|
||||
extern int lzjb_decompress(void *src, void *dst, size_t s_len, size_t d_len,
|
||||
int level);
|
||||
extern size_t gzip_compress(void *src, void *dst, size_t s_len, size_t d_len,
|
||||
int level);
|
||||
extern int gzip_decompress(void *src, void *dst, size_t s_len, size_t d_len,
|
||||
int level);
|
||||
|
||||
/*
|
||||
* Compress and decompress data if necessary.
|
||||
*/
|
||||
extern int zio_compress_data(int cpfunc, void *src, uint64_t srcsize,
|
||||
void **destp, uint64_t *destsizep, uint64_t *destbufsizep);
|
||||
extern int zio_decompress_data(int cpfunc, void *src, uint64_t srcsize,
|
||||
void *dest, uint64_t destsize);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZIO_COMPRESS_H */
|
||||
@@ -0,0 +1,143 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _ZIO_IMPL_H
|
||||
#define _ZIO_IMPL_H
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* I/O Groups: pipeline stage definitions.
|
||||
*/
|
||||
typedef enum zio_stage {
|
||||
ZIO_STAGE_OPEN = 0, /* RWFCI */
|
||||
|
||||
ZIO_STAGE_ISSUE_ASYNC, /* -W--- */
|
||||
|
||||
ZIO_STAGE_READ_BP_INIT, /* R---- */
|
||||
ZIO_STAGE_WRITE_BP_INIT, /* -W--- */
|
||||
|
||||
ZIO_STAGE_CHECKSUM_GENERATE, /* -W--- */
|
||||
|
||||
ZIO_STAGE_GANG_ASSEMBLE, /* RWFC- */
|
||||
ZIO_STAGE_GANG_ISSUE, /* RWFC- */
|
||||
|
||||
ZIO_STAGE_DVA_ALLOCATE, /* -W--- */
|
||||
ZIO_STAGE_DVA_FREE, /* --F-- */
|
||||
ZIO_STAGE_DVA_CLAIM, /* ---C- */
|
||||
|
||||
ZIO_STAGE_READY, /* RWFCI */
|
||||
|
||||
ZIO_STAGE_VDEV_IO_START, /* RW--I */
|
||||
ZIO_STAGE_VDEV_IO_DONE, /* RW--I */
|
||||
ZIO_STAGE_VDEV_IO_ASSESS, /* RW--I */
|
||||
|
||||
ZIO_STAGE_CHECKSUM_VERIFY, /* R---- */
|
||||
|
||||
ZIO_STAGE_DONE, /* RWFCI */
|
||||
ZIO_STAGES
|
||||
} zio_stage_t;
|
||||
|
||||
#define ZIO_INTERLOCK_STAGES \
|
||||
((1U << ZIO_STAGE_READY) | \
|
||||
(1U << ZIO_STAGE_DONE))
|
||||
|
||||
#define ZIO_INTERLOCK_PIPELINE \
|
||||
ZIO_INTERLOCK_STAGES
|
||||
|
||||
#define ZIO_VDEV_IO_STAGES \
|
||||
((1U << ZIO_STAGE_VDEV_IO_START) | \
|
||||
(1U << ZIO_STAGE_VDEV_IO_DONE) | \
|
||||
(1U << ZIO_STAGE_VDEV_IO_ASSESS))
|
||||
|
||||
#define ZIO_VDEV_CHILD_PIPELINE \
|
||||
(ZIO_VDEV_IO_STAGES | \
|
||||
(1U << ZIO_STAGE_DONE))
|
||||
|
||||
#define ZIO_READ_COMMON_STAGES \
|
||||
(ZIO_INTERLOCK_STAGES | \
|
||||
ZIO_VDEV_IO_STAGES | \
|
||||
(1U << ZIO_STAGE_CHECKSUM_VERIFY))
|
||||
|
||||
#define ZIO_READ_PHYS_PIPELINE \
|
||||
ZIO_READ_COMMON_STAGES
|
||||
|
||||
#define ZIO_READ_PIPELINE \
|
||||
(ZIO_READ_COMMON_STAGES | \
|
||||
(1U << ZIO_STAGE_READ_BP_INIT))
|
||||
|
||||
#define ZIO_WRITE_COMMON_STAGES \
|
||||
(ZIO_INTERLOCK_STAGES | \
|
||||
ZIO_VDEV_IO_STAGES | \
|
||||
(1U << ZIO_STAGE_ISSUE_ASYNC) | \
|
||||
(1U << ZIO_STAGE_CHECKSUM_GENERATE))
|
||||
|
||||
#define ZIO_WRITE_PHYS_PIPELINE \
|
||||
ZIO_WRITE_COMMON_STAGES
|
||||
|
||||
#define ZIO_REWRITE_PIPELINE \
|
||||
(ZIO_WRITE_COMMON_STAGES | \
|
||||
(1U << ZIO_STAGE_WRITE_BP_INIT))
|
||||
|
||||
#define ZIO_WRITE_PIPELINE \
|
||||
(ZIO_WRITE_COMMON_STAGES | \
|
||||
(1U << ZIO_STAGE_WRITE_BP_INIT) | \
|
||||
(1U << ZIO_STAGE_DVA_ALLOCATE))
|
||||
|
||||
#define ZIO_GANG_STAGES \
|
||||
((1U << ZIO_STAGE_GANG_ASSEMBLE) | \
|
||||
(1U << ZIO_STAGE_GANG_ISSUE))
|
||||
|
||||
#define ZIO_FREE_PIPELINE \
|
||||
(ZIO_INTERLOCK_STAGES | \
|
||||
(1U << ZIO_STAGE_DVA_FREE))
|
||||
|
||||
#define ZIO_CLAIM_PIPELINE \
|
||||
(ZIO_INTERLOCK_STAGES | \
|
||||
(1U << ZIO_STAGE_DVA_CLAIM))
|
||||
|
||||
#define ZIO_IOCTL_PIPELINE \
|
||||
(ZIO_INTERLOCK_STAGES | \
|
||||
(1U << ZIO_STAGE_VDEV_IO_START) | \
|
||||
(1U << ZIO_STAGE_VDEV_IO_ASSESS))
|
||||
|
||||
#define ZIO_CONFIG_LOCK_BLOCKING_STAGES \
|
||||
((1U << ZIO_STAGE_VDEV_IO_START) | \
|
||||
(1U << ZIO_STAGE_DVA_ALLOCATE) | \
|
||||
(1U << ZIO_STAGE_DVA_CLAIM))
|
||||
|
||||
extern void zio_inject_init(void);
|
||||
extern void zio_inject_fini(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ZIO_IMPL_H */
|
||||
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZVOL_H
|
||||
#define _SYS_ZVOL_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ZVOL_OBJ 1ULL
|
||||
#define ZVOL_ZAP_OBJ 2ULL
|
||||
|
||||
#ifdef _KERNEL
|
||||
extern int zvol_check_volsize(uint64_t volsize, uint64_t blocksize);
|
||||
extern int zvol_check_volblocksize(uint64_t volblocksize);
|
||||
extern int zvol_get_stats(objset_t *os, nvlist_t *nv);
|
||||
extern void zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
|
||||
extern int zvol_create_minor(const char *, major_t);
|
||||
extern int zvol_remove_minor(const char *);
|
||||
extern int zvol_set_volsize(const char *, major_t, uint64_t);
|
||||
extern int zvol_set_volblocksize(const char *, uint64_t);
|
||||
|
||||
extern int zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr);
|
||||
extern int zvol_dump(dev_t dev, caddr_t addr, daddr_t offset, int nblocks);
|
||||
extern int zvol_close(dev_t dev, int flag, int otyp, cred_t *cr);
|
||||
extern int zvol_strategy(buf_t *bp);
|
||||
extern int zvol_read(dev_t dev, uio_t *uiop, cred_t *cr);
|
||||
extern int zvol_write(dev_t dev, uio_t *uiop, cred_t *cr);
|
||||
extern int zvol_aread(dev_t dev, struct aio_req *aio, cred_t *cr);
|
||||
extern int zvol_awrite(dev_t dev, struct aio_req *aio, cred_t *cr);
|
||||
extern int zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr,
|
||||
int *rvalp);
|
||||
extern int zvol_busy(void);
|
||||
extern void zvol_init(void);
|
||||
extern void zvol_fini(void);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZVOL_H */
|
||||
Reference in New Issue
Block a user