Rebase to OpenSolaris b103, in the process we are removing any code which did not originate from the OpenSolaris source. These changes will be reintroduced in topic branches for easier tracking

This commit is contained in:
Brian Behlendorf
2008-12-03 12:09:06 -08:00
parent b6097ae55a
commit b128c09fbe
339 changed files with 15459 additions and 60397 deletions
-123
View File
@@ -1,123 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ARC_H
#define _SYS_ARC_H
#pragma ident "@(#)arc.h 1.12 08/03/20 SMI"
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
#include <sys/zio.h>
#include <sys/dmu.h>
#include <sys/spa.h>
typedef struct arc_buf_hdr arc_buf_hdr_t;
typedef struct arc_buf arc_buf_t;
typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *private);
typedef int arc_evict_func_t(void *private);
/* generic arc_done_func_t's which you can use */
arc_done_func_t arc_bcopy_func;
arc_done_func_t arc_getbuf_func;
struct arc_buf {
arc_buf_hdr_t *b_hdr;
arc_buf_t *b_next;
void *b_data;
arc_evict_func_t *b_efunc;
void *b_private;
};
typedef enum arc_buf_contents {
ARC_BUFC_DATA, /* buffer contains data */
ARC_BUFC_METADATA, /* buffer contains metadata */
ARC_BUFC_NUMTYPES
} arc_buf_contents_t;
/*
* These are the flags we pass into calls to the arc
*/
#define ARC_WAIT (1 << 1) /* perform I/O synchronously */
#define ARC_NOWAIT (1 << 2) /* perform I/O asynchronously */
#define ARC_PREFETCH (1 << 3) /* I/O is a prefetch */
#define ARC_CACHED (1 << 4) /* I/O was already in cache */
void arc_space_consume(uint64_t space);
void arc_space_return(uint64_t space);
void *arc_data_buf_alloc(uint64_t space);
void arc_data_buf_free(void *buf, uint64_t space);
arc_buf_t *arc_buf_alloc(spa_t *spa, int size, void *tag,
arc_buf_contents_t type);
void arc_buf_add_ref(arc_buf_t *buf, void *tag);
int arc_buf_remove_ref(arc_buf_t *buf, void *tag);
int arc_buf_size(arc_buf_t *buf);
void arc_release(arc_buf_t *buf, void *tag);
int arc_released(arc_buf_t *buf);
int arc_has_callback(arc_buf_t *buf);
void arc_buf_freeze(arc_buf_t *buf);
void arc_buf_thaw(arc_buf_t *buf);
#ifdef ZFS_DEBUG
int arc_referenced(arc_buf_t *buf);
#endif
int arc_read(zio_t *pio, spa_t *spa, blkptr_t *bp, arc_byteswap_func_t *swap,
arc_done_func_t *done, void *private, int priority, int flags,
uint32_t *arc_flags, zbookmark_t *zb);
zio_t *arc_write(zio_t *pio, spa_t *spa, int checksum, int compress,
int ncopies, uint64_t txg, blkptr_t *bp, arc_buf_t *buf,
arc_done_func_t *ready, arc_done_func_t *done, void *private, int priority,
int flags, zbookmark_t *zb);
int arc_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
zio_done_func_t *done, void *private, uint32_t arc_flags);
int arc_tryread(spa_t *spa, blkptr_t *bp, void *data);
void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
int arc_buf_evict(arc_buf_t *buf);
void arc_flush(spa_t *spa);
void arc_tempreserve_clear(uint64_t reserve);
int arc_tempreserve_space(uint64_t reserve, uint64_t txg);
void arc_init(void);
void arc_fini(void);
/*
* Level 2 ARC
*/
void l2arc_add_vdev(spa_t *spa, vdev_t *vd, uint64_t start, uint64_t end);
void l2arc_remove_vdev(vdev_t *vd);
void l2arc_init(void);
void l2arc_fini(void);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ARC_H */
-89
View File
@@ -1,89 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_BPLIST_H
#define _SYS_BPLIST_H
#pragma ident "@(#)bplist.h 1.3 06/05/24 SMI"
#include <sys/dmu.h>
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct bplist_phys {
/*
* This is the bonus buffer for the dead lists. The object's
* contents is an array of bpl_entries blkptr_t's, representing
* a total of bpl_bytes physical space.
*/
uint64_t bpl_entries;
uint64_t bpl_bytes;
uint64_t bpl_comp;
uint64_t bpl_uncomp;
} bplist_phys_t;
#define BPLIST_SIZE_V0 (2 * sizeof (uint64_t))
typedef struct bplist_q {
blkptr_t bpq_blk;
void *bpq_next;
} bplist_q_t;
typedef struct bplist {
kmutex_t bpl_lock;
objset_t *bpl_mos;
uint64_t bpl_object;
uint8_t bpl_blockshift;
uint8_t bpl_bpshift;
uint8_t bpl_havecomp;
bplist_q_t *bpl_queue;
bplist_phys_t *bpl_phys;
dmu_buf_t *bpl_dbuf;
dmu_buf_t *bpl_cached_dbuf;
} bplist_t;
extern uint64_t bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx);
extern void bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx);
extern int bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object);
extern void bplist_close(bplist_t *bpl);
extern boolean_t bplist_empty(bplist_t *bpl);
extern int bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp);
extern int bplist_enqueue(bplist_t *bpl, blkptr_t *bp, dmu_tx_t *tx);
extern void bplist_enqueue_deferred(bplist_t *bpl, blkptr_t *bp);
extern void bplist_sync(bplist_t *bpl, dmu_tx_t *tx);
extern void bplist_vacate(bplist_t *bpl, dmu_tx_t *tx);
extern int bplist_space(bplist_t *bpl,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_BPLIST_H */
-46
View File
@@ -1,46 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 1998 by Sun Microsystems, Inc.
* All rights reserved.
*/
#ifndef _SYS_COMPRESS_H
#define _SYS_COMPRESS_H
#include <sys/types.h>
#ifdef __cplusplus
extern "C" {
#endif
extern size_t compress(void *, void *, size_t);
extern size_t decompress(void *, void *, size_t, size_t);
extern uint32_t checksum32(void *, size_t);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_COMPRESS_H */
-334
View File
@@ -1,334 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DBUF_H
#define _SYS_DBUF_H
#pragma ident "@(#)dbuf.h 1.10 07/08/26 SMI"
#include <sys/dmu.h>
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/zio.h>
#include <sys/arc.h>
#include <sys/zfs_context.h>
#include <sys/refcount.h>
#ifdef __cplusplus
extern "C" {
#endif
#define DB_BONUS_BLKID (-1ULL)
#define IN_DMU_SYNC 2
/*
* define flags for dbuf_read
*/
#define DB_RF_MUST_SUCCEED (1 << 0)
#define DB_RF_CANFAIL (1 << 1)
#define DB_RF_HAVESTRUCT (1 << 2)
#define DB_RF_NOPREFETCH (1 << 3)
#define DB_RF_NEVERWAIT (1 << 4)
#define DB_RF_CACHED (1 << 5)
/*
* The state transition diagram for dbufs looks like:
*
* +----> READ ----+
* | |
* | V
* (alloc)-->UNCACHED CACHED-->EVICTING-->(free)
* | ^
* | |
* +----> FILL ----+
*/
typedef enum dbuf_states {
DB_UNCACHED,
DB_FILL,
DB_READ,
DB_CACHED,
DB_EVICTING
} dbuf_states_t;
struct objset_impl;
struct dnode;
struct dmu_tx;
/*
* level = 0 means the user data
* level = 1 means the single indirect block
* etc.
*/
#define LIST_LINK_INACTIVE(link) \
((link)->list_next == NULL && (link)->list_prev == NULL)
struct dmu_buf_impl;
typedef enum override_states {
DR_NOT_OVERRIDDEN,
DR_IN_DMU_SYNC,
DR_OVERRIDDEN
} override_states_t;
typedef struct dbuf_dirty_record {
/* link on our parents dirty list */
list_node_t dr_dirty_node;
/* transaction group this data will sync in */
uint64_t dr_txg;
/* zio of outstanding write IO */
zio_t *dr_zio;
/* pointer back to our dbuf */
struct dmu_buf_impl *dr_dbuf;
/* pointer to next dirty record */
struct dbuf_dirty_record *dr_next;
/* pointer to parent dirty record */
struct dbuf_dirty_record *dr_parent;
union dirty_types {
struct dirty_indirect {
/* protect access to list */
kmutex_t dr_mtx;
/* Our list of dirty children */
list_t dr_children;
} di;
struct dirty_leaf {
/*
* dr_data is set when we dirty the buffer
* so that we can retain the pointer even if it
* gets COW'd in a subsequent transaction group.
*/
arc_buf_t *dr_data;
blkptr_t dr_overridden_by;
override_states_t dr_override_state;
} dl;
} dt;
} dbuf_dirty_record_t;
typedef struct dmu_buf_impl {
/*
* The following members are immutable, with the exception of
* db.db_data, which is protected by db_mtx.
*/
/* the publicly visible structure */
dmu_buf_t db;
/* the objset we belong to */
struct objset_impl *db_objset;
/*
* the dnode we belong to (NULL when evicted)
*/
struct dnode *db_dnode;
/*
* our parent buffer; if the dnode points to us directly,
* db_parent == db_dnode->dn_dbuf
* only accessed by sync thread ???
* (NULL when evicted)
*/
struct dmu_buf_impl *db_parent;
/*
* link for hash table of all dmu_buf_impl_t's
*/
struct dmu_buf_impl *db_hash_next;
/* our block number */
uint64_t db_blkid;
/*
* Pointer to the blkptr_t which points to us. May be NULL if we
* don't have one yet. (NULL when evicted)
*/
blkptr_t *db_blkptr;
/*
* Our indirection level. Data buffers have db_level==0.
* Indirect buffers which point to data buffers have
* db_level==1. etc. Buffers which contain dnodes have
* db_level==0, since the dnodes are stored in a file.
*/
uint8_t db_level;
/* db_mtx protects the members below */
kmutex_t db_mtx;
/*
* Current state of the buffer
*/
dbuf_states_t db_state;
/*
* Refcount accessed by dmu_buf_{hold,rele}.
* If nonzero, the buffer can't be destroyed.
* Protected by db_mtx.
*/
refcount_t db_holds;
/* buffer holding our data */
arc_buf_t *db_buf;
kcondvar_t db_changed;
dbuf_dirty_record_t *db_data_pending;
/* pointer to most recent dirty record for this buffer */
dbuf_dirty_record_t *db_last_dirty;
/*
* Our link on the owner dnodes's dn_dbufs list.
* Protected by its dn_dbufs_mtx.
*/
list_node_t db_link;
/* Data which is unique to data (leaf) blocks: */
/* stuff we store for the user (see dmu_buf_set_user) */
void *db_user_ptr;
void **db_user_data_ptr_ptr;
dmu_buf_evict_func_t *db_evict_func;
uint8_t db_immediate_evict;
uint8_t db_freed_in_flight;
uint8_t db_dirtycnt;
} dmu_buf_impl_t;
/* Note: the dbuf hash table is exposed only for the mdb module */
#define DBUF_MUTEXES 256
#define DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)])
typedef struct dbuf_hash_table {
uint64_t hash_table_mask;
dmu_buf_impl_t **hash_table;
kmutex_t hash_mutexes[DBUF_MUTEXES];
} dbuf_hash_table_t;
uint64_t dbuf_whichblock(struct dnode *di, uint64_t offset);
dmu_buf_impl_t *dbuf_create_tlib(struct dnode *dn, char *data);
void dbuf_create_bonus(struct dnode *dn);
dmu_buf_impl_t *dbuf_hold(struct dnode *dn, uint64_t blkid, void *tag);
dmu_buf_impl_t *dbuf_hold_level(struct dnode *dn, int level, uint64_t blkid,
void *tag);
int dbuf_hold_impl(struct dnode *dn, uint8_t level, uint64_t blkid, int create,
void *tag, dmu_buf_impl_t **dbp);
void dbuf_prefetch(struct dnode *dn, uint64_t blkid);
void dbuf_add_ref(dmu_buf_impl_t *db, void *tag);
uint64_t dbuf_refcount(dmu_buf_impl_t *db);
void dbuf_rele(dmu_buf_impl_t *db, void *tag);
dmu_buf_impl_t *dbuf_find(struct dnode *dn, uint8_t level, uint64_t blkid);
int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags);
void dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
void dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx);
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
void dbuf_clear(dmu_buf_impl_t *db);
void dbuf_evict(dmu_buf_impl_t *db);
void dbuf_setdirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
void dbuf_unoverride(dbuf_dirty_record_t *dr);
void dbuf_sync_list(list_t *list, dmu_tx_t *tx);
void dbuf_free_range(struct dnode *dn, uint64_t blkid, uint64_t nblks,
struct dmu_tx *);
void dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx);
void dbuf_init(void);
void dbuf_fini(void);
#define DBUF_GET_BUFC_TYPE(db) \
((((db)->db_level > 0) || \
(dmu_ot[(db)->db_dnode->dn_type].ot_metadata)) ? \
ARC_BUFC_METADATA : ARC_BUFC_DATA);
#ifdef ZFS_DEBUG
/*
* There should be a ## between the string literal and fmt, to make it
* clear that we're joining two strings together, but gcc does not
* support that preprocessor token.
*/
#define dprintf_dbuf(dbuf, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char __db_buf[32]; \
uint64_t __db_obj = (dbuf)->db.db_object; \
if (__db_obj == DMU_META_DNODE_OBJECT) \
(void) strcpy(__db_buf, "mdn"); \
else \
(void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \
(u_longlong_t)__db_obj); \
dprintf_ds((dbuf)->db_objset->os_dsl_dataset, \
"obj=%s lvl=%u blkid=%lld " fmt, \
__db_buf, (dbuf)->db_level, \
(u_longlong_t)(dbuf)->db_blkid, __VA_ARGS__); \
} \
_NOTE(CONSTCOND) } while (0)
#define dprintf_dbuf_bp(db, bp, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \
sprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, bp); \
dprintf_dbuf(db, fmt " %s\n", __VA_ARGS__, __blkbuf); \
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
} \
_NOTE(CONSTCOND) } while (0)
#define DBUF_VERIFY(db) dbuf_verify(db)
#else
#define dprintf_dbuf(db, fmt, ...)
#define dprintf_dbuf_bp(db, bp, fmt, ...)
#define DBUF_VERIFY(db)
#endif
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DBUF_H */
-620
View File
@@ -1,620 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DMU_H
#define _SYS_DMU_H
#pragma ident "@(#)dmu.h 1.38 08/04/27 SMI"
/*
* This file describes the interface that the DMU provides for its
* consumers.
*
* The DMU also interacts with the SPA. That interface is described in
* dmu_spa.h.
*/
#include <sys/inttypes.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/cred.h>
#ifdef __cplusplus
extern "C" {
#endif
struct uio;
struct page;
struct vnode;
struct spa;
struct zilog;
struct zio;
struct blkptr;
struct zap_cursor;
struct dsl_dataset;
struct dsl_pool;
struct dnode;
struct drr_begin;
struct drr_end;
struct zbookmark;
struct spa;
struct nvlist;
struct objset_impl;
typedef struct objset objset_t;
typedef struct dmu_tx dmu_tx_t;
typedef struct dsl_dir dsl_dir_t;
typedef enum dmu_object_type {
DMU_OT_NONE,
/* general: */
DMU_OT_OBJECT_DIRECTORY, /* ZAP */
DMU_OT_OBJECT_ARRAY, /* UINT64 */
DMU_OT_PACKED_NVLIST, /* UINT8 (XDR by nvlist_pack/unpack) */
DMU_OT_PACKED_NVLIST_SIZE, /* UINT64 */
DMU_OT_BPLIST, /* UINT64 */
DMU_OT_BPLIST_HDR, /* UINT64 */
/* spa: */
DMU_OT_SPACE_MAP_HEADER, /* UINT64 */
DMU_OT_SPACE_MAP, /* UINT64 */
/* zil: */
DMU_OT_INTENT_LOG, /* UINT64 */
/* dmu: */
DMU_OT_DNODE, /* DNODE */
DMU_OT_OBJSET, /* OBJSET */
/* dsl: */
DMU_OT_DSL_DIR, /* UINT64 */
DMU_OT_DSL_DIR_CHILD_MAP, /* ZAP */
DMU_OT_DSL_DS_SNAP_MAP, /* ZAP */
DMU_OT_DSL_PROPS, /* ZAP */
DMU_OT_DSL_DATASET, /* UINT64 */
/* zpl: */
DMU_OT_ZNODE, /* ZNODE */
DMU_OT_OLDACL, /* Old ACL */
DMU_OT_PLAIN_FILE_CONTENTS, /* UINT8 */
DMU_OT_DIRECTORY_CONTENTS, /* ZAP */
DMU_OT_MASTER_NODE, /* ZAP */
DMU_OT_UNLINKED_SET, /* ZAP */
/* zvol: */
DMU_OT_ZVOL, /* UINT8 */
DMU_OT_ZVOL_PROP, /* ZAP */
/* other; for testing only! */
DMU_OT_PLAIN_OTHER, /* UINT8 */
DMU_OT_UINT64_OTHER, /* UINT64 */
DMU_OT_ZAP_OTHER, /* ZAP */
/* new object types: */
DMU_OT_ERROR_LOG, /* ZAP */
DMU_OT_SPA_HISTORY, /* UINT8 */
DMU_OT_SPA_HISTORY_OFFSETS, /* spa_his_phys_t */
DMU_OT_POOL_PROPS, /* ZAP */
DMU_OT_DSL_PERMS, /* ZAP */
DMU_OT_ACL, /* ACL */
DMU_OT_SYSACL, /* SYSACL */
DMU_OT_FUID, /* FUID table (Packed NVLIST UINT8) */
DMU_OT_FUID_SIZE, /* FUID table size UINT64 */
DMU_OT_NUMTYPES
} dmu_object_type_t;
typedef enum dmu_objset_type {
DMU_OST_NONE,
DMU_OST_META,
DMU_OST_ZFS,
DMU_OST_ZVOL,
DMU_OST_OTHER, /* For testing only! */
DMU_OST_ANY, /* Be careful! */
DMU_OST_NUMTYPES
} dmu_objset_type_t;
void byteswap_uint64_array(void *buf, size_t size);
void byteswap_uint32_array(void *buf, size_t size);
void byteswap_uint16_array(void *buf, size_t size);
void byteswap_uint8_array(void *buf, size_t size);
void zap_byteswap(void *buf, size_t size);
void zfs_oldacl_byteswap(void *buf, size_t size);
void zfs_acl_byteswap(void *buf, size_t size);
void zfs_znode_byteswap(void *buf, size_t size);
#define DS_MODE_NONE 0 /* invalid, to aid debugging */
#define DS_MODE_STANDARD 1 /* normal access, no special needs */
#define DS_MODE_PRIMARY 2 /* the "main" access, e.g. a mount */
#define DS_MODE_EXCLUSIVE 3 /* exclusive access, e.g. to destroy */
#define DS_MODE_LEVELS 4
#define DS_MODE_LEVEL(x) ((x) & (DS_MODE_LEVELS - 1))
#define DS_MODE_READONLY 0x8
#define DS_MODE_IS_READONLY(x) ((x) & DS_MODE_READONLY)
#define DS_MODE_INCONSISTENT 0x10
#define DS_MODE_IS_INCONSISTENT(x) ((x) & DS_MODE_INCONSISTENT)
#define DS_FIND_SNAPSHOTS (1<<0)
#define DS_FIND_CHILDREN (1<<1)
/*
* The maximum number of bytes that can be accessed as part of one
* operation, including metadata.
*/
#define DMU_MAX_ACCESS (10<<20) /* 10MB */
/*
* Public routines to create, destroy, open, and close objsets.
*/
int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
objset_t **osp);
int dmu_objset_open_ds(struct dsl_dataset *ds, dmu_objset_type_t type,
objset_t **osp);
void dmu_objset_close(objset_t *os);
int dmu_objset_evict_dbufs(objset_t *os);
int dmu_objset_create(const char *name, dmu_objset_type_t type,
objset_t *clone_parent, uint64_t flags,
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
int dmu_objset_destroy(const char *name);
int dmu_snapshots_destroy(char *fsname, char *snapname);
int dmu_objset_rollback(objset_t *os);
int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive);
int dmu_objset_rename(const char *name, const char *newname,
boolean_t recursive);
int dmu_objset_find(char *name, int func(char *, void *), void *arg,
int flags);
void dmu_objset_byteswap(void *buf, size_t size);
typedef struct dmu_buf {
uint64_t db_object; /* object that this buffer is part of */
uint64_t db_offset; /* byte offset in this object */
uint64_t db_size; /* size of buffer in bytes */
void *db_data; /* data in buffer */
} dmu_buf_t;
typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
/*
* The names of zap entries in the DIRECTORY_OBJECT of the MOS.
*/
#define DMU_POOL_DIRECTORY_OBJECT 1
#define DMU_POOL_CONFIG "config"
#define DMU_POOL_ROOT_DATASET "root_dataset"
#define DMU_POOL_SYNC_BPLIST "sync_bplist"
#define DMU_POOL_ERRLOG_SCRUB "errlog_scrub"
#define DMU_POOL_ERRLOG_LAST "errlog_last"
#define DMU_POOL_SPARES "spares"
#define DMU_POOL_DEFLATE "deflate"
#define DMU_POOL_HISTORY "history"
#define DMU_POOL_PROPS "pool_props"
#define DMU_POOL_L2CACHE "l2cache"
/*
* Allocate an object from this objset. The range of object numbers
* available is (0, DN_MAX_OBJECT). Object 0 is the meta-dnode.
*
* The transaction must be assigned to a txg. The newly allocated
* object will be "held" in the transaction (ie. you can modify the
* newly allocated object in this transaction).
*
* dmu_object_alloc() chooses an object and returns it in *objectp.
*
* dmu_object_claim() allocates a specific object number. If that
* number is already allocated, it fails and returns EEXIST.
*
* Return 0 on success, or ENOSPC or EEXIST as specified above.
*/
uint64_t dmu_object_alloc(objset_t *os, dmu_object_type_t ot,
int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
/*
* Free an object from this objset.
*
* The object's data will be freed as well (ie. you don't need to call
* dmu_free(object, 0, -1, tx)).
*
* The object need not be held in the transaction.
*
* If there are any holds on this object's buffers (via dmu_buf_hold()),
* or tx holds on the object (via dmu_tx_hold_object()), you can not
* free it; it fails and returns EBUSY.
*
* If the object is not allocated, it fails and returns ENOENT.
*
* Return 0 on success, or EBUSY or ENOENT as specified above.
*/
int dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx);
/*
* Find the next allocated or free object.
*
* The objectp parameter is in-out. It will be updated to be the next
* object which is allocated. Ignore objects which have not been
* modified since txg.
*
* XXX Can only be called on a objset with no dirty data.
*
* Returns 0 on success, or ENOENT if there are no more objects.
*/
int dmu_object_next(objset_t *os, uint64_t *objectp,
boolean_t hole, uint64_t txg);
/*
* Set the data blocksize for an object.
*
* The object cannot have any blocks allcated beyond the first. If
* the first block is allocated already, the new size must be greater
* than the current block size. If these conditions are not met,
* ENOTSUP will be returned.
*
* Returns 0 on success, or EBUSY if there are any holds on the object
* contents, or ENOTSUP as described above.
*/
int dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size,
int ibs, dmu_tx_t *tx);
/*
* Set the checksum property on a dnode. The new checksum algorithm will
* apply to all newly written blocks; existing blocks will not be affected.
*/
void dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum,
dmu_tx_t *tx);
/*
* Set the compress property on a dnode. The new compression algorithm will
* apply to all newly written blocks; existing blocks will not be affected.
*/
void dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
dmu_tx_t *tx);
/*
* Decide how many copies of a given block we should make. Can be from
* 1 to SPA_DVAS_PER_BP.
*/
int dmu_get_replication_level(struct objset_impl *, struct zbookmark *zb,
dmu_object_type_t ot);
/*
* The bonus data is accessed more or less like a regular buffer.
* You must dmu_bonus_hold() to get the buffer, which will give you a
* dmu_buf_t with db_offset==-1ULL, and db_size = the size of the bonus
* data. As with any normal buffer, you must call dmu_buf_read() to
* read db_data, dmu_buf_will_dirty() before modifying it, and the
* object must be held in an assigned transaction before calling
* dmu_buf_will_dirty. You may use dmu_buf_set_user() on the bonus
* buffer as well. You must release your hold with dmu_buf_rele().
*/
int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **);
int dmu_bonus_max(void);
int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
/*
* Obtain the DMU buffer from the specified object which contains the
* specified offset. dmu_buf_hold() puts a "hold" on the buffer, so
* that it will remain in memory. You must release the hold with
* dmu_buf_rele(). You musn't access the dmu_buf_t after releasing your
* hold. You must have a hold on any dmu_buf_t* you pass to the DMU.
*
* You must call dmu_buf_read, dmu_buf_will_dirty, or dmu_buf_will_fill
* on the returned buffer before reading or writing the buffer's
* db_data. The comments for those routines describe what particular
* operations are valid after calling them.
*
* The object number must be a valid, allocated object number.
*/
int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
void *tag, dmu_buf_t **);
void dmu_buf_add_ref(dmu_buf_t *db, void* tag);
void dmu_buf_rele(dmu_buf_t *db, void *tag);
uint64_t dmu_buf_refcount(dmu_buf_t *db);
/*
* dmu_buf_hold_array holds the DMU buffers which contain all bytes in a
* range of an object. A pointer to an array of dmu_buf_t*'s is
* returned (in *dbpp).
*
* dmu_buf_rele_array releases the hold on an array of dmu_buf_t*'s, and
* frees the array. The hold on the array of buffers MUST be released
* with dmu_buf_rele_array. You can NOT release the hold on each buffer
* individually with dmu_buf_rele.
*/
int dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset,
uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp);
void dmu_buf_rele_array(dmu_buf_t **, int numbufs, void *tag);
/*
* Returns NULL on success, or the existing user ptr if it's already
* been set.
*
* user_ptr is for use by the user and can be obtained via dmu_buf_get_user().
*
* user_data_ptr_ptr should be NULL, or a pointer to a pointer which
* will be set to db->db_data when you are allowed to access it. Note
* that db->db_data (the pointer) can change when you do dmu_buf_read(),
* dmu_buf_tryupgrade(), dmu_buf_will_dirty(), or dmu_buf_will_fill().
* *user_data_ptr_ptr will be set to the new value when it changes.
*
* If non-NULL, pageout func will be called when this buffer is being
* excised from the cache, so that you can clean up the data structure
* pointed to by user_ptr.
*
* dmu_evict_user() will call the pageout func for all buffers in a
* objset with a given pageout func.
*/
void *dmu_buf_set_user(dmu_buf_t *db, void *user_ptr, void *user_data_ptr_ptr,
dmu_buf_evict_func_t *pageout_func);
/*
* set_user_ie is the same as set_user, but request immediate eviction
* when hold count goes to zero.
*/
void *dmu_buf_set_user_ie(dmu_buf_t *db, void *user_ptr,
void *user_data_ptr_ptr, dmu_buf_evict_func_t *pageout_func);
void *dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr,
void *user_ptr, void *user_data_ptr_ptr,
dmu_buf_evict_func_t *pageout_func);
void dmu_evict_user(objset_t *os, dmu_buf_evict_func_t *func);
/*
* Returns the user_ptr set with dmu_buf_set_user(), or NULL if not set.
*/
void *dmu_buf_get_user(dmu_buf_t *db);
/*
* Indicate that you are going to modify the buffer's data (db_data).
*
* The transaction (tx) must be assigned to a txg (ie. you've called
* dmu_tx_assign()). The buffer's object must be held in the tx
* (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
*/
void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
/*
* You must create a transaction, then hold the objects which you will
* (or might) modify as part of this transaction. Then you must assign
* the transaction to a transaction group. Once the transaction has
* been assigned, you can modify buffers which belong to held objects as
* part of this transaction. You can't modify buffers before the
* transaction has been assigned; you can't modify buffers which don't
* belong to objects which this transaction holds; you can't hold
* objects once the transaction has been assigned. You may hold an
* object which you are going to free (with dmu_object_free()), but you
* don't have to.
*
* You can abort the transaction before it has been assigned.
*
* Note that you may hold buffers (with dmu_buf_hold) at any time,
* regardless of transaction state.
*/
#define DMU_NEW_OBJECT (-1ULL)
#define DMU_OBJECT_END (-1ULL)
dmu_tx_t *dmu_tx_create(objset_t *os);
void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
uint64_t len);
void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name);
void dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object);
void dmu_tx_abort(dmu_tx_t *tx);
int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
void dmu_tx_wait(dmu_tx_t *tx);
void dmu_tx_commit(dmu_tx_t *tx);
/*
* Free up the data blocks for a defined range of a file. If size is
* zero, the range from offset to end-of-file is freed.
*/
int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
uint64_t size, dmu_tx_t *tx);
/*
* Convenience functions.
*
* Canfail routines will return 0 on success, or an errno if there is a
* nonrecoverable I/O error.
*/
int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
void *buf);
void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
const void *buf, dmu_tx_t *tx);
int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size,
dmu_tx_t *tx);
int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset,
uint64_t size, struct page *pp, dmu_tx_t *tx);
extern int zfs_prefetch_disable;
/*
* Asynchronously try to read in the data.
*/
void dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset,
uint64_t len);
typedef struct dmu_object_info {
/* All sizes are in bytes. */
uint32_t doi_data_block_size;
uint32_t doi_metadata_block_size;
uint64_t doi_bonus_size;
dmu_object_type_t doi_type;
dmu_object_type_t doi_bonus_type;
uint8_t doi_indirection; /* 2 = dnode->indirect->data */
uint8_t doi_checksum;
uint8_t doi_compress;
uint8_t doi_pad[5];
/* Values below are number of 512-byte blocks. */
uint64_t doi_physical_blks; /* data + metadata */
uint64_t doi_max_block_offset;
} dmu_object_info_t;
typedef void arc_byteswap_func_t(void *buf, size_t size);
typedef struct dmu_object_type_info {
arc_byteswap_func_t *ot_byteswap;
boolean_t ot_metadata;
char *ot_name;
} dmu_object_type_info_t;
extern const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES];
/*
* Get information on a DMU object.
*
* Return 0 on success or ENOENT if object is not allocated.
*
* If doi is NULL, just indicates whether the object exists.
*/
int dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi);
void dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi);
void dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi);
void dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize,
u_longlong_t *nblk512);
typedef struct dmu_objset_stats {
uint64_t dds_num_clones; /* number of clones of this */
uint64_t dds_creation_txg;
uint64_t dds_guid;
dmu_objset_type_t dds_type;
uint8_t dds_is_snapshot;
uint8_t dds_inconsistent;
char dds_origin[MAXNAMELEN];
} dmu_objset_stats_t;
/*
* Get stats on a dataset.
*/
void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
/*
* Add entries to the nvlist for all the objset's properties. See
* zfs_prop_table[] and zfs(1m) for details on the properties.
*/
void dmu_objset_stats(objset_t *os, struct nvlist *nv);
/*
* Get the space usage statistics for statvfs().
*
* refdbytes is the amount of space "referenced" by this objset.
* availbytes is the amount of space available to this objset, taking
* into account quotas & reservations, assuming that no other objsets
* use the space first. These values correspond to the 'referenced' and
* 'available' properties, described in the zfs(1m) manpage.
*
* usedobjs and availobjs are the number of objects currently allocated,
* and available.
*/
void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
uint64_t *usedobjsp, uint64_t *availobjsp);
/*
* The fsid_guid is a 56-bit ID that can change to avoid collisions.
* (Contrast with the ds_guid which is a 64-bit ID that will never
* change, so there is a small probability that it will collide.)
*/
uint64_t dmu_objset_fsid_guid(objset_t *os);
int dmu_objset_is_snapshot(objset_t *os);
extern struct spa *dmu_objset_spa(objset_t *os);
extern struct zilog *dmu_objset_zil(objset_t *os);
extern struct dsl_pool *dmu_objset_pool(objset_t *os);
extern struct dsl_dataset *dmu_objset_ds(objset_t *os);
extern void dmu_objset_name(objset_t *os, char *buf);
extern dmu_objset_type_t dmu_objset_type(objset_t *os);
extern uint64_t dmu_objset_id(objset_t *os);
extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
uint64_t *id, uint64_t *offp, boolean_t *case_conflict);
extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
int maxlen, boolean_t *conflict);
extern int dmu_dir_list_next(objset_t *os, int namelen, char *name,
uint64_t *idp, uint64_t *offp);
extern void dmu_objset_set_user(objset_t *os, void *user_ptr);
extern void *dmu_objset_get_user(objset_t *os);
/*
* Return the txg number for the given assigned transaction.
*/
uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
/*
* Synchronous write.
* If a parent zio is provided this function initiates a write on the
* provided buffer as a child of the parent zio.
* In the absence of a parent zio, the write is completed synchronously.
* At write completion, blk is filled with the bp of the written block.
* Note that while the data covered by this function will be on stable
* storage when the write completes this new data does not become a
* permanent part of the file until the associated transaction commits.
*/
typedef void dmu_sync_cb_t(dmu_buf_t *db, void *arg);
int dmu_sync(struct zio *zio, dmu_buf_t *db,
struct blkptr *bp, uint64_t txg, dmu_sync_cb_t *done, void *arg);
/*
* Find the next hole or data block in file starting at *off
* Return found offset in *off. Return ESRCH for end of file.
*/
int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole,
uint64_t *off);
/*
* Initial setup and final teardown.
*/
extern void dmu_init(void);
extern void dmu_fini(void);
typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp,
uint64_t object, uint64_t offset, int len);
void dmu_traverse_objset(objset_t *os, uint64_t txg_start,
dmu_traverse_cb_t cb, void *arg);
int dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
struct vnode *vp, offset_t *off);
typedef struct dmu_recv_cookie {
/*
* This structure is opaque!
*
* If logical and real are different, we are recving the stream
* into the "real" temporary clone, and then switching it with
* the "logical" target.
*/
struct dsl_dataset *drc_logical_ds;
struct dsl_dataset *drc_real_ds;
struct drr_begin *drc_drrb;
char *drc_tosnap;
boolean_t drc_newfs;
boolean_t drc_force;
} dmu_recv_cookie_t;
int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *,
boolean_t force, objset_t *origin, boolean_t online, dmu_recv_cookie_t *);
int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp);
int dmu_recv_end(dmu_recv_cookie_t *drc);
void dmu_recv_abort_cleanup(dmu_recv_cookie_t *drc);
/* CRC64 table */
#define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */
extern uint64_t zfs_crc64_table[256];
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DMU_H */
-237
View File
@@ -1,237 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DMU_IMPL_H
#define _SYS_DMU_IMPL_H
#pragma ident "@(#)dmu_impl.h 1.2 07/02/02 SMI"
#include <sys/txg_impl.h>
#include <sys/zio.h>
#include <sys/dnode.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* This is the locking strategy for the DMU. Numbers in parenthesis are
* cases that use that lock order, referenced below:
*
* ARC is self-contained
* bplist is self-contained
* refcount is self-contained
* txg is self-contained (hopefully!)
* zst_lock
* zf_rwlock
*
* XXX try to improve evicting path?
*
* dp_config_rwlock > os_obj_lock > dn_struct_rwlock >
* dn_dbufs_mtx > hash_mutexes > db_mtx > leafs
*
* dp_config_rwlock
* must be held before: everything
* protects dd namespace changes
* protects property changes globally
* held from:
* dsl_dir_open/r:
* dsl_dir_create_sync/w:
* dsl_dir_sync_destroy/w:
* dsl_dir_rename_sync/w:
* dsl_prop_changed_notify/r:
*
* os_obj_lock
* must be held before:
* everything except dp_config_rwlock
* protects os_obj_next
* held from:
* dmu_object_alloc: dn_dbufs_mtx, db_mtx, hash_mutexes, dn_struct_rwlock
*
* dn_struct_rwlock
* must be held before:
* everything except dp_config_rwlock and os_obj_lock
* protects structure of dnode (eg. nlevels)
* db_blkptr can change when syncing out change to nlevels
* dn_maxblkid
* dn_nlevels
* dn_*blksz*
* phys nlevels, maxblkid, physical blkptr_t's (?)
* held from:
* callers of dbuf_read_impl, dbuf_hold[_impl], dbuf_prefetch
* dmu_object_info_from_dnode: dn_dirty_mtx (dn_datablksz)
* dmu_tx_count_free:
* dbuf_read_impl: db_mtx, dmu_zfetch()
* dmu_zfetch: zf_rwlock/r, zst_lock, dbuf_prefetch()
* dbuf_new_size: db_mtx
* dbuf_dirty: db_mtx
* dbuf_findbp: (callers, phys? - the real need)
* dbuf_create: dn_dbufs_mtx, hash_mutexes, db_mtx (phys?)
* dbuf_prefetch: dn_dirty_mtx, hash_mutexes, db_mtx, dn_dbufs_mtx
* dbuf_hold_impl: hash_mutexes, db_mtx, dn_dbufs_mtx, dbuf_findbp()
* dnode_sync/w (increase_indirection): db_mtx (phys)
* dnode_set_blksz/w: dn_dbufs_mtx (dn_*blksz*)
* dnode_new_blkid/w: (dn_maxblkid)
* dnode_free_range/w: dn_dirty_mtx (dn_maxblkid)
* dnode_next_offset: (phys)
*
* dn_dbufs_mtx
* must be held before:
* db_mtx, hash_mutexes
* protects:
* dn_dbufs
* dn_evicted
* held from:
* dmu_evict_user: db_mtx (dn_dbufs)
* dbuf_free_range: db_mtx (dn_dbufs)
* dbuf_remove_ref: db_mtx, callees:
* dbuf_hash_remove: hash_mutexes, db_mtx
* dbuf_create: hash_mutexes, db_mtx (dn_dbufs)
* dnode_set_blksz: (dn_dbufs)
*
* hash_mutexes (global)
* must be held before:
* db_mtx
* protects dbuf_hash_table (global) and db_hash_next
* held from:
* dbuf_find: db_mtx
* dbuf_hash_insert: db_mtx
* dbuf_hash_remove: db_mtx
*
* db_mtx (meta-leaf)
* must be held before:
* dn_mtx, dn_dirty_mtx, dd_lock (leaf mutexes)
* protects:
* db_state
* db_holds
* db_buf
* db_changed
* db_data_pending
* db_dirtied
* db_link
* db_dirty_node (??)
* db_dirtycnt
* db_d.*
* db.*
* held from:
* dbuf_dirty: dn_mtx, dn_dirty_mtx
* dbuf_dirty->dsl_dir_willuse_space: dd_lock
* dbuf_dirty->dbuf_new_block->dsl_dataset_block_freeable: dd_lock
* dbuf_undirty: dn_dirty_mtx (db_d)
* dbuf_write_done: dn_dirty_mtx (db_state)
* dbuf_*
* dmu_buf_update_user: none (db_d)
* dmu_evict_user: none (db_d) (maybe can eliminate)
* dbuf_find: none (db_holds)
* dbuf_hash_insert: none (db_holds)
* dmu_buf_read_array_impl: none (db_state, db_changed)
* dmu_sync: none (db_dirty_node, db_d)
* dnode_reallocate: none (db)
*
* dn_mtx (leaf)
* protects:
* dn_dirty_dbufs
* dn_ranges
* phys accounting
* dn_allocated_txg
* dn_free_txg
* dn_assigned_txg
* dd_assigned_tx
* dn_notxholds
* dn_dirtyctx
* dn_dirtyctx_firstset
* (dn_phys copy fields?)
* (dn_phys contents?)
* held from:
* dnode_*
* dbuf_dirty: none
* dbuf_sync: none (phys accounting)
* dbuf_undirty: none (dn_ranges, dn_dirty_dbufs)
* dbuf_write_done: none (phys accounting)
* dmu_object_info_from_dnode: none (accounting)
* dmu_tx_commit: none
* dmu_tx_hold_object_impl: none
* dmu_tx_try_assign: dn_notxholds(cv)
* dmu_tx_unassign: none
*
* dd_lock (leaf)
* protects:
* dd_prop_cbs
* dd_sync_*
* dd_used_bytes
* dd_tempreserved
* dd_space_towrite
* dd_myname
* dd_phys accounting?
* held from:
* dsl_dir_*
* dsl_prop_changed_notify: none (dd_prop_cbs)
* dsl_prop_register: none (dd_prop_cbs)
* dsl_prop_unregister: none (dd_prop_cbs)
* dsl_dataset_block_freeable: none (dd_sync_*)
*
* os_lock (leaf)
* protects:
* os_dirty_dnodes
* os_free_dnodes
* os_dnodes
* os_downgraded_dbufs
* dn_dirtyblksz
* dn_dirty_link
* held from:
* dnode_create: none (os_dnodes)
* dnode_destroy: none (os_dnodes)
* dnode_setdirty: none (dn_dirtyblksz, os_*_dnodes)
* dnode_free: none (dn_dirtyblksz, os_*_dnodes)
*
* ds_lock (leaf)
* protects:
* ds_user_ptr
* ds_user_evice_func
* ds_open_refcount
* ds_snapname
* ds_phys accounting
* held from:
* dsl_dataset_*
*
* dr_mtx (leaf)
* protects:
* dr_children
* held from:
* dbuf_dirty
* dbuf_undirty
* dbuf_sync_indirect
* dnode_new_blkid
*/
struct objset;
struct dmu_pool;
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DMU_IMPL_H */
-129
View File
@@ -1,129 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DMU_OBJSET_H
#define _SYS_DMU_OBJSET_H
#pragma ident "@(#)dmu_objset.h 1.13 08/04/27 SMI"
#include <sys/spa.h>
#include <sys/arc.h>
#include <sys/txg.h>
#include <sys/zfs_context.h>
#include <sys/dnode.h>
#include <sys/zio.h>
#include <sys/zil.h>
#ifdef __cplusplus
extern "C" {
#endif
struct dsl_dataset;
struct dmu_tx;
struct objset_impl;
typedef struct objset_phys {
dnode_phys_t os_meta_dnode;
zil_header_t os_zil_header;
uint64_t os_type;
char os_pad[1024 - sizeof (dnode_phys_t) - sizeof (zil_header_t) -
sizeof (uint64_t)];
} objset_phys_t;
struct objset {
struct objset_impl *os;
int os_mode;
};
typedef struct objset_impl {
/* Immutable: */
struct dsl_dataset *os_dsl_dataset;
spa_t *os_spa;
arc_buf_t *os_phys_buf;
objset_phys_t *os_phys;
dnode_t *os_meta_dnode;
zilog_t *os_zil;
objset_t os;
uint8_t os_checksum; /* can change, under dsl_dir's locks */
uint8_t os_compress; /* can change, under dsl_dir's locks */
uint8_t os_copies; /* can change, under dsl_dir's locks */
uint8_t os_md_checksum;
uint8_t os_md_compress;
/* no lock needed: */
struct dmu_tx *os_synctx; /* XXX sketchy */
blkptr_t *os_rootbp;
/* Protected by os_obj_lock */
kmutex_t os_obj_lock;
uint64_t os_obj_next;
/* Protected by os_lock */
kmutex_t os_lock;
list_t os_dirty_dnodes[TXG_SIZE];
list_t os_free_dnodes[TXG_SIZE];
list_t os_dnodes;
list_t os_downgraded_dbufs;
/* stuff we store for the user */
kmutex_t os_user_ptr_lock;
void *os_user_ptr;
} objset_impl_t;
#define DMU_META_DNODE_OBJECT 0
/* called from zpl */
int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
objset_t **osp);
void dmu_objset_close(objset_t *os);
int dmu_objset_create(const char *name, dmu_objset_type_t type,
objset_t *clone_parent, uint64_t flags,
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
int dmu_objset_destroy(const char *name);
int dmu_objset_rollback(objset_t *os);
int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive);
void dmu_objset_stats(objset_t *os, nvlist_t *nv);
void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
uint64_t *usedobjsp, uint64_t *availobjsp);
uint64_t dmu_objset_fsid_guid(objset_t *os);
int dmu_objset_find(char *name, int func(char *, void *), void *arg,
int flags);
void dmu_objset_byteswap(void *buf, size_t size);
int dmu_objset_evict_dbufs(objset_t *os);
/* called from dsl */
void dmu_objset_sync(objset_impl_t *os, zio_t *zio, dmu_tx_t *tx);
objset_impl_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx);
int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
objset_impl_t **osip);
void dmu_objset_evict(struct dsl_dataset *ds, void *arg);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DMU_OBJSET_H */
@@ -1,121 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DMU_TRAVERSE_H
#define _SYS_DMU_TRAVERSE_H
#pragma ident "@(#)dmu_traverse.h 1.4 08/04/01 SMI"
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/zio.h>
#include <sys/dmu.h>
#include <sys/dnode.h>
#include <sys/arc.h>
#ifdef __cplusplus
extern "C" {
#endif
#define ADVANCE_POST 0 /* post-order traversal */
#define ADVANCE_PRE 0x01 /* pre-order traversal */
#define ADVANCE_PRUNE 0x02 /* prune by prev snapshot birth time */
#define ADVANCE_DATA 0x04 /* read user data blocks */
#define ADVANCE_HOLES 0x08 /* visit holes */
#define ADVANCE_ZIL 0x10 /* visit intent log blocks */
#define ADVANCE_NOLOCK 0x20 /* Don't grab SPA sync lock */
#define ZB_NO_LEVEL -2
#define ZB_MAXLEVEL 32 /* Next power of 2 >= DN_MAX_LEVELS */
#define ZB_MAXBLKID (1ULL << 62)
#define ZB_MAXOBJSET (1ULL << 62)
#define ZB_MAXOBJECT (1ULL << 62)
#define ZB_MOS_CACHE 0
#define ZB_MDN_CACHE 1
#define ZB_DN_CACHE 2
#define ZB_DEPTH 3
typedef struct zseg {
uint64_t seg_mintxg;
uint64_t seg_maxtxg;
zbookmark_t seg_start;
zbookmark_t seg_end;
list_node_t seg_node;
} zseg_t;
typedef struct traverse_blk_cache {
zbookmark_t bc_bookmark;
blkptr_t bc_blkptr;
void *bc_data;
dnode_phys_t *bc_dnode;
int bc_errno;
int bc_pad1;
uint64_t bc_pad2;
} traverse_blk_cache_t;
typedef int (blkptr_cb_t)(traverse_blk_cache_t *bc, spa_t *spa, void *arg);
struct traverse_handle {
spa_t *th_spa;
blkptr_cb_t *th_func;
void *th_arg;
uint16_t th_advance;
uint16_t th_locked;
int th_zio_flags;
list_t th_seglist;
traverse_blk_cache_t th_cache[ZB_DEPTH][ZB_MAXLEVEL];
traverse_blk_cache_t th_zil_cache;
uint64_t th_hits;
uint64_t th_arc_hits;
uint64_t th_reads;
uint64_t th_callbacks;
uint64_t th_syncs;
uint64_t th_restarts;
zbookmark_t th_noread;
zbookmark_t th_lastcb;
};
int traverse_dsl_dataset(struct dsl_dataset *ds, uint64_t txg_start,
int advance, blkptr_cb_t func, void *arg);
int traverse_zvol(objset_t *os, int advance, blkptr_cb_t func, void *arg);
traverse_handle_t *traverse_init(spa_t *spa, blkptr_cb_t *func, void *arg,
int advance, int zio_flags);
void traverse_fini(traverse_handle_t *th);
void traverse_add_dnode(traverse_handle_t *th,
uint64_t mintxg, uint64_t maxtxg, uint64_t objset, uint64_t object);
void traverse_add_objset(traverse_handle_t *th,
uint64_t mintxg, uint64_t maxtxg, uint64_t objset);
void traverse_add_pool(traverse_handle_t *th, uint64_t mintxg, uint64_t maxtxg);
int traverse_more(traverse_handle_t *th);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DMU_TRAVERSE_H */
-137
View File
@@ -1,137 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DMU_TX_H
#define _SYS_DMU_TX_H
#pragma ident "@(#)dmu_tx.h 1.6 07/10/29 SMI"
#include <sys/inttypes.h>
#include <sys/dmu.h>
#include <sys/txg.h>
#include <sys/refcount.h>
#ifdef __cplusplus
extern "C" {
#endif
struct dmu_buf_impl;
struct dmu_tx_hold;
struct dnode_link;
struct dsl_pool;
struct dnode;
struct dsl_dir;
struct dmu_tx {
/*
* No synchronization is needed because a tx can only be handled
* by one thread.
*/
list_t tx_holds; /* list of dmu_tx_hold_t */
objset_t *tx_objset;
struct dsl_dir *tx_dir;
struct dsl_pool *tx_pool;
uint64_t tx_txg;
uint64_t tx_lastsnap_txg;
uint64_t tx_lasttried_txg;
txg_handle_t tx_txgh;
void *tx_tempreserve_cookie;
struct dmu_tx_hold *tx_needassign_txh;
uint8_t tx_anyobj;
int tx_err;
#ifdef ZFS_DEBUG
uint64_t tx_space_towrite;
uint64_t tx_space_tofree;
uint64_t tx_space_tooverwrite;
uint64_t tx_space_tounref;
refcount_t tx_space_written;
refcount_t tx_space_freed;
#endif
};
enum dmu_tx_hold_type {
THT_NEWOBJECT,
THT_WRITE,
THT_BONUS,
THT_FREE,
THT_ZAP,
THT_SPACE,
THT_NUMTYPES
};
typedef struct dmu_tx_hold {
dmu_tx_t *txh_tx;
list_node_t txh_node;
struct dnode *txh_dnode;
uint64_t txh_space_towrite;
uint64_t txh_space_tofree;
uint64_t txh_space_tooverwrite;
uint64_t txh_space_tounref;
#ifdef ZFS_DEBUG
enum dmu_tx_hold_type txh_type;
uint64_t txh_arg1;
uint64_t txh_arg2;
#endif
} dmu_tx_hold_t;
/*
* These routines are defined in dmu.h, and are called by the user.
*/
dmu_tx_t *dmu_tx_create(objset_t *dd);
int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
void dmu_tx_commit(dmu_tx_t *tx);
void dmu_tx_abort(dmu_tx_t *tx);
uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
void dmu_tx_wait(dmu_tx_t *tx);
/*
* These routines are defined in dmu_spa.h, and are called by the SPA.
*/
extern dmu_tx_t *dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg);
/*
* These routines are only called by the DMU.
*/
dmu_tx_t *dmu_tx_create_dd(dsl_dir_t *dd);
int dmu_tx_is_syncing(dmu_tx_t *tx);
int dmu_tx_private_ok(dmu_tx_t *tx);
void dmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object);
void dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta);
void dmu_tx_dirty_buf(dmu_tx_t *tx, struct dmu_buf_impl *db);
int dmu_tx_holds(dmu_tx_t *tx, uint64_t object);
void dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space);
#ifdef ZFS_DEBUG
#define DMU_TX_DIRTY_BUF(tx, db) dmu_tx_dirty_buf(tx, db)
#else
#define DMU_TX_DIRTY_BUF(tx, db)
#endif
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DMU_TX_H */
@@ -1,75 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _DFETCH_H
#define _DFETCH_H
#pragma ident "@(#)dmu_zfetch.h 1.2 06/07/17 SMI"
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
extern uint64_t zfetch_array_rd_sz;
struct dnode; /* so we can reference dnode */
typedef enum zfetch_dirn {
ZFETCH_FORWARD = 1, /* prefetch increasing block numbers */
ZFETCH_BACKWARD = -1 /* prefetch decreasing block numbers */
} zfetch_dirn_t;
typedef struct zstream {
uint64_t zst_offset; /* offset of starting block in range */
uint64_t zst_len; /* length of range, in blocks */
zfetch_dirn_t zst_direction; /* direction of prefetch */
uint64_t zst_stride; /* length of stride, in blocks */
uint64_t zst_ph_offset; /* prefetch offset, in blocks */
uint64_t zst_cap; /* prefetch limit (cap), in blocks */
kmutex_t zst_lock; /* protects stream */
clock_t zst_last; /* lbolt of last prefetch */
avl_node_t zst_node; /* embed avl node here */
} zstream_t;
typedef struct zfetch {
krwlock_t zf_rwlock; /* protects zfetch structure */
list_t zf_stream; /* AVL tree of zstream_t's */
struct dnode *zf_dnode; /* dnode that owns this zfetch */
uint32_t zf_stream_cnt; /* # of active streams */
uint64_t zf_alloc_fail; /* # of failed attempts to alloc strm */
} zfetch_t;
void dmu_zfetch_init(zfetch_t *, struct dnode *);
void dmu_zfetch_rele(zfetch_t *);
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, int);
#ifdef __cplusplus
}
#endif
#endif /* _DFETCH_H */
-270
View File
@@ -1,270 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DNODE_H
#define _SYS_DNODE_H
#pragma ident "@(#)dnode.h 1.12 07/08/26 SMI"
#include <sys/zfs_context.h>
#include <sys/avl.h>
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/zio.h>
#include <sys/refcount.h>
#include <sys/dmu_zfetch.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Flags.
*/
#define DNODE_MUST_BE_ALLOCATED 1
#define DNODE_MUST_BE_FREE 2
/*
* Fixed constants.
*/
#define DNODE_SHIFT 9 /* 512 bytes */
#define DN_MIN_INDBLKSHIFT 10 /* 1k */
#define DN_MAX_INDBLKSHIFT 14 /* 16k */
#define DNODE_BLOCK_SHIFT 14 /* 16k */
#define DNODE_CORE_SIZE 64 /* 64 bytes for dnode sans blkptrs */
#define DN_MAX_OBJECT_SHIFT 48 /* 256 trillion (zfs_fid_t limit) */
#define DN_MAX_OFFSET_SHIFT 64 /* 2^64 bytes in a dnode */
/*
* Derived constants.
*/
#define DNODE_SIZE (1 << DNODE_SHIFT)
#define DN_MAX_NBLKPTR ((DNODE_SIZE - DNODE_CORE_SIZE) >> SPA_BLKPTRSHIFT)
#define DN_MAX_BONUSLEN (DNODE_SIZE - DNODE_CORE_SIZE - (1 << SPA_BLKPTRSHIFT))
#define DN_MAX_OBJECT (1ULL << DN_MAX_OBJECT_SHIFT)
#define DN_ZERO_BONUSLEN (DN_MAX_BONUSLEN + 1)
#define DNODES_PER_BLOCK_SHIFT (DNODE_BLOCK_SHIFT - DNODE_SHIFT)
#define DNODES_PER_BLOCK (1ULL << DNODES_PER_BLOCK_SHIFT)
#define DNODES_PER_LEVEL_SHIFT (DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT)
/* The +2 here is a cheesy way to round up */
#define DN_MAX_LEVELS (2 + ((DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT) / \
(DN_MIN_INDBLKSHIFT - SPA_BLKPTRSHIFT)))
#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \
(((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
#define DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \
(dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT)
#define EPB(blkshift, typeshift) (1 << (blkshift - typeshift))
struct dmu_buf_impl;
struct objset_impl;
struct zio;
enum dnode_dirtycontext {
DN_UNDIRTIED,
DN_DIRTY_OPEN,
DN_DIRTY_SYNC
};
/* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */
#define DNODE_FLAG_USED_BYTES (1<<0)
typedef struct dnode_phys {
uint8_t dn_type; /* dmu_object_type_t */
uint8_t dn_indblkshift; /* ln2(indirect block size) */
uint8_t dn_nlevels; /* 1=dn_blkptr->data blocks */
uint8_t dn_nblkptr; /* length of dn_blkptr */
uint8_t dn_bonustype; /* type of data in bonus buffer */
uint8_t dn_checksum; /* ZIO_CHECKSUM type */
uint8_t dn_compress; /* ZIO_COMPRESS type */
uint8_t dn_flags; /* DNODE_FLAG_* */
uint16_t dn_datablkszsec; /* data block size in 512b sectors */
uint16_t dn_bonuslen; /* length of dn_bonus */
uint8_t dn_pad2[4];
/* accounting is protected by dn_dirty_mtx */
uint64_t dn_maxblkid; /* largest allocated block ID */
uint64_t dn_used; /* bytes (or sectors) of disk space */
uint64_t dn_pad3[4];
blkptr_t dn_blkptr[1];
uint8_t dn_bonus[DN_MAX_BONUSLEN];
} dnode_phys_t;
typedef struct dnode {
/*
* dn_struct_rwlock protects the structure of the dnode,
* including the number of levels of indirection (dn_nlevels),
* dn_maxblkid, and dn_next_*
*/
krwlock_t dn_struct_rwlock;
/*
* Our link on dataset's dd_dnodes list.
* Protected by dd_accounting_mtx.
*/
list_node_t dn_link;
/* immutable: */
struct objset_impl *dn_objset;
uint64_t dn_object;
struct dmu_buf_impl *dn_dbuf;
dnode_phys_t *dn_phys; /* pointer into dn->dn_dbuf->db.db_data */
/*
* Copies of stuff in dn_phys. They're valid in the open
* context (eg. even before the dnode is first synced).
* Where necessary, these are protected by dn_struct_rwlock.
*/
dmu_object_type_t dn_type; /* object type */
uint16_t dn_bonuslen; /* bonus length */
uint8_t dn_bonustype; /* bonus type */
uint8_t dn_nblkptr; /* number of blkptrs (immutable) */
uint8_t dn_checksum; /* ZIO_CHECKSUM type */
uint8_t dn_compress; /* ZIO_COMPRESS type */
uint8_t dn_nlevels;
uint8_t dn_indblkshift;
uint8_t dn_datablkshift; /* zero if blksz not power of 2! */
uint16_t dn_datablkszsec; /* in 512b sectors */
uint32_t dn_datablksz; /* in bytes */
uint64_t dn_maxblkid;
uint8_t dn_next_nlevels[TXG_SIZE];
uint8_t dn_next_indblkshift[TXG_SIZE];
uint16_t dn_next_bonuslen[TXG_SIZE];
uint32_t dn_next_blksz[TXG_SIZE]; /* next block size in bytes */
/* protected by os_lock: */
list_node_t dn_dirty_link[TXG_SIZE]; /* next on dataset's dirty */
/* protected by dn_mtx: */
kmutex_t dn_mtx;
list_t dn_dirty_records[TXG_SIZE];
avl_tree_t dn_ranges[TXG_SIZE];
uint64_t dn_allocated_txg;
uint64_t dn_free_txg;
uint64_t dn_assigned_txg;
kcondvar_t dn_notxholds;
enum dnode_dirtycontext dn_dirtyctx;
uint8_t *dn_dirtyctx_firstset; /* dbg: contents meaningless */
/* protected by own devices */
refcount_t dn_tx_holds;
refcount_t dn_holds;
kmutex_t dn_dbufs_mtx;
list_t dn_dbufs; /* linked list of descendent dbuf_t's */
struct dmu_buf_impl *dn_bonus; /* bonus buffer dbuf */
/* parent IO for current sync write */
zio_t *dn_zio;
/* holds prefetch structure */
struct zfetch dn_zfetch;
} dnode_t;
typedef struct free_range {
avl_node_t fr_node;
uint64_t fr_blkid;
uint64_t fr_nblks;
} free_range_t;
dnode_t *dnode_special_open(struct objset_impl *dd, dnode_phys_t *dnp,
uint64_t object);
void dnode_special_close(dnode_t *dn);
void dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx);
int dnode_hold(struct objset_impl *dd, uint64_t object,
void *ref, dnode_t **dnp);
int dnode_hold_impl(struct objset_impl *dd, uint64_t object, int flag,
void *ref, dnode_t **dnp);
boolean_t dnode_add_ref(dnode_t *dn, void *ref);
void dnode_rele(dnode_t *dn, void *ref);
void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
void dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
void dnode_free(dnode_t *dn, dmu_tx_t *tx);
void dnode_byteswap(dnode_phys_t *dnp);
void dnode_buf_byteswap(void *buf, size_t size);
void dnode_verify(dnode_t *dn);
int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
uint64_t dnode_current_max_length(dnode_t *dn);
void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
void dnode_clear_range(dnode_t *dn, uint64_t blkid,
uint64_t nblks, dmu_tx_t *tx);
void dnode_diduse_space(dnode_t *dn, int64_t space);
void dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx);
void dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx);
uint64_t dnode_block_freed(dnode_t *dn, uint64_t blkid);
void dnode_init(void);
void dnode_fini(void);
int dnode_next_offset(dnode_t *dn, boolean_t hole, uint64_t *off, int minlvl,
uint64_t blkfill, uint64_t txg);
void dnode_evict_dbufs(dnode_t *dn);
#ifdef ZFS_DEBUG
/*
* There should be a ## between the string literal and fmt, to make it
* clear that we're joining two strings together, but that piece of shit
* gcc doesn't support that preprocessor token.
*/
#define dprintf_dnode(dn, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char __db_buf[32]; \
uint64_t __db_obj = (dn)->dn_object; \
if (__db_obj == DMU_META_DNODE_OBJECT) \
(void) strcpy(__db_buf, "mdn"); \
else \
(void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \
(u_longlong_t)__db_obj);\
dprintf_ds((dn)->dn_objset->os_dsl_dataset, "obj=%s " fmt, \
__db_buf, __VA_ARGS__); \
} \
_NOTE(CONSTCOND) } while (0)
#define DNODE_VERIFY(dn) dnode_verify(dn)
#define FREE_VERIFY(db, start, end, tx) free_verify(db, start, end, tx)
#else
#define dprintf_dnode(db, fmt, ...)
#define DNODE_VERIFY(dn)
#define FREE_VERIFY(db, start, end, tx)
#endif
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DNODE_H */
@@ -1,228 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DSL_DATASET_H
#define _SYS_DSL_DATASET_H
#pragma ident "@(#)dsl_dataset.h 1.16 08/04/27 SMI"
#include <sys/dmu.h>
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/zio.h>
#include <sys/bplist.h>
#include <sys/dsl_synctask.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
struct dsl_dataset;
struct dsl_dir;
struct dsl_pool;
typedef void dsl_dataset_evict_func_t(struct dsl_dataset *, void *);
#define DS_FLAG_INCONSISTENT (1ULL<<0)
/*
* NB: nopromote can not yet be set, but we want support for it in this
* on-disk version, so that we don't need to upgrade for it later. It
* will be needed when we implement 'zfs split' (where the split off
* clone should not be promoted).
*/
#define DS_FLAG_NOPROMOTE (1ULL<<1)
/*
* DS_FLAG_UNIQUE_ACCURATE is set if ds_unique_bytes has been correctly
* calculated for head datasets (starting with SPA_VERSION_UNIQUE_ACCURATE,
* refquota/refreservations).
*/
#define DS_FLAG_UNIQUE_ACCURATE (1ULL<<2)
/*
* DS_FLAG_CI_DATASET is set if the dataset contains a file system whose
* name lookups should be performed case-insensitively.
*/
#define DS_FLAG_CI_DATASET (1ULL<<16)
typedef struct dsl_dataset_phys {
uint64_t ds_dir_obj;
uint64_t ds_prev_snap_obj;
uint64_t ds_prev_snap_txg;
uint64_t ds_next_snap_obj;
uint64_t ds_snapnames_zapobj; /* zap obj of snaps; ==0 for snaps */
uint64_t ds_num_children; /* clone/snap children; ==0 for head */
uint64_t ds_creation_time; /* seconds since 1970 */
uint64_t ds_creation_txg;
uint64_t ds_deadlist_obj;
uint64_t ds_used_bytes;
uint64_t ds_compressed_bytes;
uint64_t ds_uncompressed_bytes;
uint64_t ds_unique_bytes; /* only relevant to snapshots */
/*
* The ds_fsid_guid is a 56-bit ID that can change to avoid
* collisions. The ds_guid is a 64-bit ID that will never
* change, so there is a small probability that it will collide.
*/
uint64_t ds_fsid_guid;
uint64_t ds_guid;
uint64_t ds_flags;
blkptr_t ds_bp;
uint64_t ds_pad[8]; /* pad out to 320 bytes for good measure */
} dsl_dataset_phys_t;
typedef struct dsl_dataset {
/* Immutable: */
struct dsl_dir *ds_dir;
dsl_dataset_phys_t *ds_phys;
dmu_buf_t *ds_dbuf;
uint64_t ds_object;
uint64_t ds_fsid_guid;
/* only used in syncing context: */
struct dsl_dataset *ds_prev; /* only valid for non-snapshots */
/* has internal locking: */
bplist_t ds_deadlist;
/* protected by lock on pool's dp_dirty_datasets list */
txg_node_t ds_dirty_link;
list_node_t ds_synced_link;
/*
* ds_phys->ds_<accounting> is also protected by ds_lock.
* Protected by ds_lock:
*/
kmutex_t ds_lock;
void *ds_user_ptr;
dsl_dataset_evict_func_t *ds_user_evict_func;
uint64_t ds_open_refcount;
/* no locking; only for making guesses */
uint64_t ds_trysnap_txg;
/* for objset_open() */
kmutex_t ds_opening_lock;
uint64_t ds_reserved; /* cached refreservation */
uint64_t ds_quota; /* cached refquota */
/* Protected by ds_lock; keep at end of struct for better locality */
char ds_snapname[MAXNAMELEN];
} dsl_dataset_t;
#define dsl_dataset_is_snapshot(ds) \
((ds)->ds_phys->ds_num_children != 0)
#define DS_UNIQUE_IS_ACCURATE(ds) \
(((ds)->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
int dsl_dataset_open_spa(spa_t *spa, const char *name, int mode,
void *tag, dsl_dataset_t **dsp);
int dsl_dataset_open(const char *name, int mode, void *tag,
dsl_dataset_t **dsp);
int dsl_dataset_open_obj(struct dsl_pool *dp, uint64_t dsobj,
const char *tail, int mode, void *tag, dsl_dataset_t **);
void dsl_dataset_name(dsl_dataset_t *ds, char *name);
void dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag);
void dsl_dataset_downgrade(dsl_dataset_t *ds, int oldmode, int newmode);
boolean_t dsl_dataset_tryupgrade(dsl_dataset_t *ds, int oldmode, int newmode);
uint64_t dsl_dataset_create_sync_impl(dsl_dir_t *dd, dsl_dataset_t *origin,
uint64_t flags, dmu_tx_t *tx);
uint64_t dsl_dataset_create_sync(dsl_dir_t *pds,
const char *lastname, dsl_dataset_t *origin, uint64_t flags,
cred_t *, dmu_tx_t *);
int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag);
int dsl_snapshots_destroy(char *fsname, char *snapname);
dsl_checkfunc_t dsl_dataset_destroy_check;
dsl_syncfunc_t dsl_dataset_destroy_sync;
dsl_checkfunc_t dsl_dataset_snapshot_check;
dsl_syncfunc_t dsl_dataset_snapshot_sync;
int dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost);
int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive);
int dsl_dataset_promote(const char *name);
int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
boolean_t force);
void *dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
void *p, dsl_dataset_evict_func_t func);
void *dsl_dataset_get_user_ptr(dsl_dataset_t *ds);
blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
spa_t *dsl_dataset_get_spa(dsl_dataset_t *ds);
boolean_t dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds);
void dsl_dataset_sync(dsl_dataset_t *os, zio_t *zio, dmu_tx_t *tx);
void dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
void dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
dmu_tx_t *tx);
int dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth);
uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds);
void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx);
void dsl_dataset_stats(dsl_dataset_t *os, nvlist_t *nv);
void dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat);
void dsl_dataset_space(dsl_dataset_t *ds,
uint64_t *refdbytesp, uint64_t *availbytesp,
uint64_t *usedobjsp, uint64_t *availobjsp);
uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds);
void dsl_dataset_create_root(struct dsl_pool *dp, uint64_t *ddobjp,
dmu_tx_t *tx);
int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
uint64_t asize, uint64_t inflight, uint64_t *used,
uint64_t *ref_rsrv);
int dsl_dataset_set_quota(const char *dsname, uint64_t quota);
void dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr,
dmu_tx_t *tx);
int dsl_dataset_set_reservation(const char *dsname, uint64_t reservation);
void dsl_dataset_set_flags(dsl_dataset_t *ds, uint64_t flags);
#ifdef ZFS_DEBUG
#define dprintf_ds(ds, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char *__ds_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); \
dsl_dataset_name(ds, __ds_name); \
dprintf("ds=%s " fmt, __ds_name, __VA_ARGS__); \
kmem_free(__ds_name, MAXNAMELEN); \
} \
_NOTE(CONSTCOND) } while (0)
#else
#define dprintf_ds(dd, fmt, ...)
#endif
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DSL_DATASET_H */
@@ -1,73 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DSL_DELEG_H
#define _SYS_DSL_DELEG_H
#pragma ident "@(#)dsl_deleg.h 1.4 07/10/25 SMI"
#include <sys/dmu.h>
#include <sys/dsl_pool.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
#define ZFS_DELEG_PERM_NONE ""
#define ZFS_DELEG_PERM_CREATE "create"
#define ZFS_DELEG_PERM_DESTROY "destroy"
#define ZFS_DELEG_PERM_SNAPSHOT "snapshot"
#define ZFS_DELEG_PERM_ROLLBACK "rollback"
#define ZFS_DELEG_PERM_CLONE "clone"
#define ZFS_DELEG_PERM_PROMOTE "promote"
#define ZFS_DELEG_PERM_RENAME "rename"
#define ZFS_DELEG_PERM_MOUNT "mount"
#define ZFS_DELEG_PERM_SHARE "share"
#define ZFS_DELEG_PERM_SEND "send"
#define ZFS_DELEG_PERM_RECEIVE "receive"
#define ZFS_DELEG_PERM_ALLOW "allow"
#define ZFS_DELEG_PERM_USERPROP "userprop"
#define ZFS_DELEG_PERM_VSCAN "vscan"
/*
* Note: the names of properties that are marked delegatable are also
* valid delegated permissions
*/
int dsl_deleg_get(const char *ddname, nvlist_t **nvp);
int dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset);
int dsl_deleg_access(const char *ddname, const char *perm, cred_t *cr);
void dsl_deleg_set_create_perms(dsl_dir_t *dd, dmu_tx_t *tx, cred_t *cr);
int dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr);
int dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr);
int dsl_deleg_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx);
boolean_t dsl_delegation_on(objset_t *os);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DSL_DELEG_H */
-146
View File
@@ -1,146 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DSL_DIR_H
#define _SYS_DSL_DIR_H
#pragma ident "@(#)dsl_dir.h 1.10 07/10/29 SMI"
#include <sys/dmu.h>
#include <sys/dsl_pool.h>
#include <sys/dsl_synctask.h>
#include <sys/refcount.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
struct dsl_dataset;
typedef struct dsl_dir_phys {
uint64_t dd_creation_time; /* not actually used */
uint64_t dd_head_dataset_obj;
uint64_t dd_parent_obj;
uint64_t dd_origin_obj;
uint64_t dd_child_dir_zapobj;
/*
* how much space our children are accounting for; for leaf
* datasets, == physical space used by fs + snaps
*/
uint64_t dd_used_bytes;
uint64_t dd_compressed_bytes;
uint64_t dd_uncompressed_bytes;
/* Administrative quota setting */
uint64_t dd_quota;
/* Administrative reservation setting */
uint64_t dd_reserved;
uint64_t dd_props_zapobj;
uint64_t dd_deleg_zapobj; /* dataset delegation permissions */
uint64_t dd_pad[20]; /* pad out to 256 bytes for good measure */
} dsl_dir_phys_t;
struct dsl_dir {
/* These are immutable; no lock needed: */
uint64_t dd_object;
dsl_dir_phys_t *dd_phys;
dmu_buf_t *dd_dbuf;
dsl_pool_t *dd_pool;
/* protected by lock on pool's dp_dirty_dirs list */
txg_node_t dd_dirty_link;
/* protected by dp_config_rwlock */
dsl_dir_t *dd_parent;
/* Protected by dd_lock */
kmutex_t dd_lock;
list_t dd_prop_cbs; /* list of dsl_prop_cb_record_t's */
/* Accounting */
/* reflects any changes to dd_phys->dd_used_bytes made this syncing */
int64_t dd_used_bytes;
/* gross estimate of space used by in-flight tx's */
uint64_t dd_tempreserved[TXG_SIZE];
/* amount of space we expect to write; == amount of dirty data */
int64_t dd_space_towrite[TXG_SIZE];
/* protected by dd_lock; keep at end of struct for better locality */
char dd_myname[MAXNAMELEN];
};
void dsl_dir_close(dsl_dir_t *dd, void *tag);
int dsl_dir_open(const char *name, void *tag, dsl_dir_t **, const char **tail);
int dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, dsl_dir_t **,
const char **tailp);
int dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
const char *tail, void *tag, dsl_dir_t **);
void dsl_dir_name(dsl_dir_t *dd, char *buf);
int dsl_dir_namelen(dsl_dir_t *dd);
int dsl_dir_is_private(dsl_dir_t *dd);
uint64_t dsl_dir_create_sync(dsl_dir_t *pds, const char *name, dmu_tx_t *tx);
void dsl_dir_create_root(objset_t *mos, uint64_t *ddobjp, dmu_tx_t *tx);
dsl_checkfunc_t dsl_dir_destroy_check;
dsl_syncfunc_t dsl_dir_destroy_sync;
void dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv);
uint64_t dsl_dir_space_available(dsl_dir_t *dd,
dsl_dir_t *ancestor, int64_t delta, int ondiskonly);
void dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx);
void dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx);
int dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t mem,
uint64_t asize, uint64_t fsize, uint64_t usize, void **tr_cookiep,
dmu_tx_t *tx);
void dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx);
void dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx);
void dsl_dir_diduse_space(dsl_dir_t *dd,
int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx);
int dsl_dir_set_quota(const char *ddname, uint64_t quota);
int dsl_dir_set_reservation(const char *ddname, uint64_t reservation);
int dsl_dir_rename(dsl_dir_t *dd, const char *newname);
int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space);
int dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx);
/* internal reserved dir name */
#define MOS_DIR_NAME "$MOS"
#ifdef ZFS_DEBUG
#define dprintf_dd(dd, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char *__ds_name = kmem_alloc(MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, \
KM_SLEEP); \
dsl_dir_name(dd, __ds_name); \
dprintf("dd=%s " fmt, __ds_name, __VA_ARGS__); \
kmem_free(__ds_name, MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); \
} \
_NOTE(CONSTCOND) } while (0)
#else
#define dprintf_dd(dd, fmt, ...)
#endif
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DSL_DIR_H */
-92
View File
@@ -1,92 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DSL_POOL_H
#define _SYS_DSL_POOL_H
#pragma ident "@(#)dsl_pool.h 1.5 08/03/20 SMI"
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/txg_impl.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
struct objset;
struct dsl_dir;
typedef struct dsl_pool {
/* Immutable */
spa_t *dp_spa;
struct objset *dp_meta_objset;
struct dsl_dir *dp_root_dir;
struct dsl_dir *dp_mos_dir;
uint64_t dp_root_dir_obj;
/* No lock needed - sync context only */
blkptr_t dp_meta_rootbp;
list_t dp_synced_datasets;
uint64_t dp_write_limit;
/* Uses dp_lock */
kmutex_t dp_lock;
uint64_t dp_space_towrite[TXG_SIZE];
uint64_t dp_tempreserved[TXG_SIZE];
/* Has its own locking */
tx_state_t dp_tx;
txg_list_t dp_dirty_datasets;
txg_list_t dp_dirty_dirs;
txg_list_t dp_sync_tasks;
/*
* Protects administrative changes (properties, namespace)
* It is only held for write in syncing context. Therefore
* syncing context does not need to ever have it for read, since
* nobody else could possibly have it for write.
*/
krwlock_t dp_config_rwlock;
} dsl_pool_t;
int dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
void dsl_pool_close(dsl_pool_t *dp);
dsl_pool_t *dsl_pool_create(spa_t *spa, uint64_t txg);
void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg);
void dsl_pool_zil_clean(dsl_pool_t *dp);
int dsl_pool_sync_context(dsl_pool_t *dp);
uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree);
int dsl_pool_tempreserve_space(dsl_pool_t *dp, uint64_t space, dmu_tx_t *tx);
void dsl_pool_tempreserve_clear(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
void dsl_pool_memory_pressure(dsl_pool_t *dp);
void dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DSL_POOL_H */
-81
View File
@@ -1,81 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DSL_PROP_H
#define _SYS_DSL_PROP_H
#pragma ident "@(#)dsl_prop.h 1.6 07/10/29 SMI"
#include <sys/dmu.h>
#include <sys/dsl_pool.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
struct dsl_dataset;
/* The callback func may not call into the DMU or DSL! */
typedef void (dsl_prop_changed_cb_t)(void *arg, uint64_t newval);
typedef struct dsl_prop_cb_record {
list_node_t cbr_node; /* link on dd_prop_cbs */
struct dsl_dataset *cbr_ds;
const char *cbr_propname;
dsl_prop_changed_cb_t *cbr_func;
void *cbr_arg;
} dsl_prop_cb_record_t;
int dsl_prop_register(struct dsl_dataset *ds, const char *propname,
dsl_prop_changed_cb_t *callback, void *cbarg);
int dsl_prop_unregister(struct dsl_dataset *ds, const char *propname,
dsl_prop_changed_cb_t *callback, void *cbarg);
int dsl_prop_numcb(struct dsl_dataset *ds);
int dsl_prop_get(const char *ddname, const char *propname,
int intsz, int numints, void *buf, char *setpoint);
int dsl_prop_get_integer(const char *ddname, const char *propname,
uint64_t *valuep, char *setpoint);
int dsl_prop_get_all(objset_t *os, nvlist_t **nvp);
int dsl_prop_get_ds_locked(dsl_dir_t *dd, const char *propname,
int intsz, int numints, void *buf, char *setpoint);
int dsl_prop_set(const char *ddname, const char *propname,
int intsz, int numints, const void *buf);
int dsl_prop_set_dd(dsl_dir_t *dd, const char *propname,
int intsz, int numints, const void *buf);
void dsl_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
cred_t *cr, dmu_tx_t *tx);
void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value);
void dsl_prop_nvlist_add_string(nvlist_t *nv,
zfs_prop_t prop, const char *value);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DSL_PROP_H */
@@ -1,83 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DSL_SYNCTASK_H
#define _SYS_DSL_SYNCTASK_H
#pragma ident "@(#)dsl_synctask.h 1.3 07/06/29 SMI"
#include <sys/txg.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
struct dsl_pool;
typedef int (dsl_checkfunc_t)(void *, void *, dmu_tx_t *);
typedef void (dsl_syncfunc_t)(void *, void *, cred_t *, dmu_tx_t *);
typedef struct dsl_sync_task {
list_node_t dst_node;
dsl_checkfunc_t *dst_checkfunc;
dsl_syncfunc_t *dst_syncfunc;
void *dst_arg1;
void *dst_arg2;
int dst_err;
} dsl_sync_task_t;
typedef struct dsl_sync_task_group {
txg_node_t dstg_node;
list_t dstg_tasks;
struct dsl_pool *dstg_pool;
cred_t *dstg_cr;
uint64_t dstg_txg;
int dstg_err;
int dstg_space;
boolean_t dstg_nowaiter;
} dsl_sync_task_group_t;
dsl_sync_task_group_t *dsl_sync_task_group_create(struct dsl_pool *dp);
void dsl_sync_task_create(dsl_sync_task_group_t *dstg,
dsl_checkfunc_t *, dsl_syncfunc_t *,
void *arg1, void *arg2, int blocks_modified);
int dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg);
void dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
void dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg);
void dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
int dsl_sync_task_do(struct dsl_pool *dp,
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
void *arg1, void *arg2, int blocks_modified);
void dsl_sync_task_do_nowait(struct dsl_pool *dp,
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DSL_SYNCTASK_H */
+10 -3
View File
@@ -19,14 +19,14 @@
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_FM_FS_ZFS_H
#define _SYS_FM_FS_ZFS_H
#pragma ident "@(#)zfs.h 1.2 07/06/07 SMI"
#pragma ident "%Z%%M% %I% %E% SMI"
#ifdef __cplusplus
extern "C" {
@@ -45,8 +45,12 @@ extern "C" {
#define FM_EREPORT_ZFS_DEVICE_BAD_GUID_SUM "vdev.bad_guid_sum"
#define FM_EREPORT_ZFS_DEVICE_TOO_SMALL "vdev.too_small"
#define FM_EREPORT_ZFS_DEVICE_BAD_LABEL "vdev.bad_label"
#define FM_EREPORT_ZFS_IO_FAILURE "io_failure"
#define FM_EREPORT_ZFS_PROBE_FAILURE "probe_failure"
#define FM_EREPORT_ZFS_LOG_REPLAY "log_replay"
#define FM_EREPORT_PAYLOAD_ZFS_POOL "pool"
#define FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE "pool_failmode"
#define FM_EREPORT_PAYLOAD_ZFS_POOL_GUID "pool_guid"
#define FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT "pool_context"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID "vdev_guid"
@@ -66,7 +70,10 @@ extern "C" {
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE "zio_size"
#define FM_EREPORT_PAYLOAD_ZFS_PREV_STATE "prev_state"
#define FM_RESOURCE_OK "ok"
#define FM_EREPORT_FAILMODE_WAIT "wait"
#define FM_EREPORT_FAILMODE_CONTINUE "continue"
#define FM_EREPORT_FAILMODE_PANIC "panic"
#define FM_RESOURCE_REMOVED "removed"
#define FM_RESOURCE_AUTOREPLACE "autoreplace"
+47 -14
View File
@@ -26,8 +26,6 @@
#ifndef _SYS_FS_ZFS_H
#define _SYS_FS_ZFS_H
#pragma ident "@(#)zfs.h 1.44 08/04/09 SMI"
#ifdef __cplusplus
extern "C" {
#endif
@@ -100,12 +98,19 @@ typedef enum {
ZFS_PROP_SHARESMB,
ZFS_PROP_REFQUOTA,
ZFS_PROP_REFRESERVATION,
ZFS_PROP_GUID,
ZFS_PROP_PRIMARYCACHE,
ZFS_PROP_SECONDARYCACHE,
ZFS_PROP_USEDSNAP,
ZFS_PROP_USEDDS,
ZFS_PROP_USEDCHILD,
ZFS_PROP_USEDREFRESERV,
ZFS_NUM_PROPS
} zfs_prop_t;
/*
* Pool properties are identified by these constants and must be added to the
* end of this list to ensure that external conumsers are not affected
* end of this list to ensure that external consumers are not affected
* by the change. If you make any changes to this list, be sure to update
* the property table in usr/src/common/zfs/zpool_prop.c.
*/
@@ -124,6 +129,7 @@ typedef enum {
ZPOOL_PROP_AUTOREPLACE,
ZPOOL_PROP_CACHEFILE,
ZPOOL_PROP_FAILUREMODE,
ZPOOL_PROP_LISTSNAPS,
ZPOOL_NUM_PROPS
} zpool_prop_t;
@@ -145,6 +151,13 @@ typedef enum {
typedef int (*zprop_func)(int, void *);
/*
* Properties to be set on the root file system of a new pool
* are stuffed into their own nvlist, which is then included in
* the properties nvlist with the pool properties.
*/
#define ZPOOL_ROOTFS_PROPS "root-props-nvl"
/*
* Dataset property functions shared between libzfs and kernel.
*/
@@ -158,7 +171,7 @@ zfs_prop_t zfs_name_to_prop(const char *);
boolean_t zfs_prop_user(const char *);
int zfs_prop_index_to_string(zfs_prop_t, uint64_t, const char **);
int zfs_prop_string_to_index(zfs_prop_t, const char *, uint64_t *);
int zfs_prop_valid_for_type(int, zfs_type_t);
boolean_t zfs_prop_valid_for_type(int, zfs_type_t);
/*
* Pool property functions shared between libzfs and kernel.
@@ -213,6 +226,13 @@ typedef enum zfs_share_op {
ZFS_UNSHARE_SMB = 3
} zfs_share_op_t;
typedef enum zfs_cache_type {
ZFS_CACHE_NONE = 0,
ZFS_CACHE_METADATA = 1,
ZFS_CACHE_ALL = 2
} zfs_cache_type_t;
/*
* On-disk version number.
*/
@@ -226,14 +246,17 @@ typedef enum zfs_share_op {
#define SPA_VERSION_8 8ULL
#define SPA_VERSION_9 9ULL
#define SPA_VERSION_10 10ULL
#define SPA_VERSION_11 11ULL
#define SPA_VERSION_12 12ULL
#define SPA_VERSION_13 13ULL
#define SPA_VERSION_14 14ULL
/*
* When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
* format change. Go to usr/src/grub/grub-0.95/stage2/{zfs-include/, fsys_zfs*},
* and do the appropriate changes.
*/
#define SPA_VERSION SPA_VERSION_10
#define SPA_VERSION_STRING "10"
#define SPA_VERSION SPA_VERSION_14
#define SPA_VERSION_STRING "14"
/*
* Symbolic names for the changes that caused a SPA_VERSION switch.
@@ -263,6 +286,12 @@ typedef enum zfs_share_op {
#define SPA_VERSION_REFQUOTA SPA_VERSION_9
#define SPA_VERSION_UNIQUE_ACCURATE SPA_VERSION_9
#define SPA_VERSION_L2CACHE SPA_VERSION_10
#define SPA_VERSION_NEXT_CLONES SPA_VERSION_11
#define SPA_VERSION_ORIGIN SPA_VERSION_11
#define SPA_VERSION_DSL_SCRUB SPA_VERSION_11
#define SPA_VERSION_SNAP_PROPS SPA_VERSION_12
#define SPA_VERSION_USED_BREAKDOWN SPA_VERSION_13
#define SPA_VERSION_PASSTHROUGH_X SPA_VERSION_14
/*
* ZPL version - rev'd whenever an incompatible on-disk format change
@@ -320,6 +349,7 @@ typedef enum zfs_share_op {
#define ZPOOL_CONFIG_PHYS_PATH "phys_path"
#define ZPOOL_CONFIG_IS_LOG "is_log"
#define ZPOOL_CONFIG_L2CACHE "l2cache"
#define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */
#define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */
#define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */
/*
@@ -352,11 +382,7 @@ typedef enum zfs_share_op {
* The location of the pool configuration repository, shared between kernel and
* userland.
*/
#define ZPOOL_CACHE_DIR "/etc/zfs"
#define ZPOOL_CACHE_FILE "zpool.cache"
#define ZPOOL_CACHE_TMP ".zpool.cache"
#define ZPOOL_CACHE ZPOOL_CACHE_DIR "/" ZPOOL_CACHE_FILE
#define ZPOOL_CACHE "/etc/zfs/zpool.cache"
/*
* vdev states are ordered from least to most healthy.
@@ -390,7 +416,9 @@ typedef enum vdev_aux {
VDEV_AUX_VERSION_NEWER, /* on-disk version is too new */
VDEV_AUX_VERSION_OLDER, /* on-disk version is too old */
VDEV_AUX_SPARED, /* hot spare used in another pool */
VDEV_AUX_ERR_EXCEEDED /* too many errors */
VDEV_AUX_ERR_EXCEEDED, /* too many errors */
VDEV_AUX_IO_FAILURE, /* experienced I/O failure */
VDEV_AUX_BAD_LOG /* cannot read log chain(s) */
} vdev_aux_t;
/*
@@ -406,7 +434,6 @@ typedef enum pool_state {
POOL_STATE_SPARE, /* Reserved for hot spare use */
POOL_STATE_L2CACHE, /* Level 2 ARC device */
POOL_STATE_UNINITIALIZED, /* Internal spa_t state */
POOL_STATE_IO_FAILURE, /* Internal pool state */
POOL_STATE_UNAVAIL, /* Internal libzfs state */
POOL_STATE_POTENTIALLY_ACTIVE /* Internal libzfs state */
} pool_state_t;
@@ -602,6 +629,11 @@ typedef enum {
#define ZFS_EV_VDEV_PATH "vdev_path"
#define ZFS_EV_VDEV_GUID "vdev_guid"
/*
* Note: This is encoded on-disk, so new events must be added to the
* end, and unused events can not be removed. Be sure to edit
* zpool_main.c: hist_event_table[].
*/
typedef enum history_internal_events {
LOG_NO_EVENT = 0,
LOG_POOL_CREATE,
@@ -640,6 +672,7 @@ typedef enum history_internal_events {
LOG_DS_UPGRADE,
LOG_DS_REFQUOTA,
LOG_DS_REFRESERV,
LOG_POOL_SCRUB_DONE,
LOG_END
} history_internal_events_t;
-63
View File
@@ -1,63 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_LIST_H
#define _SYS_LIST_H
#include <sys/list_impl.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct list_node list_node_t;
typedef struct list list_t;
void list_create(list_t *, size_t, size_t);
void list_destroy(list_t *);
void list_insert_after(list_t *, void *, void *);
void list_insert_before(list_t *, void *, void *);
void list_insert_head(list_t *, void *);
void list_insert_tail(list_t *, void *);
void list_remove(list_t *, void *);
void list_move_tail(list_t *, list_t *);
void *list_head(list_t *);
void *list_tail(list_t *);
void *list_next(list_t *, void *);
void *list_prev(list_t *, void *);
int list_link_active(list_node_t *);
int list_is_empty(list_t *);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_LIST_H */
@@ -1,53 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2003 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_LIST_IMPL_H
#define _SYS_LIST_IMPL_H
#include <sys/types.h>
#ifdef __cplusplus
extern "C" {
#endif
struct list_node {
struct list_node *list_next;
struct list_node *list_prev;
};
struct list {
size_t list_size;
size_t list_offset;
struct list_node list_head;
};
#ifdef __cplusplus
}
#endif
#endif /* _SYS_LIST_IMPL_H */
-70
View File
@@ -1,70 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_METASLAB_H
#define _SYS_METASLAB_H
#pragma ident "@(#)metaslab.h 1.6 07/06/21 SMI"
#include <sys/spa.h>
#include <sys/space_map.h>
#include <sys/txg.h>
#include <sys/zio.h>
#include <sys/avl.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct metaslab_class metaslab_class_t;
typedef struct metaslab_group metaslab_group_t;
extern metaslab_t *metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
uint64_t start, uint64_t size, uint64_t txg);
extern void metaslab_fini(metaslab_t *msp);
extern void metaslab_sync(metaslab_t *msp, uint64_t txg);
extern void metaslab_sync_done(metaslab_t *msp, uint64_t txg);
extern int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
blkptr_t *bp, int ncopies, uint64_t txg, blkptr_t *hintbp,
boolean_t hintbp_avoid);
extern void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg,
boolean_t now);
extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg);
extern metaslab_class_t *metaslab_class_create(void);
extern void metaslab_class_destroy(metaslab_class_t *mc);
extern void metaslab_class_add(metaslab_class_t *mc, metaslab_group_t *mg);
extern void metaslab_class_remove(metaslab_class_t *mc, metaslab_group_t *mg);
extern metaslab_group_t *metaslab_group_create(metaslab_class_t *mc,
vdev_t *vd);
extern void metaslab_group_destroy(metaslab_group_t *mg);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_METASLAB_H */
@@ -1,81 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_METASLAB_IMPL_H
#define _SYS_METASLAB_IMPL_H
#pragma ident "@(#)metaslab_impl.h 1.2 06/04/02 SMI"
#include <sys/metaslab.h>
#include <sys/space_map.h>
#include <sys/vdev.h>
#include <sys/txg.h>
#include <sys/avl.h>
#ifdef __cplusplus
extern "C" {
#endif
struct metaslab_class {
metaslab_group_t *mc_rotor;
uint64_t mc_allocated;
};
struct metaslab_group {
kmutex_t mg_lock;
avl_tree_t mg_metaslab_tree;
uint64_t mg_aliquot;
int64_t mg_bias;
metaslab_class_t *mg_class;
vdev_t *mg_vd;
metaslab_group_t *mg_prev;
metaslab_group_t *mg_next;
};
/*
* Each metaslab's free space is tracked in space map object in the MOS,
* which is only updated in syncing context. Each time we sync a txg,
* we append the allocs and frees from that txg to the space map object.
* When the txg is done syncing, metaslab_sync_done() updates ms_smo
* to ms_smo_syncing. Everything in ms_smo is always safe to allocate.
*/
struct metaslab {
kmutex_t ms_lock; /* metaslab lock */
space_map_obj_t ms_smo; /* synced space map object */
space_map_obj_t ms_smo_syncing; /* syncing space map object */
space_map_t ms_allocmap[TXG_SIZE]; /* allocated this txg */
space_map_t ms_freemap[TXG_SIZE]; /* freed this txg */
space_map_t ms_map; /* in-core free space map */
uint64_t ms_weight; /* weight vs. others in group */
metaslab_group_t *ms_group; /* metaslab group */
avl_node_t ms_group_node; /* node in metaslab group tree */
txg_node_t ms_txg_node; /* per-txg dirty metaslab links */
};
#ifdef __cplusplus
}
#endif
#endif /* _SYS_METASLAB_IMPL_H */
-104
View File
@@ -1,104 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_REFCOUNT_H
#define _SYS_REFCOUNT_H
#pragma ident "@(#)refcount.h 1.3 07/08/02 SMI"
#include <sys/inttypes.h>
#include <sys/list.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* If the reference is held only by the calling function and not any
* particular object, use FTAG (which is a string) for the holder_tag.
* Otherwise, use the object that holds the reference.
*/
#define FTAG ((char *)__func__)
#if defined(DEBUG) || !defined(_KERNEL)
typedef struct reference {
list_node_t ref_link;
void *ref_holder;
uint64_t ref_number;
uint8_t *ref_removed;
} reference_t;
typedef struct refcount {
kmutex_t rc_mtx;
list_t rc_list;
list_t rc_removed;
int64_t rc_count;
int64_t rc_removed_count;
} refcount_t;
/* Note: refcount_t must be initialized with refcount_create() */
void refcount_create(refcount_t *rc);
void refcount_destroy(refcount_t *rc);
void refcount_destroy_many(refcount_t *rc, uint64_t number);
int refcount_is_zero(refcount_t *rc);
int64_t refcount_count(refcount_t *rc);
int64_t refcount_add(refcount_t *rc, void *holder_tag);
int64_t refcount_remove(refcount_t *rc, void *holder_tag);
int64_t refcount_add_many(refcount_t *rc, uint64_t number, void *holder_tag);
int64_t refcount_remove_many(refcount_t *rc, uint64_t number, void *holder_tag);
void refcount_init(void);
void refcount_fini(void);
#else /* DEBUG */
typedef struct refcount {
uint64_t rc_count;
} refcount_t;
#define refcount_create(rc) ((rc)->rc_count = 0)
#define refcount_destroy(rc) ((rc)->rc_count = 0)
#define refcount_destroy_many(rc, number) ((rc)->rc_count = 0)
#define refcount_is_zero(rc) ((rc)->rc_count == 0)
#define refcount_count(rc) ((rc)->rc_count)
#define refcount_add(rc, holder) atomic_add_64_nv(&(rc)->rc_count, 1)
#define refcount_remove(rc, holder) atomic_add_64_nv(&(rc)->rc_count, -1)
#define refcount_add_many(rc, number, holder) \
atomic_add_64_nv(&(rc)->rc_count, number)
#define refcount_remove_many(rc, number, holder) \
atomic_add_64_nv(&(rc)->rc_count, -number)
#define refcount_init()
#define refcount_fini()
#endif /* DEBUG */
#ifdef __cplusplus
}
#endif
#endif /* _SYS_REFCOUNT_H */
-61
View File
@@ -1,61 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_RPRWLOCK_H
#define _SYS_RPRWLOCK_H
#include <sys/inttypes.h>
#include <sys/list.h>
#include <sys/zfs_context.h>
#include <sys/refcount.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct rprwlock {
kmutex_t rw_lock;
kthread_t *rw_writer;
kcondvar_t rw_cv;
refcount_t rw_count;
} rprwlock_t;
void rprw_init(rprwlock_t *rwl);
void rprw_destroy(rprwlock_t *rwl);
void rprw_enter_read(rprwlock_t *rwl, void *tag);
void rprw_enter_write(rprwlock_t *rwl, void *tag);
void rprw_enter(rprwlock_t *rwl, krw_t rw, void *tag);
void rprw_exit(rprwlock_t *rwl, void *tag);
boolean_t rprw_held(rprwlock_t *rwl, krw_t rw);
#define RPRW_READ_HELD(x) rprw_held(x, RW_READER)
#define RPRW_WRITE_HELD(x) rprw_held(x, RW_WRITER)
#ifdef __cplusplus
}
#endif
#endif /* _SYS_RPRWLOCK_H */
-80
View File
@@ -1,80 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_RR_RW_LOCK_H
#define _SYS_RR_RW_LOCK_H
#pragma ident "@(#)rrwlock.h 1.1 07/10/24 SMI"
#ifdef __cplusplus
extern "C" {
#endif
#include <sys/inttypes.h>
#include <sys/zfs_context.h>
#include <sys/refcount.h>
/*
* A reader-writer lock implementation that allows re-entrant reads, but
* still gives writers priority on "new" reads.
*
* See rrwlock.c for more details about the implementation.
*
* Fields of the rrwlock_t structure:
* - rr_lock: protects modification and reading of rrwlock_t fields
* - rr_cv: cv for waking up readers or waiting writers
* - rr_writer: thread id of the current writer
* - rr_anon_rount: number of active anonymous readers
* - rr_linked_rcount: total number of non-anonymous active readers
* - rr_writer_wanted: a writer wants the lock
*/
typedef struct rrwlock {
kmutex_t rr_lock;
kcondvar_t rr_cv;
kthread_t *rr_writer;
refcount_t rr_anon_rcount;
refcount_t rr_linked_rcount;
boolean_t rr_writer_wanted;
} rrwlock_t;
/*
* 'tag' is used in reference counting tracking. The
* 'tag' must be the same in a rrw_enter() as in its
* corresponding rrw_exit().
*/
void rrw_init(rrwlock_t *rrl);
void rrw_destroy(rrwlock_t *rrl);
void rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag);
void rrw_exit(rrwlock_t *rrl, void *tag);
boolean_t rrw_held(rrwlock_t *rrl, krw_t rw);
#define RRW_READ_HELD(x) rrw_held(x, RW_READER)
#define RRW_WRITE_HELD(x) rrw_held(x, RW_WRITER)
#ifdef __cplusplus
}
#endif
#endif /* _SYS_RR_RW_LOCK_H */
-538
View File
@@ -1,538 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_SPA_H
#define _SYS_SPA_H
#pragma ident "@(#)spa.h 1.31 08/04/09 SMI"
#include <sys/avl.h>
#include <sys/zfs_context.h>
#include <sys/nvpair.h>
#include <sys/sysmacros.h>
#include <sys/types.h>
#include <sys/fs/zfs.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Forward references that lots of things need.
*/
typedef struct spa spa_t;
typedef struct vdev vdev_t;
typedef struct metaslab metaslab_t;
typedef struct zilog zilog_t;
typedef struct traverse_handle traverse_handle_t;
typedef struct spa_aux_vdev spa_aux_vdev_t;
struct dsl_pool;
/*
* General-purpose 32-bit and 64-bit bitfield encodings.
*/
#define BF32_DECODE(x, low, len) P2PHASE((x) >> (low), 1U << (len))
#define BF64_DECODE(x, low, len) P2PHASE((x) >> (low), 1ULL << (len))
#define BF32_ENCODE(x, low, len) (P2PHASE((x), 1U << (len)) << (low))
#define BF64_ENCODE(x, low, len) (P2PHASE((x), 1ULL << (len)) << (low))
#define BF32_GET(x, low, len) BF32_DECODE(x, low, len)
#define BF64_GET(x, low, len) BF64_DECODE(x, low, len)
#define BF32_SET(x, low, len, val) \
((x) ^= BF32_ENCODE((x >> low) ^ (val), low, len))
#define BF64_SET(x, low, len, val) \
((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len))
#define BF32_GET_SB(x, low, len, shift, bias) \
((BF32_GET(x, low, len) + (bias)) << (shift))
#define BF64_GET_SB(x, low, len, shift, bias) \
((BF64_GET(x, low, len) + (bias)) << (shift))
#define BF32_SET_SB(x, low, len, shift, bias, val) \
BF32_SET(x, low, len, ((val) >> (shift)) - (bias))
#define BF64_SET_SB(x, low, len, shift, bias, val) \
BF64_SET(x, low, len, ((val) >> (shift)) - (bias))
/*
* We currently support nine block sizes, from 512 bytes to 128K.
* We could go higher, but the benefits are near-zero and the cost
* of COWing a giant block to modify one byte would become excessive.
*/
#define SPA_MINBLOCKSHIFT 9
#define SPA_MAXBLOCKSHIFT 17
#define SPA_MINBLOCKSIZE (1ULL << SPA_MINBLOCKSHIFT)
#define SPA_MAXBLOCKSIZE (1ULL << SPA_MAXBLOCKSHIFT)
#define SPA_BLOCKSIZES (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1)
/*
* The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
* The ASIZE encoding should be at least 64 times larger (6 more bits)
* to support up to 4-way RAID-Z mirror mode with worst-case gang block
* overhead, three DVAs per bp, plus one more bit in case we do anything
* else that expands the ASIZE.
*/
#define SPA_LSIZEBITS 16 /* LSIZE up to 32M (2^16 * 512) */
#define SPA_PSIZEBITS 16 /* PSIZE up to 32M (2^16 * 512) */
#define SPA_ASIZEBITS 24 /* ASIZE up to 64 times larger */
/*
* All SPA data is represented by 128-bit data virtual addresses (DVAs).
* The members of the dva_t should be considered opaque outside the SPA.
*/
typedef struct dva {
uint64_t dva_word[2];
} dva_t;
/*
* Each block has a 256-bit checksum -- strong enough for cryptographic hashes.
*/
typedef struct zio_cksum {
uint64_t zc_word[4];
} zio_cksum_t;
/*
* Each block is described by its DVAs, time of birth, checksum, etc.
* The word-by-word, bit-by-bit layout of the blkptr is as follows:
*
* 64 56 48 40 32 24 16 8 0
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 0 | vdev1 | GRID | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 1 |G| offset1 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 2 | vdev2 | GRID | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 3 |G| offset2 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 4 | vdev3 | GRID | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 5 |G| offset3 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 6 |E| lvl | type | cksum | comp | PSIZE | LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 7 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 8 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 9 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* a | birth txg |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* b | fill count |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* c | checksum[0] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* d | checksum[1] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* e | checksum[2] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* f | checksum[3] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
*
* Legend:
*
* vdev virtual device ID
* offset offset into virtual device
* LSIZE logical size
* PSIZE physical size (after compression)
* ASIZE allocated size (including RAID-Z parity and gang block headers)
* GRID RAID-Z layout information (reserved for future use)
* cksum checksum function
* comp compression function
* G gang block indicator
* E endianness
* type DMU object type
* lvl level of indirection
* birth txg transaction group in which the block was born
* fill count number of non-zero blocks under this bp
* checksum[4] 256-bit checksum of the data this bp describes
*/
typedef struct blkptr {
dva_t blk_dva[3]; /* 128-bit Data Virtual Address */
uint64_t blk_prop; /* size, compression, type, etc */
uint64_t blk_pad[3]; /* Extra space for the future */
uint64_t blk_birth; /* transaction group at birth */
uint64_t blk_fill; /* fill count */
zio_cksum_t blk_cksum; /* 256-bit checksum */
} blkptr_t;
#define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */
#define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */
/*
* Macros to get and set fields in a bp or DVA.
*/
#define DVA_GET_ASIZE(dva) \
BF64_GET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0)
#define DVA_SET_ASIZE(dva, x) \
BF64_SET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0, x)
#define DVA_GET_GRID(dva) BF64_GET((dva)->dva_word[0], 24, 8)
#define DVA_SET_GRID(dva, x) BF64_SET((dva)->dva_word[0], 24, 8, x)
#define DVA_GET_VDEV(dva) BF64_GET((dva)->dva_word[0], 32, 32)
#define DVA_SET_VDEV(dva, x) BF64_SET((dva)->dva_word[0], 32, 32, x)
#define DVA_GET_OFFSET(dva) \
BF64_GET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0)
#define DVA_SET_OFFSET(dva, x) \
BF64_SET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0, x)
#define DVA_GET_GANG(dva) BF64_GET((dva)->dva_word[1], 63, 1)
#define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x)
#define BP_GET_LSIZE(bp) \
(BP_IS_HOLE(bp) ? 0 : \
BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1))
#define BP_SET_LSIZE(bp, x) \
BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
#define BP_GET_PSIZE(bp) \
BF64_GET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1)
#define BP_SET_PSIZE(bp, x) \
BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8)
#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x)
#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8)
#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x)
#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8)
#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x)
#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5)
#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x)
#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1))
#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x)
#define BP_GET_ASIZE(bp) \
(DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
DVA_GET_ASIZE(&(bp)->blk_dva[2]))
#define BP_GET_UCSIZE(bp) \
((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp));
#define BP_GET_NDVAS(bp) \
(!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
!!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
!!DVA_GET_ASIZE(&(bp)->blk_dva[2]))
#define BP_COUNT_GANG(bp) \
(DVA_GET_GANG(&(bp)->blk_dva[0]) + \
DVA_GET_GANG(&(bp)->blk_dva[1]) + \
DVA_GET_GANG(&(bp)->blk_dva[2]))
#define DVA_EQUAL(dva1, dva2) \
((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
(dva1)->dva_word[0] == (dva2)->dva_word[0])
#define ZIO_CHECKSUM_EQUAL(zc1, zc2) \
(0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \
((zc1).zc_word[1] - (zc2).zc_word[1]) | \
((zc1).zc_word[2] - (zc2).zc_word[2]) | \
((zc1).zc_word[3] - (zc2).zc_word[3])))
#define DVA_IS_VALID(dva) (DVA_GET_ASIZE(dva) != 0)
#define ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3) \
{ \
(zcp)->zc_word[0] = w0; \
(zcp)->zc_word[1] = w1; \
(zcp)->zc_word[2] = w2; \
(zcp)->zc_word[3] = w3; \
}
#define BP_IDENTITY(bp) (&(bp)->blk_dva[0])
#define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp))
#define BP_IS_HOLE(bp) ((bp)->blk_birth == 0)
#define BP_IS_OLDER(bp, txg) (!BP_IS_HOLE(bp) && (bp)->blk_birth < (txg))
#define BP_ZERO_DVAS(bp) \
{ \
(bp)->blk_dva[0].dva_word[0] = 0; \
(bp)->blk_dva[0].dva_word[1] = 0; \
(bp)->blk_dva[1].dva_word[0] = 0; \
(bp)->blk_dva[1].dva_word[1] = 0; \
(bp)->blk_dva[2].dva_word[0] = 0; \
(bp)->blk_dva[2].dva_word[1] = 0; \
(bp)->blk_birth = 0; \
}
#define BP_ZERO(bp) \
{ \
BP_ZERO_DVAS(bp) \
(bp)->blk_prop = 0; \
(bp)->blk_pad[0] = 0; \
(bp)->blk_pad[1] = 0; \
(bp)->blk_pad[2] = 0; \
(bp)->blk_fill = 0; \
ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \
}
/*
* Note: the byteorder is either 0 or -1, both of which are palindromes.
* This simplifies the endianness handling a bit.
*/
#ifdef _BIG_ENDIAN
#define ZFS_HOST_BYTEORDER (0ULL)
#else
#define ZFS_HOST_BYTEORDER (-1ULL)
#endif
#define BP_SHOULD_BYTESWAP(bp) (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER)
#define BP_SPRINTF_LEN 320
#include <sys/dmu.h>
#define BP_GET_BUFC_TYPE(bp) \
(((BP_GET_LEVEL(bp) > 0) || (dmu_ot[BP_GET_TYPE(bp)].ot_metadata)) ? \
ARC_BUFC_METADATA : ARC_BUFC_DATA);
/*
* Routines found in spa.c
*/
/* state manipulation functions */
extern int spa_open(const char *pool, spa_t **, void *tag);
extern int spa_get_stats(const char *pool, nvlist_t **config,
char *altroot, size_t buflen);
extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
const char *history_str);
extern void spa_check_rootconf(char *devpath, char **the_dev_p,
nvlist_t **the_conf_p, uint64_t *the_txg_p);
extern boolean_t spa_rootdev_validate(nvlist_t *nv);
extern int spa_import_rootpool(char *devpath);
extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props);
extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
extern int spa_destroy(char *pool);
extern int spa_export(char *pool, nvlist_t **oldconfig);
extern int spa_reset(char *pool);
extern void spa_async_request(spa_t *spa, int flag);
extern void spa_async_suspend(spa_t *spa);
extern void spa_async_resume(spa_t *spa);
extern spa_t *spa_inject_addref(char *pool);
extern void spa_inject_delref(spa_t *spa);
#define SPA_ASYNC_REMOVE 0x01
#define SPA_ASYNC_RESILVER_DONE 0x02
#define SPA_ASYNC_SCRUB 0x04
#define SPA_ASYNC_RESILVER 0x08
#define SPA_ASYNC_CONFIG_UPDATE 0x10
/* device manipulation */
extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot,
int replacing);
extern int spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done);
extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
/* spare state (which is global across all pools) */
extern void spa_spare_add(vdev_t *vd);
extern void spa_spare_remove(vdev_t *vd);
extern boolean_t spa_spare_exists(uint64_t guid, uint64_t *pool);
extern void spa_spare_activate(vdev_t *vd);
/* L2ARC state (which is global across all pools) */
extern void spa_l2cache_add(vdev_t *vd);
extern void spa_l2cache_remove(vdev_t *vd);
extern boolean_t spa_l2cache_exists(uint64_t guid, uint64_t *pool);
extern void spa_l2cache_activate(vdev_t *vd);
extern void spa_l2cache_drop(spa_t *spa);
extern void spa_l2cache_space_update(vdev_t *vd, int64_t space, int64_t alloc);
/* scrubbing */
extern int spa_scrub(spa_t *spa, pool_scrub_type_t type, boolean_t force);
extern void spa_scrub_suspend(spa_t *spa);
extern void spa_scrub_resume(spa_t *spa);
extern void spa_scrub_restart(spa_t *spa, uint64_t txg);
/* spa syncing */
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
extern void spa_sync_allpools(void);
/*
* SPA configuration functions in spa_config.c
*/
#define SPA_CONFIG_UPDATE_POOL 0
#define SPA_CONFIG_UPDATE_VDEVS 1
extern void spa_config_sync(void);
extern void spa_config_check(const char *, const char *);
extern void spa_config_load(void);
extern nvlist_t *spa_all_configs(uint64_t *);
extern void spa_config_set(spa_t *spa, nvlist_t *config);
extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
int getstats);
extern void spa_config_update(spa_t *spa, int what);
extern void spa_config_update_common(spa_t *spa, int what, boolean_t isroot);
/*
* Miscellaneous SPA routines in spa_misc.c
*/
/* Namespace manipulation */
extern spa_t *spa_lookup(const char *name);
extern spa_t *spa_add(const char *name, const char *altroot);
extern void spa_remove(spa_t *spa);
extern spa_t *spa_next(spa_t *prev);
/* Refcount functions */
extern void spa_open_ref(spa_t *spa, void *tag);
extern void spa_close(spa_t *spa, void *tag);
extern boolean_t spa_refcount_zero(spa_t *spa);
/* Pool configuration lock */
extern void spa_config_enter(spa_t *spa, krw_t rw, void *tag);
extern void spa_config_exit(spa_t *spa, void *tag);
extern boolean_t spa_config_held(spa_t *spa, krw_t rw);
/* Pool vdev add/remove lock */
extern uint64_t spa_vdev_enter(spa_t *spa);
extern int spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error);
/* Accessor functions */
extern krwlock_t *spa_traverse_rwlock(spa_t *spa);
extern int spa_traverse_wanted(spa_t *spa);
extern struct dsl_pool *spa_get_dsl(spa_t *spa);
extern blkptr_t *spa_get_rootblkptr(spa_t *spa);
extern void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp);
extern void spa_altroot(spa_t *, char *, size_t);
extern int spa_sync_pass(spa_t *spa);
extern char *spa_name(spa_t *spa);
extern uint64_t spa_guid(spa_t *spa);
extern uint64_t spa_last_synced_txg(spa_t *spa);
extern uint64_t spa_first_txg(spa_t *spa);
extern uint64_t spa_version(spa_t *spa);
extern int spa_state(spa_t *spa);
extern uint64_t spa_freeze_txg(spa_t *spa);
extern uint64_t spa_get_alloc(spa_t *spa);
extern uint64_t spa_get_space(spa_t *spa);
extern uint64_t spa_get_dspace(spa_t *spa);
extern uint64_t spa_get_asize(spa_t *spa, uint64_t lsize);
extern uint64_t spa_version(spa_t *spa);
extern int spa_max_replication(spa_t *spa);
extern int spa_busy(void);
extern uint8_t spa_get_failmode(spa_t *spa);
/* Miscellaneous support routines */
extern int spa_rename(const char *oldname, const char *newname);
extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
extern char *spa_strdup(const char *);
extern void spa_strfree(char *);
extern uint64_t spa_get_random(uint64_t range);
extern void sprintf_blkptr(char *buf, int len, const blkptr_t *bp);
extern void spa_freeze(spa_t *spa);
extern void spa_upgrade(spa_t *spa, uint64_t version);
extern void spa_evict_all(void);
extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid);
extern boolean_t spa_has_spare(spa_t *, uint64_t guid);
extern uint64_t bp_get_dasize(spa_t *spa, const blkptr_t *bp);
extern boolean_t spa_has_slogs(spa_t *spa);
/* history logging */
typedef enum history_log_type {
LOG_CMD_POOL_CREATE,
LOG_CMD_NORMAL,
LOG_INTERNAL
} history_log_type_t;
typedef struct history_arg {
const char *ha_history_str;
history_log_type_t ha_log_type;
history_internal_events_t ha_event;
char ha_zone[MAXPATHLEN];
} history_arg_t;
extern char *spa_his_ievent_table[];
extern void spa_history_create_obj(spa_t *spa, dmu_tx_t *tx);
extern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read,
char *his_buf);
extern int spa_history_log(spa_t *spa, const char *his_buf,
history_log_type_t what);
void spa_history_internal_log(history_internal_events_t event, spa_t *spa,
dmu_tx_t *tx, cred_t *cr, const char *fmt, ...);
/* error handling */
struct zbookmark;
struct zio;
extern void spa_log_error(spa_t *spa, struct zio *zio);
extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
struct zio *zio, uint64_t stateoroffset, uint64_t length);
extern void zfs_post_ok(spa_t *spa, vdev_t *vd);
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
extern uint64_t spa_get_errlog_size(spa_t *spa);
extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
extern void spa_errlog_rotate(spa_t *spa);
extern void spa_errlog_drain(spa_t *spa);
extern void spa_errlog_sync(spa_t *spa, uint64_t txg);
extern void spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub);
/* vdev cache */
extern void vdev_cache_stat_init(void);
extern void vdev_cache_stat_fini(void);
/* Initialization and termination */
extern void spa_init(int flags);
extern void spa_fini(void);
extern void spa_boot_init();
/* properties */
extern int spa_prop_set(spa_t *spa, nvlist_t *nvp);
extern int spa_prop_get(spa_t *spa, nvlist_t **nvp);
extern void spa_prop_clear_bootfs(spa_t *spa, uint64_t obj, dmu_tx_t *tx);
/* asynchronous event notification */
extern void spa_event_notify(spa_t *spa, vdev_t *vdev, const char *name);
#ifdef ZFS_DEBUG
#define dprintf_bp(bp, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \
sprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, (bp)); \
dprintf(fmt " %s\n", __VA_ARGS__, __blkbuf); \
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
} \
_NOTE(CONSTCOND) } while (0)
#else
#define dprintf_bp(bp, fmt, ...)
#endif
extern int spa_mode; /* mode, e.g. FREAD | FWRITE */
#ifdef __cplusplus
}
#endif
#endif /* _SYS_SPA_H */
-46
View File
@@ -1,46 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_SPA_BOOT_H
#define _SYS_SPA_BOOT_H
#pragma ident "@(#)spa_boot.h 1.1 08/04/09 SMI"
#include <sys/nvpair.h>
#ifdef __cplusplus
extern "C" {
#endif
extern char *spa_get_bootfs();
extern void spa_free_bootfs(char *bootfs);
extern int spa_get_rootconf(char *devpath, char **bestdev_p,
nvlist_t **bestconf_p);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_SPA_BOOT_H */
-178
View File
@@ -1,178 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_SPA_IMPL_H
#define _SYS_SPA_IMPL_H
#pragma ident "@(#)spa_impl.h 1.17 07/11/27 SMI"
#include <sys/spa.h>
#include <sys/vdev.h>
#include <sys/metaslab.h>
#include <sys/dmu.h>
#include <sys/dsl_pool.h>
#include <sys/uberblock_impl.h>
#include <sys/zfs_context.h>
#include <sys/avl.h>
#include <sys/refcount.h>
#include <sys/bplist.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct spa_error_entry {
zbookmark_t se_bookmark;
char *se_name;
avl_node_t se_avl;
} spa_error_entry_t;
typedef struct spa_history_phys {
uint64_t sh_pool_create_len; /* ending offset of zpool create */
uint64_t sh_phys_max_off; /* physical EOF */
uint64_t sh_bof; /* logical BOF */
uint64_t sh_eof; /* logical EOF */
uint64_t sh_records_lost; /* num of records overwritten */
} spa_history_phys_t;
struct spa_aux_vdev {
uint64_t sav_object; /* MOS object for device list */
nvlist_t *sav_config; /* cached device config */
vdev_t **sav_vdevs; /* devices */
int sav_count; /* number devices */
boolean_t sav_sync; /* sync the device list */
nvlist_t **sav_pending; /* pending device additions */
uint_t sav_npending; /* # pending devices */
};
typedef struct spa_config_lock {
kmutex_t scl_lock;
kthread_t *scl_writer;
uint16_t scl_write_wanted;
kcondvar_t scl_cv;
refcount_t scl_count;
} spa_config_lock_t;
struct spa {
/*
* Fields protected by spa_namespace_lock.
*/
char *spa_name; /* pool name */
avl_node_t spa_avl; /* node in spa_namespace_avl */
nvlist_t *spa_config; /* last synced config */
nvlist_t *spa_config_syncing; /* currently syncing config */
uint64_t spa_config_txg; /* txg of last config change */
kmutex_t spa_config_cache_lock; /* for spa_config RW_READER */
int spa_sync_pass; /* iterate-to-convergence */
int spa_state; /* pool state */
int spa_inject_ref; /* injection references */
uint8_t spa_traverse_wanted; /* traverse lock wanted */
uint8_t spa_sync_on; /* sync threads are running */
spa_load_state_t spa_load_state; /* current load operation */
taskq_t *spa_zio_issue_taskq[ZIO_TYPES];
taskq_t *spa_zio_intr_taskq[ZIO_TYPES];
dsl_pool_t *spa_dsl_pool;
metaslab_class_t *spa_normal_class; /* normal data class */
metaslab_class_t *spa_log_class; /* intent log data class */
uint64_t spa_first_txg; /* first txg after spa_open() */
uint64_t spa_final_txg; /* txg of export/destroy */
uint64_t spa_freeze_txg; /* freeze pool at this txg */
objset_t *spa_meta_objset; /* copy of dp->dp_meta_objset */
txg_list_t spa_vdev_txg_list; /* per-txg dirty vdev list */
vdev_t *spa_root_vdev; /* top-level vdev container */
uint64_t spa_load_guid; /* initial guid for spa_load */
list_t spa_dirty_list; /* vdevs with dirty labels */
spa_aux_vdev_t spa_spares; /* hot spares */
spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */
uint64_t spa_config_object; /* MOS object for pool config */
uint64_t spa_syncing_txg; /* txg currently syncing */
uint64_t spa_sync_bplist_obj; /* object for deferred frees */
bplist_t spa_sync_bplist; /* deferred-free bplist */
krwlock_t spa_traverse_lock; /* traverse vs. spa_sync() */
uberblock_t spa_ubsync; /* last synced uberblock */
uberblock_t spa_uberblock; /* current uberblock */
kmutex_t spa_scrub_lock; /* resilver/scrub lock */
kthread_t *spa_scrub_thread; /* scrub/resilver thread */
traverse_handle_t *spa_scrub_th; /* scrub traverse handle */
uint64_t spa_scrub_restart_txg; /* need to restart */
uint64_t spa_scrub_mintxg; /* min txg we'll scrub */
uint64_t spa_scrub_maxtxg; /* max txg we'll scrub */
uint64_t spa_scrub_inflight; /* in-flight scrub I/Os */
uint64_t spa_scrub_maxinflight; /* max in-flight scrub I/Os */
uint64_t spa_scrub_errors; /* scrub I/O error count */
int spa_scrub_suspended; /* tell scrubber to suspend */
kcondvar_t spa_scrub_cv; /* scrub thread state change */
kcondvar_t spa_scrub_io_cv; /* scrub I/O completion */
uint8_t spa_scrub_stop; /* tell scrubber to stop */
uint8_t spa_scrub_active; /* active or suspended? */
uint8_t spa_scrub_type; /* type of scrub we're doing */
uint8_t spa_scrub_finished; /* indicator to rotate logs */
kmutex_t spa_async_lock; /* protect async state */
kthread_t *spa_async_thread; /* thread doing async task */
int spa_async_suspended; /* async tasks suspended */
kcondvar_t spa_async_cv; /* wait for thread_exit() */
uint16_t spa_async_tasks; /* async task mask */
char *spa_root; /* alternate root directory */
kmutex_t spa_uberblock_lock; /* vdev_uberblock_load_done() */
uint64_t spa_ena; /* spa-wide ereport ENA */
boolean_t spa_last_open_failed; /* true if last open faled */
kmutex_t spa_errlog_lock; /* error log lock */
uint64_t spa_errlog_last; /* last error log object */
uint64_t spa_errlog_scrub; /* scrub error log object */
kmutex_t spa_errlist_lock; /* error list/ereport lock */
avl_tree_t spa_errlist_last; /* last error list */
avl_tree_t spa_errlist_scrub; /* scrub error list */
uint64_t spa_deflate; /* should we deflate? */
uint64_t spa_history; /* history object */
kmutex_t spa_history_lock; /* history lock */
vdev_t *spa_pending_vdev; /* pending vdev additions */
kmutex_t spa_props_lock; /* property lock */
uint64_t spa_pool_props_object; /* object for properties */
uint64_t spa_bootfs; /* default boot filesystem */
boolean_t spa_delegation; /* delegation on/off */
char *spa_config_dir; /* cache file directory */
char *spa_config_file; /* cache file name */
list_t spa_zio_list; /* zio error list */
kcondvar_t spa_zio_cv; /* resume I/O pipeline */
kmutex_t spa_zio_lock; /* zio error lock */
uint8_t spa_failmode; /* failure mode for the pool */
/*
* spa_refcnt & spa_config_lock must be the last elements
* because refcount_t changes size based on compilation options.
* In order for the MDB module to function correctly, the other
* fields must remain in the same location.
*/
spa_config_lock_t spa_config_lock; /* configuration changes */
refcount_t spa_refcount; /* number of opens */
};
extern const char *spa_config_dir;
extern kmutex_t spa_namespace_lock;
#ifdef __cplusplus
}
#endif
#endif /* _SYS_SPA_IMPL_H */
-162
View File
@@ -1,162 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_SPACE_MAP_H
#define _SYS_SPACE_MAP_H
#pragma ident "@(#)space_map.h 1.2 06/04/02 SMI"
#include <sys/avl.h>
#include <sys/dmu.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct space_map_ops space_map_ops_t;
typedef struct space_map {
avl_tree_t sm_root; /* AVL tree of map segments */
uint64_t sm_space; /* sum of all segments in the map */
uint64_t sm_start; /* start of map */
uint64_t sm_size; /* size of map */
uint8_t sm_shift; /* unit shift */
uint8_t sm_pad[3]; /* unused */
uint8_t sm_loaded; /* map loaded? */
uint8_t sm_loading; /* map loading? */
kcondvar_t sm_load_cv; /* map load completion */
space_map_ops_t *sm_ops; /* space map block picker ops vector */
void *sm_ppd; /* picker-private data */
kmutex_t *sm_lock; /* pointer to lock that protects map */
} space_map_t;
typedef struct space_seg {
avl_node_t ss_node; /* AVL node */
uint64_t ss_start; /* starting offset of this segment */
uint64_t ss_end; /* ending offset (non-inclusive) */
} space_seg_t;
typedef struct space_map_obj {
uint64_t smo_object; /* on-disk space map object */
uint64_t smo_objsize; /* size of the object */
uint64_t smo_alloc; /* space allocated from the map */
} space_map_obj_t;
struct space_map_ops {
void (*smop_load)(space_map_t *sm);
void (*smop_unload)(space_map_t *sm);
uint64_t (*smop_alloc)(space_map_t *sm, uint64_t size);
void (*smop_claim)(space_map_t *sm, uint64_t start, uint64_t size);
void (*smop_free)(space_map_t *sm, uint64_t start, uint64_t size);
};
/*
* debug entry
*
* 1 3 10 50
* ,---+--------+------------+---------------------------------.
* | 1 | action | syncpass | txg (lower bits) |
* `---+--------+------------+---------------------------------'
* 63 62 60 59 50 49 0
*
*
*
* non-debug entry
*
* 1 47 1 15
* ,-----------------------------------------------------------.
* | 0 | offset (sm_shift units) | type | run |
* `-----------------------------------------------------------'
* 63 62 17 16 15 0
*/
/* All this stuff takes and returns bytes */
#define SM_RUN_DECODE(x) (BF64_DECODE(x, 0, 15) + 1)
#define SM_RUN_ENCODE(x) BF64_ENCODE((x) - 1, 0, 15)
#define SM_TYPE_DECODE(x) BF64_DECODE(x, 15, 1)
#define SM_TYPE_ENCODE(x) BF64_ENCODE(x, 15, 1)
#define SM_OFFSET_DECODE(x) BF64_DECODE(x, 16, 47)
#define SM_OFFSET_ENCODE(x) BF64_ENCODE(x, 16, 47)
#define SM_DEBUG_DECODE(x) BF64_DECODE(x, 63, 1)
#define SM_DEBUG_ENCODE(x) BF64_ENCODE(x, 63, 1)
#define SM_DEBUG_ACTION_DECODE(x) BF64_DECODE(x, 60, 3)
#define SM_DEBUG_ACTION_ENCODE(x) BF64_ENCODE(x, 60, 3)
#define SM_DEBUG_SYNCPASS_DECODE(x) BF64_DECODE(x, 50, 10)
#define SM_DEBUG_SYNCPASS_ENCODE(x) BF64_ENCODE(x, 50, 10)
#define SM_DEBUG_TXG_DECODE(x) BF64_DECODE(x, 0, 50)
#define SM_DEBUG_TXG_ENCODE(x) BF64_ENCODE(x, 0, 50)
#define SM_RUN_MAX SM_RUN_DECODE(~0ULL)
#define SM_ALLOC 0x0
#define SM_FREE 0x1
/*
* The data for a given space map can be kept on blocks of any size.
* Larger blocks entail fewer i/o operations, but they also cause the
* DMU to keep more data in-core, and also to waste more i/o bandwidth
* when only a few blocks have changed since the last transaction group.
* This could use a lot more research, but for now, set the freelist
* block size to 4k (2^12).
*/
#define SPACE_MAP_BLOCKSHIFT 12
typedef void space_map_func_t(space_map_t *sm, uint64_t start, uint64_t size);
extern void space_map_create(space_map_t *sm, uint64_t start, uint64_t size,
uint8_t shift, kmutex_t *lp);
extern void space_map_destroy(space_map_t *sm);
extern void space_map_add(space_map_t *sm, uint64_t start, uint64_t size);
extern void space_map_remove(space_map_t *sm, uint64_t start, uint64_t size);
extern int space_map_contains(space_map_t *sm, uint64_t start, uint64_t size);
extern void space_map_vacate(space_map_t *sm,
space_map_func_t *func, space_map_t *mdest);
extern void space_map_walk(space_map_t *sm,
space_map_func_t *func, space_map_t *mdest);
extern void space_map_excise(space_map_t *sm, uint64_t start, uint64_t size);
extern void space_map_union(space_map_t *smd, space_map_t *sms);
extern void space_map_load_wait(space_map_t *sm);
extern int space_map_load(space_map_t *sm, space_map_ops_t *ops,
uint8_t maptype, space_map_obj_t *smo, objset_t *os);
extern void space_map_unload(space_map_t *sm);
extern uint64_t space_map_alloc(space_map_t *sm, uint64_t size);
extern void space_map_claim(space_map_t *sm, uint64_t start, uint64_t size);
extern void space_map_free(space_map_t *sm, uint64_t start, uint64_t size);
extern void space_map_sync(space_map_t *sm, uint8_t maptype,
space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx);
extern void space_map_truncate(space_map_obj_t *smo,
objset_t *os, dmu_tx_t *tx);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_SPACE_MAP_H */
-127
View File
@@ -1,127 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_TXG_H
#define _SYS_TXG_H
#pragma ident "@(#)txg.h 1.2 08/03/20 SMI"
#include <sys/spa.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
#define TXG_CONCURRENT_STATES 3 /* open, quiescing, syncing */
#define TXG_SIZE 4 /* next power of 2 */
#define TXG_MASK (TXG_SIZE - 1) /* mask for size */
#define TXG_INITIAL TXG_SIZE /* initial txg */
#define TXG_IDX (txg & TXG_MASK)
#define TXG_WAIT 1ULL
#define TXG_NOWAIT 2ULL
typedef struct tx_cpu tx_cpu_t;
typedef struct txg_handle {
tx_cpu_t *th_cpu;
uint64_t th_txg;
} txg_handle_t;
typedef struct txg_node {
struct txg_node *tn_next[TXG_SIZE];
uint8_t tn_member[TXG_SIZE];
} txg_node_t;
typedef struct txg_list {
kmutex_t tl_lock;
size_t tl_offset;
txg_node_t *tl_head[TXG_SIZE];
} txg_list_t;
struct dsl_pool;
extern void txg_init(struct dsl_pool *dp, uint64_t txg);
extern void txg_fini(struct dsl_pool *dp);
extern void txg_sync_start(struct dsl_pool *dp);
extern void txg_sync_stop(struct dsl_pool *dp);
extern uint64_t txg_hold_open(struct dsl_pool *dp, txg_handle_t *txghp);
extern void txg_rele_to_quiesce(txg_handle_t *txghp);
extern void txg_rele_to_sync(txg_handle_t *txghp);
extern void txg_suspend(struct dsl_pool *dp);
extern void txg_resume(struct dsl_pool *dp);
/*
* Delay the caller by the specified number of ticks or until
* the txg closes (whichever comes first). This is intended
* to be used to throttle writers when the system nears its
* capacity.
*/
extern void txg_delay(struct dsl_pool *dp, uint64_t txg, int ticks);
/*
* Wait until the given transaction group has finished syncing.
* Try to make this happen as soon as possible (eg. kick off any
* necessary syncs immediately). If txg==0, wait for the currently open
* txg to finish syncing.
*/
extern void txg_wait_synced(struct dsl_pool *dp, uint64_t txg);
/*
* Wait until the given transaction group, or one after it, is
* the open transaction group. Try to make this happen as soon
* as possible (eg. kick off any necessary syncs immediately).
* If txg == 0, wait for the next open txg.
*/
extern void txg_wait_open(struct dsl_pool *dp, uint64_t txg);
/*
* Returns TRUE if we are "backed up" waiting for the syncing
* transaction to complete; otherwise returns FALSE.
*/
extern int txg_stalled(struct dsl_pool *dp);
/*
* Per-txg object lists.
*/
#define TXG_CLEAN(txg) ((txg) - 1)
extern void txg_list_create(txg_list_t *tl, size_t offset);
extern void txg_list_destroy(txg_list_t *tl);
extern int txg_list_empty(txg_list_t *tl, uint64_t txg);
extern int txg_list_add(txg_list_t *tl, void *p, uint64_t txg);
extern void *txg_list_remove(txg_list_t *tl, uint64_t txg);
extern void *txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg);
extern int txg_list_member(txg_list_t *tl, void *p, uint64_t txg);
extern void *txg_list_head(txg_list_t *tl, uint64_t txg);
extern void *txg_list_next(txg_list_t *tl, void *p, uint64_t txg);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_TXG_H */
-76
View File
@@ -1,76 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_TXG_IMPL_H
#define _SYS_TXG_IMPL_H
#pragma ident "@(#)txg_impl.h 1.2 08/03/20 SMI"
#include <sys/spa.h>
#include <sys/txg.h>
#ifdef __cplusplus
extern "C" {
#endif
struct tx_cpu {
kmutex_t tc_lock;
kcondvar_t tc_cv[TXG_SIZE];
uint64_t tc_count[TXG_SIZE];
char tc_pad[16];
};
typedef struct tx_state {
tx_cpu_t *tx_cpu; /* protects right to enter txg */
kmutex_t tx_sync_lock; /* protects tx_state_t */
krwlock_t tx_suspend;
uint64_t tx_open_txg; /* currently open txg id */
uint64_t tx_quiesced_txg; /* quiesced txg waiting for sync */
uint64_t tx_syncing_txg; /* currently syncing txg id */
uint64_t tx_synced_txg; /* last synced txg id */
uint64_t tx_sync_txg_waiting; /* txg we're waiting to sync */
uint64_t tx_quiesce_txg_waiting; /* txg we're waiting to open */
kcondvar_t tx_sync_more_cv;
kcondvar_t tx_sync_done_cv;
kcondvar_t tx_quiesce_more_cv;
kcondvar_t tx_quiesce_done_cv;
kcondvar_t tx_timeout_cv;
kcondvar_t tx_exit_cv; /* wait for all threads to exit */
uint8_t tx_threads; /* number of threads */
uint8_t tx_exiting; /* set when we're exiting */
kthread_t *tx_sync_thread;
kthread_t *tx_quiesce_thread;
kthread_t *tx_timelimit_thread;
} tx_state_t;
#ifdef __cplusplus
}
#endif
#endif /* _SYS_TXG_IMPL_H */
@@ -1,50 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_UBERBLOCK_H
#define _SYS_UBERBLOCK_H
#pragma ident "@(#)uberblock.h 1.1 05/10/30 SMI"
#include <sys/spa.h>
#include <sys/vdev.h>
#include <sys/zio.h>
#include <sys/zio_checksum.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct uberblock uberblock_t;
extern int uberblock_verify(uberblock_t *ub);
extern int uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_UBERBLOCK_H */
@@ -1,63 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_UBERBLOCK_IMPL_H
#define _SYS_UBERBLOCK_IMPL_H
#pragma ident "@(#)uberblock_impl.h 1.4 07/06/29 SMI"
#include <sys/uberblock.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* The uberblock version is incremented whenever an incompatible on-disk
* format change is made to the SPA, DMU, or ZAP.
*
* Note: the first two fields should never be moved. When a storage pool
* is opened, the uberblock must be read off the disk before the version
* can be checked. If the ub_version field is moved, we may not detect
* version mismatch. If the ub_magic field is moved, applications that
* expect the magic number in the first word won't work.
*/
#define UBERBLOCK_MAGIC 0x00bab10c /* oo-ba-bloc! */
#define UBERBLOCK_SHIFT 10 /* up to 1K */
struct uberblock {
uint64_t ub_magic; /* UBERBLOCK_MAGIC */
uint64_t ub_version; /* SPA_VERSION */
uint64_t ub_txg; /* txg of last sync */
uint64_t ub_guid_sum; /* sum of all vdev guids */
uint64_t ub_timestamp; /* UTC time of last sync */
blkptr_t ub_rootbp; /* MOS objset_phys_t */
};
#ifdef __cplusplus
}
#endif
#endif /* _SYS_UBERBLOCK_IMPL_H */
-59
View File
@@ -1,59 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_UNIQUE_H
#define _SYS_UNIQUE_H
#pragma ident "@(#)unique.h 1.2 07/08/02 SMI"
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
/* The number of significant bits in each unique value. */
#define UNIQUE_BITS 56
void unique_init(void);
void unique_fini(void);
/*
* Return a new unique value (which will not be uniquified against until
* it is unique_insert()-ed.
*/
uint64_t unique_create(void);
/* Return a unique value, which equals the one passed in if possible. */
uint64_t unique_insert(uint64_t value);
/* Indicate that this value no longer needs to be uniquified against. */
void unique_remove(uint64_t value);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_UNIQUE_H */
-138
View File
@@ -1,138 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_VDEV_H
#define _SYS_VDEV_H
#pragma ident "@(#)vdev.h 1.16 07/12/12 SMI"
#include <sys/spa.h>
#include <sys/zio.h>
#include <sys/dmu.h>
#include <sys/space_map.h>
#include <sys/fs/zfs.h>
#ifdef __cplusplus
extern "C" {
#endif
extern boolean_t zfs_nocacheflush;
/*
* Fault injection modes.
*/
#define VDEV_FAULT_NONE 0
#define VDEV_FAULT_RANDOM 1
#define VDEV_FAULT_COUNT 2
extern int vdev_open(vdev_t *);
extern int vdev_validate(vdev_t *);
extern void vdev_close(vdev_t *);
extern int vdev_create(vdev_t *, uint64_t txg, boolean_t isreplace);
extern void vdev_init(vdev_t *, uint64_t txg);
extern void vdev_reopen(vdev_t *);
extern int vdev_validate_aux(vdev_t *vd);
extern int vdev_probe(vdev_t *);
extern vdev_t *vdev_lookup_top(spa_t *spa, uint64_t vdev);
extern vdev_t *vdev_lookup_by_guid(vdev_t *vd, uint64_t guid);
extern void vdev_dtl_dirty(space_map_t *sm, uint64_t txg, uint64_t size);
extern int vdev_dtl_contains(space_map_t *sm, uint64_t txg, uint64_t size);
extern void vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
int scrub_done);
extern const char *vdev_description(vdev_t *vd);
extern int vdev_metaslab_init(vdev_t *vd, uint64_t txg);
extern void vdev_metaslab_fini(vdev_t *vd);
extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
extern void vdev_clear_stats(vdev_t *vd);
extern void vdev_stat_update(zio_t *zio);
extern void vdev_scrub_stat_update(vdev_t *vd, pool_scrub_type_t type,
boolean_t complete);
extern int vdev_getspec(spa_t *spa, uint64_t vdev, char **vdev_spec);
extern void vdev_propagate_state(vdev_t *vd);
extern void vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state,
vdev_aux_t aux);
extern void vdev_space_update(vdev_t *vd, int64_t space_delta,
int64_t alloc_delta, boolean_t update_root);
extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize);
extern int vdev_fault(spa_t *spa, uint64_t guid);
extern int vdev_degrade(spa_t *spa, uint64_t guid);
extern int vdev_online(spa_t *spa, uint64_t guid, uint64_t flags,
vdev_state_t *);
extern int vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags);
extern void vdev_clear(spa_t *spa, vdev_t *vd, boolean_t reopen_wanted);
extern int vdev_error_inject(vdev_t *vd, zio_t *zio);
extern int vdev_is_dead(vdev_t *vd);
extern int vdev_readable(vdev_t *vd);
extern int vdev_writeable(vdev_t *vd);
extern void vdev_cache_init(vdev_t *vd);
extern void vdev_cache_fini(vdev_t *vd);
extern int vdev_cache_read(zio_t *zio);
extern void vdev_cache_write(zio_t *zio);
extern void vdev_cache_purge(vdev_t *vd);
extern void vdev_queue_init(vdev_t *vd);
extern void vdev_queue_fini(vdev_t *vd);
extern zio_t *vdev_queue_io(zio_t *zio);
extern void vdev_queue_io_done(zio_t *zio);
extern void vdev_config_dirty(vdev_t *vd);
extern void vdev_config_clean(vdev_t *vd);
extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg);
extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
boolean_t getstats, boolean_t isspare, boolean_t isl2cache);
/*
* Label routines
*/
struct uberblock;
extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset);
extern nvlist_t *vdev_label_read_config(vdev_t *vd);
extern void vdev_uberblock_load(zio_t *zio, vdev_t *vd, struct uberblock *ub);
typedef enum {
VDEV_LABEL_CREATE, /* create/add a new device */
VDEV_LABEL_REPLACE, /* replace an existing device */
VDEV_LABEL_SPARE, /* add a new hot spare */
VDEV_LABEL_REMOVE, /* remove an existing device */
VDEV_LABEL_L2CACHE /* add an L2ARC cache device */
} vdev_labeltype_t;
extern int vdev_label_init(vdev_t *vd, uint64_t txg, vdev_labeltype_t reason);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_VDEV_H */
@@ -1,46 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_VDEV_FILE_H
#define _SYS_VDEV_FILE_H
#pragma ident "@(#)vdev_file.h 1.1 05/10/30 SMI"
#include <sys/vdev.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct vdev_file {
vnode_t *vf_vnode;
} vdev_file_t;
#ifdef __cplusplus
}
#endif
#endif /* _SYS_VDEV_FILE_H */
-306
View File
@@ -1,306 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_VDEV_IMPL_H
#define _SYS_VDEV_IMPL_H
#pragma ident "@(#)vdev_impl.h 1.19 07/11/27 SMI"
#include <sys/avl.h>
#include <sys/dmu.h>
#include <sys/metaslab.h>
#include <sys/nvpair.h>
#include <sys/space_map.h>
#include <sys/vdev.h>
#include <sys/dkio.h>
#include <sys/uberblock_impl.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Virtual device descriptors.
*
* All storage pool operations go through the virtual device framework,
* which provides data replication and I/O scheduling.
*/
/*
* Forward declarations that lots of things need.
*/
typedef struct vdev_queue vdev_queue_t;
typedef struct vdev_cache vdev_cache_t;
typedef struct vdev_cache_entry vdev_cache_entry_t;
/*
* Virtual device operations
*/
typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *ashift);
typedef void vdev_close_func_t(vdev_t *vd);
typedef int vdev_probe_func_t(vdev_t *vd);
typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize);
typedef int vdev_io_start_func_t(zio_t *zio);
typedef int vdev_io_done_func_t(zio_t *zio);
typedef void vdev_state_change_func_t(vdev_t *vd, int, int);
typedef struct vdev_ops {
vdev_open_func_t *vdev_op_open;
vdev_close_func_t *vdev_op_close;
vdev_probe_func_t *vdev_op_probe;
vdev_asize_func_t *vdev_op_asize;
vdev_io_start_func_t *vdev_op_io_start;
vdev_io_done_func_t *vdev_op_io_done;
vdev_state_change_func_t *vdev_op_state_change;
char vdev_op_type[16];
boolean_t vdev_op_leaf;
} vdev_ops_t;
/*
* Virtual device properties
*/
struct vdev_cache_entry {
char *ve_data;
uint64_t ve_offset;
uint64_t ve_lastused;
avl_node_t ve_offset_node;
avl_node_t ve_lastused_node;
uint32_t ve_hits;
uint16_t ve_missed_update;
zio_t *ve_fill_io;
};
struct vdev_cache {
avl_tree_t vc_offset_tree;
avl_tree_t vc_lastused_tree;
kmutex_t vc_lock;
};
struct vdev_queue {
avl_tree_t vq_deadline_tree;
avl_tree_t vq_read_tree;
avl_tree_t vq_write_tree;
avl_tree_t vq_pending_tree;
kmutex_t vq_lock;
};
/*
* Virtual device descriptor
*/
struct vdev {
/*
* Common to all vdev types.
*/
uint64_t vdev_id; /* child number in vdev parent */
uint64_t vdev_guid; /* unique ID for this vdev */
uint64_t vdev_guid_sum; /* self guid + all child guids */
uint64_t vdev_asize; /* allocatable device capacity */
uint64_t vdev_ashift; /* block alignment shift */
uint64_t vdev_state; /* see VDEV_STATE_* #defines */
uint64_t vdev_prevstate; /* used when reopening a vdev */
vdev_ops_t *vdev_ops; /* vdev operations */
spa_t *vdev_spa; /* spa for this vdev */
void *vdev_tsd; /* type-specific data */
vdev_t *vdev_top; /* top-level vdev */
vdev_t *vdev_parent; /* parent vdev */
vdev_t **vdev_child; /* array of children */
uint64_t vdev_children; /* number of children */
space_map_t vdev_dtl_map; /* dirty time log in-core state */
space_map_t vdev_dtl_scrub; /* DTL for scrub repair writes */
vdev_stat_t vdev_stat; /* virtual device statistics */
/*
* Top-level vdev state.
*/
uint64_t vdev_ms_array; /* metaslab array object */
uint64_t vdev_ms_shift; /* metaslab size shift */
uint64_t vdev_ms_count; /* number of metaslabs */
metaslab_group_t *vdev_mg; /* metaslab group */
metaslab_t **vdev_ms; /* metaslab array */
txg_list_t vdev_ms_list; /* per-txg dirty metaslab lists */
txg_list_t vdev_dtl_list; /* per-txg dirty DTL lists */
txg_node_t vdev_txg_node; /* per-txg dirty vdev linkage */
boolean_t vdev_remove_wanted; /* async remove wanted? */
list_node_t vdev_dirty_node; /* config dirty list */
uint64_t vdev_deflate_ratio; /* deflation ratio (x512) */
uint64_t vdev_islog; /* is an intent log device */
/*
* Leaf vdev state.
*/
uint64_t vdev_psize; /* physical device capacity */
space_map_obj_t vdev_dtl; /* dirty time log on-disk state */
txg_node_t vdev_dtl_node; /* per-txg dirty DTL linkage */
uint64_t vdev_wholedisk; /* true if this is a whole disk */
uint64_t vdev_offline; /* persistent offline state */
uint64_t vdev_faulted; /* persistent faulted state */
uint64_t vdev_degraded; /* persistent degraded state */
uint64_t vdev_removed; /* persistent removed state */
uint64_t vdev_nparity; /* number of parity devices for raidz */
char *vdev_path; /* vdev path (if any) */
char *vdev_devid; /* vdev devid (if any) */
char *vdev_physpath; /* vdev device path (if any) */
uint64_t vdev_fault_arg; /* fault injection paramater */
int vdev_fault_mask; /* zio types to fault */
uint8_t vdev_fault_mode; /* fault injection mode */
uint8_t vdev_tmpoffline; /* device taken offline temporarily? */
uint8_t vdev_detached; /* device detached? */
uint64_t vdev_isspare; /* was a hot spare */
uint64_t vdev_isl2cache; /* was a l2cache device */
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
vdev_cache_t vdev_cache; /* physical block cache */
uint64_t vdev_not_present; /* not present during import */
hrtime_t vdev_last_try; /* last reopen time */
boolean_t vdev_nowritecache; /* true if flushwritecache failed */
uint64_t vdev_unspare; /* unspare when resilvering done */
boolean_t vdev_checkremove; /* temporary online test */
boolean_t vdev_forcefault; /* force online fault */
boolean_t vdev_is_failing; /* device errors seen */
/*
* For DTrace to work in userland (libzpool) context, these fields must
* remain at the end of the structure. DTrace will use the kernel's
* CTF definition for 'struct vdev', and since the size of a kmutex_t is
* larger in userland, the offsets for the rest fields would be
* incorrect.
*/
kmutex_t vdev_dtl_lock; /* vdev_dtl_{map,resilver} */
kmutex_t vdev_stat_lock; /* vdev_stat */
};
#define VDEV_SKIP_SIZE (8 << 10)
#define VDEV_BOOT_HEADER_SIZE (8 << 10)
#define VDEV_PHYS_SIZE (112 << 10)
#define VDEV_UBERBLOCK_RING (128 << 10)
#define VDEV_UBERBLOCK_SHIFT(vd) \
MAX((vd)->vdev_top->vdev_ashift, UBERBLOCK_SHIFT)
#define VDEV_UBERBLOCK_COUNT(vd) \
(VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT(vd))
#define VDEV_UBERBLOCK_OFFSET(vd, n) \
offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT(vd)])
#define VDEV_UBERBLOCK_SIZE(vd) (1ULL << VDEV_UBERBLOCK_SHIFT(vd))
/* ZFS boot block */
#define VDEV_BOOT_MAGIC 0x2f5b007b10cULL
#define VDEV_BOOT_VERSION 1 /* version number */
typedef struct vdev_boot_header {
uint64_t vb_magic; /* VDEV_BOOT_MAGIC */
uint64_t vb_version; /* VDEV_BOOT_VERSION */
uint64_t vb_offset; /* start offset (bytes) */
uint64_t vb_size; /* size (bytes) */
char vb_pad[VDEV_BOOT_HEADER_SIZE - 4 * sizeof (uint64_t)];
} vdev_boot_header_t;
typedef struct vdev_phys {
char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_block_tail_t)];
zio_block_tail_t vp_zbt;
} vdev_phys_t;
typedef struct vdev_label {
char vl_pad[VDEV_SKIP_SIZE]; /* 8K */
vdev_boot_header_t vl_boot_header; /* 8K */
vdev_phys_t vl_vdev_phys; /* 112K */
char vl_uberblock[VDEV_UBERBLOCK_RING]; /* 128K */
} vdev_label_t; /* 256K total */
/*
* vdev_dirty() flags
*/
#define VDD_METASLAB 0x01
#define VDD_DTL 0x02
/*
* Size and offset of embedded boot loader region on each label.
* The total size of the first two labels plus the boot area is 4MB.
*/
#define VDEV_BOOT_OFFSET (2 * sizeof (vdev_label_t))
#define VDEV_BOOT_SIZE (7ULL << 19) /* 3.5M */
/*
* Size of label regions at the start and end of each leaf device.
*/
#define VDEV_LABEL_START_SIZE (2 * sizeof (vdev_label_t) + VDEV_BOOT_SIZE)
#define VDEV_LABEL_END_SIZE (2 * sizeof (vdev_label_t))
#define VDEV_LABELS 4
#define VDEV_ALLOC_LOAD 0
#define VDEV_ALLOC_ADD 1
#define VDEV_ALLOC_SPARE 2
#define VDEV_ALLOC_L2CACHE 3
/*
* Allocate or free a vdev
*/
extern int vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *config,
vdev_t *parent, uint_t id, int alloctype);
extern void vdev_free(vdev_t *vd);
/*
* Add or remove children and parents
*/
extern void vdev_add_child(vdev_t *pvd, vdev_t *cvd);
extern void vdev_remove_child(vdev_t *pvd, vdev_t *cvd);
extern void vdev_compact_children(vdev_t *pvd);
extern vdev_t *vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops);
extern void vdev_remove_parent(vdev_t *cvd);
/*
* vdev sync load and sync
*/
extern void vdev_load(vdev_t *vd);
extern void vdev_sync(vdev_t *vd, uint64_t txg);
extern void vdev_sync_done(vdev_t *vd, uint64_t txg);
extern void vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg);
/*
* Available vdev types.
*/
extern vdev_ops_t vdev_root_ops;
extern vdev_ops_t vdev_mirror_ops;
extern vdev_ops_t vdev_replacing_ops;
extern vdev_ops_t vdev_raidz_ops;
extern vdev_ops_t vdev_disk_ops;
extern vdev_ops_t vdev_file_ops;
extern vdev_ops_t vdev_missing_ops;
extern vdev_ops_t vdev_spare_ops;
/*
* Common size functions
*/
extern uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize);
extern uint64_t vdev_get_rsize(vdev_t *vd);
/*
* zdb uses this tunable, so it must be declared here to make lint happy.
*/
extern int zfs_vdev_cache_size;
#ifdef __cplusplus
}
#endif
#endif /* _SYS_VDEV_IMPL_H */
-410
View File
@@ -1,410 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZAP_H
#define _SYS_ZAP_H
#pragma ident "@(#)zap.h 1.6 07/10/25 SMI"
/*
* ZAP - ZFS Attribute Processor
*
* The ZAP is a module which sits on top of the DMU (Data Management
* Unit) and implements a higher-level storage primitive using DMU
* objects. Its primary consumer is the ZPL (ZFS Posix Layer).
*
* A "zapobj" is a DMU object which the ZAP uses to stores attributes.
* Users should use only zap routines to access a zapobj - they should
* not access the DMU object directly using DMU routines.
*
* The attributes stored in a zapobj are name-value pairs. The name is
* a zero-terminated string of up to ZAP_MAXNAMELEN bytes (including
* terminating NULL). The value is an array of integers, which may be
* 1, 2, 4, or 8 bytes long. The total space used by the array (number
* of integers * integer length) can be up to ZAP_MAXVALUELEN bytes.
* Note that an 8-byte integer value can be used to store the location
* (object number) of another dmu object (which may be itself a zapobj).
* Note that you can use a zero-length attribute to store a single bit
* of information - the attribute is present or not.
*
* The ZAP routines are thread-safe. However, you must observe the
* DMU's restriction that a transaction may not be operated on
* concurrently.
*
* Any of the routines that return an int may return an I/O error (EIO
* or ECHECKSUM).
*
*
* Implementation / Performance Notes:
*
* The ZAP is intended to operate most efficiently on attributes with
* short (49 bytes or less) names and single 8-byte values, for which
* the microzap will be used. The ZAP should be efficient enough so
* that the user does not need to cache these attributes.
*
* The ZAP's locking scheme makes its routines thread-safe. Operations
* on different zapobjs will be processed concurrently. Operations on
* the same zapobj which only read data will be processed concurrently.
* Operations on the same zapobj which modify data will be processed
* concurrently when there are many attributes in the zapobj (because
* the ZAP uses per-block locking - more than 128 * (number of cpus)
* small attributes will suffice).
*/
/*
* We're using zero-terminated byte strings (ie. ASCII or UTF-8 C
* strings) for the names of attributes, rather than a byte string
* bounded by an explicit length. If some day we want to support names
* in character sets which have embedded zeros (eg. UTF-16, UTF-32),
* we'll have to add routines for using length-bounded strings.
*/
#include <sys/dmu.h>
#ifdef __cplusplus
extern "C" {
#endif
#define ZAP_MAXNAMELEN 256
#define ZAP_MAXVALUELEN 1024
/*
* The matchtype specifies which entry will be accessed.
* MT_EXACT: only find an exact match (non-normalized)
* MT_FIRST: find the "first" normalized (case and Unicode
* form) match; the designated "first" match will not change as long
* as the set of entries with this normalization doesn't change
* MT_BEST: if there is an exact match, find that, otherwise find the
* first normalized match
*/
typedef enum matchtype
{
MT_EXACT,
MT_BEST,
MT_FIRST
} matchtype_t;
/*
* Create a new zapobj with no attributes and return its object number.
* MT_EXACT will cause the zap object to only support MT_EXACT lookups,
* otherwise any matchtype can be used for lookups.
*
* normflags specifies what normalization will be done. values are:
* 0: no normalization (legacy on-disk format, supports MT_EXACT matching
* only)
* U8_TEXTPREP_TOLOWER: case normalization will be performed.
* MT_FIRST/MT_BEST matching will find entries that match without
* regard to case (eg. looking for "foo" can find an entry "Foo").
* Eventually, other flags will permit unicode normalization as well.
*/
uint64_t zap_create(objset_t *ds, dmu_object_type_t ot,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
uint64_t zap_create_norm(objset_t *ds, int normflags, dmu_object_type_t ot,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
/*
* Create a new zapobj with no attributes from the given (unallocated)
* object number.
*/
int zap_create_claim(objset_t *ds, uint64_t obj, dmu_object_type_t ot,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
int zap_create_claim_norm(objset_t *ds, uint64_t obj,
int normflags, dmu_object_type_t ot,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
/*
* The zapobj passed in must be a valid ZAP object for all of the
* following routines.
*/
/*
* Destroy this zapobj and all its attributes.
*
* Frees the object number using dmu_object_free.
*/
int zap_destroy(objset_t *ds, uint64_t zapobj, dmu_tx_t *tx);
/*
* Manipulate attributes.
*
* 'integer_size' is in bytes, and must be 1, 2, 4, or 8.
*/
/*
* Retrieve the contents of the attribute with the given name.
*
* If the requested attribute does not exist, the call will fail and
* return ENOENT.
*
* If 'integer_size' is smaller than the attribute's integer size, the
* call will fail and return EINVAL.
*
* If 'integer_size' is equal to or larger than the attribute's integer
* size, the call will succeed and return 0. * When converting to a
* larger integer size, the integers will be treated as unsigned (ie. no
* sign-extension will be performed).
*
* 'num_integers' is the length (in integers) of 'buf'.
*
* If the attribute is longer than the buffer, as many integers as will
* fit will be transferred to 'buf'. If the entire attribute was not
* transferred, the call will return EOVERFLOW.
*
* If rn_len is nonzero, realname will be set to the name of the found
* entry (which may be different from the requested name if matchtype is
* not MT_EXACT).
*
* If normalization_conflictp is not NULL, it will be set if there is
* another name with the same case/unicode normalized form.
*/
int zap_lookup(objset_t *ds, uint64_t zapobj, const char *name,
uint64_t integer_size, uint64_t num_integers, void *buf);
int zap_lookup_norm(objset_t *ds, uint64_t zapobj, const char *name,
uint64_t integer_size, uint64_t num_integers, void *buf,
matchtype_t mt, char *realname, int rn_len,
boolean_t *normalization_conflictp);
/*
* Create an attribute with the given name and value.
*
* If an attribute with the given name already exists, the call will
* fail and return EEXIST.
*/
int zap_add(objset_t *ds, uint64_t zapobj, const char *name,
int integer_size, uint64_t num_integers,
const void *val, dmu_tx_t *tx);
/*
* Set the attribute with the given name to the given value. If an
* attribute with the given name does not exist, it will be created. If
* an attribute with the given name already exists, the previous value
* will be overwritten. The integer_size may be different from the
* existing attribute's integer size, in which case the attribute's
* integer size will be updated to the new value.
*/
int zap_update(objset_t *ds, uint64_t zapobj, const char *name,
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
/*
* Get the length (in integers) and the integer size of the specified
* attribute.
*
* If the requested attribute does not exist, the call will fail and
* return ENOENT.
*/
int zap_length(objset_t *ds, uint64_t zapobj, const char *name,
uint64_t *integer_size, uint64_t *num_integers);
/*
* Remove the specified attribute.
*
* If the specified attribute does not exist, the call will fail and
* return ENOENT.
*/
int zap_remove(objset_t *ds, uint64_t zapobj, const char *name, dmu_tx_t *tx);
int zap_remove_norm(objset_t *ds, uint64_t zapobj, const char *name,
matchtype_t mt, dmu_tx_t *tx);
/*
* Returns (in *count) the number of attributes in the specified zap
* object.
*/
int zap_count(objset_t *ds, uint64_t zapobj, uint64_t *count);
/*
* Returns (in name) the name of the entry whose (value & mask)
* (za_first_integer) is value, or ENOENT if not found. The string
* pointed to by name must be at least 256 bytes long. If mask==0, the
* match must be exact (ie, same as mask=-1ULL).
*/
int zap_value_search(objset_t *os, uint64_t zapobj,
uint64_t value, uint64_t mask, char *name);
struct zap;
struct zap_leaf;
typedef struct zap_cursor {
/* This structure is opaque! */
objset_t *zc_objset;
struct zap *zc_zap;
struct zap_leaf *zc_leaf;
uint64_t zc_zapobj;
uint64_t zc_hash;
uint32_t zc_cd;
} zap_cursor_t;
typedef struct {
int za_integer_length;
/*
* za_normalization_conflict will be set if there are additional
* entries with this normalized form (eg, "foo" and "Foo").
*/
boolean_t za_normalization_conflict;
uint64_t za_num_integers;
uint64_t za_first_integer; /* no sign extension for <8byte ints */
char za_name[MAXNAMELEN];
} zap_attribute_t;
/*
* The interface for listing all the attributes of a zapobj can be
* thought of as cursor moving down a list of the attributes one by
* one. The cookie returned by the zap_cursor_serialize routine is
* persistent across system calls (and across reboot, even).
*/
/*
* Initialize a zap cursor, pointing to the "first" attribute of the
* zapobj. You must _fini the cursor when you are done with it.
*/
void zap_cursor_init(zap_cursor_t *zc, objset_t *ds, uint64_t zapobj);
void zap_cursor_fini(zap_cursor_t *zc);
/*
* Get the attribute currently pointed to by the cursor. Returns
* ENOENT if at the end of the attributes.
*/
int zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za);
/*
* Advance the cursor to the next attribute.
*/
void zap_cursor_advance(zap_cursor_t *zc);
/*
* Get a persistent cookie pointing to the current position of the zap
* cursor. The low 4 bits in the cookie are always zero, and thus can
* be used as to differentiate a serialized cookie from a different type
* of value. The cookie will be less than 2^32 as long as there are
* fewer than 2^22 (4.2 million) entries in the zap object.
*/
uint64_t zap_cursor_serialize(zap_cursor_t *zc);
/*
* Initialize a zap cursor pointing to the position recorded by
* zap_cursor_serialize (in the "serialized" argument). You can also
* use a "serialized" argument of 0 to start at the beginning of the
* zapobj (ie. zap_cursor_init_serialized(..., 0) is equivalent to
* zap_cursor_init(...).)
*/
void zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *ds,
uint64_t zapobj, uint64_t serialized);
#define ZAP_HISTOGRAM_SIZE 10
typedef struct zap_stats {
/*
* Size of the pointer table (in number of entries).
* This is always a power of 2, or zero if it's a microzap.
* In general, it should be considerably greater than zs_num_leafs.
*/
uint64_t zs_ptrtbl_len;
uint64_t zs_blocksize; /* size of zap blocks */
/*
* The number of blocks used. Note that some blocks may be
* wasted because old ptrtbl's and large name/value blocks are
* not reused. (Although their space is reclaimed, we don't
* reuse those offsets in the object.)
*/
uint64_t zs_num_blocks;
/*
* Pointer table values from zap_ptrtbl in the zap_phys_t
*/
uint64_t zs_ptrtbl_nextblk; /* next (larger) copy start block */
uint64_t zs_ptrtbl_blks_copied; /* number source blocks copied */
uint64_t zs_ptrtbl_zt_blk; /* starting block number */
uint64_t zs_ptrtbl_zt_numblks; /* number of blocks */
uint64_t zs_ptrtbl_zt_shift; /* bits to index it */
/*
* Values of the other members of the zap_phys_t
*/
uint64_t zs_block_type; /* ZBT_HEADER */
uint64_t zs_magic; /* ZAP_MAGIC */
uint64_t zs_num_leafs; /* The number of leaf blocks */
uint64_t zs_num_entries; /* The number of zap entries */
uint64_t zs_salt; /* salt to stir into hash function */
/*
* Histograms. For all histograms, the last index
* (ZAP_HISTOGRAM_SIZE-1) includes any values which are greater
* than what can be represented. For example
* zs_leafs_with_n5_entries[ZAP_HISTOGRAM_SIZE-1] is the number
* of leafs with more than 45 entries.
*/
/*
* zs_leafs_with_n_pointers[n] is the number of leafs with
* 2^n pointers to it.
*/
uint64_t zs_leafs_with_2n_pointers[ZAP_HISTOGRAM_SIZE];
/*
* zs_leafs_with_n_entries[n] is the number of leafs with
* [n*5, (n+1)*5) entries. In the current implementation, there
* can be at most 55 entries in any block, but there may be
* fewer if the name or value is large, or the block is not
* completely full.
*/
uint64_t zs_blocks_with_n5_entries[ZAP_HISTOGRAM_SIZE];
/*
* zs_leafs_n_tenths_full[n] is the number of leafs whose
* fullness is in the range [n/10, (n+1)/10).
*/
uint64_t zs_blocks_n_tenths_full[ZAP_HISTOGRAM_SIZE];
/*
* zs_entries_using_n_chunks[n] is the number of entries which
* consume n 24-byte chunks. (Note, large names/values only use
* one chunk, but contribute to zs_num_blocks_large.)
*/
uint64_t zs_entries_using_n_chunks[ZAP_HISTOGRAM_SIZE];
/*
* zs_buckets_with_n_entries[n] is the number of buckets (each
* leaf has 64 buckets) with n entries.
* zs_buckets_with_n_entries[1] should be very close to
* zs_num_entries.
*/
uint64_t zs_buckets_with_n_entries[ZAP_HISTOGRAM_SIZE];
} zap_stats_t;
/*
* Get statistics about a ZAP object. Note: you need to be aware of the
* internal implementation of the ZAP to correctly interpret some of the
* statistics. This interface shouldn't be relied on unless you really
* know what you're doing.
*/
int zap_get_stats(objset_t *ds, uint64_t zapobj, zap_stats_t *zs);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZAP_H */
-218
View File
@@ -1,218 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZAP_IMPL_H
#define _SYS_ZAP_IMPL_H
#pragma ident "@(#)zap_impl.h 1.9 07/10/30 SMI"
#include <sys/zap.h>
#include <sys/zfs_context.h>
#include <sys/avl.h>
#ifdef __cplusplus
extern "C" {
#endif
extern int fzap_default_block_shift;
#define ZAP_MAGIC 0x2F52AB2ABULL
#define FZAP_BLOCK_SHIFT(zap) ((zap)->zap_f.zap_block_shift)
#define ZAP_MAXCD (uint32_t)(-1)
#define ZAP_HASHBITS 28
#define MZAP_ENT_LEN 64
#define MZAP_NAME_LEN (MZAP_ENT_LEN - 8 - 4 - 2)
#define MZAP_MAX_BLKSHIFT SPA_MAXBLOCKSHIFT
#define MZAP_MAX_BLKSZ (1 << MZAP_MAX_BLKSHIFT)
typedef struct mzap_ent_phys {
uint64_t mze_value;
uint32_t mze_cd;
uint16_t mze_pad; /* in case we want to chain them someday */
char mze_name[MZAP_NAME_LEN];
} mzap_ent_phys_t;
typedef struct mzap_phys {
uint64_t mz_block_type; /* ZBT_MICRO */
uint64_t mz_salt;
uint64_t mz_normflags;
uint64_t mz_pad[5];
mzap_ent_phys_t mz_chunk[1];
/* actually variable size depending on block size */
} mzap_phys_t;
typedef struct mzap_ent {
avl_node_t mze_node;
int mze_chunkid;
uint64_t mze_hash;
mzap_ent_phys_t mze_phys;
} mzap_ent_t;
/*
* The (fat) zap is stored in one object. It is an array of
* 1<<FZAP_BLOCK_SHIFT byte blocks. The layout looks like one of:
*
* ptrtbl fits in first block:
* [zap_phys_t zap_ptrtbl_shift < 6] [zap_leaf_t] ...
*
* ptrtbl too big for first block:
* [zap_phys_t zap_ptrtbl_shift >= 6] [zap_leaf_t] [ptrtbl] ...
*
*/
struct dmu_buf;
struct zap_leaf;
#define ZBT_LEAF ((1ULL << 63) + 0)
#define ZBT_HEADER ((1ULL << 63) + 1)
#define ZBT_MICRO ((1ULL << 63) + 3)
/* any other values are ptrtbl blocks */
/*
* the embedded pointer table takes up half a block:
* block size / entry size (2^3) / 2
*/
#define ZAP_EMBEDDED_PTRTBL_SHIFT(zap) (FZAP_BLOCK_SHIFT(zap) - 3 - 1)
/*
* The embedded pointer table starts half-way through the block. Since
* the pointer table itself is half the block, it starts at (64-bit)
* word number (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)).
*/
#define ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) \
((uint64_t *)(zap)->zap_f.zap_phys) \
[(idx) + (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap))]
/*
* TAKE NOTE:
* If zap_phys_t is modified, zap_byteswap() must be modified.
*/
typedef struct zap_phys {
uint64_t zap_block_type; /* ZBT_HEADER */
uint64_t zap_magic; /* ZAP_MAGIC */
struct zap_table_phys {
uint64_t zt_blk; /* starting block number */
uint64_t zt_numblks; /* number of blocks */
uint64_t zt_shift; /* bits to index it */
uint64_t zt_nextblk; /* next (larger) copy start block */
uint64_t zt_blks_copied; /* number source blocks copied */
} zap_ptrtbl;
uint64_t zap_freeblk; /* the next free block */
uint64_t zap_num_leafs; /* number of leafs */
uint64_t zap_num_entries; /* number of entries */
uint64_t zap_salt; /* salt to stir into hash function */
uint64_t zap_normflags; /* flags for u8_textprep_str() */
/*
* This structure is followed by padding, and then the embedded
* pointer table. The embedded pointer table takes up second
* half of the block. It is accessed using the
* ZAP_EMBEDDED_PTRTBL_ENT() macro.
*/
} zap_phys_t;
typedef struct zap_table_phys zap_table_phys_t;
typedef struct zap {
objset_t *zap_objset;
uint64_t zap_object;
struct dmu_buf *zap_dbuf;
krwlock_t zap_rwlock;
boolean_t zap_ismicro;
int zap_normflags;
uint64_t zap_salt;
union {
struct {
zap_phys_t *zap_phys;
/*
* zap_num_entries_mtx protects
* zap_num_entries
*/
kmutex_t zap_num_entries_mtx;
int zap_block_shift;
} zap_fat;
struct {
mzap_phys_t *zap_phys;
int16_t zap_num_entries;
int16_t zap_num_chunks;
int16_t zap_alloc_next;
avl_tree_t zap_avl;
} zap_micro;
} zap_u;
} zap_t;
typedef struct zap_name {
zap_t *zn_zap;
const char *zn_name_orij;
uint64_t zn_hash;
matchtype_t zn_matchtype;
const char *zn_name_norm;
char zn_normbuf[ZAP_MAXNAMELEN];
} zap_name_t;
#define zap_f zap_u.zap_fat
#define zap_m zap_u.zap_micro
boolean_t zap_match(zap_name_t *zn, const char *matchname);
int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp);
void zap_unlockdir(zap_t *zap);
void zap_evict(dmu_buf_t *db, void *vmzap);
zap_name_t *zap_name_alloc(zap_t *zap, const char *name, matchtype_t mt);
void zap_name_free(zap_name_t *zn);
#define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))
void fzap_byteswap(void *buf, size_t size);
int fzap_count(zap_t *zap, uint64_t *count);
int fzap_lookup(zap_name_t *zn,
uint64_t integer_size, uint64_t num_integers, void *buf,
char *realname, int rn_len, boolean_t *normalization_conflictp);
int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
const void *val, dmu_tx_t *tx);
int fzap_update(zap_name_t *zn,
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
int fzap_length(zap_name_t *zn,
uint64_t *integer_size, uint64_t *num_integers);
int fzap_remove(zap_name_t *zn, dmu_tx_t *tx);
int fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za);
void fzap_get_stats(zap_t *zap, zap_stats_t *zs);
void zap_put_leaf(struct zap_leaf *l);
int fzap_add_cd(zap_name_t *zn,
uint64_t integer_size, uint64_t num_integers,
const void *val, uint32_t cd, dmu_tx_t *tx);
void fzap_upgrade(zap_t *zap, dmu_tx_t *tx);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZAP_IMPL_H */
-244
View File
@@ -1,244 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZAP_LEAF_H
#define _SYS_ZAP_LEAF_H
#pragma ident "@(#)zap_leaf.h 1.5 07/11/16 SMI"
#ifdef __cplusplus
extern "C" {
#endif
struct zap;
#define ZAP_LEAF_MAGIC 0x2AB1EAF
/* chunk size = 24 bytes */
#define ZAP_LEAF_CHUNKSIZE 24
/*
* The amount of space available for chunks is:
* block size (1<<l->l_bs) - hash entry size (2) * number of hash
* entries - header space (2*chunksize)
*/
#define ZAP_LEAF_NUMCHUNKS(l) \
(((1<<(l)->l_bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(l)) / \
ZAP_LEAF_CHUNKSIZE - 2)
/*
* The amount of space within the chunk available for the array is:
* chunk size - space for type (1) - space for next pointer (2)
*/
#define ZAP_LEAF_ARRAY_BYTES (ZAP_LEAF_CHUNKSIZE - 3)
#define ZAP_LEAF_ARRAY_NCHUNKS(bytes) \
(((bytes)+ZAP_LEAF_ARRAY_BYTES-1)/ZAP_LEAF_ARRAY_BYTES)
/*
* Low water mark: when there are only this many chunks free, start
* growing the ptrtbl. Ideally, this should be larger than a
* "reasonably-sized" entry. 20 chunks is more than enough for the
* largest directory entry (MAXNAMELEN (256) byte name, 8-byte value),
* while still being only around 3% for 16k blocks.
*/
#define ZAP_LEAF_LOW_WATER (20)
/*
* The leaf hash table has block size / 2^5 (32) number of entries,
* which should be more than enough for the maximum number of entries,
* which is less than block size / CHUNKSIZE (24) / minimum number of
* chunks per entry (3).
*/
#define ZAP_LEAF_HASH_SHIFT(l) ((l)->l_bs - 5)
#define ZAP_LEAF_HASH_NUMENTRIES(l) (1 << ZAP_LEAF_HASH_SHIFT(l))
/*
* The chunks start immediately after the hash table. The end of the
* hash table is at l_hash + HASH_NUMENTRIES, which we simply cast to a
* chunk_t.
*/
#define ZAP_LEAF_CHUNK(l, idx) \
((zap_leaf_chunk_t *) \
((l)->l_phys->l_hash + ZAP_LEAF_HASH_NUMENTRIES(l)))[idx]
#define ZAP_LEAF_ENTRY(l, idx) (&ZAP_LEAF_CHUNK(l, idx).l_entry)
typedef enum zap_chunk_type {
ZAP_CHUNK_FREE = 253,
ZAP_CHUNK_ENTRY = 252,
ZAP_CHUNK_ARRAY = 251,
ZAP_CHUNK_TYPE_MAX = 250
} zap_chunk_type_t;
#define ZLF_ENTRIES_CDSORTED (1<<0)
/*
* TAKE NOTE:
* If zap_leaf_phys_t is modified, zap_leaf_byteswap() must be modified.
*/
typedef struct zap_leaf_phys {
struct zap_leaf_header {
uint64_t lh_block_type; /* ZBT_LEAF */
uint64_t lh_pad1;
uint64_t lh_prefix; /* hash prefix of this leaf */
uint32_t lh_magic; /* ZAP_LEAF_MAGIC */
uint16_t lh_nfree; /* number free chunks */
uint16_t lh_nentries; /* number of entries */
uint16_t lh_prefix_len; /* num bits used to id this */
/* above is accessable to zap, below is zap_leaf private */
uint16_t lh_freelist; /* chunk head of free list */
uint8_t lh_flags; /* ZLF_* flags */
uint8_t lh_pad2[11];
} l_hdr; /* 2 24-byte chunks */
/*
* The header is followed by a hash table with
* ZAP_LEAF_HASH_NUMENTRIES(zap) entries. The hash table is
* followed by an array of ZAP_LEAF_NUMCHUNKS(zap)
* zap_leaf_chunk structures. These structures are accessed
* with the ZAP_LEAF_CHUNK() macro.
*/
uint16_t l_hash[1];
} zap_leaf_phys_t;
typedef union zap_leaf_chunk {
struct zap_leaf_entry {
uint8_t le_type; /* always ZAP_CHUNK_ENTRY */
uint8_t le_int_size; /* size of ints */
uint16_t le_next; /* next entry in hash chain */
uint16_t le_name_chunk; /* first chunk of the name */
uint16_t le_name_length; /* bytes in name, incl null */
uint16_t le_value_chunk; /* first chunk of the value */
uint16_t le_value_length; /* value length in ints */
uint32_t le_cd; /* collision differentiator */
uint64_t le_hash; /* hash value of the name */
} l_entry;
struct zap_leaf_array {
uint8_t la_type; /* always ZAP_CHUNK_ARRAY */
uint8_t la_array[ZAP_LEAF_ARRAY_BYTES];
uint16_t la_next; /* next blk or CHAIN_END */
} l_array;
struct zap_leaf_free {
uint8_t lf_type; /* always ZAP_CHUNK_FREE */
uint8_t lf_pad[ZAP_LEAF_ARRAY_BYTES];
uint16_t lf_next; /* next in free list, or CHAIN_END */
} l_free;
} zap_leaf_chunk_t;
typedef struct zap_leaf {
krwlock_t l_rwlock; /* only used on head of chain */
uint64_t l_blkid; /* 1<<ZAP_BLOCK_SHIFT byte block off */
int l_bs; /* block size shift */
dmu_buf_t *l_dbuf;
zap_leaf_phys_t *l_phys;
} zap_leaf_t;
typedef struct zap_entry_handle {
/* below is set by zap_leaf.c and is public to zap.c */
uint64_t zeh_num_integers;
uint64_t zeh_hash;
uint32_t zeh_cd;
uint8_t zeh_integer_size;
/* below is private to zap_leaf.c */
uint16_t zeh_fakechunk;
uint16_t *zeh_chunkp;
zap_leaf_t *zeh_leaf;
} zap_entry_handle_t;
/*
* Return a handle to the named entry, or ENOENT if not found. The hash
* value must equal zap_hash(name).
*/
extern int zap_leaf_lookup(zap_leaf_t *l,
zap_name_t *zn, zap_entry_handle_t *zeh);
/*
* Return a handle to the entry with this hash+cd, or the entry with the
* next closest hash+cd.
*/
extern int zap_leaf_lookup_closest(zap_leaf_t *l,
uint64_t hash, uint32_t cd, zap_entry_handle_t *zeh);
/*
* Read the first num_integers in the attribute. Integer size
* conversion will be done without sign extension. Return EINVAL if
* integer_size is too small. Return EOVERFLOW if there are more than
* num_integers in the attribute.
*/
extern int zap_entry_read(const zap_entry_handle_t *zeh,
uint8_t integer_size, uint64_t num_integers, void *buf);
extern int zap_entry_read_name(const zap_entry_handle_t *zeh,
uint16_t buflen, char *buf);
/*
* Replace the value of an existing entry.
*
* zap_entry_update may fail if it runs out of space (ENOSPC).
*/
extern int zap_entry_update(zap_entry_handle_t *zeh,
uint8_t integer_size, uint64_t num_integers, const void *buf);
/*
* Remove an entry.
*/
extern void zap_entry_remove(zap_entry_handle_t *zeh);
/*
* Create an entry. An equal entry must not exist, and this entry must
* belong in this leaf (according to its hash value). Fills in the
* entry handle on success. Returns 0 on success or ENOSPC on failure.
*/
extern int zap_entry_create(zap_leaf_t *l,
const char *name, uint64_t h, uint32_t cd,
uint8_t integer_size, uint64_t num_integers, const void *buf,
zap_entry_handle_t *zeh);
/*
* Return true if there are additional entries with the same normalized
* form.
*/
extern boolean_t zap_entry_normalization_conflict(zap_entry_handle_t *zeh,
zap_name_t *zn, const char *name, zap_t *zap);
/*
* Other stuff.
*/
extern void zap_leaf_init(zap_leaf_t *l, boolean_t sort);
extern void zap_leaf_byteswap(zap_leaf_phys_t *buf, int len);
extern void zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort);
extern void zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZAP_LEAF_H */
-215
View File
@@ -1,215 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_FS_ZFS_ACL_H
#define _SYS_FS_ZFS_ACL_H
#pragma ident "@(#)zfs_acl.h 1.9 08/04/08 SMI"
#ifdef _KERNEL
#include <sys/isa_defs.h>
#include <sys/types32.h>
#endif
#include <sys/acl.h>
#include <sys/dmu.h>
#include <sys/zfs_fuid.h>
#ifdef __cplusplus
extern "C" {
#endif
struct znode_phys;
#define ACE_SLOT_CNT 6
#define ZFS_ACL_VERSION_INITIAL 0ULL
#define ZFS_ACL_VERSION_FUID 1ULL
#define ZFS_ACL_VERSION ZFS_ACL_VERSION_FUID
/*
* ZFS ACLs are store in various forms.
* Files created with ACL version ZFS_ACL_VERSION_INITIAL
* will all be created with fixed length ACEs of type
* zfs_oldace_t.
*
* Files with ACL version ZFS_ACL_VERSION_FUID will be created
* with various sized ACEs. The abstraction entries will utilize
* zfs_ace_hdr_t, normal user/group entries will use zfs_ace_t
* and some specialized CIFS ACEs will use zfs_object_ace_t.
*/
/*
* All ACEs have a common hdr. For
* owner@, group@, and everyone@ this is all
* thats needed.
*/
typedef struct zfs_ace_hdr {
uint16_t z_type;
uint16_t z_flags;
uint32_t z_access_mask;
} zfs_ace_hdr_t;
typedef zfs_ace_hdr_t zfs_ace_abstract_t;
/*
* Standard ACE
*/
typedef struct zfs_ace {
zfs_ace_hdr_t z_hdr;
uint64_t z_fuid;
} zfs_ace_t;
/*
* The following type only applies to ACE_ACCESS_ALLOWED|DENIED_OBJECT_ACE_TYPE
* and will only be set/retrieved in a CIFS context.
*/
typedef struct zfs_object_ace {
zfs_ace_t z_ace;
uint8_t z_object_type[16]; /* object type */
uint8_t z_inherit_type[16]; /* inherited object type */
} zfs_object_ace_t;
typedef struct zfs_oldace {
uint32_t z_fuid; /* "who" */
uint32_t z_access_mask; /* access mask */
uint16_t z_flags; /* flags, i.e inheritance */
uint16_t z_type; /* type of entry allow/deny */
} zfs_oldace_t;
typedef struct zfs_acl_phys_v0 {
uint64_t z_acl_extern_obj; /* ext acl pieces */
uint32_t z_acl_count; /* Number of ACEs */
uint16_t z_acl_version; /* acl version */
uint16_t z_acl_pad; /* pad */
zfs_oldace_t z_ace_data[ACE_SLOT_CNT]; /* 6 standard ACEs */
} zfs_acl_phys_v0_t;
#define ZFS_ACE_SPACE (sizeof (zfs_oldace_t) * ACE_SLOT_CNT)
typedef struct zfs_acl_phys {
uint64_t z_acl_extern_obj; /* ext acl pieces */
uint32_t z_acl_size; /* Number of bytes in ACL */
uint16_t z_acl_version; /* acl version */
uint16_t z_acl_count; /* ace count */
uint8_t z_ace_data[ZFS_ACE_SPACE]; /* space for embedded ACEs */
} zfs_acl_phys_t;
typedef struct acl_ops {
uint32_t (*ace_mask_get) (void *acep); /* get access mask */
void (*ace_mask_set) (void *acep,
uint32_t mask); /* set access mask */
uint16_t (*ace_flags_get) (void *acep); /* get flags */
void (*ace_flags_set) (void *acep,
uint16_t flags); /* set flags */
uint16_t (*ace_type_get)(void *acep); /* get type */
void (*ace_type_set)(void *acep,
uint16_t type); /* set type */
uint64_t (*ace_who_get)(void *acep); /* get who/fuid */
void (*ace_who_set)(void *acep,
uint64_t who); /* set who/fuid */
size_t (*ace_size)(void *acep); /* how big is this ace */
size_t (*ace_abstract_size)(void); /* sizeof abstract entry */
int (*ace_mask_off)(void); /* off of access mask in ace */
int (*ace_data)(void *acep, void **datap);
/* ptr to data if any */
} acl_ops_t;
/*
* A zfs_acl_t structure is composed of a list of zfs_acl_node_t's.
* Each node will have one or more ACEs associated with it. You will
* only have multiple nodes during a chmod operation. Normally only
* one node is required.
*/
typedef struct zfs_acl_node {
list_node_t z_next; /* Next chunk of ACEs */
void *z_acldata; /* pointer into actual ACE(s) */
void *z_allocdata; /* pointer to kmem allocated memory */
size_t z_allocsize; /* Size of blob in bytes */
size_t z_size; /* length of ACL data */
int z_ace_count; /* number of ACEs in this acl node */
int z_ace_idx; /* ace iterator positioned on */
} zfs_acl_node_t;
typedef struct zfs_acl {
int z_acl_count; /* Number of ACEs */
size_t z_acl_bytes; /* Number of bytes in ACL */
uint_t z_version; /* version of ACL */
void *z_next_ace; /* pointer to next ACE */
int z_hints; /* ACL hints (ZFS_INHERIT_ACE ...) */
zfs_acl_node_t *z_curr_node; /* current node iterator is handling */
list_t z_acl; /* chunks of ACE data */
acl_ops_t z_ops; /* ACL operations */
boolean_t z_has_fuids; /* FUIDs present in ACL? */
} zfs_acl_t;
#define ACL_DATA_ALLOCED 0x1
#define ZFS_ACL_SIZE(aclcnt) (sizeof (ace_t) * (aclcnt))
/*
* Property values for acl_mode and acl_inherit.
*
* acl_mode can take discard, noallow, groupmask and passthrough.
* whereas acl_inherit has secure instead of groupmask.
*/
#define ZFS_ACL_DISCARD 0
#define ZFS_ACL_NOALLOW 1
#define ZFS_ACL_GROUPMASK 2
#define ZFS_ACL_PASSTHROUGH 3
#define ZFS_ACL_RESTRICTED 4
struct znode;
struct zfsvfs;
struct zfs_fuid_info;
#ifdef _KERNEL
void zfs_perm_init(struct znode *, struct znode *, int, vattr_t *,
dmu_tx_t *, cred_t *, zfs_acl_t *, zfs_fuid_info_t **);
int zfs_getacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
int zfs_setacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
void zfs_acl_rele(void *);
void zfs_oldace_byteswap(ace_t *, int);
void zfs_ace_byteswap(void *, size_t, boolean_t);
extern int zfs_zaccess(struct znode *, int, int, boolean_t, cred_t *);
extern int zfs_zaccess_rwx(struct znode *, mode_t, int, cred_t *);
extern int zfs_zaccess_unix(struct znode *, mode_t, cred_t *);
extern int zfs_acl_access(struct znode *, int, cred_t *);
int zfs_acl_chmod_setattr(struct znode *, zfs_acl_t **, uint64_t);
int zfs_zaccess_delete(struct znode *, struct znode *, cred_t *);
int zfs_zaccess_rename(struct znode *, struct znode *,
struct znode *, struct znode *, cred_t *cr);
void zfs_acl_free(zfs_acl_t *);
int zfs_vsec_2_aclp(struct zfsvfs *, vtype_t, vsecattr_t *, zfs_acl_t **);
int zfs_aclset_common(struct znode *, zfs_acl_t *, cred_t *,
struct zfs_fuid_info **, dmu_tx_t *);
#endif
#ifdef __cplusplus
}
#endif
#endif /* _SYS_FS_ZFS_ACL_H */
@@ -1,73 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZFS_CONTEXT_H
#define _SYS_ZFS_CONTEXT_H
#pragma ident "@(#)zfs_context.h 1.3 07/10/24 SMI"
#ifdef __cplusplus
extern "C" {
#endif
#include <sys/note.h>
#include <sys/types.h>
#include <sys/t_lock.h>
#include <sys/atomic.h>
#include <sys/sysmacros.h>
#include <sys/bitmap.h>
#include <sys/cmn_err.h>
#include <sys/kmem.h>
#include <sys/taskq.h>
#include <sys/buf.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/cpuvar.h>
#include <sys/kobj.h>
#include <sys/conf.h>
#include <sys/disp.h>
#include <sys/debug.h>
#include <sys/random.h>
#include <sys/byteorder.h>
#include <sys/systm.h>
#include <sys/list.h>
#include <sys/uio.h>
#include <sys/dirent.h>
#include <sys/time.h>
#include <vm/seg_kmem.h>
#include <sys/zone.h>
#include <sys/uio.h>
#include <sys/zfs_debug.h>
#include <sys/sysevent.h>
#include <sys/sysevent/eventdefs.h>
#include <sys/fm/util.h>
#define CPU_SEQID (CPU->cpu_seqid)
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZFS_CONTEXT_H */
@@ -1,538 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZFS_CONTEXT_H
#define _SYS_ZFS_CONTEXT_H
#ifdef __cplusplus
extern "C" {
#endif
#define _SYS_MUTEX_H
#define _SYS_RWLOCK_H
#define _SYS_CONDVAR_H
#define _SYS_SYSTM_H
#define _SYS_DEBUG_H
#define _SYS_T_LOCK_H
#define _SYS_VNODE_H
#define _SYS_VFS_H
#define _SYS_SUNDDI_H
#define _SYS_CALLB_H
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <stdarg.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <strings.h>
#include <synch.h>
#include <thread.h>
#include <assert.h>
#include <alloca.h>
#include <umem.h>
#include <limits.h>
#include <atomic.h>
#include <dirent.h>
#include <time.h>
#include <sys/note.h>
#include <sys/types.h>
#include <sys/cred.h>
#include <sys/sysmacros.h>
#include <sys/bitmap.h>
#include <sys/resource.h>
#include <sys/byteorder.h>
#include <sys/list.h>
#include <sys/uio.h>
#include <sys/zfs_debug.h>
#include <sys/sdt.h>
#include <sys/kstat.h>
#include <sys/u8_textprep.h>
#include <sys/sysevent/eventdefs.h>
/*
* Debugging
*/
/*
* Note that we are not using the debugging levels.
*/
#define CE_CONT 0 /* continuation */
#define CE_NOTE 1 /* notice */
#define CE_WARN 2 /* warning */
#define CE_PANIC 3 /* panic */
#define CE_IGNORE 4 /* print nothing */
/*
* ZFS debugging
*/
#ifdef ZFS_DEBUG
extern void dprintf_setup(int *argc, char **argv);
#endif /* ZFS_DEBUG */
extern void cmn_err(int, const char *, ...);
extern void vcmn_err(int, const char *, __va_list);
extern void panic(const char *, ...);
extern void vpanic(const char *, __va_list);
#define fm_panic panic
/* This definition is copied from assert.h. */
#if defined(__STDC__)
#if __STDC_VERSION__ - 0 >= 199901L
#define verify(EX) (void)((EX) || \
(__assert_c99(#EX, __FILE__, __LINE__, __func__), 0))
#else
#define verify(EX) (void)((EX) || (__assert(#EX, __FILE__, __LINE__), 0))
#endif /* __STDC_VERSION__ - 0 >= 199901L */
#else
#define verify(EX) (void)((EX) || (_assert("EX", __FILE__, __LINE__), 0))
#endif /* __STDC__ */
#define VERIFY verify
#define ASSERT assert
extern void __assert(const char *, const char *, int);
#ifdef lint
#define VERIFY3_IMPL(x, y, z, t) if (x == z) ((void)0)
#else
/* BEGIN CSTYLED */
#define VERIFY3_IMPL(LEFT, OP, RIGHT, TYPE) do { \
const TYPE __left = (TYPE)(LEFT); \
const TYPE __right = (TYPE)(RIGHT); \
if (!(__left OP __right)) { \
char *__buf = alloca(256); \
(void) snprintf(__buf, 256, "%s %s %s (0x%llx %s 0x%llx)", \
#LEFT, #OP, #RIGHT, \
(u_longlong_t)__left, #OP, (u_longlong_t)__right); \
__assert(__buf, __FILE__, __LINE__); \
} \
_NOTE(CONSTCOND) } while (0)
/* END CSTYLED */
#endif /* lint */
#define VERIFY3S(x, y, z) VERIFY3_IMPL(x, y, z, int64_t)
#define VERIFY3U(x, y, z) VERIFY3_IMPL(x, y, z, uint64_t)
#define VERIFY3P(x, y, z) VERIFY3_IMPL(x, y, z, uintptr_t)
#ifdef NDEBUG
#define ASSERT3S(x, y, z) ((void)0)
#define ASSERT3U(x, y, z) ((void)0)
#define ASSERT3P(x, y, z) ((void)0)
#else
#define ASSERT3S(x, y, z) VERIFY3S(x, y, z)
#define ASSERT3U(x, y, z) VERIFY3U(x, y, z)
#define ASSERT3P(x, y, z) VERIFY3P(x, y, z)
#endif
/*
* DTrace SDT probes have different signatures in userland than they do in
* kernel. If they're being used in kernel code, re-define them out of
* existence for their counterparts in libzpool.
*/
#ifdef DTRACE_PROBE1
#undef DTRACE_PROBE1
#define DTRACE_PROBE1(a, b, c) ((void)0)
#endif /* DTRACE_PROBE1 */
#ifdef DTRACE_PROBE2
#undef DTRACE_PROBE2
#define DTRACE_PROBE2(a, b, c, d, e) ((void)0)
#endif /* DTRACE_PROBE2 */
#ifdef DTRACE_PROBE3
#undef DTRACE_PROBE3
#define DTRACE_PROBE3(a, b, c, d, e, f, g) ((void)0)
#endif /* DTRACE_PROBE3 */
#ifdef DTRACE_PROBE4
#undef DTRACE_PROBE4
#define DTRACE_PROBE4(a, b, c, d, e, f, g, h, i) ((void)0)
#endif /* DTRACE_PROBE4 */
/*
* Threads
*/
#define curthread ((void *)(uintptr_t)thr_self())
typedef struct kthread kthread_t;
#define thread_create(stk, stksize, func, arg, len, pp, state, pri) \
zk_thread_create(func, arg)
#define thread_exit() thr_exit(NULL)
extern kthread_t *zk_thread_create(void (*func)(), void *arg);
#define issig(why) (FALSE)
#define ISSIG(thr, why) (FALSE)
/*
* Mutexes
*/
typedef struct kmutex {
void *m_owner;
boolean_t initialized;
mutex_t m_lock;
} kmutex_t;
#define MUTEX_DEFAULT USYNC_THREAD
#undef MUTEX_HELD
#define MUTEX_HELD(m) _mutex_held(&(m)->m_lock)
/*
* Argh -- we have to get cheesy here because the kernel and userland
* have different signatures for the same routine.
*/
extern int _mutex_init(mutex_t *mp, int type, void *arg);
extern int _mutex_destroy(mutex_t *mp);
#define mutex_init(mp, b, c, d) zmutex_init((kmutex_t *)(mp))
#define mutex_destroy(mp) zmutex_destroy((kmutex_t *)(mp))
extern void zmutex_init(kmutex_t *mp);
extern void zmutex_destroy(kmutex_t *mp);
extern void mutex_enter(kmutex_t *mp);
extern void mutex_exit(kmutex_t *mp);
extern int mutex_tryenter(kmutex_t *mp);
extern void *mutex_owner(kmutex_t *mp);
/*
* RW locks
*/
typedef struct krwlock {
void *rw_owner;
boolean_t initialized;
rwlock_t rw_lock;
} krwlock_t;
typedef int krw_t;
#define RW_READER 0
#define RW_WRITER 1
#define RW_DEFAULT USYNC_THREAD
#undef RW_READ_HELD
#define RW_READ_HELD(x) _rw_read_held(&(x)->rw_lock)
#undef RW_WRITE_HELD
#define RW_WRITE_HELD(x) _rw_write_held(&(x)->rw_lock)
extern void rw_init(krwlock_t *rwlp, char *name, int type, void *arg);
extern void rw_destroy(krwlock_t *rwlp);
extern void rw_enter(krwlock_t *rwlp, krw_t rw);
extern int rw_tryenter(krwlock_t *rwlp, krw_t rw);
extern int rw_tryupgrade(krwlock_t *rwlp);
extern void rw_exit(krwlock_t *rwlp);
#define rw_downgrade(rwlp) do { } while (0)
extern uid_t crgetuid(cred_t *cr);
extern gid_t crgetgid(cred_t *cr);
extern int crgetngroups(cred_t *cr);
extern gid_t *crgetgroups(cred_t *cr);
/*
* Condition variables
*/
typedef cond_t kcondvar_t;
#define CV_DEFAULT USYNC_THREAD
extern void cv_init(kcondvar_t *cv, char *name, int type, void *arg);
extern void cv_destroy(kcondvar_t *cv);
extern void cv_wait(kcondvar_t *cv, kmutex_t *mp);
extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime);
extern void cv_signal(kcondvar_t *cv);
extern void cv_broadcast(kcondvar_t *cv);
/*
* kstat creation, installation and deletion
*/
extern kstat_t *kstat_create(char *, int,
char *, char *, uchar_t, ulong_t, uchar_t);
extern void kstat_install(kstat_t *);
extern void kstat_delete(kstat_t *);
/*
* Kernel memory
*/
#define KM_SLEEP UMEM_NOFAIL
#define KM_NOSLEEP UMEM_DEFAULT
#define KMC_NODEBUG UMC_NODEBUG
#define kmem_alloc(_s, _f) umem_alloc(_s, _f)
#define kmem_zalloc(_s, _f) umem_zalloc(_s, _f)
#define kmem_free(_b, _s) umem_free(_b, _s)
#define kmem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i) \
umem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i)
#define kmem_cache_destroy(_c) umem_cache_destroy(_c)
#define kmem_cache_alloc(_c, _f) umem_cache_alloc(_c, _f)
#define kmem_cache_free(_c, _b) umem_cache_free(_c, _b)
#define kmem_debugging() 0
#define kmem_cache_reap_now(c)
typedef umem_cache_t kmem_cache_t;
/*
* Task queues
*/
typedef struct taskq taskq_t;
typedef uintptr_t taskqid_t;
typedef void (task_func_t)(void *);
#define TASKQ_PREPOPULATE 0x0001
#define TASKQ_CPR_SAFE 0x0002 /* Use CPR safe protocol */
#define TASKQ_DYNAMIC 0x0004 /* Use dynamic thread scheduling */
#define TQ_SLEEP KM_SLEEP /* Can block for memory */
#define TQ_NOSLEEP KM_NOSLEEP /* cannot block for memory; may fail */
#define TQ_NOQUEUE 0x02 /* Do not enqueue if can't dispatch */
extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
extern void taskq_destroy(taskq_t *);
extern void taskq_wait(taskq_t *);
extern int taskq_member(taskq_t *, void *);
#define XVA_MAPSIZE 3
#define XVA_MAGIC 0x78766174
/*
* vnodes
*/
typedef struct vnode {
uint64_t v_size;
int v_fd;
char *v_path;
} vnode_t;
typedef struct xoptattr {
timestruc_t xoa_createtime; /* Create time of file */
uint8_t xoa_archive;
uint8_t xoa_system;
uint8_t xoa_readonly;
uint8_t xoa_hidden;
uint8_t xoa_nounlink;
uint8_t xoa_immutable;
uint8_t xoa_appendonly;
uint8_t xoa_nodump;
uint8_t xoa_settable;
uint8_t xoa_opaque;
uint8_t xoa_av_quarantined;
uint8_t xoa_av_modified;
} xoptattr_t;
typedef struct vattr {
uint_t va_mask; /* bit-mask of attributes */
u_offset_t va_size; /* file size in bytes */
} vattr_t;
typedef struct xvattr {
vattr_t xva_vattr; /* Embedded vattr structure */
uint32_t xva_magic; /* Magic Number */
uint32_t xva_mapsize; /* Size of attr bitmap (32-bit words) */
uint32_t *xva_rtnattrmapp; /* Ptr to xva_rtnattrmap[] */
uint32_t xva_reqattrmap[XVA_MAPSIZE]; /* Requested attrs */
uint32_t xva_rtnattrmap[XVA_MAPSIZE]; /* Returned attrs */
xoptattr_t xva_xoptattrs; /* Optional attributes */
} xvattr_t;
typedef struct vsecattr {
uint_t vsa_mask; /* See below */
int vsa_aclcnt; /* ACL entry count */
void *vsa_aclentp; /* pointer to ACL entries */
int vsa_dfaclcnt; /* default ACL entry count */
void *vsa_dfaclentp; /* pointer to default ACL entries */
size_t vsa_aclentsz; /* ACE size in bytes of vsa_aclentp */
} vsecattr_t;
#define AT_TYPE 0x00001
#define AT_MODE 0x00002
#define AT_UID 0x00004
#define AT_GID 0x00008
#define AT_FSID 0x00010
#define AT_NODEID 0x00020
#define AT_NLINK 0x00040
#define AT_SIZE 0x00080
#define AT_ATIME 0x00100
#define AT_MTIME 0x00200
#define AT_CTIME 0x00400
#define AT_RDEV 0x00800
#define AT_BLKSIZE 0x01000
#define AT_NBLOCKS 0x02000
#define AT_SEQ 0x08000
#define AT_XVATTR 0x10000
#define CRCREAT 0
#define VOP_CLOSE(vp, f, c, o, cr, ct) 0
#define VOP_PUTPAGE(vp, of, sz, fl, cr, ct) 0
#define VOP_GETATTR(vp, vap, fl, cr, ct) ((vap)->va_size = (vp)->v_size, 0)
#define VOP_FSYNC(vp, f, cr, ct) fsync((vp)->v_fd)
#define VN_RELE(vp) vn_close(vp)
extern int vn_open(char *path, int x1, int oflags, int mode, vnode_t **vpp,
int x2, int x3);
extern int vn_openat(char *path, int x1, int oflags, int mode, vnode_t **vpp,
int x2, int x3, vnode_t *vp, int fd);
extern int vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len,
offset_t offset, int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp);
extern void vn_close(vnode_t *vp);
#define vn_remove(path, x1, x2) remove(path)
#define vn_rename(from, to, seg) rename((from), (to))
#define vn_is_readonly(vp) B_FALSE
extern vnode_t *rootdir;
#include <sys/file.h> /* for FREAD, FWRITE, etc */
/*
* Random stuff
*/
#define lbolt (gethrtime() >> 23)
#define lbolt64 (gethrtime() >> 23)
#define hz 119 /* frequency when using gethrtime() >> 23 for lbolt */
extern void delay(clock_t ticks);
#define gethrestime_sec() time(NULL)
#define max_ncpus 64
#define minclsyspri 60
#define maxclsyspri 99
#define CPU_SEQID (thr_self() & (max_ncpus - 1))
#define kcred NULL
#define CRED() NULL
extern uint64_t physmem;
extern int highbit(ulong_t i);
extern int random_get_bytes(uint8_t *ptr, size_t len);
extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len);
extern void kernel_init(int);
extern void kernel_fini(void);
struct spa;
extern void nicenum(uint64_t num, char *buf);
extern void show_pool_stats(struct spa *);
typedef struct callb_cpr {
kmutex_t *cc_lockp;
} callb_cpr_t;
#define CALLB_CPR_INIT(cp, lockp, func, name) { \
(cp)->cc_lockp = lockp; \
}
#define CALLB_CPR_SAFE_BEGIN(cp) { \
ASSERT(MUTEX_HELD((cp)->cc_lockp)); \
}
#define CALLB_CPR_SAFE_END(cp, lockp) { \
ASSERT(MUTEX_HELD((cp)->cc_lockp)); \
}
#define CALLB_CPR_EXIT(cp) { \
ASSERT(MUTEX_HELD((cp)->cc_lockp)); \
mutex_exit((cp)->cc_lockp); \
}
#define zone_dataset_visible(x, y) (1)
#define INGLOBALZONE(z) (1)
/*
* Hostname information
*/
extern char hw_serial[];
extern int ddi_strtoul(const char *str, char **nptr, int base,
unsigned long *result);
/* ZFS Boot Related stuff. */
struct _buf {
intptr_t _fd;
};
struct bootstat {
uint64_t st_size;
};
typedef struct ace_object {
uid_t a_who;
uint32_t a_access_mask;
uint16_t a_flags;
uint16_t a_type;
uint8_t a_obj_type[16];
uint8_t a_inherit_obj_type[16];
} ace_object_t;
#define ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE 0x05
#define ACE_ACCESS_DENIED_OBJECT_ACE_TYPE 0x06
#define ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE 0x07
#define ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE 0x08
extern struct _buf *kobj_open_file(char *name);
extern int kobj_read_file(struct _buf *file, char *buf, unsigned size,
unsigned off);
extern void kobj_close_file(struct _buf *file);
extern int kobj_get_filesize(struct _buf *file, uint64_t *size);
extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr);
extern int zfs_secpolicy_rename_perms(const char *from, const char *to,
cred_t *cr);
extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
extern zoneid_t getzoneid(void);
/* SID stuff */
typedef struct ksiddomain {
uint_t kd_ref;
uint_t kd_len;
char *kd_name;
} ksiddomain_t;
ksiddomain_t *ksid_lookupdomain(const char *);
void ksiddomain_rele(ksiddomain_t *);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZFS_CONTEXT_H */
@@ -1,74 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _ZFS_CTLDIR_H
#define _ZFS_CTLDIR_H
#pragma ident "@(#)zfs_ctldir.h 1.4 08/02/22 SMI"
#include <sys/pathname.h>
#include <sys/vnode.h>
#include <sys/zfs_vfsops.h>
#include <sys/zfs_znode.h>
#ifdef __cplusplus
extern "C" {
#endif
#define ZFS_CTLDIR_NAME ".zfs"
#define zfs_has_ctldir(zdp) \
((zdp)->z_id == (zdp)->z_zfsvfs->z_root && \
((zdp)->z_zfsvfs->z_ctldir != NULL))
#define zfs_show_ctldir(zdp) \
(zfs_has_ctldir(zdp) && \
((zdp)->z_zfsvfs->z_show_ctldir))
void zfsctl_create(zfsvfs_t *);
void zfsctl_destroy(zfsvfs_t *);
vnode_t *zfsctl_root(znode_t *);
void zfsctl_init(void);
void zfsctl_fini(void);
int zfsctl_rename_snapshot(const char *from, const char *to);
int zfsctl_destroy_snapshot(const char *snapname, int force);
int zfsctl_umount_snapshots(vfs_t *, int, cred_t *);
int zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
int *direntflags, pathname_t *realpnp);
int zfsctl_make_fid(zfsvfs_t *zfsvfsp, uint64_t object, uint32_t gen,
fid_t *fidp);
int zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp);
#define ZFSCTL_INO_ROOT 0x1
#define ZFSCTL_INO_SNAPDIR 0x2
#ifdef __cplusplus
}
#endif
#endif /* _ZFS_CTLDIR_H */
@@ -1,75 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZFS_DEBUG_H
#define _SYS_ZFS_DEBUG_H
#pragma ident "@(#)zfs_debug.h 1.3 07/02/25 SMI"
#ifdef __cplusplus
extern "C" {
#endif
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
/*
* ZFS debugging
*/
#if defined(DEBUG) || !defined(_KERNEL)
#define ZFS_DEBUG
#endif
extern int zfs_flags;
#define ZFS_DEBUG_DPRINTF 0x0001
#define ZFS_DEBUG_DBUF_VERIFY 0x0002
#define ZFS_DEBUG_DNODE_VERIFY 0x0004
#define ZFS_DEBUG_SNAPNAMES 0x0008
#define ZFS_DEBUG_MODIFY 0x0010
#ifdef ZFS_DEBUG
extern void __dprintf(const char *file, const char *func,
int line, const char *fmt, ...);
#define dprintf(...) \
if (zfs_flags & ZFS_DEBUG_DPRINTF) \
__dprintf(__FILE__, __func__, __LINE__, __VA_ARGS__)
#else
#define dprintf(...) ((void)0)
#endif /* ZFS_DEBUG */
extern void zfs_panic_recover(const char *fmt, ...);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZFS_DEBUG_H */
-76
View File
@@ -1,76 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_FS_ZFS_DIR_H
#define _SYS_FS_ZFS_DIR_H
#pragma ident "@(#)zfs_dir.h 1.5 07/11/09 SMI"
#include <sys/pathname.h>
#include <sys/dmu.h>
#include <sys/zfs_znode.h>
#ifdef __cplusplus
extern "C" {
#endif
/* zfs_dirent_lock() flags */
#define ZNEW 0x0001 /* entry should not exist */
#define ZEXISTS 0x0002 /* entry should exist */
#define ZSHARED 0x0004 /* shared access (zfs_dirlook()) */
#define ZXATTR 0x0008 /* we want the xattr dir */
#define ZRENAMING 0x0010 /* znode is being renamed */
#define ZCILOOK 0x0020 /* case-insensitive lookup requested */
#define ZCIEXACT 0x0040 /* c-i requires c-s match (rename) */
/* mknode flags */
#define IS_ROOT_NODE 0x01 /* create a root node */
#define IS_XATTR 0x02 /* create an extended attribute node */
#define IS_REPLAY 0x04 /* we are replaying intent log */
extern int zfs_dirent_lock(zfs_dirlock_t **, znode_t *, char *, znode_t **,
int, int *, pathname_t *);
extern void zfs_dirent_unlock(zfs_dirlock_t *);
extern int zfs_link_create(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int);
extern int zfs_link_destroy(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int,
boolean_t *);
extern int zfs_dirlook(znode_t *, char *, vnode_t **, int, int *,
pathname_t *);
extern void zfs_mknode(znode_t *, vattr_t *, dmu_tx_t *, cred_t *,
uint_t, znode_t **, int, zfs_acl_t *, zfs_fuid_info_t **);
extern void zfs_rmnode(znode_t *);
extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old);
extern boolean_t zfs_dirempty(znode_t *);
extern void zfs_unlinked_add(znode_t *, dmu_tx_t *);
extern void zfs_unlinked_drain(zfsvfs_t *zfsvfs);
extern int zfs_sticky_remove_access(znode_t *, znode_t *, cred_t *cr);
extern int zfs_get_xattrdir(znode_t *, vnode_t **, cred_t *, int);
extern int zfs_make_xattrdir(znode_t *, vattr_t *, vnode_t **, cred_t *);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_FS_ZFS_DIR_H */
-125
View File
@@ -1,125 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_FS_ZFS_FUID_H
#define _SYS_FS_ZFS_FUID_H
#pragma ident "@(#)zfs_fuid.h 1.4 08/01/31 SMI"
#ifdef _KERNEL
#include <sys/kidmap.h>
#include <sys/sid.h>
#include <sys/dmu.h>
#include <sys/zfs_vfsops.h>
#endif
#include <sys/avl.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef enum {
ZFS_OWNER,
ZFS_GROUP,
ZFS_ACE_USER,
ZFS_ACE_GROUP
} zfs_fuid_type_t;
/*
* Estimate space needed for one more fuid table entry.
* for now assume its current size + 1K
*/
#define FUID_SIZE_ESTIMATE(z) (z->z_fuid_size + (SPA_MINBLOCKSIZE << 1))
#define FUID_INDEX(x) (x >> 32)
#define FUID_RID(x) (x & 0xffffffff)
#define FUID_ENCODE(idx, rid) ((idx << 32) | rid)
/*
* FUIDs cause problems for the intent log
* we need to replay the creation of the FUID,
* but we can't count on the idmapper to be around
* and during replay the FUID index may be different than
* before. Also, if an ACL has 100 ACEs and 12 different
* domains we don't want to log 100 domain strings, but rather
* just the unique 12.
*/
/*
* The FUIDs in the log will index into
* domain string table and the bottom half will be the rid.
* Used for mapping ephemeral uid/gid during ACL setting to FUIDs
*/
typedef struct zfs_fuid {
list_node_t z_next;
uint64_t z_id; /* uid/gid being converted to fuid */
uint64_t z_domidx; /* index in AVL domain table */
uint64_t z_logfuid; /* index for domain in log */
} zfs_fuid_t;
/* list of unique domains */
typedef struct zfs_fuid_domain {
list_node_t z_next;
uint64_t z_domidx; /* AVL tree idx */
const char *z_domain; /* domain string */
} zfs_fuid_domain_t;
/*
* FUID information necessary for logging create, setattr, and setacl.
*/
typedef struct zfs_fuid_info {
list_t z_fuids;
list_t z_domains;
uint64_t z_fuid_owner;
uint64_t z_fuid_group;
char **z_domain_table; /* Used during replay */
uint32_t z_fuid_cnt; /* How many fuids in z_fuids */
uint32_t z_domain_cnt; /* How many domains */
size_t z_domain_str_sz; /* len of domain strings z_domain list */
} zfs_fuid_info_t;
#ifdef _KERNEL
struct znode;
extern uid_t zfs_fuid_map_id(zfsvfs_t *, uint64_t, cred_t *, zfs_fuid_type_t);
extern void zfs_fuid_destroy(zfsvfs_t *);
extern uint64_t zfs_fuid_create_cred(zfsvfs_t *, zfs_fuid_type_t,
dmu_tx_t *, cred_t *, zfs_fuid_info_t **);
extern uint64_t zfs_fuid_create(zfsvfs_t *, uint64_t, cred_t *, zfs_fuid_type_t,
dmu_tx_t *, zfs_fuid_info_t **);
extern void zfs_fuid_map_ids(struct znode *zp, cred_t *cr, uid_t *uid,
uid_t *gid);
extern zfs_fuid_info_t *zfs_fuid_info_alloc(void);
extern void zfs_fuid_info_free();
extern boolean_t zfs_groupmember(zfsvfs_t *, uint64_t, cred_t *);
#endif
char *zfs_fuid_idx_domain(avl_tree_t *, uint32_t);
uint64_t zfs_fuid_table_load(objset_t *, uint64_t, avl_tree_t *, avl_tree_t *);
void zfs_fuid_table_destroy(avl_tree_t *, avl_tree_t *);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_FS_ZFS_FUID_H */
-71
View File
@@ -1,71 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZFS_I18N_H
#define _SYS_ZFS_I18N_H
#include <sys/sunddi.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* z_case behaviors
* The first two describe the extent of case insensitivity.
* The third describes matching behavior when mixed sensitivity
* is allowed.
*/
#define ZFS_CI_ONLY 0x01 /* all lookups case-insensitive */
#define ZFS_CI_MIXD 0x02 /* some lookups case-insensitive */
/*
* ZFS_UTF8_ONLY
* If set, the file system should reject non-utf8 characters in names.
*/
#define ZFS_UTF8_ONLY 0x04
enum zfs_case {
ZFS_CASE_SENSITIVE,
ZFS_CASE_INSENSITIVE,
ZFS_CASE_MIXED
};
enum zfs_normal {
ZFS_NORMALIZE_NONE,
ZFS_NORMALIZE_D,
ZFS_NORMALIZE_KC,
ZFS_NORMALIZE_C,
ZFS_NORMALIZE_KD
};
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZFS_I18N_H */
-196
View File
@@ -1,196 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZFS_IOCTL_H
#define _SYS_ZFS_IOCTL_H
#pragma ident "@(#)zfs_ioctl.h 1.19 08/04/27 SMI"
#include <sys/cred.h>
#include <sys/dmu.h>
#include <sys/zio.h>
#include <sys/dsl_deleg.h>
#ifdef _KERNEL
#include <sys/nvpair.h>
#endif /* _KERNEL */
#ifdef __cplusplus
extern "C" {
#endif
/*
* Property values for snapdir
*/
#define ZFS_SNAPDIR_HIDDEN 0
#define ZFS_SNAPDIR_VISIBLE 1
#define DMU_BACKUP_STREAM_VERSION (1ULL)
#define DMU_BACKUP_HEADER_VERSION (2ULL)
#define DMU_BACKUP_MAGIC 0x2F5bacbacULL
#define DRR_FLAG_CLONE (1<<0)
#define DRR_FLAG_CI_DATA (1<<1)
/*
* zfs ioctl command structure
*/
typedef struct dmu_replay_record {
enum {
DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
DRR_WRITE, DRR_FREE, DRR_END,
} drr_type;
uint32_t drr_payloadlen;
union {
struct drr_begin {
uint64_t drr_magic;
uint64_t drr_version;
uint64_t drr_creation_time;
dmu_objset_type_t drr_type;
uint32_t drr_flags;
uint64_t drr_toguid;
uint64_t drr_fromguid;
char drr_toname[MAXNAMELEN];
} drr_begin;
struct drr_end {
zio_cksum_t drr_checksum;
} drr_end;
struct drr_object {
uint64_t drr_object;
dmu_object_type_t drr_type;
dmu_object_type_t drr_bonustype;
uint32_t drr_blksz;
uint32_t drr_bonuslen;
uint8_t drr_checksum;
uint8_t drr_compress;
uint8_t drr_pad[6];
/* bonus content follows */
} drr_object;
struct drr_freeobjects {
uint64_t drr_firstobj;
uint64_t drr_numobjs;
} drr_freeobjects;
struct drr_write {
uint64_t drr_object;
dmu_object_type_t drr_type;
uint32_t drr_pad;
uint64_t drr_offset;
uint64_t drr_length;
/* content follows */
} drr_write;
struct drr_free {
uint64_t drr_object;
uint64_t drr_offset;
uint64_t drr_length;
} drr_free;
} drr_u;
} dmu_replay_record_t;
typedef struct zinject_record {
uint64_t zi_objset;
uint64_t zi_object;
uint64_t zi_start;
uint64_t zi_end;
uint64_t zi_guid;
uint32_t zi_level;
uint32_t zi_error;
uint64_t zi_type;
uint32_t zi_freq;
uint32_t zi_pad; /* pad out to 64 bit alignment */
} zinject_record_t;
#define ZINJECT_NULL 0x1
#define ZINJECT_FLUSH_ARC 0x2
#define ZINJECT_UNLOAD_SPA 0x4
typedef struct zfs_share {
uint64_t z_exportdata;
uint64_t z_sharedata;
uint64_t z_sharetype; /* 0 = share, 1 = unshare */
uint64_t z_sharemax; /* max length of share string */
} zfs_share_t;
/*
* ZFS file systems may behave the usual, POSIX-compliant way, where
* name lookups are case-sensitive. They may also be set up so that
* all the name lookups are case-insensitive, or so that only some
* lookups, the ones that set an FIGNORECASE flag, are case-insensitive.
*/
typedef enum zfs_case {
ZFS_CASE_SENSITIVE,
ZFS_CASE_INSENSITIVE,
ZFS_CASE_MIXED
} zfs_case_t;
typedef struct zfs_cmd {
char zc_name[MAXPATHLEN];
char zc_value[MAXPATHLEN * 2];
char zc_string[MAXNAMELEN];
uint64_t zc_guid;
uint64_t zc_nvlist_conf; /* really (char *) */
uint64_t zc_nvlist_conf_size;
uint64_t zc_nvlist_src; /* really (char *) */
uint64_t zc_nvlist_src_size;
uint64_t zc_nvlist_dst; /* really (char *) */
uint64_t zc_nvlist_dst_size;
uint64_t zc_cookie;
uint64_t zc_objset_type;
uint64_t zc_perm_action;
uint64_t zc_history; /* really (char *) */
uint64_t zc_history_len;
uint64_t zc_history_offset;
uint64_t zc_obj;
zfs_share_t zc_share;
dmu_objset_stats_t zc_objset_stats;
struct drr_begin zc_begin_record;
zinject_record_t zc_inject_record;
} zfs_cmd_t;
#define ZVOL_MAX_MINOR (1 << 16)
#define ZFS_MIN_MINOR (ZVOL_MAX_MINOR + 1)
#ifdef _KERNEL
typedef struct zfs_creat {
nvlist_t *zct_zplprops;
nvlist_t *zct_props;
} zfs_creat_t;
extern dev_info_t *zfs_dip;
extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr);
extern int zfs_secpolicy_rename_perms(const char *from,
const char *to, cred_t *cr);
extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
extern int zfs_busy(void);
extern int zfs_unmount_snap(char *, void *);
#endif /* _KERNEL */
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZFS_IOCTL_H */
@@ -1,89 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_FS_ZFS_RLOCK_H
#define _SYS_FS_ZFS_RLOCK_H
#pragma ident "@(#)zfs_rlock.h 1.2 06/06/19 SMI"
#ifdef __cplusplus
extern "C" {
#endif
#ifdef _KERNEL
#include <sys/zfs_znode.h>
typedef enum {
RL_READER,
RL_WRITER,
RL_APPEND
} rl_type_t;
typedef struct rl {
znode_t *r_zp; /* znode this lock applies to */
avl_node_t r_node; /* avl node link */
uint64_t r_off; /* file range offset */
uint64_t r_len; /* file range length */
uint_t r_cnt; /* range reference count in tree */
rl_type_t r_type; /* range type */
kcondvar_t r_wr_cv; /* cv for waiting writers */
kcondvar_t r_rd_cv; /* cv for waiting readers */
uint8_t r_proxy; /* acting for original range */
uint8_t r_write_wanted; /* writer wants to lock this range */
uint8_t r_read_wanted; /* reader wants to lock this range */
} rl_t;
/*
* Lock a range (offset, length) as either shared (READER)
* or exclusive (WRITER or APPEND). APPEND is a special type that
* is converted to WRITER that specified to lock from the start of the
* end of file. zfs_range_lock() returns the range lock structure.
*/
rl_t *zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type);
/*
* Unlock range and destroy range lock structure.
*/
void zfs_range_unlock(rl_t *rl);
/*
* Reduce range locked as RW_WRITER from whole file to specified range.
* Asserts the whole file was previously locked.
*/
void zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len);
/*
* AVL comparison function used to compare range locks
*/
int zfs_range_compare(const void *arg1, const void *arg2);
#endif /* _KERNEL */
#ifdef __cplusplus
}
#endif
#endif /* _SYS_FS_ZFS_RLOCK_H */
-140
View File
@@ -1,140 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_FS_ZFS_VFSOPS_H
#define _SYS_FS_ZFS_VFSOPS_H
#pragma ident "@(#)zfs_vfsops.h 1.11 08/02/22 SMI"
#include <sys/isa_defs.h>
#include <sys/types32.h>
#include <sys/list.h>
#include <sys/vfs.h>
#include <sys/zil.h>
#include <sys/rrwlock.h>
#include <sys/zfs_ioctl.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct zfsvfs zfsvfs_t;
struct zfsvfs {
vfs_t *z_vfs; /* generic fs struct */
zfsvfs_t *z_parent; /* parent fs */
objset_t *z_os; /* objset reference */
uint64_t z_root; /* id of root znode */
uint64_t z_unlinkedobj; /* id of unlinked zapobj */
uint64_t z_max_blksz; /* maximum block size for files */
uint64_t z_assign; /* TXG_NOWAIT or set by zil_replay() */
uint64_t z_fuid_obj; /* fuid table object number */
uint64_t z_fuid_size; /* fuid table size */
avl_tree_t z_fuid_idx; /* fuid tree keyed by index */
avl_tree_t z_fuid_domain; /* fuid tree keyed by domain */
krwlock_t z_fuid_lock; /* fuid lock */
boolean_t z_fuid_loaded; /* fuid tables are loaded */
struct zfs_fuid_info *z_fuid_replay; /* fuid info for replay */
zilog_t *z_log; /* intent log pointer */
uint_t z_acl_mode; /* acl chmod/mode behavior */
uint_t z_acl_inherit; /* acl inheritance behavior */
zfs_case_t z_case; /* case-sense */
boolean_t z_utf8; /* utf8-only */
int z_norm; /* normalization flags */
boolean_t z_atime; /* enable atimes mount option */
boolean_t z_unmounted; /* unmounted */
rrwlock_t z_teardown_lock;
krwlock_t z_teardown_inactive_lock;
list_t z_all_znodes; /* all vnodes in the fs */
kmutex_t z_znodes_lock; /* lock for z_all_znodes */
vnode_t *z_ctldir; /* .zfs directory pointer */
boolean_t z_show_ctldir; /* expose .zfs in the root dir */
boolean_t z_issnap; /* true if this is a snapshot */
boolean_t z_vscan; /* virus scan on/off */
boolean_t z_use_fuids; /* version allows fuids */
kmutex_t z_online_recv_lock; /* recv in prog grabs as WRITER */
uint64_t z_version; /* ZPL version */
#define ZFS_OBJ_MTX_SZ 64
kmutex_t z_hold_mtx[ZFS_OBJ_MTX_SZ]; /* znode hold locks */
};
/*
* Normal filesystems (those not under .zfs/snapshot) have a total
* file ID size limited to 12 bytes (including the length field) due to
* NFSv2 protocol's limitation of 32 bytes for a filehandle. For historical
* reasons, this same limit is being imposed by the Solaris NFSv3 implementation
* (although the NFSv3 protocol actually permits a maximum of 64 bytes). It
* is not possible to expand beyond 12 bytes without abandoning support
* of NFSv2.
*
* For normal filesystems, we partition up the available space as follows:
* 2 bytes fid length (required)
* 6 bytes object number (48 bits)
* 4 bytes generation number (32 bits)
*
* We reserve only 48 bits for the object number, as this is the limit
* currently defined and imposed by the DMU.
*/
typedef struct zfid_short {
uint16_t zf_len;
uint8_t zf_object[6]; /* obj[i] = obj >> (8 * i) */
uint8_t zf_gen[4]; /* gen[i] = gen >> (8 * i) */
} zfid_short_t;
/*
* Filesystems under .zfs/snapshot have a total file ID size of 22 bytes
* (including the length field). This makes files under .zfs/snapshot
* accessible by NFSv3 and NFSv4, but not NFSv2.
*
* For files under .zfs/snapshot, we partition up the available space
* as follows:
* 2 bytes fid length (required)
* 6 bytes object number (48 bits)
* 4 bytes generation number (32 bits)
* 6 bytes objset id (48 bits)
* 4 bytes currently just zero (32 bits)
*
* We reserve only 48 bits for the object number and objset id, as these are
* the limits currently defined and imposed by the DMU.
*/
typedef struct zfid_long {
zfid_short_t z_fid;
uint8_t zf_setid[6]; /* obj[i] = obj >> (8 * i) */
uint8_t zf_setgen[4]; /* gen[i] = gen >> (8 * i) */
} zfid_long_t;
#define SHORT_FID_LEN (sizeof (zfid_short_t) - sizeof (uint16_t))
#define LONG_FID_LEN (sizeof (zfid_long_t) - sizeof (uint16_t))
extern uint_t zfs_fsyncer_key;
extern int zfs_suspend_fs(zfsvfs_t *zfsvfs, char *osname, int *mode);
extern int zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_FS_ZFS_VFSOPS_H */
-353
View File
@@ -1,353 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_FS_ZFS_ZNODE_H
#define _SYS_FS_ZFS_ZNODE_H
#pragma ident "@(#)zfs_znode.h 1.25 07/12/07 SMI"
#ifdef _KERNEL
#include <sys/isa_defs.h>
#include <sys/types32.h>
#include <sys/attr.h>
#include <sys/list.h>
#include <sys/dmu.h>
#include <sys/zfs_vfsops.h>
#include <sys/rrwlock.h>
#endif
#include <sys/zfs_acl.h>
#include <sys/zil.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Additional file level attributes, that are stored
* in the upper half of zp_flags
*/
#define ZFS_READONLY 0x0000000100000000
#define ZFS_HIDDEN 0x0000000200000000
#define ZFS_SYSTEM 0x0000000400000000
#define ZFS_ARCHIVE 0x0000000800000000
#define ZFS_IMMUTABLE 0x0000001000000000
#define ZFS_NOUNLINK 0x0000002000000000
#define ZFS_APPENDONLY 0x0000004000000000
#define ZFS_NODUMP 0x0000008000000000
#define ZFS_OPAQUE 0x0000010000000000
#define ZFS_AV_QUARANTINED 0x0000020000000000
#define ZFS_AV_MODIFIED 0x0000040000000000
#define ZFS_ATTR_SET(zp, attr, value) \
{ \
if (value) \
zp->z_phys->zp_flags |= attr; \
else \
zp->z_phys->zp_flags &= ~attr; \
}
/*
* Define special zfs pflags
*/
#define ZFS_XATTR 0x1 /* is an extended attribute */
#define ZFS_INHERIT_ACE 0x2 /* ace has inheritable ACEs */
#define ZFS_ACL_TRIVIAL 0x4 /* files ACL is trivial */
#define ZFS_ACL_OBJ_ACE 0x8 /* ACL has CMPLX Object ACE */
#define ZFS_ACL_PROTECTED 0x10 /* ACL protected */
#define ZFS_ACL_DEFAULTED 0x20 /* ACL should be defaulted */
#define ZFS_ACL_AUTO_INHERIT 0x40 /* ACL should be inherited */
#define ZFS_BONUS_SCANSTAMP 0x80 /* Scanstamp in bonus area */
/*
* Is ID ephemeral?
*/
#define IS_EPHEMERAL(x) (x > MAXUID)
/*
* Should we use FUIDs?
*/
#define USE_FUIDS(version, os) (version >= ZPL_VERSION_FUID &&\
spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID)
#define MASTER_NODE_OBJ 1
/*
* Special attributes for master node.
*/
#define ZFS_FSID "FSID"
#define ZFS_UNLINKED_SET "DELETE_QUEUE"
#define ZFS_ROOT_OBJ "ROOT"
#define ZPL_VERSION_STR "VERSION"
#define ZFS_FUID_TABLES "FUID"
#define ZFS_MAX_BLOCKSIZE (SPA_MAXBLOCKSIZE)
/* Path component length */
/*
* The generic fs code uses MAXNAMELEN to represent
* what the largest component length is. Unfortunately,
* this length includes the terminating NULL. ZFS needs
* to tell the users via pathconf() and statvfs() what the
* true maximum length of a component is, excluding the NULL.
*/
#define ZFS_MAXNAMELEN (MAXNAMELEN - 1)
/*
* Convert mode bits (zp_mode) to BSD-style DT_* values for storing in
* the directory entries.
*/
#define IFTODT(mode) (((mode) & S_IFMT) >> 12)
/*
* The directory entry has the type (currently unused on Solaris) in the
* top 4 bits, and the object number in the low 48 bits. The "middle"
* 12 bits are unused.
*/
#define ZFS_DIRENT_TYPE(de) BF64_GET(de, 60, 4)
#define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48)
/*
* This is the persistent portion of the znode. It is stored
* in the "bonus buffer" of the file. Short symbolic links
* are also stored in the bonus buffer.
*/
typedef struct znode_phys {
uint64_t zp_atime[2]; /* 0 - last file access time */
uint64_t zp_mtime[2]; /* 16 - last file modification time */
uint64_t zp_ctime[2]; /* 32 - last file change time */
uint64_t zp_crtime[2]; /* 48 - creation time */
uint64_t zp_gen; /* 64 - generation (txg of creation) */
uint64_t zp_mode; /* 72 - file mode bits */
uint64_t zp_size; /* 80 - size of file */
uint64_t zp_parent; /* 88 - directory parent (`..') */
uint64_t zp_links; /* 96 - number of links to file */
uint64_t zp_xattr; /* 104 - DMU object for xattrs */
uint64_t zp_rdev; /* 112 - dev_t for VBLK & VCHR files */
uint64_t zp_flags; /* 120 - persistent flags */
uint64_t zp_uid; /* 128 - file owner */
uint64_t zp_gid; /* 136 - owning group */
uint64_t zp_zap; /* 144 - extra attributes */
uint64_t zp_pad[3]; /* 152 - future */
zfs_acl_phys_t zp_acl; /* 176 - 263 ACL */
/*
* Data may pad out any remaining bytes in the znode buffer, eg:
*
* |<---------------------- dnode_phys (512) ------------------------>|
* |<-- dnode (192) --->|<----------- "bonus" buffer (320) ---------->|
* |<---- znode (264) ---->|<---- data (56) ---->|
*
* At present, we use this space for the following:
* - symbolic links
* - 32-byte anti-virus scanstamp (regular files only)
*/
} znode_phys_t;
/*
* Directory entry locks control access to directory entries.
* They are used to protect creates, deletes, and renames.
* Each directory znode has a mutex and a list of locked names.
*/
#ifdef _KERNEL
typedef struct zfs_dirlock {
char *dl_name; /* directory entry being locked */
uint32_t dl_sharecnt; /* 0 if exclusive, > 0 if shared */
uint16_t dl_namesize; /* set if dl_name was allocated */
kcondvar_t dl_cv; /* wait for entry to be unlocked */
struct znode *dl_dzp; /* directory znode */
struct zfs_dirlock *dl_next; /* next in z_dirlocks list */
} zfs_dirlock_t;
typedef struct znode {
struct zfsvfs *z_zfsvfs;
vnode_t *z_vnode;
uint64_t z_id; /* object ID for this znode */
kmutex_t z_lock; /* znode modification lock */
krwlock_t z_map_lock; /* page map lock */
krwlock_t z_parent_lock; /* parent lock for directories */
krwlock_t z_name_lock; /* "master" lock for dirent locks */
zfs_dirlock_t *z_dirlocks; /* directory entry lock list */
kmutex_t z_range_lock; /* protects changes to z_range_avl */
avl_tree_t z_range_avl; /* avl tree of file range locks */
uint8_t z_unlinked; /* file has been unlinked */
uint8_t z_atime_dirty; /* atime needs to be synced */
uint8_t z_zn_prefetch; /* Prefetch znodes? */
uint_t z_blksz; /* block size in bytes */
uint_t z_seq; /* modification sequence number */
uint64_t z_mapcnt; /* number of pages mapped to file */
uint64_t z_last_itx; /* last ZIL itx on this znode */
uint64_t z_gen; /* generation (same as zp_gen) */
uint32_t z_sync_cnt; /* synchronous open count */
kmutex_t z_acl_lock; /* acl data lock */
list_node_t z_link_node; /* all znodes in fs link */
/*
* These are dmu managed fields.
*/
znode_phys_t *z_phys; /* pointer to persistent znode */
dmu_buf_t *z_dbuf; /* buffer containing the z_phys */
} znode_t;
/*
* Range locking rules
* --------------------
* 1. When truncating a file (zfs_create, zfs_setattr, zfs_space) the whole
* file range needs to be locked as RL_WRITER. Only then can the pages be
* freed etc and zp_size reset. zp_size must be set within range lock.
* 2. For writes and punching holes (zfs_write & zfs_space) just the range
* being written or freed needs to be locked as RL_WRITER.
* Multiple writes at the end of the file must coordinate zp_size updates
* to ensure data isn't lost. A compare and swap loop is currently used
* to ensure the file size is at least the offset last written.
* 3. For reads (zfs_read, zfs_get_data & zfs_putapage) just the range being
* read needs to be locked as RL_READER. A check against zp_size can then
* be made for reading beyond end of file.
*/
/*
* Convert between znode pointers and vnode pointers
*/
#define ZTOV(ZP) ((ZP)->z_vnode)
#define VTOZ(VP) ((znode_t *)(VP)->v_data)
/*
* ZFS_ENTER() is called on entry to each ZFS vnode and vfs operation.
* ZFS_EXIT() must be called before exitting the vop.
* ZFS_VERIFY_ZP() verifies the znode is valid.
*/
#define ZFS_ENTER(zfsvfs) \
{ \
rrw_enter(&(zfsvfs)->z_teardown_lock, RW_READER, FTAG); \
if ((zfsvfs)->z_unmounted) { \
ZFS_EXIT(zfsvfs); \
return (EIO); \
} \
}
#define ZFS_EXIT(zfsvfs) rrw_exit(&(zfsvfs)->z_teardown_lock, FTAG)
#define ZFS_VERIFY_ZP(zp) \
if ((zp)->z_dbuf == NULL) { \
ZFS_EXIT((zp)->z_zfsvfs); \
return (EIO); \
} \
/*
* Macros for dealing with dmu_buf_hold
*/
#define ZFS_OBJ_HASH(obj_num) ((obj_num) & (ZFS_OBJ_MTX_SZ - 1))
#define ZFS_OBJ_MUTEX(zp) \
(&(zp)->z_zfsvfs->z_hold_mtx[ZFS_OBJ_HASH((zp)->z_id)])
#define ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num) \
mutex_enter(&(zfsvfs)->z_hold_mtx[ZFS_OBJ_HASH(obj_num)]);
#define ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num) \
mutex_exit(&(zfsvfs)->z_hold_mtx[ZFS_OBJ_HASH(obj_num)])
/*
* Macros to encode/decode ZFS stored time values from/to struct timespec
*/
#define ZFS_TIME_ENCODE(tp, stmp) \
{ \
(stmp)[0] = (uint64_t)(tp)->tv_sec; \
(stmp)[1] = (uint64_t)(tp)->tv_nsec; \
}
#define ZFS_TIME_DECODE(tp, stmp) \
{ \
(tp)->tv_sec = (time_t)(stmp)[0]; \
(tp)->tv_nsec = (long)(stmp)[1]; \
}
/*
* Timestamp defines
*/
#define ACCESSED (AT_ATIME)
#define STATE_CHANGED (AT_CTIME)
#define CONTENT_MODIFIED (AT_MTIME | AT_CTIME)
#define ZFS_ACCESSTIME_STAMP(zfsvfs, zp) \
if ((zfsvfs)->z_atime && !((zfsvfs)->z_vfs->vfs_flag & VFS_RDONLY)) \
zfs_time_stamper(zp, ACCESSED, NULL)
extern int zfs_init_fs(zfsvfs_t *, znode_t **, cred_t *);
extern void zfs_set_dataprop(objset_t *);
extern void zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *,
dmu_tx_t *tx);
extern void zfs_time_stamper(znode_t *, uint_t, dmu_tx_t *);
extern void zfs_time_stamper_locked(znode_t *, uint_t, dmu_tx_t *);
extern void zfs_grow_blocksize(znode_t *, uint64_t, dmu_tx_t *);
extern int zfs_freesp(znode_t *, uint64_t, uint64_t, int, boolean_t);
extern void zfs_znode_init(void);
extern void zfs_znode_fini(void);
extern int zfs_zget(zfsvfs_t *, uint64_t, znode_t **);
extern int zfs_rezget(znode_t *);
extern void zfs_zinactive(znode_t *);
extern void zfs_znode_delete(znode_t *, dmu_tx_t *);
extern void zfs_znode_free(znode_t *);
extern void zfs_remove_op_tables();
extern int zfs_create_op_tables();
extern int zfs_sync(vfs_t *vfsp, short flag, cred_t *cr);
extern dev_t zfs_cmpldev(uint64_t);
extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value);
extern int zfs_set_version(const char *name, uint64_t newvers);
extern int zfs_get_stats(objset_t *os, nvlist_t *nv);
extern void zfs_znode_dmu_fini(znode_t *);
extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *dzp, znode_t *zp, char *name, vsecattr_t *, zfs_fuid_info_t *,
vattr_t *vap);
extern int zfs_log_create_txtype(zil_create_t, vsecattr_t *vsecp,
vattr_t *vap);
extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *dzp, char *name);
extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *dzp, znode_t *zp, char *name);
extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *dzp, znode_t *zp, char *name, char *link);
extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp);
extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, offset_t off, ssize_t len, int ioflag);
extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, uint64_t off, uint64_t len);
extern void zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp);
extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
vsecattr_t *vsecp, zfs_fuid_info_t *fuidp);
extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap);
extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
extern zil_get_data_t zfs_get_data;
extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE];
extern int zfsfstype;
#endif /* _KERNEL */
extern int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_FS_ZFS_ZNODE_H */
-380
View File
@@ -1,380 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZIL_H
#define _SYS_ZIL_H
#pragma ident "@(#)zil.h 1.15 08/02/22 SMI"
#include <sys/types.h>
#include <sys/spa.h>
#include <sys/zio.h>
#include <sys/dmu.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Intent log format:
*
* Each objset has its own intent log. The log header (zil_header_t)
* for objset N's intent log is kept in the Nth object of the SPA's
* intent_log objset. The log header points to a chain of log blocks,
* each of which contains log records (i.e., transactions) followed by
* a log block trailer (zil_trailer_t). The format of a log record
* depends on the record (or transaction) type, but all records begin
* with a common structure that defines the type, length, and txg.
*/
/*
* Intent log header - this on disk structure holds fields to manage
* the log. All fields are 64 bit to easily handle cross architectures.
*/
typedef struct zil_header {
uint64_t zh_claim_txg; /* txg in which log blocks were claimed */
uint64_t zh_replay_seq; /* highest replayed sequence number */
blkptr_t zh_log; /* log chain */
uint64_t zh_claim_seq; /* highest claimed sequence number */
uint64_t zh_pad[5];
} zil_header_t;
/*
* Log block trailer - structure at the end of the header and each log block
*
* The zit_bt contains a zbt_cksum which for the intent log is
* the sequence number of this log block. A seq of 0 is invalid.
* The zbt_cksum is checked by the SPA against the sequence
* number passed in the blk_cksum field of the blkptr_t
*/
typedef struct zil_trailer {
uint64_t zit_pad;
blkptr_t zit_next_blk; /* next block in chain */
uint64_t zit_nused; /* bytes in log block used */
zio_block_tail_t zit_bt; /* block trailer */
} zil_trailer_t;
#define ZIL_MIN_BLKSZ 4096ULL
#define ZIL_MAX_BLKSZ SPA_MAXBLOCKSIZE
#define ZIL_BLK_DATA_SZ(lwb) ((lwb)->lwb_sz - sizeof (zil_trailer_t))
/*
* The words of a log block checksum.
*/
#define ZIL_ZC_GUID_0 0
#define ZIL_ZC_GUID_1 1
#define ZIL_ZC_OBJSET 2
#define ZIL_ZC_SEQ 3
typedef enum zil_create {
Z_FILE,
Z_DIR,
Z_XATTRDIR,
} zil_create_t;
/*
* size of xvattr log section.
* its composed of lr_attr_t + xvattr bitmap + 2 64 bit timestamps
* for create time and a single 64 bit integer for all of the attributes,
* and 4 64 bit integers (32 bytes) for the scanstamp.
*
*/
#define ZIL_XVAT_SIZE(mapsize) \
sizeof (lr_attr_t) + (sizeof (uint32_t) * (mapsize - 1)) + \
(sizeof (uint64_t) * 7)
/*
* Size of ACL in log. The ACE data is padded out to properly align
* on 8 byte boundary.
*/
#define ZIL_ACE_LENGTH(x) (roundup(x, sizeof (uint64_t)))
/*
* Intent log transaction types and record structures
*/
#define TX_CREATE 1 /* Create file */
#define TX_MKDIR 2 /* Make directory */
#define TX_MKXATTR 3 /* Make XATTR directory */
#define TX_SYMLINK 4 /* Create symbolic link to a file */
#define TX_REMOVE 5 /* Remove file */
#define TX_RMDIR 6 /* Remove directory */
#define TX_LINK 7 /* Create hard link to a file */
#define TX_RENAME 8 /* Rename a file */
#define TX_WRITE 9 /* File write */
#define TX_TRUNCATE 10 /* Truncate a file */
#define TX_SETATTR 11 /* Set file attributes */
#define TX_ACL_V0 12 /* Set old formatted ACL */
#define TX_ACL 13 /* Set ACL */
#define TX_CREATE_ACL 14 /* create with ACL */
#define TX_CREATE_ATTR 15 /* create + attrs */
#define TX_CREATE_ACL_ATTR 16 /* create with ACL + attrs */
#define TX_MKDIR_ACL 17 /* mkdir with ACL */
#define TX_MKDIR_ATTR 18 /* mkdir with attr */
#define TX_MKDIR_ACL_ATTR 19 /* mkdir with ACL + attrs */
#define TX_MAX_TYPE 20 /* Max transaction type */
/*
* The transactions for mkdir, symlink, remove, rmdir, link, and rename
* may have the following bit set, indicating the original request
* specified case-insensitive handling of names.
*/
#define TX_CI ((uint64_t)0x1 << 63) /* case-insensitive behavior requested */
/*
* Format of log records.
* The fields are carefully defined to allow them to be aligned
* and sized the same on sparc & intel architectures.
* Each log record has a common structure at the beginning.
*
* Note, lrc_seq holds two different sequence numbers. Whilst in memory
* it contains the transaction sequence number. The log record on
* disk holds the sequence number of all log records which is used to
* ensure we don't replay the same record. The two sequence numbers are
* different because the transactions can now be pushed out of order.
*/
typedef struct { /* common log record header */
uint64_t lrc_txtype; /* intent log transaction type */
uint64_t lrc_reclen; /* transaction record length */
uint64_t lrc_txg; /* dmu transaction group number */
uint64_t lrc_seq; /* see comment above */
} lr_t;
/*
* Handle option extended vattr attributes.
*
* Whenever new attributes are added the version number
* will need to be updated as will code in
* zfs_log.c and zfs_replay.c
*/
typedef struct {
uint32_t lr_attr_masksize; /* number of elements in array */
uint32_t lr_attr_bitmap; /* First entry of array */
/* remainder of array and any additional fields */
} lr_attr_t;
/*
* log record for creates without optional ACL.
* This log record does support optional xvattr_t attributes.
*/
typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_doid; /* object id of directory */
uint64_t lr_foid; /* object id of created file object */
uint64_t lr_mode; /* mode of object */
uint64_t lr_uid; /* uid of object */
uint64_t lr_gid; /* gid of object */
uint64_t lr_gen; /* generation (txg of creation) */
uint64_t lr_crtime[2]; /* creation time */
uint64_t lr_rdev; /* rdev of object to create */
/* name of object to create follows this */
/* for symlinks, link content follows name */
/* for creates with xvattr data, the name follows the xvattr info */
} lr_create_t;
/*
* FUID ACL record will be an array of ACEs from the original ACL.
* If this array includes ephemeral IDs, the record will also include
* an array of log-specific FUIDs to replace the ephemeral IDs.
* Only one copy of each unique domain will be present, so the log-specific
* FUIDs will use an index into a compressed domain table. On replay this
* information will be used to construct real FUIDs (and bypass idmap,
* since it may not be available).
*/
/*
* Log record for creates with optional ACL
* This log record is also used for recording any FUID
* information needed for replaying the create. If the
* file doesn't have any actual ACEs then the lr_aclcnt
* would be zero.
*/
typedef struct {
lr_create_t lr_create; /* common create portion */
uint64_t lr_aclcnt; /* number of ACEs in ACL */
uint64_t lr_domcnt; /* number of unique domains */
uint64_t lr_fuidcnt; /* number of real fuids */
uint64_t lr_acl_bytes; /* number of bytes in ACL */
uint64_t lr_acl_flags; /* ACL flags */
/* lr_acl_bytes number of variable sized ace's follows */
/* if create is also setting xvattr's, then acl data follows xvattr */
/* if ACE FUIDs are needed then they will follow the xvattr_t */
/* Following the FUIDs will be the domain table information. */
/* The FUIDs for the owner and group will be in the lr_create */
/* portion of the record. */
/* name follows ACL data */
} lr_acl_create_t;
typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_doid; /* obj id of directory */
/* name of object to remove follows this */
} lr_remove_t;
typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_doid; /* obj id of directory */
uint64_t lr_link_obj; /* obj id of link */
/* name of object to link follows this */
} lr_link_t;
typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_sdoid; /* obj id of source directory */
uint64_t lr_tdoid; /* obj id of target directory */
/* 2 strings: names of source and destination follow this */
} lr_rename_t;
typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_foid; /* file object to write */
uint64_t lr_offset; /* offset to write to */
uint64_t lr_length; /* user data length to write */
uint64_t lr_blkoff; /* offset represented by lr_blkptr */
blkptr_t lr_blkptr; /* spa block pointer for replay */
/* write data will follow for small writes */
} lr_write_t;
typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_foid; /* object id of file to truncate */
uint64_t lr_offset; /* offset to truncate from */
uint64_t lr_length; /* length to truncate */
} lr_truncate_t;
typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_foid; /* file object to change attributes */
uint64_t lr_mask; /* mask of attributes to set */
uint64_t lr_mode; /* mode to set */
uint64_t lr_uid; /* uid to set */
uint64_t lr_gid; /* gid to set */
uint64_t lr_size; /* size to set */
uint64_t lr_atime[2]; /* access time */
uint64_t lr_mtime[2]; /* modification time */
/* optional attribute lr_attr_t may be here */
} lr_setattr_t;
typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_foid; /* obj id of file */
uint64_t lr_aclcnt; /* number of acl entries */
/* lr_aclcnt number of ace_t entries follow this */
} lr_acl_v0_t;
typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_foid; /* obj id of file */
uint64_t lr_aclcnt; /* number of ACEs in ACL */
uint64_t lr_domcnt; /* number of unique domains */
uint64_t lr_fuidcnt; /* number of real fuids */
uint64_t lr_acl_bytes; /* number of bytes in ACL */
uint64_t lr_acl_flags; /* ACL flags */
/* lr_acl_bytes number of variable sized ace's follows */
} lr_acl_t;
/*
* ZIL structure definitions, interface function prototype and globals.
*/
/*
* ZFS intent log transaction structure
*/
typedef enum {
WR_INDIRECT, /* indirect - a large write (dmu_sync() data */
/* and put blkptr in log, rather than actual data) */
WR_COPIED, /* immediate - data is copied into lr_write_t */
WR_NEED_COPY, /* immediate - data needs to be copied if pushed */
} itx_wr_state_t;
typedef struct itx {
list_node_t itx_node; /* linkage on zl_itx_list */
void *itx_private; /* type-specific opaque data */
itx_wr_state_t itx_wr_state; /* write state */
uint8_t itx_sync; /* synchronous transaction */
uint64_t itx_sod; /* record size on disk */
lr_t itx_lr; /* common part of log record */
/* followed by type-specific part of lr_xx_t and its immediate data */
} itx_t;
/*
* zgd_t is passed through dmu_sync() to the callback routine zfs_get_done()
* to handle the cleanup of the dmu_sync() buffer write
*/
typedef struct {
zilog_t *zgd_zilog; /* zilog */
blkptr_t *zgd_bp; /* block pointer */
struct rl *zgd_rl; /* range lock */
} zgd_t;
typedef void zil_parse_blk_func_t(zilog_t *zilog, blkptr_t *bp, void *arg,
uint64_t txg);
typedef void zil_parse_lr_func_t(zilog_t *zilog, lr_t *lr, void *arg,
uint64_t txg);
typedef int zil_replay_func_t();
typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio);
extern uint64_t zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg);
extern void zil_init(void);
extern void zil_fini(void);
extern zilog_t *zil_alloc(objset_t *os, zil_header_t *zh_phys);
extern void zil_free(zilog_t *zilog);
extern zilog_t *zil_open(objset_t *os, zil_get_data_t *get_data);
extern void zil_close(zilog_t *zilog);
extern void zil_replay(objset_t *os, void *arg, uint64_t *txgp,
zil_replay_func_t *replay_func[TX_MAX_TYPE]);
extern void zil_destroy(zilog_t *zilog, boolean_t keep_first);
extern void zil_rollback_destroy(zilog_t *zilog, dmu_tx_t *tx);
extern itx_t *zil_itx_create(uint64_t txtype, size_t lrsize);
extern uint64_t zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx);
extern void zil_commit(zilog_t *zilog, uint64_t seq, uint64_t oid);
extern int zil_claim(char *osname, void *txarg);
extern void zil_sync(zilog_t *zilog, dmu_tx_t *tx);
extern void zil_clean(zilog_t *zilog);
extern int zil_is_committed(zilog_t *zilog);
extern int zil_suspend(zilog_t *zilog);
extern void zil_resume(zilog_t *zilog);
extern void zil_add_block(zilog_t *zilog, blkptr_t *bp);
extern int zil_disable;
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZIL_H */
-109
View File
@@ -1,109 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZIL_IMPL_H
#define _SYS_ZIL_IMPL_H
#pragma ident "@(#)zil_impl.h 1.7 07/12/12 SMI"
#include <sys/zil.h>
#include <sys/dmu_objset.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Log write buffer.
*/
typedef struct lwb {
zilog_t *lwb_zilog; /* back pointer to log struct */
blkptr_t lwb_blk; /* on disk address of this log blk */
int lwb_nused; /* # used bytes in buffer */
int lwb_sz; /* size of block and buffer */
char *lwb_buf; /* log write buffer */
zio_t *lwb_zio; /* zio for this buffer */
uint64_t lwb_max_txg; /* highest txg in this lwb */
txg_handle_t lwb_txgh; /* txg handle for txg_exit() */
list_node_t lwb_node; /* zilog->zl_lwb_list linkage */
} lwb_t;
/*
* Vdev flushing: during a zil_commit(), we build up an AVL tree of the vdevs
* we've touched so we know which ones need a write cache flush at the end.
*/
typedef struct zil_vdev_node {
uint64_t zv_vdev; /* vdev to be flushed */
avl_node_t zv_node; /* AVL tree linkage */
} zil_vdev_node_t;
/*
* Stable storage intent log management structure. One per dataset.
*/
struct zilog {
kmutex_t zl_lock; /* protects most zilog_t fields */
struct dsl_pool *zl_dmu_pool; /* DSL pool */
spa_t *zl_spa; /* handle for read/write log */
const zil_header_t *zl_header; /* log header buffer */
objset_t *zl_os; /* object set we're logging */
zil_get_data_t *zl_get_data; /* callback to get object content */
zio_t *zl_root_zio; /* log writer root zio */
uint64_t zl_itx_seq; /* next itx sequence number */
uint64_t zl_commit_seq; /* committed upto this number */
uint64_t zl_lr_seq; /* log record sequence number */
uint64_t zl_destroy_txg; /* txg of last zil_destroy() */
uint64_t zl_replay_seq[TXG_SIZE]; /* seq of last replayed rec */
uint32_t zl_suspend; /* log suspend count */
kcondvar_t zl_cv_writer; /* log writer thread completion */
kcondvar_t zl_cv_suspend; /* log suspend completion */
uint8_t zl_suspending; /* log is currently suspending */
uint8_t zl_keep_first; /* keep first log block in destroy */
uint8_t zl_stop_replay; /* don't replay any further */
uint8_t zl_stop_sync; /* for debugging */
uint8_t zl_writer; /* boolean: write setup in progress */
uint8_t zl_log_error; /* boolean: log write error */
list_t zl_itx_list; /* in-memory itx list */
uint64_t zl_itx_list_sz; /* total size of records on list */
uint64_t zl_cur_used; /* current commit log size used */
uint64_t zl_prev_used; /* previous commit log size used */
list_t zl_lwb_list; /* in-flight log write list */
kmutex_t zl_vdev_lock; /* protects zl_vdev_tree */
avl_tree_t zl_vdev_tree; /* vdevs to flush in zil_commit() */
taskq_t *zl_clean_taskq; /* runs lwb and itx clean tasks */
avl_tree_t zl_dva_tree; /* track DVAs during log parse */
clock_t zl_replay_time; /* lbolt of when replay started */
uint64_t zl_replay_blks; /* number of log blocks replayed */
};
typedef struct zil_dva_node {
dva_t zn_dva;
avl_node_t zn_node;
} zil_dva_node_t;
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZIL_IMPL_H */
-388
View File
@@ -1,388 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _ZIO_H
#define _ZIO_H
#pragma ident "@(#)zio.h 1.20 08/04/01 SMI"
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/avl.h>
#include <sys/dkio.h>
#include <sys/fs/zfs.h>
#include <sys/zio_impl.h>
#ifdef __cplusplus
extern "C" {
#endif
#define ZBT_MAGIC 0x210da7ab10c7a11ULL /* zio data bloc tail */
typedef struct zio_block_tail {
uint64_t zbt_magic; /* for validation, endianness */
zio_cksum_t zbt_cksum; /* 256-bit checksum */
} zio_block_tail_t;
/*
* Gang block headers are self-checksumming and contain an array
* of block pointers.
*/
#define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE
#define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \
sizeof (zio_block_tail_t)) / sizeof (blkptr_t))
#define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \
sizeof (zio_block_tail_t) - \
(SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\
sizeof (uint64_t))
#define ZIO_GET_IOSIZE(zio) \
(BP_IS_GANG((zio)->io_bp) ? \
SPA_GANGBLOCKSIZE : BP_GET_PSIZE((zio)->io_bp))
typedef struct zio_gbh {
blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS];
uint64_t zg_filler[SPA_GBH_FILLER];
zio_block_tail_t zg_tail;
} zio_gbh_phys_t;
enum zio_checksum {
ZIO_CHECKSUM_INHERIT = 0,
ZIO_CHECKSUM_ON,
ZIO_CHECKSUM_OFF,
ZIO_CHECKSUM_LABEL,
ZIO_CHECKSUM_GANG_HEADER,
ZIO_CHECKSUM_ZILOG,
ZIO_CHECKSUM_FLETCHER_2,
ZIO_CHECKSUM_FLETCHER_4,
ZIO_CHECKSUM_SHA256,
ZIO_CHECKSUM_FUNCTIONS
};
#define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_2
#define ZIO_CHECKSUM_DEFAULT ZIO_CHECKSUM_ON
enum zio_compress {
ZIO_COMPRESS_INHERIT = 0,
ZIO_COMPRESS_ON,
ZIO_COMPRESS_OFF,
ZIO_COMPRESS_LZJB,
ZIO_COMPRESS_EMPTY,
ZIO_COMPRESS_GZIP_1,
ZIO_COMPRESS_GZIP_2,
ZIO_COMPRESS_GZIP_3,
ZIO_COMPRESS_GZIP_4,
ZIO_COMPRESS_GZIP_5,
ZIO_COMPRESS_GZIP_6,
ZIO_COMPRESS_GZIP_7,
ZIO_COMPRESS_GZIP_8,
ZIO_COMPRESS_GZIP_9,
ZIO_COMPRESS_FUNCTIONS
};
#define ZIO_COMPRESS_ON_VALUE ZIO_COMPRESS_LZJB
#define ZIO_COMPRESS_DEFAULT ZIO_COMPRESS_OFF
#define ZIO_FAILURE_MODE_WAIT 0
#define ZIO_FAILURE_MODE_CONTINUE 1
#define ZIO_FAILURE_MODE_PANIC 2
#define ZIO_PRIORITY_NOW (zio_priority_table[0])
#define ZIO_PRIORITY_SYNC_READ (zio_priority_table[1])
#define ZIO_PRIORITY_SYNC_WRITE (zio_priority_table[2])
#define ZIO_PRIORITY_ASYNC_READ (zio_priority_table[3])
#define ZIO_PRIORITY_ASYNC_WRITE (zio_priority_table[4])
#define ZIO_PRIORITY_FREE (zio_priority_table[5])
#define ZIO_PRIORITY_CACHE_FILL (zio_priority_table[6])
#define ZIO_PRIORITY_LOG_WRITE (zio_priority_table[7])
#define ZIO_PRIORITY_RESILVER (zio_priority_table[8])
#define ZIO_PRIORITY_SCRUB (zio_priority_table[9])
#define ZIO_PRIORITY_TABLE_SIZE 10
#define ZIO_FLAG_MUSTSUCCEED 0x00000
#define ZIO_FLAG_CANFAIL 0x00001
#define ZIO_FLAG_FAILFAST 0x00002
#define ZIO_FLAG_CONFIG_HELD 0x00004
#define ZIO_FLAG_CONFIG_GRABBED 0x00008
#define ZIO_FLAG_DONT_CACHE 0x00010
#define ZIO_FLAG_DONT_QUEUE 0x00020
#define ZIO_FLAG_DONT_PROPAGATE 0x00040
#define ZIO_FLAG_DONT_RETRY 0x00080
#define ZIO_FLAG_PHYSICAL 0x00100
#define ZIO_FLAG_IO_BYPASS 0x00200
#define ZIO_FLAG_IO_REPAIR 0x00400
#define ZIO_FLAG_SPECULATIVE 0x00800
#define ZIO_FLAG_RESILVER 0x01000
#define ZIO_FLAG_SCRUB 0x02000
#define ZIO_FLAG_SCRUB_THREAD 0x04000
#define ZIO_FLAG_SUBBLOCK 0x08000
#define ZIO_FLAG_NOBOOKMARK 0x10000
#define ZIO_FLAG_USER 0x20000
#define ZIO_FLAG_METADATA 0x40000
#define ZIO_FLAG_WRITE_RETRY 0x80000
#define ZIO_FLAG_GANG_INHERIT \
(ZIO_FLAG_CANFAIL | \
ZIO_FLAG_FAILFAST | \
ZIO_FLAG_CONFIG_HELD | \
ZIO_FLAG_DONT_CACHE | \
ZIO_FLAG_DONT_RETRY | \
ZIO_FLAG_IO_REPAIR | \
ZIO_FLAG_SPECULATIVE | \
ZIO_FLAG_RESILVER | \
ZIO_FLAG_SCRUB | \
ZIO_FLAG_SCRUB_THREAD | \
ZIO_FLAG_USER | \
ZIO_FLAG_METADATA)
#define ZIO_FLAG_VDEV_INHERIT \
(ZIO_FLAG_GANG_INHERIT | \
ZIO_FLAG_PHYSICAL)
#define ZIO_FLAG_RETRY_INHERIT \
(ZIO_FLAG_VDEV_INHERIT | \
ZIO_FLAG_CONFIG_GRABBED | \
ZIO_FLAG_DONT_PROPAGATE | \
ZIO_FLAG_NOBOOKMARK)
#define ZIO_PIPELINE_CONTINUE 0x100
#define ZIO_PIPELINE_STOP 0x101
/*
* We'll take the unused errnos, 'EBADE' and 'EBADR' (from the Convergent
* graveyard) to indicate checksum errors and fragmentation.
*/
#define ECKSUM EBADE
#define EFRAGS EBADR
typedef struct zio zio_t;
typedef void zio_done_func_t(zio_t *zio);
extern uint8_t zio_priority_table[ZIO_PRIORITY_TABLE_SIZE];
extern char *zio_type_name[ZIO_TYPES];
/*
* A bookmark is a four-tuple <objset, object, level, blkid> that uniquely
* identifies any block in the pool. By convention, the meta-objset (MOS)
* is objset 0, the meta-dnode is object 0, the root block (osphys_t) is
* level -1 of the meta-dnode, and intent log blocks (which are chained
* off the root block) have blkid == sequence number. In summary:
*
* mos is objset 0
* meta-dnode is object 0
* root block is <objset, 0, -1, 0>
* intent log is <objset, 0, -1, ZIL sequence number>
*
* Note: this structure is called a bookmark because its first purpose was
* to remember where to resume a pool-wide traverse. The absolute ordering
* for block visitation during traversal is defined in compare_bookmark().
*
* Note: this structure is passed between userland and the kernel.
* Therefore it must not change size or alignment between 32/64 bit
* compilation options.
*/
typedef struct zbookmark {
uint64_t zb_objset;
uint64_t zb_object;
int64_t zb_level;
uint64_t zb_blkid;
} zbookmark_t;
struct zio {
/* Core information about this I/O */
zio_t *io_parent;
zio_t *io_root;
spa_t *io_spa;
zbookmark_t io_bookmark;
enum zio_checksum io_checksum;
enum zio_compress io_compress;
int io_ndvas;
uint64_t io_txg;
blkptr_t *io_bp;
blkptr_t io_bp_copy;
zio_t *io_child;
zio_t *io_sibling_prev;
zio_t *io_sibling_next;
zio_transform_t *io_transform_stack;
zio_t *io_logical;
list_node_t zio_link_node;
/* Callback info */
zio_done_func_t *io_ready;
zio_done_func_t *io_done;
void *io_private;
blkptr_t io_bp_orig;
/* Data represented by this I/O */
void *io_data;
uint64_t io_size;
/* Stuff for the vdev stack */
vdev_t *io_vd;
void *io_vsd;
uint64_t io_offset;
uint64_t io_deadline;
uint64_t io_timestamp;
avl_node_t io_offset_node;
avl_node_t io_deadline_node;
avl_tree_t *io_vdev_tree;
zio_t *io_delegate_list;
zio_t *io_delegate_next;
/* Internal pipeline state */
int io_flags;
int io_orig_flags;
enum zio_type io_type;
enum zio_stage io_stage;
enum zio_stage io_orig_stage;
uint8_t io_stalled;
uint8_t io_priority;
struct dk_callback io_dk_callback;
int io_cmd;
int io_retries;
int io_error;
uint32_t io_numerrors;
uint32_t io_pipeline;
uint32_t io_orig_pipeline;
uint64_t io_children_notready;
uint64_t io_children_notdone;
void *io_waiter;
kmutex_t io_lock;
kcondvar_t io_cv;
/* FMA state */
uint64_t io_ena;
};
extern zio_t *zio_null(zio_t *pio, spa_t *spa,
zio_done_func_t *done, void *private, int flags);
extern zio_t *zio_root(spa_t *spa,
zio_done_func_t *done, void *private, int flags);
extern zio_t *zio_read(zio_t *pio, spa_t *spa, blkptr_t *bp, void *data,
uint64_t size, zio_done_func_t *done, void *private,
int priority, int flags, zbookmark_t *zb);
extern zio_t *zio_write(zio_t *pio, spa_t *spa, int checksum, int compress,
int ncopies, uint64_t txg, blkptr_t *bp, void *data, uint64_t size,
zio_done_func_t *ready, zio_done_func_t *done, void *private, int priority,
int flags, zbookmark_t *zb);
extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, int checksum,
uint64_t txg, blkptr_t *bp, void *data, uint64_t size,
zio_done_func_t *done, void *private, int priority, int flags,
zbookmark_t *zb);
extern zio_t *zio_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
zio_done_func_t *done, void *private);
extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
zio_done_func_t *done, void *private);
extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
zio_done_func_t *done, void *private, int priority, int flags);
extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
uint64_t size, void *data, int checksum,
zio_done_func_t *done, void *private, int priority, int flags,
boolean_t labels);
extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
uint64_t size, void *data, int checksum,
zio_done_func_t *done, void *private, int priority, int flags,
boolean_t labels);
extern int zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp,
blkptr_t *old_bp, uint64_t txg);
extern void zio_free_blk(spa_t *spa, blkptr_t *bp, uint64_t txg);
extern void zio_flush(zio_t *zio, vdev_t *vd);
extern int zio_wait(zio_t *zio);
extern void zio_nowait(zio_t *zio);
extern void zio_execute(zio_t *zio);
extern void zio_interrupt(zio_t *zio);
extern int zio_wait_for_children_ready(zio_t *zio);
extern int zio_wait_for_children_done(zio_t *zio);
extern void *zio_buf_alloc(size_t size);
extern void zio_buf_free(void *buf, size_t size);
extern void *zio_data_buf_alloc(size_t size);
extern void zio_data_buf_free(void *buf, size_t size);
extern void zio_resubmit_stage_async(void *);
/*
* Delegate I/O to a child vdev.
*/
extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd,
uint64_t offset, void *data, uint64_t size, int type, int priority,
int flags, zio_done_func_t *done, void *private);
extern void zio_vdev_io_bypass(zio_t *zio);
extern void zio_vdev_io_reissue(zio_t *zio);
extern void zio_vdev_io_redone(zio_t *zio);
extern void zio_checksum_verified(zio_t *zio);
extern void zio_set_gang_verifier(zio_t *zio, zio_cksum_t *zcp);
extern uint8_t zio_checksum_select(uint8_t child, uint8_t parent);
extern uint8_t zio_compress_select(uint8_t child, uint8_t parent);
extern boolean_t zio_should_retry(zio_t *zio);
extern int zio_vdev_resume_io(spa_t *);
/*
* Initial setup and teardown.
*/
extern void zio_init(void);
extern void zio_fini(void);
/*
* Fault injection
*/
struct zinject_record;
extern uint32_t zio_injection_enabled;
extern int zio_inject_fault(char *name, int flags, int *id,
struct zinject_record *record);
extern int zio_inject_list_next(int *id, char *name, size_t buflen,
struct zinject_record *record);
extern int zio_clear_fault(int id);
extern int zio_handle_fault_injection(zio_t *zio, int error);
extern int zio_handle_device_injection(vdev_t *vd, int error);
#ifdef __cplusplus
}
#endif
#endif /* _ZIO_H */
@@ -1,75 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZIO_CHECKSUM_H
#define _SYS_ZIO_CHECKSUM_H
#pragma ident "@(#)zio_checksum.h 1.2 06/03/03 SMI"
#include <sys/zio.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Signature for checksum functions.
*/
typedef void zio_checksum_t(const void *data, uint64_t size, zio_cksum_t *zcp);
/*
* Information about each checksum function.
*/
typedef struct zio_checksum_info {
zio_checksum_t *ci_func[2]; /* checksum function for each byteorder */
int ci_correctable; /* number of correctable bits */
int ci_zbt; /* uses zio block tail? */
char *ci_name; /* descriptive name */
} zio_checksum_info_t;
extern zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS];
/*
* Checksum routines.
*/
extern zio_checksum_t fletcher_2_native;
extern zio_checksum_t fletcher_4_native;
extern zio_checksum_t fletcher_4_incremental_native;
extern zio_checksum_t fletcher_2_byteswap;
extern zio_checksum_t fletcher_4_byteswap;
extern zio_checksum_t fletcher_4_incremental_byteswap;
extern zio_checksum_t zio_checksum_SHA256;
extern void zio_checksum(uint_t checksum, zio_cksum_t *zcp,
void *data, uint64_t size);
extern int zio_checksum_error(zio_t *zio);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZIO_CHECKSUM_H */
@@ -1,82 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZIO_COMPRESS_H
#define _SYS_ZIO_COMPRESS_H
#pragma ident "@(#)zio_compress.h 1.2 07/03/22 SMI"
#include <sys/zio.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Common signature for all zio compress/decompress functions.
*/
typedef size_t zio_compress_func_t(void *src, void *dst,
size_t s_len, size_t d_len, int);
typedef int zio_decompress_func_t(void *src, void *dst,
size_t s_len, size_t d_len, int);
/*
* Information about each compression function.
*/
typedef struct zio_compress_info {
zio_compress_func_t *ci_compress; /* compression function */
zio_decompress_func_t *ci_decompress; /* decompression function */
int ci_level; /* level parameter */
char *ci_name; /* algorithm name */
} zio_compress_info_t;
extern zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS];
/*
* Compression routines.
*/
extern size_t lzjb_compress(void *src, void *dst, size_t s_len, size_t d_len,
int level);
extern int lzjb_decompress(void *src, void *dst, size_t s_len, size_t d_len,
int level);
extern size_t gzip_compress(void *src, void *dst, size_t s_len, size_t d_len,
int level);
extern int gzip_decompress(void *src, void *dst, size_t s_len, size_t d_len,
int level);
/*
* Compress and decompress data if necessary.
*/
extern int zio_compress_data(int cpfunc, void *src, uint64_t srcsize,
void **destp, uint64_t *destsizep, uint64_t *destbufsizep);
extern int zio_decompress_data(int cpfunc, void *src, uint64_t srcsize,
void *dest, uint64_t destsize);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZIO_COMPRESS_H */
-178
View File
@@ -1,178 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _ZIO_IMPL_H
#define _ZIO_IMPL_H
#pragma ident "@(#)zio_impl.h 1.6 07/12/12 SMI"
#include <sys/zfs_context.h>
#include <sys/zio.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* I/O Groups: pipeline stage definitions.
*/
typedef enum zio_stage {
ZIO_STAGE_OPEN = 0, /* RWFCI */
ZIO_STAGE_WAIT_FOR_CHILDREN_READY, /* RWFCI */
ZIO_STAGE_READ_INIT, /* R---- */
ZIO_STAGE_ISSUE_ASYNC, /* -W--- */
ZIO_STAGE_WRITE_COMPRESS, /* -W--- */
ZIO_STAGE_CHECKSUM_GENERATE, /* -W--- */
ZIO_STAGE_GET_GANG_HEADER, /* -WFC- */
ZIO_STAGE_REWRITE_GANG_MEMBERS, /* -W--- */
ZIO_STAGE_FREE_GANG_MEMBERS, /* --F-- */
ZIO_STAGE_CLAIM_GANG_MEMBERS, /* ---C- */
ZIO_STAGE_DVA_ALLOCATE, /* -W--- */
ZIO_STAGE_DVA_FREE, /* --F-- */
ZIO_STAGE_DVA_CLAIM, /* ---C- */
ZIO_STAGE_GANG_CHECKSUM_GENERATE, /* -W--- */
ZIO_STAGE_READY, /* RWFCI */
ZIO_STAGE_VDEV_IO_START, /* RW--I */
ZIO_STAGE_VDEV_IO_DONE, /* RW--I */
ZIO_STAGE_VDEV_IO_ASSESS, /* RW--I */
ZIO_STAGE_WAIT_FOR_CHILDREN_DONE, /* RWFCI */
ZIO_STAGE_CHECKSUM_VERIFY, /* R---- */
ZIO_STAGE_READ_GANG_MEMBERS, /* R---- */
ZIO_STAGE_READ_DECOMPRESS, /* R---- */
ZIO_STAGE_ASSESS, /* RWFCI */
ZIO_STAGE_DONE /* RWFCI */
} zio_stage_t;
#define ZIO_INTERLOCK_STAGES \
((1U << ZIO_STAGE_WAIT_FOR_CHILDREN_READY) | \
(1U << ZIO_STAGE_READY) | \
(1U << ZIO_STAGE_WAIT_FOR_CHILDREN_DONE) | \
(1U << ZIO_STAGE_ASSESS) | \
(1U << ZIO_STAGE_DONE))
#define ZIO_VDEV_IO_STAGES \
((1U << ZIO_STAGE_VDEV_IO_START) | \
(1U << ZIO_STAGE_VDEV_IO_DONE) | \
(1U << ZIO_STAGE_VDEV_IO_ASSESS))
#define ZIO_READ_PHYS_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
ZIO_VDEV_IO_STAGES | \
(1U << ZIO_STAGE_CHECKSUM_VERIFY))
#define ZIO_READ_GANG_PIPELINE \
ZIO_READ_PHYS_PIPELINE
#define ZIO_READ_PIPELINE \
(1U << ZIO_STAGE_READ_INIT) | \
ZIO_READ_PHYS_PIPELINE
#define ZIO_WRITE_COMMON_STAGES \
(ZIO_INTERLOCK_STAGES | \
ZIO_VDEV_IO_STAGES | \
(1U << ZIO_STAGE_ISSUE_ASYNC) | \
(1U << ZIO_STAGE_CHECKSUM_GENERATE))
#define ZIO_WRITE_PHYS_PIPELINE \
ZIO_WRITE_COMMON_STAGES
#define ZIO_WRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \
(1U << ZIO_STAGE_WRITE_COMPRESS) | \
(1U << ZIO_STAGE_DVA_ALLOCATE))
#define ZIO_GANG_REWRITE_STAGES \
((1U << ZIO_STAGE_GET_GANG_HEADER) | \
(1U << ZIO_STAGE_REWRITE_GANG_MEMBERS) | \
(1U << ZIO_STAGE_GANG_CHECKSUM_GENERATE))
#define ZIO_GANG_FREE_STAGES \
((1U << ZIO_STAGE_GET_GANG_HEADER) | \
(1U << ZIO_STAGE_FREE_GANG_MEMBERS))
#define ZIO_GANG_CLAIM_STAGES \
((1U << ZIO_STAGE_GET_GANG_HEADER) | \
(1U << ZIO_STAGE_CLAIM_GANG_MEMBERS))
#define ZIO_REWRITE_PIPELINE(bp) \
(ZIO_WRITE_COMMON_STAGES | \
(BP_IS_GANG(bp) ? ZIO_GANG_REWRITE_STAGES : 0))
#define ZIO_WRITE_ALLOCATE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \
(1U << ZIO_STAGE_DVA_ALLOCATE))
#define ZIO_FREE_PIPELINE(bp) \
(ZIO_INTERLOCK_STAGES | \
(1U << ZIO_STAGE_DVA_FREE) | \
(BP_IS_GANG(bp) ? ZIO_GANG_FREE_STAGES : 0))
#define ZIO_CLAIM_PIPELINE(bp) \
(ZIO_INTERLOCK_STAGES | \
(1U << ZIO_STAGE_DVA_CLAIM) | \
(BP_IS_GANG(bp) ? ZIO_GANG_CLAIM_STAGES : 0))
#define ZIO_IOCTL_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
ZIO_VDEV_IO_STAGES)
#define ZIO_WAIT_FOR_CHILDREN_PIPELINE \
ZIO_INTERLOCK_STAGES
#define ZIO_VDEV_CHILD_PIPELINE \
(ZIO_VDEV_IO_STAGES | \
(1U << ZIO_STAGE_ASSESS) | \
(1U << ZIO_STAGE_WAIT_FOR_CHILDREN_DONE) | \
(1U << ZIO_STAGE_DONE))
#define ZIO_ERROR_PIPELINE_MASK \
ZIO_INTERLOCK_STAGES
typedef struct zio_transform zio_transform_t;
struct zio_transform {
void *zt_data;
uint64_t zt_size;
uint64_t zt_bufsize;
zio_transform_t *zt_next;
};
extern void zio_inject_init(void);
extern void zio_inject_fini(void);
#ifdef __cplusplus
}
#endif
#endif /* _ZIO_IMPL_H */
-70
View File
@@ -1,70 +0,0 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_ZVOL_H
#define _SYS_ZVOL_H
#pragma ident "@(#)zvol.h 1.5 08/04/01 SMI"
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
#define ZVOL_OBJ 1ULL
#define ZVOL_ZAP_OBJ 2ULL
#ifdef _KERNEL
extern int zvol_check_volsize(uint64_t volsize, uint64_t blocksize);
extern int zvol_check_volblocksize(uint64_t volblocksize);
extern int zvol_get_stats(objset_t *os, nvlist_t *nv);
extern void zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
extern int zvol_create_minor(const char *, major_t);
extern int zvol_remove_minor(const char *);
extern int zvol_set_volsize(const char *, major_t, uint64_t);
extern int zvol_set_volblocksize(const char *, uint64_t);
extern int zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr);
extern int zvol_dump(dev_t dev, caddr_t addr, daddr_t offset, int nblocks);
extern int zvol_close(dev_t dev, int flag, int otyp, cred_t *cr);
extern int zvol_strategy(buf_t *bp);
extern int zvol_read(dev_t dev, uio_t *uiop, cred_t *cr);
extern int zvol_write(dev_t dev, uio_t *uiop, cred_t *cr);
extern int zvol_aread(dev_t dev, struct aio_req *aio, cred_t *cr);
extern int zvol_awrite(dev_t dev, struct aio_req *aio, cred_t *cr);
extern int zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr,
int *rvalp);
extern int zvol_busy(void);
extern void zvol_init(void);
extern void zvol_fini(void);
#endif
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZVOL_H */