mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 18:40:43 +03:00
Initial Linux ZFS GIT Repo
This commit is contained in:
@@ -0,0 +1,14 @@
|
||||
subdir-m += fm fs
|
||||
|
||||
DISTFILES = arc.h bplist.h compress.h dbuf.h dmu.h dmu_impl.h
|
||||
DISTFILES += dmu_objset.h dmu_traverse.h dmu_tx.h dmu_zfetch.h
|
||||
DISTFILES += dnode.h dsl_dataset.h dsl_deleg.h dsl_dir.h dsl_pool.h
|
||||
DISTFILES += dsl_prop.h dsl_synctask.h list.h list_impl.h metaslab.h
|
||||
DISTFILES += metaslab_impl.h refcount.h rprwlock.h rrwlock.h spa.h
|
||||
DISTFILES += spa_boot.h spa_impl.h space_map.h txg.h txg_impl.h uberblock.h
|
||||
DISTFILES += uberblock_impl.h unique.h vdev.h vdev_disk.h vdev_file.h
|
||||
DISTFILES += vdev_impl.h zap.h zap_impl.h zap_leaf.h zfs_acl.h
|
||||
DISTFILES += zfs_context.h zfs_context_user.h zfs_ctldir.h zfs_debug.h
|
||||
DISTFILES += zfs_dir.h zfs_fuid.h zfs_i18n.h zfs_ioctl.h zfs_rlock.h
|
||||
DISTFILES += zfs_vfsops.h zfs_znode.h zil.h zil_impl.h zio.h
|
||||
DISTFILES += zio_checksum.h zio_compress.h zio_impl.h zvol.h
|
||||
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ARC_H
|
||||
#define _SYS_ARC_H
|
||||
|
||||
#pragma ident "@(#)arc.h 1.12 08/03/20 SMI"
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/spa.h>
|
||||
|
||||
typedef struct arc_buf_hdr arc_buf_hdr_t;
|
||||
typedef struct arc_buf arc_buf_t;
|
||||
typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *private);
|
||||
typedef int arc_evict_func_t(void *private);
|
||||
|
||||
/* generic arc_done_func_t's which you can use */
|
||||
arc_done_func_t arc_bcopy_func;
|
||||
arc_done_func_t arc_getbuf_func;
|
||||
|
||||
struct arc_buf {
|
||||
arc_buf_hdr_t *b_hdr;
|
||||
arc_buf_t *b_next;
|
||||
void *b_data;
|
||||
arc_evict_func_t *b_efunc;
|
||||
void *b_private;
|
||||
};
|
||||
|
||||
typedef enum arc_buf_contents {
|
||||
ARC_BUFC_DATA, /* buffer contains data */
|
||||
ARC_BUFC_METADATA, /* buffer contains metadata */
|
||||
ARC_BUFC_NUMTYPES
|
||||
} arc_buf_contents_t;
|
||||
/*
|
||||
* These are the flags we pass into calls to the arc
|
||||
*/
|
||||
#define ARC_WAIT (1 << 1) /* perform I/O synchronously */
|
||||
#define ARC_NOWAIT (1 << 2) /* perform I/O asynchronously */
|
||||
#define ARC_PREFETCH (1 << 3) /* I/O is a prefetch */
|
||||
#define ARC_CACHED (1 << 4) /* I/O was already in cache */
|
||||
|
||||
void arc_space_consume(uint64_t space);
|
||||
void arc_space_return(uint64_t space);
|
||||
void *arc_data_buf_alloc(uint64_t space);
|
||||
void arc_data_buf_free(void *buf, uint64_t space);
|
||||
arc_buf_t *arc_buf_alloc(spa_t *spa, int size, void *tag,
|
||||
arc_buf_contents_t type);
|
||||
void arc_buf_add_ref(arc_buf_t *buf, void *tag);
|
||||
int arc_buf_remove_ref(arc_buf_t *buf, void *tag);
|
||||
int arc_buf_size(arc_buf_t *buf);
|
||||
void arc_release(arc_buf_t *buf, void *tag);
|
||||
int arc_released(arc_buf_t *buf);
|
||||
int arc_has_callback(arc_buf_t *buf);
|
||||
void arc_buf_freeze(arc_buf_t *buf);
|
||||
void arc_buf_thaw(arc_buf_t *buf);
|
||||
#ifdef ZFS_DEBUG
|
||||
int arc_referenced(arc_buf_t *buf);
|
||||
#endif
|
||||
|
||||
int arc_read(zio_t *pio, spa_t *spa, blkptr_t *bp, arc_byteswap_func_t *swap,
|
||||
arc_done_func_t *done, void *private, int priority, int flags,
|
||||
uint32_t *arc_flags, zbookmark_t *zb);
|
||||
zio_t *arc_write(zio_t *pio, spa_t *spa, int checksum, int compress,
|
||||
int ncopies, uint64_t txg, blkptr_t *bp, arc_buf_t *buf,
|
||||
arc_done_func_t *ready, arc_done_func_t *done, void *private, int priority,
|
||||
int flags, zbookmark_t *zb);
|
||||
int arc_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
|
||||
zio_done_func_t *done, void *private, uint32_t arc_flags);
|
||||
int arc_tryread(spa_t *spa, blkptr_t *bp, void *data);
|
||||
|
||||
void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
|
||||
int arc_buf_evict(arc_buf_t *buf);
|
||||
|
||||
void arc_flush(spa_t *spa);
|
||||
void arc_tempreserve_clear(uint64_t reserve);
|
||||
int arc_tempreserve_space(uint64_t reserve, uint64_t txg);
|
||||
|
||||
void arc_init(void);
|
||||
void arc_fini(void);
|
||||
|
||||
/*
|
||||
* Level 2 ARC
|
||||
*/
|
||||
|
||||
void l2arc_add_vdev(spa_t *spa, vdev_t *vd, uint64_t start, uint64_t end);
|
||||
void l2arc_remove_vdev(vdev_t *vd);
|
||||
void l2arc_init(void);
|
||||
void l2arc_fini(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ARC_H */
|
||||
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_BPLIST_H
|
||||
#define _SYS_BPLIST_H
|
||||
|
||||
#pragma ident "@(#)bplist.h 1.3 06/05/24 SMI"
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct bplist_phys {
|
||||
/*
|
||||
* This is the bonus buffer for the dead lists. The object's
|
||||
* contents is an array of bpl_entries blkptr_t's, representing
|
||||
* a total of bpl_bytes physical space.
|
||||
*/
|
||||
uint64_t bpl_entries;
|
||||
uint64_t bpl_bytes;
|
||||
uint64_t bpl_comp;
|
||||
uint64_t bpl_uncomp;
|
||||
} bplist_phys_t;
|
||||
|
||||
#define BPLIST_SIZE_V0 (2 * sizeof (uint64_t))
|
||||
|
||||
typedef struct bplist_q {
|
||||
blkptr_t bpq_blk;
|
||||
void *bpq_next;
|
||||
} bplist_q_t;
|
||||
|
||||
typedef struct bplist {
|
||||
kmutex_t bpl_lock;
|
||||
objset_t *bpl_mos;
|
||||
uint64_t bpl_object;
|
||||
uint8_t bpl_blockshift;
|
||||
uint8_t bpl_bpshift;
|
||||
uint8_t bpl_havecomp;
|
||||
bplist_q_t *bpl_queue;
|
||||
bplist_phys_t *bpl_phys;
|
||||
dmu_buf_t *bpl_dbuf;
|
||||
dmu_buf_t *bpl_cached_dbuf;
|
||||
} bplist_t;
|
||||
|
||||
extern uint64_t bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx);
|
||||
extern void bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx);
|
||||
extern int bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object);
|
||||
extern void bplist_close(bplist_t *bpl);
|
||||
extern boolean_t bplist_empty(bplist_t *bpl);
|
||||
extern int bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp);
|
||||
extern int bplist_enqueue(bplist_t *bpl, blkptr_t *bp, dmu_tx_t *tx);
|
||||
extern void bplist_enqueue_deferred(bplist_t *bpl, blkptr_t *bp);
|
||||
extern void bplist_sync(bplist_t *bpl, dmu_tx_t *tx);
|
||||
extern void bplist_vacate(bplist_t *bpl, dmu_tx_t *tx);
|
||||
extern int bplist_space(bplist_t *bpl,
|
||||
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_BPLIST_H */
|
||||
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License, Version 1.0 only
|
||||
* (the "License"). You may not use this file except in compliance
|
||||
* with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 1998 by Sun Microsystems, Inc.
|
||||
* All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_COMPRESS_H
|
||||
#define _SYS_COMPRESS_H
|
||||
|
||||
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern size_t compress(void *, void *, size_t);
|
||||
extern size_t decompress(void *, void *, size_t, size_t);
|
||||
extern uint32_t checksum32(void *, size_t);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_COMPRESS_H */
|
||||
@@ -0,0 +1,334 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DBUF_H
|
||||
#define _SYS_DBUF_H
|
||||
|
||||
#pragma ident "@(#)dbuf.h 1.10 07/08/26 SMI"
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/arc.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/refcount.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define DB_BONUS_BLKID (-1ULL)
|
||||
#define IN_DMU_SYNC 2
|
||||
|
||||
/*
|
||||
* define flags for dbuf_read
|
||||
*/
|
||||
|
||||
#define DB_RF_MUST_SUCCEED (1 << 0)
|
||||
#define DB_RF_CANFAIL (1 << 1)
|
||||
#define DB_RF_HAVESTRUCT (1 << 2)
|
||||
#define DB_RF_NOPREFETCH (1 << 3)
|
||||
#define DB_RF_NEVERWAIT (1 << 4)
|
||||
#define DB_RF_CACHED (1 << 5)
|
||||
|
||||
/*
|
||||
* The state transition diagram for dbufs looks like:
|
||||
*
|
||||
* +----> READ ----+
|
||||
* | |
|
||||
* | V
|
||||
* (alloc)-->UNCACHED CACHED-->EVICTING-->(free)
|
||||
* | ^
|
||||
* | |
|
||||
* +----> FILL ----+
|
||||
*/
|
||||
typedef enum dbuf_states {
|
||||
DB_UNCACHED,
|
||||
DB_FILL,
|
||||
DB_READ,
|
||||
DB_CACHED,
|
||||
DB_EVICTING
|
||||
} dbuf_states_t;
|
||||
|
||||
struct objset_impl;
|
||||
struct dnode;
|
||||
struct dmu_tx;
|
||||
|
||||
/*
|
||||
* level = 0 means the user data
|
||||
* level = 1 means the single indirect block
|
||||
* etc.
|
||||
*/
|
||||
|
||||
#define LIST_LINK_INACTIVE(link) \
|
||||
((link)->list_next == NULL && (link)->list_prev == NULL)
|
||||
|
||||
struct dmu_buf_impl;
|
||||
|
||||
typedef enum override_states {
|
||||
DR_NOT_OVERRIDDEN,
|
||||
DR_IN_DMU_SYNC,
|
||||
DR_OVERRIDDEN
|
||||
} override_states_t;
|
||||
|
||||
typedef struct dbuf_dirty_record {
|
||||
/* link on our parents dirty list */
|
||||
list_node_t dr_dirty_node;
|
||||
|
||||
/* transaction group this data will sync in */
|
||||
uint64_t dr_txg;
|
||||
|
||||
/* zio of outstanding write IO */
|
||||
zio_t *dr_zio;
|
||||
|
||||
/* pointer back to our dbuf */
|
||||
struct dmu_buf_impl *dr_dbuf;
|
||||
|
||||
/* pointer to next dirty record */
|
||||
struct dbuf_dirty_record *dr_next;
|
||||
|
||||
/* pointer to parent dirty record */
|
||||
struct dbuf_dirty_record *dr_parent;
|
||||
|
||||
union dirty_types {
|
||||
struct dirty_indirect {
|
||||
|
||||
/* protect access to list */
|
||||
kmutex_t dr_mtx;
|
||||
|
||||
/* Our list of dirty children */
|
||||
list_t dr_children;
|
||||
} di;
|
||||
struct dirty_leaf {
|
||||
|
||||
/*
|
||||
* dr_data is set when we dirty the buffer
|
||||
* so that we can retain the pointer even if it
|
||||
* gets COW'd in a subsequent transaction group.
|
||||
*/
|
||||
arc_buf_t *dr_data;
|
||||
blkptr_t dr_overridden_by;
|
||||
override_states_t dr_override_state;
|
||||
} dl;
|
||||
} dt;
|
||||
} dbuf_dirty_record_t;
|
||||
|
||||
typedef struct dmu_buf_impl {
|
||||
/*
|
||||
* The following members are immutable, with the exception of
|
||||
* db.db_data, which is protected by db_mtx.
|
||||
*/
|
||||
|
||||
/* the publicly visible structure */
|
||||
dmu_buf_t db;
|
||||
|
||||
/* the objset we belong to */
|
||||
struct objset_impl *db_objset;
|
||||
|
||||
/*
|
||||
* the dnode we belong to (NULL when evicted)
|
||||
*/
|
||||
struct dnode *db_dnode;
|
||||
|
||||
/*
|
||||
* our parent buffer; if the dnode points to us directly,
|
||||
* db_parent == db_dnode->dn_dbuf
|
||||
* only accessed by sync thread ???
|
||||
* (NULL when evicted)
|
||||
*/
|
||||
struct dmu_buf_impl *db_parent;
|
||||
|
||||
/*
|
||||
* link for hash table of all dmu_buf_impl_t's
|
||||
*/
|
||||
struct dmu_buf_impl *db_hash_next;
|
||||
|
||||
/* our block number */
|
||||
uint64_t db_blkid;
|
||||
|
||||
/*
|
||||
* Pointer to the blkptr_t which points to us. May be NULL if we
|
||||
* don't have one yet. (NULL when evicted)
|
||||
*/
|
||||
blkptr_t *db_blkptr;
|
||||
|
||||
/*
|
||||
* Our indirection level. Data buffers have db_level==0.
|
||||
* Indirect buffers which point to data buffers have
|
||||
* db_level==1. etc. Buffers which contain dnodes have
|
||||
* db_level==0, since the dnodes are stored in a file.
|
||||
*/
|
||||
uint8_t db_level;
|
||||
|
||||
/* db_mtx protects the members below */
|
||||
kmutex_t db_mtx;
|
||||
|
||||
/*
|
||||
* Current state of the buffer
|
||||
*/
|
||||
dbuf_states_t db_state;
|
||||
|
||||
/*
|
||||
* Refcount accessed by dmu_buf_{hold,rele}.
|
||||
* If nonzero, the buffer can't be destroyed.
|
||||
* Protected by db_mtx.
|
||||
*/
|
||||
refcount_t db_holds;
|
||||
|
||||
/* buffer holding our data */
|
||||
arc_buf_t *db_buf;
|
||||
|
||||
kcondvar_t db_changed;
|
||||
dbuf_dirty_record_t *db_data_pending;
|
||||
|
||||
/* pointer to most recent dirty record for this buffer */
|
||||
dbuf_dirty_record_t *db_last_dirty;
|
||||
|
||||
/*
|
||||
* Our link on the owner dnodes's dn_dbufs list.
|
||||
* Protected by its dn_dbufs_mtx.
|
||||
*/
|
||||
list_node_t db_link;
|
||||
|
||||
/* Data which is unique to data (leaf) blocks: */
|
||||
|
||||
/* stuff we store for the user (see dmu_buf_set_user) */
|
||||
void *db_user_ptr;
|
||||
void **db_user_data_ptr_ptr;
|
||||
dmu_buf_evict_func_t *db_evict_func;
|
||||
|
||||
uint8_t db_immediate_evict;
|
||||
uint8_t db_freed_in_flight;
|
||||
|
||||
uint8_t db_dirtycnt;
|
||||
} dmu_buf_impl_t;
|
||||
|
||||
/* Note: the dbuf hash table is exposed only for the mdb module */
|
||||
#define DBUF_MUTEXES 256
|
||||
#define DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)])
|
||||
typedef struct dbuf_hash_table {
|
||||
uint64_t hash_table_mask;
|
||||
dmu_buf_impl_t **hash_table;
|
||||
kmutex_t hash_mutexes[DBUF_MUTEXES];
|
||||
} dbuf_hash_table_t;
|
||||
|
||||
|
||||
uint64_t dbuf_whichblock(struct dnode *di, uint64_t offset);
|
||||
|
||||
dmu_buf_impl_t *dbuf_create_tlib(struct dnode *dn, char *data);
|
||||
void dbuf_create_bonus(struct dnode *dn);
|
||||
|
||||
dmu_buf_impl_t *dbuf_hold(struct dnode *dn, uint64_t blkid, void *tag);
|
||||
dmu_buf_impl_t *dbuf_hold_level(struct dnode *dn, int level, uint64_t blkid,
|
||||
void *tag);
|
||||
int dbuf_hold_impl(struct dnode *dn, uint8_t level, uint64_t blkid, int create,
|
||||
void *tag, dmu_buf_impl_t **dbp);
|
||||
|
||||
void dbuf_prefetch(struct dnode *dn, uint64_t blkid);
|
||||
|
||||
void dbuf_add_ref(dmu_buf_impl_t *db, void *tag);
|
||||
uint64_t dbuf_refcount(dmu_buf_impl_t *db);
|
||||
|
||||
void dbuf_rele(dmu_buf_impl_t *db, void *tag);
|
||||
|
||||
dmu_buf_impl_t *dbuf_find(struct dnode *dn, uint8_t level, uint64_t blkid);
|
||||
|
||||
int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags);
|
||||
void dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
void dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||
|
||||
void dbuf_clear(dmu_buf_impl_t *db);
|
||||
void dbuf_evict(dmu_buf_impl_t *db);
|
||||
|
||||
void dbuf_setdirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||
void dbuf_unoverride(dbuf_dirty_record_t *dr);
|
||||
void dbuf_sync_list(list_t *list, dmu_tx_t *tx);
|
||||
|
||||
void dbuf_free_range(struct dnode *dn, uint64_t blkid, uint64_t nblks,
|
||||
struct dmu_tx *);
|
||||
|
||||
void dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx);
|
||||
|
||||
void dbuf_init(void);
|
||||
void dbuf_fini(void);
|
||||
|
||||
#define DBUF_GET_BUFC_TYPE(db) \
|
||||
((((db)->db_level > 0) || \
|
||||
(dmu_ot[(db)->db_dnode->dn_type].ot_metadata)) ? \
|
||||
ARC_BUFC_METADATA : ARC_BUFC_DATA);
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
|
||||
/*
|
||||
* There should be a ## between the string literal and fmt, to make it
|
||||
* clear that we're joining two strings together, but gcc does not
|
||||
* support that preprocessor token.
|
||||
*/
|
||||
#define dprintf_dbuf(dbuf, fmt, ...) do { \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
|
||||
char __db_buf[32]; \
|
||||
uint64_t __db_obj = (dbuf)->db.db_object; \
|
||||
if (__db_obj == DMU_META_DNODE_OBJECT) \
|
||||
(void) strcpy(__db_buf, "mdn"); \
|
||||
else \
|
||||
(void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \
|
||||
(u_longlong_t)__db_obj); \
|
||||
dprintf_ds((dbuf)->db_objset->os_dsl_dataset, \
|
||||
"obj=%s lvl=%u blkid=%lld " fmt, \
|
||||
__db_buf, (dbuf)->db_level, \
|
||||
(u_longlong_t)(dbuf)->db_blkid, __VA_ARGS__); \
|
||||
} \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
|
||||
#define dprintf_dbuf_bp(db, bp, fmt, ...) do { \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
|
||||
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \
|
||||
sprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, bp); \
|
||||
dprintf_dbuf(db, fmt " %s\n", __VA_ARGS__, __blkbuf); \
|
||||
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
|
||||
} \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
|
||||
#define DBUF_VERIFY(db) dbuf_verify(db)
|
||||
|
||||
#else
|
||||
|
||||
#define dprintf_dbuf(db, fmt, ...)
|
||||
#define dprintf_dbuf_bp(db, bp, fmt, ...)
|
||||
#define DBUF_VERIFY(db)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DBUF_H */
|
||||
@@ -0,0 +1,620 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DMU_H
|
||||
#define _SYS_DMU_H
|
||||
|
||||
#pragma ident "@(#)dmu.h 1.38 08/04/27 SMI"
|
||||
|
||||
/*
|
||||
* This file describes the interface that the DMU provides for its
|
||||
* consumers.
|
||||
*
|
||||
* The DMU also interacts with the SPA. That interface is described in
|
||||
* dmu_spa.h.
|
||||
*/
|
||||
|
||||
#include <sys/inttypes.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/cred.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct uio;
|
||||
struct page;
|
||||
struct vnode;
|
||||
struct spa;
|
||||
struct zilog;
|
||||
struct zio;
|
||||
struct blkptr;
|
||||
struct zap_cursor;
|
||||
struct dsl_dataset;
|
||||
struct dsl_pool;
|
||||
struct dnode;
|
||||
struct drr_begin;
|
||||
struct drr_end;
|
||||
struct zbookmark;
|
||||
struct spa;
|
||||
struct nvlist;
|
||||
struct objset_impl;
|
||||
|
||||
typedef struct objset objset_t;
|
||||
typedef struct dmu_tx dmu_tx_t;
|
||||
typedef struct dsl_dir dsl_dir_t;
|
||||
|
||||
typedef enum dmu_object_type {
|
||||
DMU_OT_NONE,
|
||||
/* general: */
|
||||
DMU_OT_OBJECT_DIRECTORY, /* ZAP */
|
||||
DMU_OT_OBJECT_ARRAY, /* UINT64 */
|
||||
DMU_OT_PACKED_NVLIST, /* UINT8 (XDR by nvlist_pack/unpack) */
|
||||
DMU_OT_PACKED_NVLIST_SIZE, /* UINT64 */
|
||||
DMU_OT_BPLIST, /* UINT64 */
|
||||
DMU_OT_BPLIST_HDR, /* UINT64 */
|
||||
/* spa: */
|
||||
DMU_OT_SPACE_MAP_HEADER, /* UINT64 */
|
||||
DMU_OT_SPACE_MAP, /* UINT64 */
|
||||
/* zil: */
|
||||
DMU_OT_INTENT_LOG, /* UINT64 */
|
||||
/* dmu: */
|
||||
DMU_OT_DNODE, /* DNODE */
|
||||
DMU_OT_OBJSET, /* OBJSET */
|
||||
/* dsl: */
|
||||
DMU_OT_DSL_DIR, /* UINT64 */
|
||||
DMU_OT_DSL_DIR_CHILD_MAP, /* ZAP */
|
||||
DMU_OT_DSL_DS_SNAP_MAP, /* ZAP */
|
||||
DMU_OT_DSL_PROPS, /* ZAP */
|
||||
DMU_OT_DSL_DATASET, /* UINT64 */
|
||||
/* zpl: */
|
||||
DMU_OT_ZNODE, /* ZNODE */
|
||||
DMU_OT_OLDACL, /* Old ACL */
|
||||
DMU_OT_PLAIN_FILE_CONTENTS, /* UINT8 */
|
||||
DMU_OT_DIRECTORY_CONTENTS, /* ZAP */
|
||||
DMU_OT_MASTER_NODE, /* ZAP */
|
||||
DMU_OT_UNLINKED_SET, /* ZAP */
|
||||
/* zvol: */
|
||||
DMU_OT_ZVOL, /* UINT8 */
|
||||
DMU_OT_ZVOL_PROP, /* ZAP */
|
||||
/* other; for testing only! */
|
||||
DMU_OT_PLAIN_OTHER, /* UINT8 */
|
||||
DMU_OT_UINT64_OTHER, /* UINT64 */
|
||||
DMU_OT_ZAP_OTHER, /* ZAP */
|
||||
/* new object types: */
|
||||
DMU_OT_ERROR_LOG, /* ZAP */
|
||||
DMU_OT_SPA_HISTORY, /* UINT8 */
|
||||
DMU_OT_SPA_HISTORY_OFFSETS, /* spa_his_phys_t */
|
||||
DMU_OT_POOL_PROPS, /* ZAP */
|
||||
DMU_OT_DSL_PERMS, /* ZAP */
|
||||
DMU_OT_ACL, /* ACL */
|
||||
DMU_OT_SYSACL, /* SYSACL */
|
||||
DMU_OT_FUID, /* FUID table (Packed NVLIST UINT8) */
|
||||
DMU_OT_FUID_SIZE, /* FUID table size UINT64 */
|
||||
DMU_OT_NUMTYPES
|
||||
} dmu_object_type_t;
|
||||
|
||||
typedef enum dmu_objset_type {
|
||||
DMU_OST_NONE,
|
||||
DMU_OST_META,
|
||||
DMU_OST_ZFS,
|
||||
DMU_OST_ZVOL,
|
||||
DMU_OST_OTHER, /* For testing only! */
|
||||
DMU_OST_ANY, /* Be careful! */
|
||||
DMU_OST_NUMTYPES
|
||||
} dmu_objset_type_t;
|
||||
|
||||
void byteswap_uint64_array(void *buf, size_t size);
|
||||
void byteswap_uint32_array(void *buf, size_t size);
|
||||
void byteswap_uint16_array(void *buf, size_t size);
|
||||
void byteswap_uint8_array(void *buf, size_t size);
|
||||
void zap_byteswap(void *buf, size_t size);
|
||||
void zfs_oldacl_byteswap(void *buf, size_t size);
|
||||
void zfs_acl_byteswap(void *buf, size_t size);
|
||||
void zfs_znode_byteswap(void *buf, size_t size);
|
||||
|
||||
#define DS_MODE_NONE 0 /* invalid, to aid debugging */
|
||||
#define DS_MODE_STANDARD 1 /* normal access, no special needs */
|
||||
#define DS_MODE_PRIMARY 2 /* the "main" access, e.g. a mount */
|
||||
#define DS_MODE_EXCLUSIVE 3 /* exclusive access, e.g. to destroy */
|
||||
#define DS_MODE_LEVELS 4
|
||||
#define DS_MODE_LEVEL(x) ((x) & (DS_MODE_LEVELS - 1))
|
||||
#define DS_MODE_READONLY 0x8
|
||||
#define DS_MODE_IS_READONLY(x) ((x) & DS_MODE_READONLY)
|
||||
#define DS_MODE_INCONSISTENT 0x10
|
||||
#define DS_MODE_IS_INCONSISTENT(x) ((x) & DS_MODE_INCONSISTENT)
|
||||
|
||||
#define DS_FIND_SNAPSHOTS (1<<0)
|
||||
#define DS_FIND_CHILDREN (1<<1)
|
||||
|
||||
/*
|
||||
* The maximum number of bytes that can be accessed as part of one
|
||||
* operation, including metadata.
|
||||
*/
|
||||
#define DMU_MAX_ACCESS (10<<20) /* 10MB */
|
||||
|
||||
/*
|
||||
* Public routines to create, destroy, open, and close objsets.
|
||||
*/
|
||||
int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
|
||||
objset_t **osp);
|
||||
int dmu_objset_open_ds(struct dsl_dataset *ds, dmu_objset_type_t type,
|
||||
objset_t **osp);
|
||||
void dmu_objset_close(objset_t *os);
|
||||
int dmu_objset_evict_dbufs(objset_t *os);
|
||||
int dmu_objset_create(const char *name, dmu_objset_type_t type,
|
||||
objset_t *clone_parent, uint64_t flags,
|
||||
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
|
||||
int dmu_objset_destroy(const char *name);
|
||||
int dmu_snapshots_destroy(char *fsname, char *snapname);
|
||||
int dmu_objset_rollback(objset_t *os);
|
||||
int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive);
|
||||
int dmu_objset_rename(const char *name, const char *newname,
|
||||
boolean_t recursive);
|
||||
int dmu_objset_find(char *name, int func(char *, void *), void *arg,
|
||||
int flags);
|
||||
void dmu_objset_byteswap(void *buf, size_t size);
|
||||
|
||||
typedef struct dmu_buf {
|
||||
uint64_t db_object; /* object that this buffer is part of */
|
||||
uint64_t db_offset; /* byte offset in this object */
|
||||
uint64_t db_size; /* size of buffer in bytes */
|
||||
void *db_data; /* data in buffer */
|
||||
} dmu_buf_t;
|
||||
|
||||
typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
|
||||
|
||||
/*
|
||||
* The names of zap entries in the DIRECTORY_OBJECT of the MOS.
|
||||
*/
|
||||
#define DMU_POOL_DIRECTORY_OBJECT 1
|
||||
#define DMU_POOL_CONFIG "config"
|
||||
#define DMU_POOL_ROOT_DATASET "root_dataset"
|
||||
#define DMU_POOL_SYNC_BPLIST "sync_bplist"
|
||||
#define DMU_POOL_ERRLOG_SCRUB "errlog_scrub"
|
||||
#define DMU_POOL_ERRLOG_LAST "errlog_last"
|
||||
#define DMU_POOL_SPARES "spares"
|
||||
#define DMU_POOL_DEFLATE "deflate"
|
||||
#define DMU_POOL_HISTORY "history"
|
||||
#define DMU_POOL_PROPS "pool_props"
|
||||
#define DMU_POOL_L2CACHE "l2cache"
|
||||
|
||||
/*
|
||||
* Allocate an object from this objset. The range of object numbers
|
||||
* available is (0, DN_MAX_OBJECT). Object 0 is the meta-dnode.
|
||||
*
|
||||
* The transaction must be assigned to a txg. The newly allocated
|
||||
* object will be "held" in the transaction (ie. you can modify the
|
||||
* newly allocated object in this transaction).
|
||||
*
|
||||
* dmu_object_alloc() chooses an object and returns it in *objectp.
|
||||
*
|
||||
* dmu_object_claim() allocates a specific object number. If that
|
||||
* number is already allocated, it fails and returns EEXIST.
|
||||
*
|
||||
* Return 0 on success, or ENOSPC or EEXIST as specified above.
|
||||
*/
|
||||
uint64_t dmu_object_alloc(objset_t *os, dmu_object_type_t ot,
|
||||
int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
|
||||
int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
|
||||
int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
|
||||
int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
|
||||
int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Free an object from this objset.
|
||||
*
|
||||
* The object's data will be freed as well (ie. you don't need to call
|
||||
* dmu_free(object, 0, -1, tx)).
|
||||
*
|
||||
* The object need not be held in the transaction.
|
||||
*
|
||||
* If there are any holds on this object's buffers (via dmu_buf_hold()),
|
||||
* or tx holds on the object (via dmu_tx_hold_object()), you can not
|
||||
* free it; it fails and returns EBUSY.
|
||||
*
|
||||
* If the object is not allocated, it fails and returns ENOENT.
|
||||
*
|
||||
* Return 0 on success, or EBUSY or ENOENT as specified above.
|
||||
*/
|
||||
int dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Find the next allocated or free object.
|
||||
*
|
||||
* The objectp parameter is in-out. It will be updated to be the next
|
||||
* object which is allocated. Ignore objects which have not been
|
||||
* modified since txg.
|
||||
*
|
||||
* XXX Can only be called on a objset with no dirty data.
|
||||
*
|
||||
* Returns 0 on success, or ENOENT if there are no more objects.
|
||||
*/
|
||||
int dmu_object_next(objset_t *os, uint64_t *objectp,
|
||||
boolean_t hole, uint64_t txg);
|
||||
|
||||
/*
|
||||
* Set the data blocksize for an object.
|
||||
*
|
||||
* The object cannot have any blocks allcated beyond the first. If
|
||||
* the first block is allocated already, the new size must be greater
|
||||
* than the current block size. If these conditions are not met,
|
||||
* ENOTSUP will be returned.
|
||||
*
|
||||
* Returns 0 on success, or EBUSY if there are any holds on the object
|
||||
* contents, or ENOTSUP as described above.
|
||||
*/
|
||||
int dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size,
|
||||
int ibs, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Set the checksum property on a dnode. The new checksum algorithm will
|
||||
* apply to all newly written blocks; existing blocks will not be affected.
|
||||
*/
|
||||
void dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum,
|
||||
dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Set the compress property on a dnode. The new compression algorithm will
|
||||
* apply to all newly written blocks; existing blocks will not be affected.
|
||||
*/
|
||||
void dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
|
||||
dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Decide how many copies of a given block we should make. Can be from
|
||||
* 1 to SPA_DVAS_PER_BP.
|
||||
*/
|
||||
int dmu_get_replication_level(struct objset_impl *, struct zbookmark *zb,
|
||||
dmu_object_type_t ot);
|
||||
/*
|
||||
* The bonus data is accessed more or less like a regular buffer.
|
||||
* You must dmu_bonus_hold() to get the buffer, which will give you a
|
||||
* dmu_buf_t with db_offset==-1ULL, and db_size = the size of the bonus
|
||||
* data. As with any normal buffer, you must call dmu_buf_read() to
|
||||
* read db_data, dmu_buf_will_dirty() before modifying it, and the
|
||||
* object must be held in an assigned transaction before calling
|
||||
* dmu_buf_will_dirty. You may use dmu_buf_set_user() on the bonus
|
||||
* buffer as well. You must release your hold with dmu_buf_rele().
|
||||
*/
|
||||
int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **);
|
||||
int dmu_bonus_max(void);
|
||||
int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
|
||||
|
||||
/*
|
||||
* Obtain the DMU buffer from the specified object which contains the
|
||||
* specified offset. dmu_buf_hold() puts a "hold" on the buffer, so
|
||||
* that it will remain in memory. You must release the hold with
|
||||
* dmu_buf_rele(). You musn't access the dmu_buf_t after releasing your
|
||||
* hold. You must have a hold on any dmu_buf_t* you pass to the DMU.
|
||||
*
|
||||
* You must call dmu_buf_read, dmu_buf_will_dirty, or dmu_buf_will_fill
|
||||
* on the returned buffer before reading or writing the buffer's
|
||||
* db_data. The comments for those routines describe what particular
|
||||
* operations are valid after calling them.
|
||||
*
|
||||
* The object number must be a valid, allocated object number.
|
||||
*/
|
||||
int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
|
||||
void *tag, dmu_buf_t **);
|
||||
void dmu_buf_add_ref(dmu_buf_t *db, void* tag);
|
||||
void dmu_buf_rele(dmu_buf_t *db, void *tag);
|
||||
uint64_t dmu_buf_refcount(dmu_buf_t *db);
|
||||
|
||||
/*
|
||||
* dmu_buf_hold_array holds the DMU buffers which contain all bytes in a
|
||||
* range of an object. A pointer to an array of dmu_buf_t*'s is
|
||||
* returned (in *dbpp).
|
||||
*
|
||||
* dmu_buf_rele_array releases the hold on an array of dmu_buf_t*'s, and
|
||||
* frees the array. The hold on the array of buffers MUST be released
|
||||
* with dmu_buf_rele_array. You can NOT release the hold on each buffer
|
||||
* individually with dmu_buf_rele.
|
||||
*/
|
||||
int dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset,
|
||||
uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp);
|
||||
void dmu_buf_rele_array(dmu_buf_t **, int numbufs, void *tag);
|
||||
|
||||
/*
|
||||
* Returns NULL on success, or the existing user ptr if it's already
|
||||
* been set.
|
||||
*
|
||||
* user_ptr is for use by the user and can be obtained via dmu_buf_get_user().
|
||||
*
|
||||
* user_data_ptr_ptr should be NULL, or a pointer to a pointer which
|
||||
* will be set to db->db_data when you are allowed to access it. Note
|
||||
* that db->db_data (the pointer) can change when you do dmu_buf_read(),
|
||||
* dmu_buf_tryupgrade(), dmu_buf_will_dirty(), or dmu_buf_will_fill().
|
||||
* *user_data_ptr_ptr will be set to the new value when it changes.
|
||||
*
|
||||
* If non-NULL, pageout func will be called when this buffer is being
|
||||
* excised from the cache, so that you can clean up the data structure
|
||||
* pointed to by user_ptr.
|
||||
*
|
||||
* dmu_evict_user() will call the pageout func for all buffers in a
|
||||
* objset with a given pageout func.
|
||||
*/
|
||||
void *dmu_buf_set_user(dmu_buf_t *db, void *user_ptr, void *user_data_ptr_ptr,
|
||||
dmu_buf_evict_func_t *pageout_func);
|
||||
/*
|
||||
* set_user_ie is the same as set_user, but request immediate eviction
|
||||
* when hold count goes to zero.
|
||||
*/
|
||||
void *dmu_buf_set_user_ie(dmu_buf_t *db, void *user_ptr,
|
||||
void *user_data_ptr_ptr, dmu_buf_evict_func_t *pageout_func);
|
||||
void *dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr,
|
||||
void *user_ptr, void *user_data_ptr_ptr,
|
||||
dmu_buf_evict_func_t *pageout_func);
|
||||
void dmu_evict_user(objset_t *os, dmu_buf_evict_func_t *func);
|
||||
|
||||
/*
|
||||
* Returns the user_ptr set with dmu_buf_set_user(), or NULL if not set.
|
||||
*/
|
||||
void *dmu_buf_get_user(dmu_buf_t *db);
|
||||
|
||||
/*
|
||||
* Indicate that you are going to modify the buffer's data (db_data).
|
||||
*
|
||||
* The transaction (tx) must be assigned to a txg (ie. you've called
|
||||
* dmu_tx_assign()). The buffer's object must be held in the tx
|
||||
* (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
|
||||
*/
|
||||
void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* You must create a transaction, then hold the objects which you will
|
||||
* (or might) modify as part of this transaction. Then you must assign
|
||||
* the transaction to a transaction group. Once the transaction has
|
||||
* been assigned, you can modify buffers which belong to held objects as
|
||||
* part of this transaction. You can't modify buffers before the
|
||||
* transaction has been assigned; you can't modify buffers which don't
|
||||
* belong to objects which this transaction holds; you can't hold
|
||||
* objects once the transaction has been assigned. You may hold an
|
||||
* object which you are going to free (with dmu_object_free()), but you
|
||||
* don't have to.
|
||||
*
|
||||
* You can abort the transaction before it has been assigned.
|
||||
*
|
||||
* Note that you may hold buffers (with dmu_buf_hold) at any time,
|
||||
* regardless of transaction state.
|
||||
*/
|
||||
|
||||
#define DMU_NEW_OBJECT (-1ULL)
|
||||
#define DMU_OBJECT_END (-1ULL)
|
||||
|
||||
dmu_tx_t *dmu_tx_create(objset_t *os);
|
||||
void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
|
||||
void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
|
||||
uint64_t len);
|
||||
void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name);
|
||||
void dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object);
|
||||
void dmu_tx_abort(dmu_tx_t *tx);
|
||||
int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
|
||||
void dmu_tx_wait(dmu_tx_t *tx);
|
||||
void dmu_tx_commit(dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Free up the data blocks for a defined range of a file. If size is
|
||||
* zero, the range from offset to end-of-file is freed.
|
||||
*/
|
||||
int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
|
||||
uint64_t size, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Convenience functions.
|
||||
*
|
||||
* Canfail routines will return 0 on success, or an errno if there is a
|
||||
* nonrecoverable I/O error.
|
||||
*/
|
||||
int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||
void *buf);
|
||||
void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||
const void *buf, dmu_tx_t *tx);
|
||||
int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
|
||||
int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size,
|
||||
dmu_tx_t *tx);
|
||||
int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset,
|
||||
uint64_t size, struct page *pp, dmu_tx_t *tx);
|
||||
|
||||
extern int zfs_prefetch_disable;
|
||||
|
||||
/*
|
||||
* Asynchronously try to read in the data.
|
||||
*/
|
||||
void dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset,
|
||||
uint64_t len);
|
||||
|
||||
typedef struct dmu_object_info {
|
||||
/* All sizes are in bytes. */
|
||||
uint32_t doi_data_block_size;
|
||||
uint32_t doi_metadata_block_size;
|
||||
uint64_t doi_bonus_size;
|
||||
dmu_object_type_t doi_type;
|
||||
dmu_object_type_t doi_bonus_type;
|
||||
uint8_t doi_indirection; /* 2 = dnode->indirect->data */
|
||||
uint8_t doi_checksum;
|
||||
uint8_t doi_compress;
|
||||
uint8_t doi_pad[5];
|
||||
/* Values below are number of 512-byte blocks. */
|
||||
uint64_t doi_physical_blks; /* data + metadata */
|
||||
uint64_t doi_max_block_offset;
|
||||
} dmu_object_info_t;
|
||||
|
||||
typedef void arc_byteswap_func_t(void *buf, size_t size);
|
||||
|
||||
typedef struct dmu_object_type_info {
|
||||
arc_byteswap_func_t *ot_byteswap;
|
||||
boolean_t ot_metadata;
|
||||
char *ot_name;
|
||||
} dmu_object_type_info_t;
|
||||
|
||||
extern const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES];
|
||||
|
||||
/*
|
||||
* Get information on a DMU object.
|
||||
*
|
||||
* Return 0 on success or ENOENT if object is not allocated.
|
||||
*
|
||||
* If doi is NULL, just indicates whether the object exists.
|
||||
*/
|
||||
int dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi);
|
||||
void dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi);
|
||||
void dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi);
|
||||
void dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize,
|
||||
u_longlong_t *nblk512);
|
||||
|
||||
typedef struct dmu_objset_stats {
|
||||
uint64_t dds_num_clones; /* number of clones of this */
|
||||
uint64_t dds_creation_txg;
|
||||
uint64_t dds_guid;
|
||||
dmu_objset_type_t dds_type;
|
||||
uint8_t dds_is_snapshot;
|
||||
uint8_t dds_inconsistent;
|
||||
char dds_origin[MAXNAMELEN];
|
||||
} dmu_objset_stats_t;
|
||||
|
||||
/*
|
||||
* Get stats on a dataset.
|
||||
*/
|
||||
void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
|
||||
|
||||
/*
|
||||
* Add entries to the nvlist for all the objset's properties. See
|
||||
* zfs_prop_table[] and zfs(1m) for details on the properties.
|
||||
*/
|
||||
void dmu_objset_stats(objset_t *os, struct nvlist *nv);
|
||||
|
||||
/*
|
||||
* Get the space usage statistics for statvfs().
|
||||
*
|
||||
* refdbytes is the amount of space "referenced" by this objset.
|
||||
* availbytes is the amount of space available to this objset, taking
|
||||
* into account quotas & reservations, assuming that no other objsets
|
||||
* use the space first. These values correspond to the 'referenced' and
|
||||
* 'available' properties, described in the zfs(1m) manpage.
|
||||
*
|
||||
* usedobjs and availobjs are the number of objects currently allocated,
|
||||
* and available.
|
||||
*/
|
||||
void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
|
||||
uint64_t *usedobjsp, uint64_t *availobjsp);
|
||||
|
||||
/*
|
||||
* The fsid_guid is a 56-bit ID that can change to avoid collisions.
|
||||
* (Contrast with the ds_guid which is a 64-bit ID that will never
|
||||
* change, so there is a small probability that it will collide.)
|
||||
*/
|
||||
uint64_t dmu_objset_fsid_guid(objset_t *os);
|
||||
|
||||
int dmu_objset_is_snapshot(objset_t *os);
|
||||
|
||||
extern struct spa *dmu_objset_spa(objset_t *os);
|
||||
extern struct zilog *dmu_objset_zil(objset_t *os);
|
||||
extern struct dsl_pool *dmu_objset_pool(objset_t *os);
|
||||
extern struct dsl_dataset *dmu_objset_ds(objset_t *os);
|
||||
extern void dmu_objset_name(objset_t *os, char *buf);
|
||||
extern dmu_objset_type_t dmu_objset_type(objset_t *os);
|
||||
extern uint64_t dmu_objset_id(objset_t *os);
|
||||
extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
|
||||
uint64_t *id, uint64_t *offp, boolean_t *case_conflict);
|
||||
extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
|
||||
int maxlen, boolean_t *conflict);
|
||||
extern int dmu_dir_list_next(objset_t *os, int namelen, char *name,
|
||||
uint64_t *idp, uint64_t *offp);
|
||||
extern void dmu_objset_set_user(objset_t *os, void *user_ptr);
|
||||
extern void *dmu_objset_get_user(objset_t *os);
|
||||
|
||||
/*
|
||||
* Return the txg number for the given assigned transaction.
|
||||
*/
|
||||
uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Synchronous write.
|
||||
* If a parent zio is provided this function initiates a write on the
|
||||
* provided buffer as a child of the parent zio.
|
||||
* In the absence of a parent zio, the write is completed synchronously.
|
||||
* At write completion, blk is filled with the bp of the written block.
|
||||
* Note that while the data covered by this function will be on stable
|
||||
* storage when the write completes this new data does not become a
|
||||
* permanent part of the file until the associated transaction commits.
|
||||
*/
|
||||
typedef void dmu_sync_cb_t(dmu_buf_t *db, void *arg);
|
||||
int dmu_sync(struct zio *zio, dmu_buf_t *db,
|
||||
struct blkptr *bp, uint64_t txg, dmu_sync_cb_t *done, void *arg);
|
||||
|
||||
/*
|
||||
* Find the next hole or data block in file starting at *off
|
||||
* Return found offset in *off. Return ESRCH for end of file.
|
||||
*/
|
||||
int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole,
|
||||
uint64_t *off);
|
||||
|
||||
/*
|
||||
* Initial setup and final teardown.
|
||||
*/
|
||||
extern void dmu_init(void);
|
||||
extern void dmu_fini(void);
|
||||
|
||||
typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp,
|
||||
uint64_t object, uint64_t offset, int len);
|
||||
void dmu_traverse_objset(objset_t *os, uint64_t txg_start,
|
||||
dmu_traverse_cb_t cb, void *arg);
|
||||
|
||||
int dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
|
||||
struct vnode *vp, offset_t *off);
|
||||
|
||||
typedef struct dmu_recv_cookie {
|
||||
/*
|
||||
* This structure is opaque!
|
||||
*
|
||||
* If logical and real are different, we are recving the stream
|
||||
* into the "real" temporary clone, and then switching it with
|
||||
* the "logical" target.
|
||||
*/
|
||||
struct dsl_dataset *drc_logical_ds;
|
||||
struct dsl_dataset *drc_real_ds;
|
||||
struct drr_begin *drc_drrb;
|
||||
char *drc_tosnap;
|
||||
boolean_t drc_newfs;
|
||||
boolean_t drc_force;
|
||||
} dmu_recv_cookie_t;
|
||||
|
||||
int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *,
|
||||
boolean_t force, objset_t *origin, boolean_t online, dmu_recv_cookie_t *);
|
||||
int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp);
|
||||
int dmu_recv_end(dmu_recv_cookie_t *drc);
|
||||
void dmu_recv_abort_cleanup(dmu_recv_cookie_t *drc);
|
||||
|
||||
/* CRC64 table */
|
||||
#define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */
|
||||
extern uint64_t zfs_crc64_table[256];
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DMU_H */
|
||||
@@ -0,0 +1,237 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DMU_IMPL_H
|
||||
#define _SYS_DMU_IMPL_H
|
||||
|
||||
#pragma ident "@(#)dmu_impl.h 1.2 07/02/02 SMI"
|
||||
|
||||
#include <sys/txg_impl.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dnode.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This is the locking strategy for the DMU. Numbers in parenthesis are
|
||||
* cases that use that lock order, referenced below:
|
||||
*
|
||||
* ARC is self-contained
|
||||
* bplist is self-contained
|
||||
* refcount is self-contained
|
||||
* txg is self-contained (hopefully!)
|
||||
* zst_lock
|
||||
* zf_rwlock
|
||||
*
|
||||
* XXX try to improve evicting path?
|
||||
*
|
||||
* dp_config_rwlock > os_obj_lock > dn_struct_rwlock >
|
||||
* dn_dbufs_mtx > hash_mutexes > db_mtx > leafs
|
||||
*
|
||||
* dp_config_rwlock
|
||||
* must be held before: everything
|
||||
* protects dd namespace changes
|
||||
* protects property changes globally
|
||||
* held from:
|
||||
* dsl_dir_open/r:
|
||||
* dsl_dir_create_sync/w:
|
||||
* dsl_dir_sync_destroy/w:
|
||||
* dsl_dir_rename_sync/w:
|
||||
* dsl_prop_changed_notify/r:
|
||||
*
|
||||
* os_obj_lock
|
||||
* must be held before:
|
||||
* everything except dp_config_rwlock
|
||||
* protects os_obj_next
|
||||
* held from:
|
||||
* dmu_object_alloc: dn_dbufs_mtx, db_mtx, hash_mutexes, dn_struct_rwlock
|
||||
*
|
||||
* dn_struct_rwlock
|
||||
* must be held before:
|
||||
* everything except dp_config_rwlock and os_obj_lock
|
||||
* protects structure of dnode (eg. nlevels)
|
||||
* db_blkptr can change when syncing out change to nlevels
|
||||
* dn_maxblkid
|
||||
* dn_nlevels
|
||||
* dn_*blksz*
|
||||
* phys nlevels, maxblkid, physical blkptr_t's (?)
|
||||
* held from:
|
||||
* callers of dbuf_read_impl, dbuf_hold[_impl], dbuf_prefetch
|
||||
* dmu_object_info_from_dnode: dn_dirty_mtx (dn_datablksz)
|
||||
* dmu_tx_count_free:
|
||||
* dbuf_read_impl: db_mtx, dmu_zfetch()
|
||||
* dmu_zfetch: zf_rwlock/r, zst_lock, dbuf_prefetch()
|
||||
* dbuf_new_size: db_mtx
|
||||
* dbuf_dirty: db_mtx
|
||||
* dbuf_findbp: (callers, phys? - the real need)
|
||||
* dbuf_create: dn_dbufs_mtx, hash_mutexes, db_mtx (phys?)
|
||||
* dbuf_prefetch: dn_dirty_mtx, hash_mutexes, db_mtx, dn_dbufs_mtx
|
||||
* dbuf_hold_impl: hash_mutexes, db_mtx, dn_dbufs_mtx, dbuf_findbp()
|
||||
* dnode_sync/w (increase_indirection): db_mtx (phys)
|
||||
* dnode_set_blksz/w: dn_dbufs_mtx (dn_*blksz*)
|
||||
* dnode_new_blkid/w: (dn_maxblkid)
|
||||
* dnode_free_range/w: dn_dirty_mtx (dn_maxblkid)
|
||||
* dnode_next_offset: (phys)
|
||||
*
|
||||
* dn_dbufs_mtx
|
||||
* must be held before:
|
||||
* db_mtx, hash_mutexes
|
||||
* protects:
|
||||
* dn_dbufs
|
||||
* dn_evicted
|
||||
* held from:
|
||||
* dmu_evict_user: db_mtx (dn_dbufs)
|
||||
* dbuf_free_range: db_mtx (dn_dbufs)
|
||||
* dbuf_remove_ref: db_mtx, callees:
|
||||
* dbuf_hash_remove: hash_mutexes, db_mtx
|
||||
* dbuf_create: hash_mutexes, db_mtx (dn_dbufs)
|
||||
* dnode_set_blksz: (dn_dbufs)
|
||||
*
|
||||
* hash_mutexes (global)
|
||||
* must be held before:
|
||||
* db_mtx
|
||||
* protects dbuf_hash_table (global) and db_hash_next
|
||||
* held from:
|
||||
* dbuf_find: db_mtx
|
||||
* dbuf_hash_insert: db_mtx
|
||||
* dbuf_hash_remove: db_mtx
|
||||
*
|
||||
* db_mtx (meta-leaf)
|
||||
* must be held before:
|
||||
* dn_mtx, dn_dirty_mtx, dd_lock (leaf mutexes)
|
||||
* protects:
|
||||
* db_state
|
||||
* db_holds
|
||||
* db_buf
|
||||
* db_changed
|
||||
* db_data_pending
|
||||
* db_dirtied
|
||||
* db_link
|
||||
* db_dirty_node (??)
|
||||
* db_dirtycnt
|
||||
* db_d.*
|
||||
* db.*
|
||||
* held from:
|
||||
* dbuf_dirty: dn_mtx, dn_dirty_mtx
|
||||
* dbuf_dirty->dsl_dir_willuse_space: dd_lock
|
||||
* dbuf_dirty->dbuf_new_block->dsl_dataset_block_freeable: dd_lock
|
||||
* dbuf_undirty: dn_dirty_mtx (db_d)
|
||||
* dbuf_write_done: dn_dirty_mtx (db_state)
|
||||
* dbuf_*
|
||||
* dmu_buf_update_user: none (db_d)
|
||||
* dmu_evict_user: none (db_d) (maybe can eliminate)
|
||||
* dbuf_find: none (db_holds)
|
||||
* dbuf_hash_insert: none (db_holds)
|
||||
* dmu_buf_read_array_impl: none (db_state, db_changed)
|
||||
* dmu_sync: none (db_dirty_node, db_d)
|
||||
* dnode_reallocate: none (db)
|
||||
*
|
||||
* dn_mtx (leaf)
|
||||
* protects:
|
||||
* dn_dirty_dbufs
|
||||
* dn_ranges
|
||||
* phys accounting
|
||||
* dn_allocated_txg
|
||||
* dn_free_txg
|
||||
* dn_assigned_txg
|
||||
* dd_assigned_tx
|
||||
* dn_notxholds
|
||||
* dn_dirtyctx
|
||||
* dn_dirtyctx_firstset
|
||||
* (dn_phys copy fields?)
|
||||
* (dn_phys contents?)
|
||||
* held from:
|
||||
* dnode_*
|
||||
* dbuf_dirty: none
|
||||
* dbuf_sync: none (phys accounting)
|
||||
* dbuf_undirty: none (dn_ranges, dn_dirty_dbufs)
|
||||
* dbuf_write_done: none (phys accounting)
|
||||
* dmu_object_info_from_dnode: none (accounting)
|
||||
* dmu_tx_commit: none
|
||||
* dmu_tx_hold_object_impl: none
|
||||
* dmu_tx_try_assign: dn_notxholds(cv)
|
||||
* dmu_tx_unassign: none
|
||||
*
|
||||
* dd_lock (leaf)
|
||||
* protects:
|
||||
* dd_prop_cbs
|
||||
* dd_sync_*
|
||||
* dd_used_bytes
|
||||
* dd_tempreserved
|
||||
* dd_space_towrite
|
||||
* dd_myname
|
||||
* dd_phys accounting?
|
||||
* held from:
|
||||
* dsl_dir_*
|
||||
* dsl_prop_changed_notify: none (dd_prop_cbs)
|
||||
* dsl_prop_register: none (dd_prop_cbs)
|
||||
* dsl_prop_unregister: none (dd_prop_cbs)
|
||||
* dsl_dataset_block_freeable: none (dd_sync_*)
|
||||
*
|
||||
* os_lock (leaf)
|
||||
* protects:
|
||||
* os_dirty_dnodes
|
||||
* os_free_dnodes
|
||||
* os_dnodes
|
||||
* os_downgraded_dbufs
|
||||
* dn_dirtyblksz
|
||||
* dn_dirty_link
|
||||
* held from:
|
||||
* dnode_create: none (os_dnodes)
|
||||
* dnode_destroy: none (os_dnodes)
|
||||
* dnode_setdirty: none (dn_dirtyblksz, os_*_dnodes)
|
||||
* dnode_free: none (dn_dirtyblksz, os_*_dnodes)
|
||||
*
|
||||
* ds_lock (leaf)
|
||||
* protects:
|
||||
* ds_user_ptr
|
||||
* ds_user_evice_func
|
||||
* ds_open_refcount
|
||||
* ds_snapname
|
||||
* ds_phys accounting
|
||||
* held from:
|
||||
* dsl_dataset_*
|
||||
*
|
||||
* dr_mtx (leaf)
|
||||
* protects:
|
||||
* dr_children
|
||||
* held from:
|
||||
* dbuf_dirty
|
||||
* dbuf_undirty
|
||||
* dbuf_sync_indirect
|
||||
* dnode_new_blkid
|
||||
*/
|
||||
|
||||
struct objset;
|
||||
struct dmu_pool;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DMU_IMPL_H */
|
||||
@@ -0,0 +1,129 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DMU_OBJSET_H
|
||||
#define _SYS_DMU_OBJSET_H
|
||||
|
||||
#pragma ident "@(#)dmu_objset.h 1.13 08/04/27 SMI"
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/arc.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/dnode.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/zil.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct dsl_dataset;
|
||||
struct dmu_tx;
|
||||
struct objset_impl;
|
||||
|
||||
typedef struct objset_phys {
|
||||
dnode_phys_t os_meta_dnode;
|
||||
zil_header_t os_zil_header;
|
||||
uint64_t os_type;
|
||||
char os_pad[1024 - sizeof (dnode_phys_t) - sizeof (zil_header_t) -
|
||||
sizeof (uint64_t)];
|
||||
} objset_phys_t;
|
||||
|
||||
struct objset {
|
||||
struct objset_impl *os;
|
||||
int os_mode;
|
||||
};
|
||||
|
||||
typedef struct objset_impl {
|
||||
/* Immutable: */
|
||||
struct dsl_dataset *os_dsl_dataset;
|
||||
spa_t *os_spa;
|
||||
arc_buf_t *os_phys_buf;
|
||||
objset_phys_t *os_phys;
|
||||
dnode_t *os_meta_dnode;
|
||||
zilog_t *os_zil;
|
||||
objset_t os;
|
||||
uint8_t os_checksum; /* can change, under dsl_dir's locks */
|
||||
uint8_t os_compress; /* can change, under dsl_dir's locks */
|
||||
uint8_t os_copies; /* can change, under dsl_dir's locks */
|
||||
uint8_t os_md_checksum;
|
||||
uint8_t os_md_compress;
|
||||
|
||||
/* no lock needed: */
|
||||
struct dmu_tx *os_synctx; /* XXX sketchy */
|
||||
blkptr_t *os_rootbp;
|
||||
|
||||
/* Protected by os_obj_lock */
|
||||
kmutex_t os_obj_lock;
|
||||
uint64_t os_obj_next;
|
||||
|
||||
/* Protected by os_lock */
|
||||
kmutex_t os_lock;
|
||||
list_t os_dirty_dnodes[TXG_SIZE];
|
||||
list_t os_free_dnodes[TXG_SIZE];
|
||||
list_t os_dnodes;
|
||||
list_t os_downgraded_dbufs;
|
||||
|
||||
/* stuff we store for the user */
|
||||
kmutex_t os_user_ptr_lock;
|
||||
void *os_user_ptr;
|
||||
} objset_impl_t;
|
||||
|
||||
#define DMU_META_DNODE_OBJECT 0
|
||||
|
||||
/* called from zpl */
|
||||
int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
|
||||
objset_t **osp);
|
||||
void dmu_objset_close(objset_t *os);
|
||||
int dmu_objset_create(const char *name, dmu_objset_type_t type,
|
||||
objset_t *clone_parent, uint64_t flags,
|
||||
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
|
||||
int dmu_objset_destroy(const char *name);
|
||||
int dmu_objset_rollback(objset_t *os);
|
||||
int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive);
|
||||
void dmu_objset_stats(objset_t *os, nvlist_t *nv);
|
||||
void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
|
||||
void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
|
||||
uint64_t *usedobjsp, uint64_t *availobjsp);
|
||||
uint64_t dmu_objset_fsid_guid(objset_t *os);
|
||||
int dmu_objset_find(char *name, int func(char *, void *), void *arg,
|
||||
int flags);
|
||||
void dmu_objset_byteswap(void *buf, size_t size);
|
||||
int dmu_objset_evict_dbufs(objset_t *os);
|
||||
|
||||
/* called from dsl */
|
||||
void dmu_objset_sync(objset_impl_t *os, zio_t *zio, dmu_tx_t *tx);
|
||||
objset_impl_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
|
||||
blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx);
|
||||
int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
|
||||
objset_impl_t **osip);
|
||||
void dmu_objset_evict(struct dsl_dataset *ds, void *arg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DMU_OBJSET_H */
|
||||
@@ -0,0 +1,121 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DMU_TRAVERSE_H
|
||||
#define _SYS_DMU_TRAVERSE_H
|
||||
|
||||
#pragma ident "@(#)dmu_traverse.h 1.4 08/04/01 SMI"
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dnode.h>
|
||||
#include <sys/arc.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ADVANCE_POST 0 /* post-order traversal */
|
||||
#define ADVANCE_PRE 0x01 /* pre-order traversal */
|
||||
#define ADVANCE_PRUNE 0x02 /* prune by prev snapshot birth time */
|
||||
#define ADVANCE_DATA 0x04 /* read user data blocks */
|
||||
#define ADVANCE_HOLES 0x08 /* visit holes */
|
||||
#define ADVANCE_ZIL 0x10 /* visit intent log blocks */
|
||||
#define ADVANCE_NOLOCK 0x20 /* Don't grab SPA sync lock */
|
||||
|
||||
#define ZB_NO_LEVEL -2
|
||||
#define ZB_MAXLEVEL 32 /* Next power of 2 >= DN_MAX_LEVELS */
|
||||
#define ZB_MAXBLKID (1ULL << 62)
|
||||
#define ZB_MAXOBJSET (1ULL << 62)
|
||||
#define ZB_MAXOBJECT (1ULL << 62)
|
||||
|
||||
#define ZB_MOS_CACHE 0
|
||||
#define ZB_MDN_CACHE 1
|
||||
#define ZB_DN_CACHE 2
|
||||
#define ZB_DEPTH 3
|
||||
|
||||
typedef struct zseg {
|
||||
uint64_t seg_mintxg;
|
||||
uint64_t seg_maxtxg;
|
||||
zbookmark_t seg_start;
|
||||
zbookmark_t seg_end;
|
||||
list_node_t seg_node;
|
||||
} zseg_t;
|
||||
|
||||
typedef struct traverse_blk_cache {
|
||||
zbookmark_t bc_bookmark;
|
||||
blkptr_t bc_blkptr;
|
||||
void *bc_data;
|
||||
dnode_phys_t *bc_dnode;
|
||||
int bc_errno;
|
||||
int bc_pad1;
|
||||
uint64_t bc_pad2;
|
||||
} traverse_blk_cache_t;
|
||||
|
||||
typedef int (blkptr_cb_t)(traverse_blk_cache_t *bc, spa_t *spa, void *arg);
|
||||
|
||||
struct traverse_handle {
|
||||
spa_t *th_spa;
|
||||
blkptr_cb_t *th_func;
|
||||
void *th_arg;
|
||||
uint16_t th_advance;
|
||||
uint16_t th_locked;
|
||||
int th_zio_flags;
|
||||
list_t th_seglist;
|
||||
traverse_blk_cache_t th_cache[ZB_DEPTH][ZB_MAXLEVEL];
|
||||
traverse_blk_cache_t th_zil_cache;
|
||||
uint64_t th_hits;
|
||||
uint64_t th_arc_hits;
|
||||
uint64_t th_reads;
|
||||
uint64_t th_callbacks;
|
||||
uint64_t th_syncs;
|
||||
uint64_t th_restarts;
|
||||
zbookmark_t th_noread;
|
||||
zbookmark_t th_lastcb;
|
||||
};
|
||||
|
||||
int traverse_dsl_dataset(struct dsl_dataset *ds, uint64_t txg_start,
|
||||
int advance, blkptr_cb_t func, void *arg);
|
||||
int traverse_zvol(objset_t *os, int advance, blkptr_cb_t func, void *arg);
|
||||
|
||||
traverse_handle_t *traverse_init(spa_t *spa, blkptr_cb_t *func, void *arg,
|
||||
int advance, int zio_flags);
|
||||
void traverse_fini(traverse_handle_t *th);
|
||||
|
||||
void traverse_add_dnode(traverse_handle_t *th,
|
||||
uint64_t mintxg, uint64_t maxtxg, uint64_t objset, uint64_t object);
|
||||
void traverse_add_objset(traverse_handle_t *th,
|
||||
uint64_t mintxg, uint64_t maxtxg, uint64_t objset);
|
||||
void traverse_add_pool(traverse_handle_t *th, uint64_t mintxg, uint64_t maxtxg);
|
||||
|
||||
int traverse_more(traverse_handle_t *th);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DMU_TRAVERSE_H */
|
||||
@@ -0,0 +1,137 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DMU_TX_H
|
||||
#define _SYS_DMU_TX_H
|
||||
|
||||
#pragma ident "@(#)dmu_tx.h 1.6 07/10/29 SMI"
|
||||
|
||||
#include <sys/inttypes.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/refcount.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct dmu_buf_impl;
|
||||
struct dmu_tx_hold;
|
||||
struct dnode_link;
|
||||
struct dsl_pool;
|
||||
struct dnode;
|
||||
struct dsl_dir;
|
||||
|
||||
struct dmu_tx {
|
||||
/*
|
||||
* No synchronization is needed because a tx can only be handled
|
||||
* by one thread.
|
||||
*/
|
||||
list_t tx_holds; /* list of dmu_tx_hold_t */
|
||||
objset_t *tx_objset;
|
||||
struct dsl_dir *tx_dir;
|
||||
struct dsl_pool *tx_pool;
|
||||
uint64_t tx_txg;
|
||||
uint64_t tx_lastsnap_txg;
|
||||
uint64_t tx_lasttried_txg;
|
||||
txg_handle_t tx_txgh;
|
||||
void *tx_tempreserve_cookie;
|
||||
struct dmu_tx_hold *tx_needassign_txh;
|
||||
uint8_t tx_anyobj;
|
||||
int tx_err;
|
||||
#ifdef ZFS_DEBUG
|
||||
uint64_t tx_space_towrite;
|
||||
uint64_t tx_space_tofree;
|
||||
uint64_t tx_space_tooverwrite;
|
||||
uint64_t tx_space_tounref;
|
||||
refcount_t tx_space_written;
|
||||
refcount_t tx_space_freed;
|
||||
#endif
|
||||
};
|
||||
|
||||
enum dmu_tx_hold_type {
|
||||
THT_NEWOBJECT,
|
||||
THT_WRITE,
|
||||
THT_BONUS,
|
||||
THT_FREE,
|
||||
THT_ZAP,
|
||||
THT_SPACE,
|
||||
THT_NUMTYPES
|
||||
};
|
||||
|
||||
typedef struct dmu_tx_hold {
|
||||
dmu_tx_t *txh_tx;
|
||||
list_node_t txh_node;
|
||||
struct dnode *txh_dnode;
|
||||
uint64_t txh_space_towrite;
|
||||
uint64_t txh_space_tofree;
|
||||
uint64_t txh_space_tooverwrite;
|
||||
uint64_t txh_space_tounref;
|
||||
#ifdef ZFS_DEBUG
|
||||
enum dmu_tx_hold_type txh_type;
|
||||
uint64_t txh_arg1;
|
||||
uint64_t txh_arg2;
|
||||
#endif
|
||||
} dmu_tx_hold_t;
|
||||
|
||||
|
||||
/*
|
||||
* These routines are defined in dmu.h, and are called by the user.
|
||||
*/
|
||||
dmu_tx_t *dmu_tx_create(objset_t *dd);
|
||||
int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
|
||||
void dmu_tx_commit(dmu_tx_t *tx);
|
||||
void dmu_tx_abort(dmu_tx_t *tx);
|
||||
uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
|
||||
void dmu_tx_wait(dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* These routines are defined in dmu_spa.h, and are called by the SPA.
|
||||
*/
|
||||
extern dmu_tx_t *dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg);
|
||||
|
||||
/*
|
||||
* These routines are only called by the DMU.
|
||||
*/
|
||||
dmu_tx_t *dmu_tx_create_dd(dsl_dir_t *dd);
|
||||
int dmu_tx_is_syncing(dmu_tx_t *tx);
|
||||
int dmu_tx_private_ok(dmu_tx_t *tx);
|
||||
void dmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object);
|
||||
void dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta);
|
||||
void dmu_tx_dirty_buf(dmu_tx_t *tx, struct dmu_buf_impl *db);
|
||||
int dmu_tx_holds(dmu_tx_t *tx, uint64_t object);
|
||||
void dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space);
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
#define DMU_TX_DIRTY_BUF(tx, db) dmu_tx_dirty_buf(tx, db)
|
||||
#else
|
||||
#define DMU_TX_DIRTY_BUF(tx, db)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DMU_TX_H */
|
||||
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _DFETCH_H
|
||||
#define _DFETCH_H
|
||||
|
||||
#pragma ident "@(#)dmu_zfetch.h 1.2 06/07/17 SMI"
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern uint64_t zfetch_array_rd_sz;
|
||||
|
||||
struct dnode; /* so we can reference dnode */
|
||||
|
||||
typedef enum zfetch_dirn {
|
||||
ZFETCH_FORWARD = 1, /* prefetch increasing block numbers */
|
||||
ZFETCH_BACKWARD = -1 /* prefetch decreasing block numbers */
|
||||
} zfetch_dirn_t;
|
||||
|
||||
typedef struct zstream {
|
||||
uint64_t zst_offset; /* offset of starting block in range */
|
||||
uint64_t zst_len; /* length of range, in blocks */
|
||||
zfetch_dirn_t zst_direction; /* direction of prefetch */
|
||||
uint64_t zst_stride; /* length of stride, in blocks */
|
||||
uint64_t zst_ph_offset; /* prefetch offset, in blocks */
|
||||
uint64_t zst_cap; /* prefetch limit (cap), in blocks */
|
||||
kmutex_t zst_lock; /* protects stream */
|
||||
clock_t zst_last; /* lbolt of last prefetch */
|
||||
avl_node_t zst_node; /* embed avl node here */
|
||||
} zstream_t;
|
||||
|
||||
typedef struct zfetch {
|
||||
krwlock_t zf_rwlock; /* protects zfetch structure */
|
||||
list_t zf_stream; /* AVL tree of zstream_t's */
|
||||
struct dnode *zf_dnode; /* dnode that owns this zfetch */
|
||||
uint32_t zf_stream_cnt; /* # of active streams */
|
||||
uint64_t zf_alloc_fail; /* # of failed attempts to alloc strm */
|
||||
} zfetch_t;
|
||||
|
||||
void dmu_zfetch_init(zfetch_t *, struct dnode *);
|
||||
void dmu_zfetch_rele(zfetch_t *);
|
||||
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, int);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _DFETCH_H */
|
||||
@@ -0,0 +1,270 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DNODE_H
|
||||
#define _SYS_DNODE_H
|
||||
|
||||
#pragma ident "@(#)dnode.h 1.12 07/08/26 SMI"
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/avl.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/dmu_zfetch.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Flags.
|
||||
*/
|
||||
#define DNODE_MUST_BE_ALLOCATED 1
|
||||
#define DNODE_MUST_BE_FREE 2
|
||||
|
||||
/*
|
||||
* Fixed constants.
|
||||
*/
|
||||
#define DNODE_SHIFT 9 /* 512 bytes */
|
||||
#define DN_MIN_INDBLKSHIFT 10 /* 1k */
|
||||
#define DN_MAX_INDBLKSHIFT 14 /* 16k */
|
||||
#define DNODE_BLOCK_SHIFT 14 /* 16k */
|
||||
#define DNODE_CORE_SIZE 64 /* 64 bytes for dnode sans blkptrs */
|
||||
#define DN_MAX_OBJECT_SHIFT 48 /* 256 trillion (zfs_fid_t limit) */
|
||||
#define DN_MAX_OFFSET_SHIFT 64 /* 2^64 bytes in a dnode */
|
||||
|
||||
/*
|
||||
* Derived constants.
|
||||
*/
|
||||
#define DNODE_SIZE (1 << DNODE_SHIFT)
|
||||
#define DN_MAX_NBLKPTR ((DNODE_SIZE - DNODE_CORE_SIZE) >> SPA_BLKPTRSHIFT)
|
||||
#define DN_MAX_BONUSLEN (DNODE_SIZE - DNODE_CORE_SIZE - (1 << SPA_BLKPTRSHIFT))
|
||||
#define DN_MAX_OBJECT (1ULL << DN_MAX_OBJECT_SHIFT)
|
||||
#define DN_ZERO_BONUSLEN (DN_MAX_BONUSLEN + 1)
|
||||
|
||||
#define DNODES_PER_BLOCK_SHIFT (DNODE_BLOCK_SHIFT - DNODE_SHIFT)
|
||||
#define DNODES_PER_BLOCK (1ULL << DNODES_PER_BLOCK_SHIFT)
|
||||
#define DNODES_PER_LEVEL_SHIFT (DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT)
|
||||
|
||||
/* The +2 here is a cheesy way to round up */
|
||||
#define DN_MAX_LEVELS (2 + ((DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT) / \
|
||||
(DN_MIN_INDBLKSHIFT - SPA_BLKPTRSHIFT)))
|
||||
|
||||
#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \
|
||||
(((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
|
||||
|
||||
#define DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \
|
||||
(dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT)
|
||||
|
||||
#define EPB(blkshift, typeshift) (1 << (blkshift - typeshift))
|
||||
|
||||
struct dmu_buf_impl;
|
||||
struct objset_impl;
|
||||
struct zio;
|
||||
|
||||
enum dnode_dirtycontext {
|
||||
DN_UNDIRTIED,
|
||||
DN_DIRTY_OPEN,
|
||||
DN_DIRTY_SYNC
|
||||
};
|
||||
|
||||
/* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */
|
||||
#define DNODE_FLAG_USED_BYTES (1<<0)
|
||||
|
||||
typedef struct dnode_phys {
|
||||
uint8_t dn_type; /* dmu_object_type_t */
|
||||
uint8_t dn_indblkshift; /* ln2(indirect block size) */
|
||||
uint8_t dn_nlevels; /* 1=dn_blkptr->data blocks */
|
||||
uint8_t dn_nblkptr; /* length of dn_blkptr */
|
||||
uint8_t dn_bonustype; /* type of data in bonus buffer */
|
||||
uint8_t dn_checksum; /* ZIO_CHECKSUM type */
|
||||
uint8_t dn_compress; /* ZIO_COMPRESS type */
|
||||
uint8_t dn_flags; /* DNODE_FLAG_* */
|
||||
uint16_t dn_datablkszsec; /* data block size in 512b sectors */
|
||||
uint16_t dn_bonuslen; /* length of dn_bonus */
|
||||
uint8_t dn_pad2[4];
|
||||
|
||||
/* accounting is protected by dn_dirty_mtx */
|
||||
uint64_t dn_maxblkid; /* largest allocated block ID */
|
||||
uint64_t dn_used; /* bytes (or sectors) of disk space */
|
||||
|
||||
uint64_t dn_pad3[4];
|
||||
|
||||
blkptr_t dn_blkptr[1];
|
||||
uint8_t dn_bonus[DN_MAX_BONUSLEN];
|
||||
} dnode_phys_t;
|
||||
|
||||
typedef struct dnode {
|
||||
/*
|
||||
* dn_struct_rwlock protects the structure of the dnode,
|
||||
* including the number of levels of indirection (dn_nlevels),
|
||||
* dn_maxblkid, and dn_next_*
|
||||
*/
|
||||
krwlock_t dn_struct_rwlock;
|
||||
|
||||
/*
|
||||
* Our link on dataset's dd_dnodes list.
|
||||
* Protected by dd_accounting_mtx.
|
||||
*/
|
||||
list_node_t dn_link;
|
||||
|
||||
/* immutable: */
|
||||
struct objset_impl *dn_objset;
|
||||
uint64_t dn_object;
|
||||
struct dmu_buf_impl *dn_dbuf;
|
||||
dnode_phys_t *dn_phys; /* pointer into dn->dn_dbuf->db.db_data */
|
||||
|
||||
/*
|
||||
* Copies of stuff in dn_phys. They're valid in the open
|
||||
* context (eg. even before the dnode is first synced).
|
||||
* Where necessary, these are protected by dn_struct_rwlock.
|
||||
*/
|
||||
dmu_object_type_t dn_type; /* object type */
|
||||
uint16_t dn_bonuslen; /* bonus length */
|
||||
uint8_t dn_bonustype; /* bonus type */
|
||||
uint8_t dn_nblkptr; /* number of blkptrs (immutable) */
|
||||
uint8_t dn_checksum; /* ZIO_CHECKSUM type */
|
||||
uint8_t dn_compress; /* ZIO_COMPRESS type */
|
||||
uint8_t dn_nlevels;
|
||||
uint8_t dn_indblkshift;
|
||||
uint8_t dn_datablkshift; /* zero if blksz not power of 2! */
|
||||
uint16_t dn_datablkszsec; /* in 512b sectors */
|
||||
uint32_t dn_datablksz; /* in bytes */
|
||||
uint64_t dn_maxblkid;
|
||||
uint8_t dn_next_nlevels[TXG_SIZE];
|
||||
uint8_t dn_next_indblkshift[TXG_SIZE];
|
||||
uint16_t dn_next_bonuslen[TXG_SIZE];
|
||||
uint32_t dn_next_blksz[TXG_SIZE]; /* next block size in bytes */
|
||||
|
||||
/* protected by os_lock: */
|
||||
list_node_t dn_dirty_link[TXG_SIZE]; /* next on dataset's dirty */
|
||||
|
||||
/* protected by dn_mtx: */
|
||||
kmutex_t dn_mtx;
|
||||
list_t dn_dirty_records[TXG_SIZE];
|
||||
avl_tree_t dn_ranges[TXG_SIZE];
|
||||
uint64_t dn_allocated_txg;
|
||||
uint64_t dn_free_txg;
|
||||
uint64_t dn_assigned_txg;
|
||||
kcondvar_t dn_notxholds;
|
||||
enum dnode_dirtycontext dn_dirtyctx;
|
||||
uint8_t *dn_dirtyctx_firstset; /* dbg: contents meaningless */
|
||||
|
||||
/* protected by own devices */
|
||||
refcount_t dn_tx_holds;
|
||||
refcount_t dn_holds;
|
||||
|
||||
kmutex_t dn_dbufs_mtx;
|
||||
list_t dn_dbufs; /* linked list of descendent dbuf_t's */
|
||||
struct dmu_buf_impl *dn_bonus; /* bonus buffer dbuf */
|
||||
|
||||
/* parent IO for current sync write */
|
||||
zio_t *dn_zio;
|
||||
|
||||
/* holds prefetch structure */
|
||||
struct zfetch dn_zfetch;
|
||||
} dnode_t;
|
||||
|
||||
typedef struct free_range {
|
||||
avl_node_t fr_node;
|
||||
uint64_t fr_blkid;
|
||||
uint64_t fr_nblks;
|
||||
} free_range_t;
|
||||
|
||||
dnode_t *dnode_special_open(struct objset_impl *dd, dnode_phys_t *dnp,
|
||||
uint64_t object);
|
||||
void dnode_special_close(dnode_t *dn);
|
||||
|
||||
void dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx);
|
||||
int dnode_hold(struct objset_impl *dd, uint64_t object,
|
||||
void *ref, dnode_t **dnp);
|
||||
int dnode_hold_impl(struct objset_impl *dd, uint64_t object, int flag,
|
||||
void *ref, dnode_t **dnp);
|
||||
boolean_t dnode_add_ref(dnode_t *dn, void *ref);
|
||||
void dnode_rele(dnode_t *dn, void *ref);
|
||||
void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
|
||||
void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
|
||||
void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
void dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
void dnode_free(dnode_t *dn, dmu_tx_t *tx);
|
||||
void dnode_byteswap(dnode_phys_t *dnp);
|
||||
void dnode_buf_byteswap(void *buf, size_t size);
|
||||
void dnode_verify(dnode_t *dn);
|
||||
int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
|
||||
uint64_t dnode_current_max_length(dnode_t *dn);
|
||||
void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
|
||||
void dnode_clear_range(dnode_t *dn, uint64_t blkid,
|
||||
uint64_t nblks, dmu_tx_t *tx);
|
||||
void dnode_diduse_space(dnode_t *dn, int64_t space);
|
||||
void dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx);
|
||||
void dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx);
|
||||
uint64_t dnode_block_freed(dnode_t *dn, uint64_t blkid);
|
||||
void dnode_init(void);
|
||||
void dnode_fini(void);
|
||||
int dnode_next_offset(dnode_t *dn, boolean_t hole, uint64_t *off, int minlvl,
|
||||
uint64_t blkfill, uint64_t txg);
|
||||
void dnode_evict_dbufs(dnode_t *dn);
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
|
||||
/*
|
||||
* There should be a ## between the string literal and fmt, to make it
|
||||
* clear that we're joining two strings together, but that piece of shit
|
||||
* gcc doesn't support that preprocessor token.
|
||||
*/
|
||||
#define dprintf_dnode(dn, fmt, ...) do { \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
|
||||
char __db_buf[32]; \
|
||||
uint64_t __db_obj = (dn)->dn_object; \
|
||||
if (__db_obj == DMU_META_DNODE_OBJECT) \
|
||||
(void) strcpy(__db_buf, "mdn"); \
|
||||
else \
|
||||
(void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \
|
||||
(u_longlong_t)__db_obj);\
|
||||
dprintf_ds((dn)->dn_objset->os_dsl_dataset, "obj=%s " fmt, \
|
||||
__db_buf, __VA_ARGS__); \
|
||||
} \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
|
||||
#define DNODE_VERIFY(dn) dnode_verify(dn)
|
||||
#define FREE_VERIFY(db, start, end, tx) free_verify(db, start, end, tx)
|
||||
|
||||
#else
|
||||
|
||||
#define dprintf_dnode(db, fmt, ...)
|
||||
#define DNODE_VERIFY(dn)
|
||||
#define FREE_VERIFY(db, start, end, tx)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DNODE_H */
|
||||
@@ -0,0 +1,228 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_DATASET_H
|
||||
#define _SYS_DSL_DATASET_H
|
||||
|
||||
#pragma ident "@(#)dsl_dataset.h 1.16 08/04/27 SMI"
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/bplist.h>
|
||||
#include <sys/dsl_synctask.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct dsl_dataset;
|
||||
struct dsl_dir;
|
||||
struct dsl_pool;
|
||||
|
||||
typedef void dsl_dataset_evict_func_t(struct dsl_dataset *, void *);
|
||||
|
||||
#define DS_FLAG_INCONSISTENT (1ULL<<0)
|
||||
/*
|
||||
* NB: nopromote can not yet be set, but we want support for it in this
|
||||
* on-disk version, so that we don't need to upgrade for it later. It
|
||||
* will be needed when we implement 'zfs split' (where the split off
|
||||
* clone should not be promoted).
|
||||
*/
|
||||
#define DS_FLAG_NOPROMOTE (1ULL<<1)
|
||||
|
||||
/*
|
||||
* DS_FLAG_UNIQUE_ACCURATE is set if ds_unique_bytes has been correctly
|
||||
* calculated for head datasets (starting with SPA_VERSION_UNIQUE_ACCURATE,
|
||||
* refquota/refreservations).
|
||||
*/
|
||||
#define DS_FLAG_UNIQUE_ACCURATE (1ULL<<2)
|
||||
|
||||
/*
|
||||
* DS_FLAG_CI_DATASET is set if the dataset contains a file system whose
|
||||
* name lookups should be performed case-insensitively.
|
||||
*/
|
||||
#define DS_FLAG_CI_DATASET (1ULL<<16)
|
||||
|
||||
typedef struct dsl_dataset_phys {
|
||||
uint64_t ds_dir_obj;
|
||||
uint64_t ds_prev_snap_obj;
|
||||
uint64_t ds_prev_snap_txg;
|
||||
uint64_t ds_next_snap_obj;
|
||||
uint64_t ds_snapnames_zapobj; /* zap obj of snaps; ==0 for snaps */
|
||||
uint64_t ds_num_children; /* clone/snap children; ==0 for head */
|
||||
uint64_t ds_creation_time; /* seconds since 1970 */
|
||||
uint64_t ds_creation_txg;
|
||||
uint64_t ds_deadlist_obj;
|
||||
uint64_t ds_used_bytes;
|
||||
uint64_t ds_compressed_bytes;
|
||||
uint64_t ds_uncompressed_bytes;
|
||||
uint64_t ds_unique_bytes; /* only relevant to snapshots */
|
||||
/*
|
||||
* The ds_fsid_guid is a 56-bit ID that can change to avoid
|
||||
* collisions. The ds_guid is a 64-bit ID that will never
|
||||
* change, so there is a small probability that it will collide.
|
||||
*/
|
||||
uint64_t ds_fsid_guid;
|
||||
uint64_t ds_guid;
|
||||
uint64_t ds_flags;
|
||||
blkptr_t ds_bp;
|
||||
uint64_t ds_pad[8]; /* pad out to 320 bytes for good measure */
|
||||
} dsl_dataset_phys_t;
|
||||
|
||||
typedef struct dsl_dataset {
|
||||
/* Immutable: */
|
||||
struct dsl_dir *ds_dir;
|
||||
dsl_dataset_phys_t *ds_phys;
|
||||
dmu_buf_t *ds_dbuf;
|
||||
uint64_t ds_object;
|
||||
uint64_t ds_fsid_guid;
|
||||
|
||||
/* only used in syncing context: */
|
||||
struct dsl_dataset *ds_prev; /* only valid for non-snapshots */
|
||||
|
||||
/* has internal locking: */
|
||||
bplist_t ds_deadlist;
|
||||
|
||||
/* protected by lock on pool's dp_dirty_datasets list */
|
||||
txg_node_t ds_dirty_link;
|
||||
list_node_t ds_synced_link;
|
||||
|
||||
/*
|
||||
* ds_phys->ds_<accounting> is also protected by ds_lock.
|
||||
* Protected by ds_lock:
|
||||
*/
|
||||
kmutex_t ds_lock;
|
||||
void *ds_user_ptr;
|
||||
dsl_dataset_evict_func_t *ds_user_evict_func;
|
||||
uint64_t ds_open_refcount;
|
||||
|
||||
/* no locking; only for making guesses */
|
||||
uint64_t ds_trysnap_txg;
|
||||
|
||||
/* for objset_open() */
|
||||
kmutex_t ds_opening_lock;
|
||||
|
||||
uint64_t ds_reserved; /* cached refreservation */
|
||||
uint64_t ds_quota; /* cached refquota */
|
||||
|
||||
/* Protected by ds_lock; keep at end of struct for better locality */
|
||||
char ds_snapname[MAXNAMELEN];
|
||||
} dsl_dataset_t;
|
||||
|
||||
#define dsl_dataset_is_snapshot(ds) \
|
||||
((ds)->ds_phys->ds_num_children != 0)
|
||||
|
||||
#define DS_UNIQUE_IS_ACCURATE(ds) \
|
||||
(((ds)->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
|
||||
|
||||
int dsl_dataset_open_spa(spa_t *spa, const char *name, int mode,
|
||||
void *tag, dsl_dataset_t **dsp);
|
||||
int dsl_dataset_open(const char *name, int mode, void *tag,
|
||||
dsl_dataset_t **dsp);
|
||||
int dsl_dataset_open_obj(struct dsl_pool *dp, uint64_t dsobj,
|
||||
const char *tail, int mode, void *tag, dsl_dataset_t **);
|
||||
void dsl_dataset_name(dsl_dataset_t *ds, char *name);
|
||||
void dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag);
|
||||
void dsl_dataset_downgrade(dsl_dataset_t *ds, int oldmode, int newmode);
|
||||
boolean_t dsl_dataset_tryupgrade(dsl_dataset_t *ds, int oldmode, int newmode);
|
||||
uint64_t dsl_dataset_create_sync_impl(dsl_dir_t *dd, dsl_dataset_t *origin,
|
||||
uint64_t flags, dmu_tx_t *tx);
|
||||
uint64_t dsl_dataset_create_sync(dsl_dir_t *pds,
|
||||
const char *lastname, dsl_dataset_t *origin, uint64_t flags,
|
||||
cred_t *, dmu_tx_t *);
|
||||
int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag);
|
||||
int dsl_snapshots_destroy(char *fsname, char *snapname);
|
||||
dsl_checkfunc_t dsl_dataset_destroy_check;
|
||||
dsl_syncfunc_t dsl_dataset_destroy_sync;
|
||||
dsl_checkfunc_t dsl_dataset_snapshot_check;
|
||||
dsl_syncfunc_t dsl_dataset_snapshot_sync;
|
||||
int dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost);
|
||||
int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive);
|
||||
int dsl_dataset_promote(const char *name);
|
||||
int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
|
||||
boolean_t force);
|
||||
|
||||
void *dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
|
||||
void *p, dsl_dataset_evict_func_t func);
|
||||
void *dsl_dataset_get_user_ptr(dsl_dataset_t *ds);
|
||||
|
||||
blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
|
||||
void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
|
||||
|
||||
spa_t *dsl_dataset_get_spa(dsl_dataset_t *ds);
|
||||
|
||||
boolean_t dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds);
|
||||
|
||||
void dsl_dataset_sync(dsl_dataset_t *os, zio_t *zio, dmu_tx_t *tx);
|
||||
|
||||
void dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
|
||||
void dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
|
||||
dmu_tx_t *tx);
|
||||
int dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth);
|
||||
uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds);
|
||||
|
||||
void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx);
|
||||
void dsl_dataset_stats(dsl_dataset_t *os, nvlist_t *nv);
|
||||
void dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat);
|
||||
void dsl_dataset_space(dsl_dataset_t *ds,
|
||||
uint64_t *refdbytesp, uint64_t *availbytesp,
|
||||
uint64_t *usedobjsp, uint64_t *availobjsp);
|
||||
uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds);
|
||||
|
||||
void dsl_dataset_create_root(struct dsl_pool *dp, uint64_t *ddobjp,
|
||||
dmu_tx_t *tx);
|
||||
|
||||
int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
|
||||
|
||||
int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
|
||||
uint64_t asize, uint64_t inflight, uint64_t *used,
|
||||
uint64_t *ref_rsrv);
|
||||
int dsl_dataset_set_quota(const char *dsname, uint64_t quota);
|
||||
void dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr,
|
||||
dmu_tx_t *tx);
|
||||
int dsl_dataset_set_reservation(const char *dsname, uint64_t reservation);
|
||||
void dsl_dataset_set_flags(dsl_dataset_t *ds, uint64_t flags);
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
#define dprintf_ds(ds, fmt, ...) do { \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
|
||||
char *__ds_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); \
|
||||
dsl_dataset_name(ds, __ds_name); \
|
||||
dprintf("ds=%s " fmt, __ds_name, __VA_ARGS__); \
|
||||
kmem_free(__ds_name, MAXNAMELEN); \
|
||||
} \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
#else
|
||||
#define dprintf_ds(dd, fmt, ...)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DSL_DATASET_H */
|
||||
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_DELEG_H
|
||||
#define _SYS_DSL_DELEG_H
|
||||
|
||||
#pragma ident "@(#)dsl_deleg.h 1.4 07/10/25 SMI"
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ZFS_DELEG_PERM_NONE ""
|
||||
#define ZFS_DELEG_PERM_CREATE "create"
|
||||
#define ZFS_DELEG_PERM_DESTROY "destroy"
|
||||
#define ZFS_DELEG_PERM_SNAPSHOT "snapshot"
|
||||
#define ZFS_DELEG_PERM_ROLLBACK "rollback"
|
||||
#define ZFS_DELEG_PERM_CLONE "clone"
|
||||
#define ZFS_DELEG_PERM_PROMOTE "promote"
|
||||
#define ZFS_DELEG_PERM_RENAME "rename"
|
||||
#define ZFS_DELEG_PERM_MOUNT "mount"
|
||||
#define ZFS_DELEG_PERM_SHARE "share"
|
||||
#define ZFS_DELEG_PERM_SEND "send"
|
||||
#define ZFS_DELEG_PERM_RECEIVE "receive"
|
||||
#define ZFS_DELEG_PERM_ALLOW "allow"
|
||||
#define ZFS_DELEG_PERM_USERPROP "userprop"
|
||||
#define ZFS_DELEG_PERM_VSCAN "vscan"
|
||||
|
||||
/*
|
||||
* Note: the names of properties that are marked delegatable are also
|
||||
* valid delegated permissions
|
||||
*/
|
||||
|
||||
int dsl_deleg_get(const char *ddname, nvlist_t **nvp);
|
||||
int dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset);
|
||||
int dsl_deleg_access(const char *ddname, const char *perm, cred_t *cr);
|
||||
void dsl_deleg_set_create_perms(dsl_dir_t *dd, dmu_tx_t *tx, cred_t *cr);
|
||||
int dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr);
|
||||
int dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr);
|
||||
int dsl_deleg_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx);
|
||||
boolean_t dsl_delegation_on(objset_t *os);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DSL_DELEG_H */
|
||||
@@ -0,0 +1,146 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_DIR_H
|
||||
#define _SYS_DSL_DIR_H
|
||||
|
||||
#pragma ident "@(#)dsl_dir.h 1.10 07/10/29 SMI"
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/dsl_synctask.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct dsl_dataset;
|
||||
|
||||
typedef struct dsl_dir_phys {
|
||||
uint64_t dd_creation_time; /* not actually used */
|
||||
uint64_t dd_head_dataset_obj;
|
||||
uint64_t dd_parent_obj;
|
||||
uint64_t dd_origin_obj;
|
||||
uint64_t dd_child_dir_zapobj;
|
||||
/*
|
||||
* how much space our children are accounting for; for leaf
|
||||
* datasets, == physical space used by fs + snaps
|
||||
*/
|
||||
uint64_t dd_used_bytes;
|
||||
uint64_t dd_compressed_bytes;
|
||||
uint64_t dd_uncompressed_bytes;
|
||||
/* Administrative quota setting */
|
||||
uint64_t dd_quota;
|
||||
/* Administrative reservation setting */
|
||||
uint64_t dd_reserved;
|
||||
uint64_t dd_props_zapobj;
|
||||
uint64_t dd_deleg_zapobj; /* dataset delegation permissions */
|
||||
uint64_t dd_pad[20]; /* pad out to 256 bytes for good measure */
|
||||
} dsl_dir_phys_t;
|
||||
|
||||
struct dsl_dir {
|
||||
/* These are immutable; no lock needed: */
|
||||
uint64_t dd_object;
|
||||
dsl_dir_phys_t *dd_phys;
|
||||
dmu_buf_t *dd_dbuf;
|
||||
dsl_pool_t *dd_pool;
|
||||
|
||||
/* protected by lock on pool's dp_dirty_dirs list */
|
||||
txg_node_t dd_dirty_link;
|
||||
|
||||
/* protected by dp_config_rwlock */
|
||||
dsl_dir_t *dd_parent;
|
||||
|
||||
/* Protected by dd_lock */
|
||||
kmutex_t dd_lock;
|
||||
list_t dd_prop_cbs; /* list of dsl_prop_cb_record_t's */
|
||||
|
||||
/* Accounting */
|
||||
/* reflects any changes to dd_phys->dd_used_bytes made this syncing */
|
||||
int64_t dd_used_bytes;
|
||||
/* gross estimate of space used by in-flight tx's */
|
||||
uint64_t dd_tempreserved[TXG_SIZE];
|
||||
/* amount of space we expect to write; == amount of dirty data */
|
||||
int64_t dd_space_towrite[TXG_SIZE];
|
||||
|
||||
/* protected by dd_lock; keep at end of struct for better locality */
|
||||
char dd_myname[MAXNAMELEN];
|
||||
};
|
||||
|
||||
void dsl_dir_close(dsl_dir_t *dd, void *tag);
|
||||
int dsl_dir_open(const char *name, void *tag, dsl_dir_t **, const char **tail);
|
||||
int dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, dsl_dir_t **,
|
||||
const char **tailp);
|
||||
int dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
|
||||
const char *tail, void *tag, dsl_dir_t **);
|
||||
void dsl_dir_name(dsl_dir_t *dd, char *buf);
|
||||
int dsl_dir_namelen(dsl_dir_t *dd);
|
||||
int dsl_dir_is_private(dsl_dir_t *dd);
|
||||
uint64_t dsl_dir_create_sync(dsl_dir_t *pds, const char *name, dmu_tx_t *tx);
|
||||
void dsl_dir_create_root(objset_t *mos, uint64_t *ddobjp, dmu_tx_t *tx);
|
||||
dsl_checkfunc_t dsl_dir_destroy_check;
|
||||
dsl_syncfunc_t dsl_dir_destroy_sync;
|
||||
void dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv);
|
||||
uint64_t dsl_dir_space_available(dsl_dir_t *dd,
|
||||
dsl_dir_t *ancestor, int64_t delta, int ondiskonly);
|
||||
void dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx);
|
||||
void dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx);
|
||||
int dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t mem,
|
||||
uint64_t asize, uint64_t fsize, uint64_t usize, void **tr_cookiep,
|
||||
dmu_tx_t *tx);
|
||||
void dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx);
|
||||
void dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx);
|
||||
void dsl_dir_diduse_space(dsl_dir_t *dd,
|
||||
int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx);
|
||||
int dsl_dir_set_quota(const char *ddname, uint64_t quota);
|
||||
int dsl_dir_set_reservation(const char *ddname, uint64_t reservation);
|
||||
int dsl_dir_rename(dsl_dir_t *dd, const char *newname);
|
||||
int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space);
|
||||
int dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx);
|
||||
|
||||
/* internal reserved dir name */
|
||||
#define MOS_DIR_NAME "$MOS"
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
#define dprintf_dd(dd, fmt, ...) do { \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
|
||||
char *__ds_name = kmem_alloc(MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, \
|
||||
KM_SLEEP); \
|
||||
dsl_dir_name(dd, __ds_name); \
|
||||
dprintf("dd=%s " fmt, __ds_name, __VA_ARGS__); \
|
||||
kmem_free(__ds_name, MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); \
|
||||
} \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
#else
|
||||
#define dprintf_dd(dd, fmt, ...)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DSL_DIR_H */
|
||||
@@ -0,0 +1,92 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_POOL_H
|
||||
#define _SYS_DSL_POOL_H
|
||||
|
||||
#pragma ident "@(#)dsl_pool.h 1.5 08/03/20 SMI"
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/txg_impl.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct objset;
|
||||
struct dsl_dir;
|
||||
|
||||
typedef struct dsl_pool {
|
||||
/* Immutable */
|
||||
spa_t *dp_spa;
|
||||
struct objset *dp_meta_objset;
|
||||
struct dsl_dir *dp_root_dir;
|
||||
struct dsl_dir *dp_mos_dir;
|
||||
uint64_t dp_root_dir_obj;
|
||||
|
||||
/* No lock needed - sync context only */
|
||||
blkptr_t dp_meta_rootbp;
|
||||
list_t dp_synced_datasets;
|
||||
uint64_t dp_write_limit;
|
||||
|
||||
/* Uses dp_lock */
|
||||
kmutex_t dp_lock;
|
||||
uint64_t dp_space_towrite[TXG_SIZE];
|
||||
uint64_t dp_tempreserved[TXG_SIZE];
|
||||
|
||||
/* Has its own locking */
|
||||
tx_state_t dp_tx;
|
||||
txg_list_t dp_dirty_datasets;
|
||||
txg_list_t dp_dirty_dirs;
|
||||
txg_list_t dp_sync_tasks;
|
||||
|
||||
/*
|
||||
* Protects administrative changes (properties, namespace)
|
||||
* It is only held for write in syncing context. Therefore
|
||||
* syncing context does not need to ever have it for read, since
|
||||
* nobody else could possibly have it for write.
|
||||
*/
|
||||
krwlock_t dp_config_rwlock;
|
||||
} dsl_pool_t;
|
||||
|
||||
int dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
|
||||
void dsl_pool_close(dsl_pool_t *dp);
|
||||
dsl_pool_t *dsl_pool_create(spa_t *spa, uint64_t txg);
|
||||
void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg);
|
||||
void dsl_pool_zil_clean(dsl_pool_t *dp);
|
||||
int dsl_pool_sync_context(dsl_pool_t *dp);
|
||||
uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree);
|
||||
int dsl_pool_tempreserve_space(dsl_pool_t *dp, uint64_t space, dmu_tx_t *tx);
|
||||
void dsl_pool_tempreserve_clear(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
|
||||
void dsl_pool_memory_pressure(dsl_pool_t *dp);
|
||||
void dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DSL_POOL_H */
|
||||
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_PROP_H
|
||||
#define _SYS_DSL_PROP_H
|
||||
|
||||
#pragma ident "@(#)dsl_prop.h 1.6 07/10/29 SMI"
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct dsl_dataset;
|
||||
|
||||
/* The callback func may not call into the DMU or DSL! */
|
||||
typedef void (dsl_prop_changed_cb_t)(void *arg, uint64_t newval);
|
||||
|
||||
typedef struct dsl_prop_cb_record {
|
||||
list_node_t cbr_node; /* link on dd_prop_cbs */
|
||||
struct dsl_dataset *cbr_ds;
|
||||
const char *cbr_propname;
|
||||
dsl_prop_changed_cb_t *cbr_func;
|
||||
void *cbr_arg;
|
||||
} dsl_prop_cb_record_t;
|
||||
|
||||
int dsl_prop_register(struct dsl_dataset *ds, const char *propname,
|
||||
dsl_prop_changed_cb_t *callback, void *cbarg);
|
||||
int dsl_prop_unregister(struct dsl_dataset *ds, const char *propname,
|
||||
dsl_prop_changed_cb_t *callback, void *cbarg);
|
||||
int dsl_prop_numcb(struct dsl_dataset *ds);
|
||||
|
||||
int dsl_prop_get(const char *ddname, const char *propname,
|
||||
int intsz, int numints, void *buf, char *setpoint);
|
||||
int dsl_prop_get_integer(const char *ddname, const char *propname,
|
||||
uint64_t *valuep, char *setpoint);
|
||||
int dsl_prop_get_all(objset_t *os, nvlist_t **nvp);
|
||||
int dsl_prop_get_ds_locked(dsl_dir_t *dd, const char *propname,
|
||||
int intsz, int numints, void *buf, char *setpoint);
|
||||
|
||||
int dsl_prop_set(const char *ddname, const char *propname,
|
||||
int intsz, int numints, const void *buf);
|
||||
int dsl_prop_set_dd(dsl_dir_t *dd, const char *propname,
|
||||
int intsz, int numints, const void *buf);
|
||||
void dsl_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
|
||||
cred_t *cr, dmu_tx_t *tx);
|
||||
|
||||
void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value);
|
||||
void dsl_prop_nvlist_add_string(nvlist_t *nv,
|
||||
zfs_prop_t prop, const char *value);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DSL_PROP_H */
|
||||
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_SYNCTASK_H
|
||||
#define _SYS_DSL_SYNCTASK_H
|
||||
|
||||
#pragma ident "@(#)dsl_synctask.h 1.3 07/06/29 SMI"
|
||||
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct dsl_pool;
|
||||
|
||||
typedef int (dsl_checkfunc_t)(void *, void *, dmu_tx_t *);
|
||||
typedef void (dsl_syncfunc_t)(void *, void *, cred_t *, dmu_tx_t *);
|
||||
|
||||
typedef struct dsl_sync_task {
|
||||
list_node_t dst_node;
|
||||
dsl_checkfunc_t *dst_checkfunc;
|
||||
dsl_syncfunc_t *dst_syncfunc;
|
||||
void *dst_arg1;
|
||||
void *dst_arg2;
|
||||
int dst_err;
|
||||
} dsl_sync_task_t;
|
||||
|
||||
typedef struct dsl_sync_task_group {
|
||||
txg_node_t dstg_node;
|
||||
list_t dstg_tasks;
|
||||
struct dsl_pool *dstg_pool;
|
||||
cred_t *dstg_cr;
|
||||
uint64_t dstg_txg;
|
||||
int dstg_err;
|
||||
int dstg_space;
|
||||
boolean_t dstg_nowaiter;
|
||||
} dsl_sync_task_group_t;
|
||||
|
||||
dsl_sync_task_group_t *dsl_sync_task_group_create(struct dsl_pool *dp);
|
||||
void dsl_sync_task_create(dsl_sync_task_group_t *dstg,
|
||||
dsl_checkfunc_t *, dsl_syncfunc_t *,
|
||||
void *arg1, void *arg2, int blocks_modified);
|
||||
int dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg);
|
||||
void dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
|
||||
void dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg);
|
||||
void dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
|
||||
|
||||
int dsl_sync_task_do(struct dsl_pool *dp,
|
||||
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
|
||||
void *arg1, void *arg2, int blocks_modified);
|
||||
void dsl_sync_task_do_nowait(struct dsl_pool *dp,
|
||||
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
|
||||
void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_DSL_SYNCTASK_H */
|
||||
@@ -0,0 +1 @@
|
||||
subdir-m += fs
|
||||
@@ -0,0 +1 @@
|
||||
DISTFILES = zfs.h
|
||||
@@ -0,0 +1,77 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FM_FS_ZFS_H
|
||||
#define _SYS_FM_FS_ZFS_H
|
||||
|
||||
#pragma ident "@(#)zfs.h 1.2 07/06/07 SMI"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ZFS_ERROR_CLASS "fs.zfs"
|
||||
|
||||
#define FM_EREPORT_ZFS_CHECKSUM "checksum"
|
||||
#define FM_EREPORT_ZFS_IO "io"
|
||||
#define FM_EREPORT_ZFS_DATA "data"
|
||||
#define FM_EREPORT_ZFS_POOL "zpool"
|
||||
#define FM_EREPORT_ZFS_DEVICE_UNKNOWN "vdev.unknown"
|
||||
#define FM_EREPORT_ZFS_DEVICE_OPEN_FAILED "vdev.open_failed"
|
||||
#define FM_EREPORT_ZFS_DEVICE_CORRUPT_DATA "vdev.corrupt_data"
|
||||
#define FM_EREPORT_ZFS_DEVICE_NO_REPLICAS "vdev.no_replicas"
|
||||
#define FM_EREPORT_ZFS_DEVICE_BAD_GUID_SUM "vdev.bad_guid_sum"
|
||||
#define FM_EREPORT_ZFS_DEVICE_TOO_SMALL "vdev.too_small"
|
||||
#define FM_EREPORT_ZFS_DEVICE_BAD_LABEL "vdev.bad_label"
|
||||
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_POOL "pool"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_POOL_GUID "pool_guid"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT "pool_context"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID "vdev_guid"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE "vdev_type"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH "vdev_path"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID "vdev_devid"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID "parent_guid"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE "parent_type"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH "parent_path"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_DEVID "parent_devid"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET "zio_objset"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT "zio_object"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL "zio_level"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID "zio_blkid"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_ERR "zio_err"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET "zio_offset"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE "zio_size"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_PREV_STATE "prev_state"
|
||||
|
||||
#define FM_RESOURCE_OK "ok"
|
||||
#define FM_RESOURCE_REMOVED "removed"
|
||||
#define FM_RESOURCE_AUTOREPLACE "autoreplace"
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_FM_FS_ZFS_H */
|
||||
@@ -0,0 +1 @@
|
||||
DISTFILES = zfs.h
|
||||
@@ -0,0 +1,650 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FS_ZFS_H
|
||||
#define _SYS_FS_ZFS_H
|
||||
|
||||
#pragma ident "@(#)zfs.h 1.44 08/04/09 SMI"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Types and constants shared between userland and the kernel.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Each dataset can be one of the following types. These constants can be
|
||||
* combined into masks that can be passed to various functions.
|
||||
*/
|
||||
typedef enum {
|
||||
ZFS_TYPE_FILESYSTEM = 0x1,
|
||||
ZFS_TYPE_SNAPSHOT = 0x2,
|
||||
ZFS_TYPE_VOLUME = 0x4,
|
||||
ZFS_TYPE_POOL = 0x8
|
||||
} zfs_type_t;
|
||||
|
||||
#define ZFS_TYPE_DATASET \
|
||||
(ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME | ZFS_TYPE_SNAPSHOT)
|
||||
|
||||
/*
|
||||
* Dataset properties are identified by these constants and must be added to
|
||||
* the end of this list to ensure that external consumers are not affected
|
||||
* by the change. If you make any changes to this list, be sure to update
|
||||
* the property table in usr/src/common/zfs/zfs_prop.c.
|
||||
*/
|
||||
typedef enum {
|
||||
ZFS_PROP_TYPE,
|
||||
ZFS_PROP_CREATION,
|
||||
ZFS_PROP_USED,
|
||||
ZFS_PROP_AVAILABLE,
|
||||
ZFS_PROP_REFERENCED,
|
||||
ZFS_PROP_COMPRESSRATIO,
|
||||
ZFS_PROP_MOUNTED,
|
||||
ZFS_PROP_ORIGIN,
|
||||
ZFS_PROP_QUOTA,
|
||||
ZFS_PROP_RESERVATION,
|
||||
ZFS_PROP_VOLSIZE,
|
||||
ZFS_PROP_VOLBLOCKSIZE,
|
||||
ZFS_PROP_RECORDSIZE,
|
||||
ZFS_PROP_MOUNTPOINT,
|
||||
ZFS_PROP_SHARENFS,
|
||||
ZFS_PROP_CHECKSUM,
|
||||
ZFS_PROP_COMPRESSION,
|
||||
ZFS_PROP_ATIME,
|
||||
ZFS_PROP_DEVICES,
|
||||
ZFS_PROP_EXEC,
|
||||
ZFS_PROP_SETUID,
|
||||
ZFS_PROP_READONLY,
|
||||
ZFS_PROP_ZONED,
|
||||
ZFS_PROP_SNAPDIR,
|
||||
ZFS_PROP_ACLMODE,
|
||||
ZFS_PROP_ACLINHERIT,
|
||||
ZFS_PROP_CREATETXG, /* not exposed to the user */
|
||||
ZFS_PROP_NAME, /* not exposed to the user */
|
||||
ZFS_PROP_CANMOUNT,
|
||||
ZFS_PROP_SHAREISCSI,
|
||||
ZFS_PROP_ISCSIOPTIONS, /* not exposed to the user */
|
||||
ZFS_PROP_XATTR,
|
||||
ZFS_PROP_NUMCLONES, /* not exposed to the user */
|
||||
ZFS_PROP_COPIES,
|
||||
ZFS_PROP_VERSION,
|
||||
ZFS_PROP_UTF8ONLY,
|
||||
ZFS_PROP_NORMALIZE,
|
||||
ZFS_PROP_CASE,
|
||||
ZFS_PROP_VSCAN,
|
||||
ZFS_PROP_NBMAND,
|
||||
ZFS_PROP_SHARESMB,
|
||||
ZFS_PROP_REFQUOTA,
|
||||
ZFS_PROP_REFRESERVATION,
|
||||
ZFS_NUM_PROPS
|
||||
} zfs_prop_t;
|
||||
|
||||
/*
|
||||
* Pool properties are identified by these constants and must be added to the
|
||||
* end of this list to ensure that external conumsers are not affected
|
||||
* by the change. If you make any changes to this list, be sure to update
|
||||
* the property table in usr/src/common/zfs/zpool_prop.c.
|
||||
*/
|
||||
typedef enum {
|
||||
ZPOOL_PROP_NAME,
|
||||
ZPOOL_PROP_SIZE,
|
||||
ZPOOL_PROP_USED,
|
||||
ZPOOL_PROP_AVAILABLE,
|
||||
ZPOOL_PROP_CAPACITY,
|
||||
ZPOOL_PROP_ALTROOT,
|
||||
ZPOOL_PROP_HEALTH,
|
||||
ZPOOL_PROP_GUID,
|
||||
ZPOOL_PROP_VERSION,
|
||||
ZPOOL_PROP_BOOTFS,
|
||||
ZPOOL_PROP_DELEGATION,
|
||||
ZPOOL_PROP_AUTOREPLACE,
|
||||
ZPOOL_PROP_CACHEFILE,
|
||||
ZPOOL_PROP_FAILUREMODE,
|
||||
ZPOOL_NUM_PROPS
|
||||
} zpool_prop_t;
|
||||
|
||||
#define ZPROP_CONT -2
|
||||
#define ZPROP_INVAL -1
|
||||
|
||||
#define ZPROP_VALUE "value"
|
||||
#define ZPROP_SOURCE "source"
|
||||
|
||||
typedef enum {
|
||||
ZPROP_SRC_NONE = 0x1,
|
||||
ZPROP_SRC_DEFAULT = 0x2,
|
||||
ZPROP_SRC_TEMPORARY = 0x4,
|
||||
ZPROP_SRC_LOCAL = 0x8,
|
||||
ZPROP_SRC_INHERITED = 0x10
|
||||
} zprop_source_t;
|
||||
|
||||
#define ZPROP_SRC_ALL 0x1f
|
||||
|
||||
typedef int (*zprop_func)(int, void *);
|
||||
|
||||
/*
|
||||
* Dataset property functions shared between libzfs and kernel.
|
||||
*/
|
||||
const char *zfs_prop_default_string(zfs_prop_t);
|
||||
uint64_t zfs_prop_default_numeric(zfs_prop_t);
|
||||
boolean_t zfs_prop_readonly(zfs_prop_t);
|
||||
boolean_t zfs_prop_inheritable(zfs_prop_t);
|
||||
boolean_t zfs_prop_setonce(zfs_prop_t);
|
||||
const char *zfs_prop_to_name(zfs_prop_t);
|
||||
zfs_prop_t zfs_name_to_prop(const char *);
|
||||
boolean_t zfs_prop_user(const char *);
|
||||
int zfs_prop_index_to_string(zfs_prop_t, uint64_t, const char **);
|
||||
int zfs_prop_string_to_index(zfs_prop_t, const char *, uint64_t *);
|
||||
int zfs_prop_valid_for_type(int, zfs_type_t);
|
||||
|
||||
/*
|
||||
* Pool property functions shared between libzfs and kernel.
|
||||
*/
|
||||
zpool_prop_t zpool_name_to_prop(const char *);
|
||||
const char *zpool_prop_to_name(zpool_prop_t);
|
||||
const char *zpool_prop_default_string(zpool_prop_t);
|
||||
uint64_t zpool_prop_default_numeric(zpool_prop_t);
|
||||
boolean_t zpool_prop_readonly(zpool_prop_t);
|
||||
int zpool_prop_index_to_string(zpool_prop_t, uint64_t, const char **);
|
||||
int zpool_prop_string_to_index(zpool_prop_t, const char *, uint64_t *);
|
||||
|
||||
/*
|
||||
* Definitions for the Delegation.
|
||||
*/
|
||||
typedef enum {
|
||||
ZFS_DELEG_WHO_UNKNOWN = 0,
|
||||
ZFS_DELEG_USER = 'u',
|
||||
ZFS_DELEG_USER_SETS = 'U',
|
||||
ZFS_DELEG_GROUP = 'g',
|
||||
ZFS_DELEG_GROUP_SETS = 'G',
|
||||
ZFS_DELEG_EVERYONE = 'e',
|
||||
ZFS_DELEG_EVERYONE_SETS = 'E',
|
||||
ZFS_DELEG_CREATE = 'c',
|
||||
ZFS_DELEG_CREATE_SETS = 'C',
|
||||
ZFS_DELEG_NAMED_SET = 's',
|
||||
ZFS_DELEG_NAMED_SET_SETS = 'S'
|
||||
} zfs_deleg_who_type_t;
|
||||
|
||||
typedef enum {
|
||||
ZFS_DELEG_NONE = 0,
|
||||
ZFS_DELEG_PERM_LOCAL = 1,
|
||||
ZFS_DELEG_PERM_DESCENDENT = 2,
|
||||
ZFS_DELEG_PERM_LOCALDESCENDENT = 3,
|
||||
ZFS_DELEG_PERM_CREATE = 4
|
||||
} zfs_deleg_inherit_t;
|
||||
|
||||
#define ZFS_DELEG_PERM_UID "uid"
|
||||
#define ZFS_DELEG_PERM_GID "gid"
|
||||
#define ZFS_DELEG_PERM_GROUPS "groups"
|
||||
|
||||
typedef enum {
|
||||
ZFS_CANMOUNT_OFF = 0,
|
||||
ZFS_CANMOUNT_ON = 1,
|
||||
ZFS_CANMOUNT_NOAUTO = 2
|
||||
} zfs_canmount_type_t;
|
||||
|
||||
typedef enum zfs_share_op {
|
||||
ZFS_SHARE_NFS = 0,
|
||||
ZFS_UNSHARE_NFS = 1,
|
||||
ZFS_SHARE_SMB = 2,
|
||||
ZFS_UNSHARE_SMB = 3
|
||||
} zfs_share_op_t;
|
||||
|
||||
/*
|
||||
* On-disk version number.
|
||||
*/
|
||||
#define SPA_VERSION_1 1ULL
|
||||
#define SPA_VERSION_2 2ULL
|
||||
#define SPA_VERSION_3 3ULL
|
||||
#define SPA_VERSION_4 4ULL
|
||||
#define SPA_VERSION_5 5ULL
|
||||
#define SPA_VERSION_6 6ULL
|
||||
#define SPA_VERSION_7 7ULL
|
||||
#define SPA_VERSION_8 8ULL
|
||||
#define SPA_VERSION_9 9ULL
|
||||
#define SPA_VERSION_10 10ULL
|
||||
|
||||
/*
|
||||
* When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
|
||||
* format change. Go to usr/src/grub/grub-0.95/stage2/{zfs-include/, fsys_zfs*},
|
||||
* and do the appropriate changes.
|
||||
*/
|
||||
#define SPA_VERSION SPA_VERSION_10
|
||||
#define SPA_VERSION_STRING "10"
|
||||
|
||||
/*
|
||||
* Symbolic names for the changes that caused a SPA_VERSION switch.
|
||||
* Used in the code when checking for presence or absence of a feature.
|
||||
* Feel free to define multiple symbolic names for each version if there
|
||||
* were multiple changes to on-disk structures during that version.
|
||||
*
|
||||
* NOTE: When checking the current SPA_VERSION in your code, be sure
|
||||
* to use spa_version() since it reports the version of the
|
||||
* last synced uberblock. Checking the in-flight version can
|
||||
* be dangerous in some cases.
|
||||
*/
|
||||
#define SPA_VERSION_INITIAL SPA_VERSION_1
|
||||
#define SPA_VERSION_DITTO_BLOCKS SPA_VERSION_2
|
||||
#define SPA_VERSION_SPARES SPA_VERSION_3
|
||||
#define SPA_VERSION_RAID6 SPA_VERSION_3
|
||||
#define SPA_VERSION_BPLIST_ACCOUNT SPA_VERSION_3
|
||||
#define SPA_VERSION_RAIDZ_DEFLATE SPA_VERSION_3
|
||||
#define SPA_VERSION_DNODE_BYTES SPA_VERSION_3
|
||||
#define SPA_VERSION_ZPOOL_HISTORY SPA_VERSION_4
|
||||
#define SPA_VERSION_GZIP_COMPRESSION SPA_VERSION_5
|
||||
#define SPA_VERSION_BOOTFS SPA_VERSION_6
|
||||
#define SPA_VERSION_SLOGS SPA_VERSION_7
|
||||
#define SPA_VERSION_DELEGATED_PERMS SPA_VERSION_8
|
||||
#define SPA_VERSION_FUID SPA_VERSION_9
|
||||
#define SPA_VERSION_REFRESERVATION SPA_VERSION_9
|
||||
#define SPA_VERSION_REFQUOTA SPA_VERSION_9
|
||||
#define SPA_VERSION_UNIQUE_ACCURATE SPA_VERSION_9
|
||||
#define SPA_VERSION_L2CACHE SPA_VERSION_10
|
||||
|
||||
/*
|
||||
* ZPL version - rev'd whenever an incompatible on-disk format change
|
||||
* occurs. This is independent of SPA/DMU/ZAP versioning. You must
|
||||
* also update the version_table[] and help message in zfs_prop.c.
|
||||
*
|
||||
* When changing, be sure to teach GRUB how to read the new format!
|
||||
* See usr/src/grub/grub-0.95/stage2/{zfs-include/,fsys_zfs*}
|
||||
*/
|
||||
#define ZPL_VERSION_1 1ULL
|
||||
#define ZPL_VERSION_2 2ULL
|
||||
#define ZPL_VERSION_3 3ULL
|
||||
#define ZPL_VERSION ZPL_VERSION_3
|
||||
#define ZPL_VERSION_STRING "3"
|
||||
|
||||
#define ZPL_VERSION_INITIAL ZPL_VERSION_1
|
||||
#define ZPL_VERSION_DIRENT_TYPE ZPL_VERSION_2
|
||||
#define ZPL_VERSION_FUID ZPL_VERSION_3
|
||||
#define ZPL_VERSION_NORMALIZATION ZPL_VERSION_3
|
||||
#define ZPL_VERSION_SYSATTR ZPL_VERSION_3
|
||||
|
||||
/*
|
||||
* The following are configuration names used in the nvlist describing a pool's
|
||||
* configuration.
|
||||
*/
|
||||
#define ZPOOL_CONFIG_VERSION "version"
|
||||
#define ZPOOL_CONFIG_POOL_NAME "name"
|
||||
#define ZPOOL_CONFIG_POOL_STATE "state"
|
||||
#define ZPOOL_CONFIG_POOL_TXG "txg"
|
||||
#define ZPOOL_CONFIG_POOL_GUID "pool_guid"
|
||||
#define ZPOOL_CONFIG_CREATE_TXG "create_txg"
|
||||
#define ZPOOL_CONFIG_TOP_GUID "top_guid"
|
||||
#define ZPOOL_CONFIG_VDEV_TREE "vdev_tree"
|
||||
#define ZPOOL_CONFIG_TYPE "type"
|
||||
#define ZPOOL_CONFIG_CHILDREN "children"
|
||||
#define ZPOOL_CONFIG_ID "id"
|
||||
#define ZPOOL_CONFIG_GUID "guid"
|
||||
#define ZPOOL_CONFIG_PATH "path"
|
||||
#define ZPOOL_CONFIG_DEVID "devid"
|
||||
#define ZPOOL_CONFIG_METASLAB_ARRAY "metaslab_array"
|
||||
#define ZPOOL_CONFIG_METASLAB_SHIFT "metaslab_shift"
|
||||
#define ZPOOL_CONFIG_ASHIFT "ashift"
|
||||
#define ZPOOL_CONFIG_ASIZE "asize"
|
||||
#define ZPOOL_CONFIG_DTL "DTL"
|
||||
#define ZPOOL_CONFIG_STATS "stats"
|
||||
#define ZPOOL_CONFIG_WHOLE_DISK "whole_disk"
|
||||
#define ZPOOL_CONFIG_ERRCOUNT "error_count"
|
||||
#define ZPOOL_CONFIG_NOT_PRESENT "not_present"
|
||||
#define ZPOOL_CONFIG_SPARES "spares"
|
||||
#define ZPOOL_CONFIG_IS_SPARE "is_spare"
|
||||
#define ZPOOL_CONFIG_NPARITY "nparity"
|
||||
#define ZPOOL_CONFIG_HOSTID "hostid"
|
||||
#define ZPOOL_CONFIG_HOSTNAME "hostname"
|
||||
#define ZPOOL_CONFIG_UNSPARE "unspare"
|
||||
#define ZPOOL_CONFIG_PHYS_PATH "phys_path"
|
||||
#define ZPOOL_CONFIG_IS_LOG "is_log"
|
||||
#define ZPOOL_CONFIG_L2CACHE "l2cache"
|
||||
#define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */
|
||||
/*
|
||||
* The persistent vdev state is stored as separate values rather than a single
|
||||
* 'vdev_state' entry. This is because a device can be in multiple states, such
|
||||
* as offline and degraded.
|
||||
*/
|
||||
#define ZPOOL_CONFIG_OFFLINE "offline"
|
||||
#define ZPOOL_CONFIG_FAULTED "faulted"
|
||||
#define ZPOOL_CONFIG_DEGRADED "degraded"
|
||||
#define ZPOOL_CONFIG_REMOVED "removed"
|
||||
|
||||
#define VDEV_TYPE_ROOT "root"
|
||||
#define VDEV_TYPE_MIRROR "mirror"
|
||||
#define VDEV_TYPE_REPLACING "replacing"
|
||||
#define VDEV_TYPE_RAIDZ "raidz"
|
||||
#define VDEV_TYPE_DISK "disk"
|
||||
#define VDEV_TYPE_FILE "file"
|
||||
#define VDEV_TYPE_MISSING "missing"
|
||||
#define VDEV_TYPE_SPARE "spare"
|
||||
#define VDEV_TYPE_LOG "log"
|
||||
#define VDEV_TYPE_L2CACHE "l2cache"
|
||||
|
||||
/*
|
||||
* This is needed in userland to report the minimum necessary device size.
|
||||
*/
|
||||
#define SPA_MINDEVSIZE (64ULL << 20)
|
||||
|
||||
/*
|
||||
* The location of the pool configuration repository, shared between kernel and
|
||||
* userland.
|
||||
*/
|
||||
#define ZPOOL_CACHE_DIR "/etc/zfs"
|
||||
#define ZPOOL_CACHE_FILE "zpool.cache"
|
||||
#define ZPOOL_CACHE_TMP ".zpool.cache"
|
||||
|
||||
#define ZPOOL_CACHE ZPOOL_CACHE_DIR "/" ZPOOL_CACHE_FILE
|
||||
|
||||
/*
|
||||
* vdev states are ordered from least to most healthy.
|
||||
* A vdev that's CANT_OPEN or below is considered unusable.
|
||||
*/
|
||||
typedef enum vdev_state {
|
||||
VDEV_STATE_UNKNOWN = 0, /* Uninitialized vdev */
|
||||
VDEV_STATE_CLOSED, /* Not currently open */
|
||||
VDEV_STATE_OFFLINE, /* Not allowed to open */
|
||||
VDEV_STATE_REMOVED, /* Explicitly removed from system */
|
||||
VDEV_STATE_CANT_OPEN, /* Tried to open, but failed */
|
||||
VDEV_STATE_FAULTED, /* External request to fault device */
|
||||
VDEV_STATE_DEGRADED, /* Replicated vdev with unhealthy kids */
|
||||
VDEV_STATE_HEALTHY /* Presumed good */
|
||||
} vdev_state_t;
|
||||
|
||||
#define VDEV_STATE_ONLINE VDEV_STATE_HEALTHY
|
||||
|
||||
/*
|
||||
* vdev aux states. When a vdev is in the CANT_OPEN state, the aux field
|
||||
* of the vdev stats structure uses these constants to distinguish why.
|
||||
*/
|
||||
typedef enum vdev_aux {
|
||||
VDEV_AUX_NONE, /* no error */
|
||||
VDEV_AUX_OPEN_FAILED, /* ldi_open_*() or vn_open() failed */
|
||||
VDEV_AUX_CORRUPT_DATA, /* bad label or disk contents */
|
||||
VDEV_AUX_NO_REPLICAS, /* insufficient number of replicas */
|
||||
VDEV_AUX_BAD_GUID_SUM, /* vdev guid sum doesn't match */
|
||||
VDEV_AUX_TOO_SMALL, /* vdev size is too small */
|
||||
VDEV_AUX_BAD_LABEL, /* the label is OK but invalid */
|
||||
VDEV_AUX_VERSION_NEWER, /* on-disk version is too new */
|
||||
VDEV_AUX_VERSION_OLDER, /* on-disk version is too old */
|
||||
VDEV_AUX_SPARED, /* hot spare used in another pool */
|
||||
VDEV_AUX_ERR_EXCEEDED /* too many errors */
|
||||
} vdev_aux_t;
|
||||
|
||||
/*
|
||||
* pool state. The following states are written to disk as part of the normal
|
||||
* SPA lifecycle: ACTIVE, EXPORTED, DESTROYED, SPARE, L2CACHE. The remaining
|
||||
* states are software abstractions used at various levels to communicate
|
||||
* pool state.
|
||||
*/
|
||||
typedef enum pool_state {
|
||||
POOL_STATE_ACTIVE = 0, /* In active use */
|
||||
POOL_STATE_EXPORTED, /* Explicitly exported */
|
||||
POOL_STATE_DESTROYED, /* Explicitly destroyed */
|
||||
POOL_STATE_SPARE, /* Reserved for hot spare use */
|
||||
POOL_STATE_L2CACHE, /* Level 2 ARC device */
|
||||
POOL_STATE_UNINITIALIZED, /* Internal spa_t state */
|
||||
POOL_STATE_IO_FAILURE, /* Internal pool state */
|
||||
POOL_STATE_UNAVAIL, /* Internal libzfs state */
|
||||
POOL_STATE_POTENTIALLY_ACTIVE /* Internal libzfs state */
|
||||
} pool_state_t;
|
||||
|
||||
/*
|
||||
* Scrub types.
|
||||
*/
|
||||
typedef enum pool_scrub_type {
|
||||
POOL_SCRUB_NONE,
|
||||
POOL_SCRUB_RESILVER,
|
||||
POOL_SCRUB_EVERYTHING,
|
||||
POOL_SCRUB_TYPES
|
||||
} pool_scrub_type_t;
|
||||
|
||||
/*
|
||||
* ZIO types. Needed to interpret vdev statistics below.
|
||||
*/
|
||||
typedef enum zio_type {
|
||||
ZIO_TYPE_NULL = 0,
|
||||
ZIO_TYPE_READ,
|
||||
ZIO_TYPE_WRITE,
|
||||
ZIO_TYPE_FREE,
|
||||
ZIO_TYPE_CLAIM,
|
||||
ZIO_TYPE_IOCTL,
|
||||
ZIO_TYPES
|
||||
} zio_type_t;
|
||||
|
||||
/*
|
||||
* Vdev statistics. Note: all fields should be 64-bit because this
|
||||
* is passed between kernel and userland as an nvlist uint64 array.
|
||||
*/
|
||||
typedef struct vdev_stat {
|
||||
hrtime_t vs_timestamp; /* time since vdev load */
|
||||
uint64_t vs_state; /* vdev state */
|
||||
uint64_t vs_aux; /* see vdev_aux_t */
|
||||
uint64_t vs_alloc; /* space allocated */
|
||||
uint64_t vs_space; /* total capacity */
|
||||
uint64_t vs_dspace; /* deflated capacity */
|
||||
uint64_t vs_rsize; /* replaceable dev size */
|
||||
uint64_t vs_ops[ZIO_TYPES]; /* operation count */
|
||||
uint64_t vs_bytes[ZIO_TYPES]; /* bytes read/written */
|
||||
uint64_t vs_read_errors; /* read errors */
|
||||
uint64_t vs_write_errors; /* write errors */
|
||||
uint64_t vs_checksum_errors; /* checksum errors */
|
||||
uint64_t vs_self_healed; /* self-healed bytes */
|
||||
uint64_t vs_scrub_type; /* pool_scrub_type_t */
|
||||
uint64_t vs_scrub_complete; /* completed? */
|
||||
uint64_t vs_scrub_examined; /* bytes examined; top */
|
||||
uint64_t vs_scrub_repaired; /* bytes repaired; leaf */
|
||||
uint64_t vs_scrub_errors; /* errors during scrub */
|
||||
uint64_t vs_scrub_start; /* UTC scrub start time */
|
||||
uint64_t vs_scrub_end; /* UTC scrub end time */
|
||||
} vdev_stat_t;
|
||||
|
||||
#define ZVOL_DRIVER "zvol"
|
||||
#define ZFS_DRIVER "zfs"
|
||||
#define ZFS_DEV "/dev/zfs"
|
||||
|
||||
/*
|
||||
* zvol paths. Irritatingly, the devfsadm interfaces want all these
|
||||
* paths without the /dev prefix, but for some things, we want the
|
||||
* /dev prefix. Below are the names without /dev.
|
||||
*/
|
||||
#define ZVOL_DEV_DIR "zvol/dsk"
|
||||
#define ZVOL_RDEV_DIR "zvol/rdsk"
|
||||
|
||||
/*
|
||||
* And here are the things we need with /dev, etc. in front of them.
|
||||
*/
|
||||
#define ZVOL_PSEUDO_DEV "/devices/pseudo/zvol@0:"
|
||||
#define ZVOL_FULL_DEV_DIR "/dev/" ZVOL_DEV_DIR "/"
|
||||
|
||||
#define ZVOL_PROP_NAME "name"
|
||||
|
||||
/*
|
||||
* /dev/zfs ioctl numbers.
|
||||
*/
|
||||
#define ZFS_IOC ('Z' << 8)
|
||||
|
||||
typedef enum zfs_ioc {
|
||||
ZFS_IOC_POOL_CREATE = ZFS_IOC,
|
||||
ZFS_IOC_POOL_DESTROY,
|
||||
ZFS_IOC_POOL_IMPORT,
|
||||
ZFS_IOC_POOL_EXPORT,
|
||||
ZFS_IOC_POOL_CONFIGS,
|
||||
ZFS_IOC_POOL_STATS,
|
||||
ZFS_IOC_POOL_TRYIMPORT,
|
||||
ZFS_IOC_POOL_SCRUB,
|
||||
ZFS_IOC_POOL_FREEZE,
|
||||
ZFS_IOC_POOL_UPGRADE,
|
||||
ZFS_IOC_POOL_GET_HISTORY,
|
||||
ZFS_IOC_VDEV_ADD,
|
||||
ZFS_IOC_VDEV_REMOVE,
|
||||
ZFS_IOC_VDEV_SET_STATE,
|
||||
ZFS_IOC_VDEV_ATTACH,
|
||||
ZFS_IOC_VDEV_DETACH,
|
||||
ZFS_IOC_VDEV_SETPATH,
|
||||
ZFS_IOC_OBJSET_STATS,
|
||||
ZFS_IOC_OBJSET_ZPLPROPS,
|
||||
ZFS_IOC_DATASET_LIST_NEXT,
|
||||
ZFS_IOC_SNAPSHOT_LIST_NEXT,
|
||||
ZFS_IOC_SET_PROP,
|
||||
ZFS_IOC_CREATE_MINOR,
|
||||
ZFS_IOC_REMOVE_MINOR,
|
||||
ZFS_IOC_CREATE,
|
||||
ZFS_IOC_DESTROY,
|
||||
ZFS_IOC_ROLLBACK,
|
||||
ZFS_IOC_RENAME,
|
||||
ZFS_IOC_RECV,
|
||||
ZFS_IOC_SEND,
|
||||
ZFS_IOC_INJECT_FAULT,
|
||||
ZFS_IOC_CLEAR_FAULT,
|
||||
ZFS_IOC_INJECT_LIST_NEXT,
|
||||
ZFS_IOC_ERROR_LOG,
|
||||
ZFS_IOC_CLEAR,
|
||||
ZFS_IOC_PROMOTE,
|
||||
ZFS_IOC_DESTROY_SNAPS,
|
||||
ZFS_IOC_SNAPSHOT,
|
||||
ZFS_IOC_DSOBJ_TO_DSNAME,
|
||||
ZFS_IOC_OBJ_TO_PATH,
|
||||
ZFS_IOC_POOL_SET_PROPS,
|
||||
ZFS_IOC_POOL_GET_PROPS,
|
||||
ZFS_IOC_SET_FSACL,
|
||||
ZFS_IOC_GET_FSACL,
|
||||
ZFS_IOC_ISCSI_PERM_CHECK,
|
||||
ZFS_IOC_SHARE,
|
||||
ZFS_IOC_INHERIT_PROP
|
||||
} zfs_ioc_t;
|
||||
|
||||
/*
|
||||
* Internal SPA load state. Used by FMA diagnosis engine.
|
||||
*/
|
||||
typedef enum {
|
||||
SPA_LOAD_NONE, /* no load in progress */
|
||||
SPA_LOAD_OPEN, /* normal open */
|
||||
SPA_LOAD_IMPORT, /* import in progress */
|
||||
SPA_LOAD_TRYIMPORT /* tryimport in progress */
|
||||
} spa_load_state_t;
|
||||
|
||||
/*
|
||||
* Bookmark name values.
|
||||
*/
|
||||
#define ZPOOL_ERR_LIST "error list"
|
||||
#define ZPOOL_ERR_DATASET "dataset"
|
||||
#define ZPOOL_ERR_OBJECT "object"
|
||||
|
||||
#define HIS_MAX_RECORD_LEN (MAXPATHLEN + MAXPATHLEN + 1)
|
||||
|
||||
/*
|
||||
* The following are names used in the nvlist describing
|
||||
* the pool's history log.
|
||||
*/
|
||||
#define ZPOOL_HIST_RECORD "history record"
|
||||
#define ZPOOL_HIST_TIME "history time"
|
||||
#define ZPOOL_HIST_CMD "history command"
|
||||
#define ZPOOL_HIST_WHO "history who"
|
||||
#define ZPOOL_HIST_ZONE "history zone"
|
||||
#define ZPOOL_HIST_HOST "history hostname"
|
||||
#define ZPOOL_HIST_TXG "history txg"
|
||||
#define ZPOOL_HIST_INT_EVENT "history internal event"
|
||||
#define ZPOOL_HIST_INT_STR "history internal str"
|
||||
|
||||
/*
|
||||
* Flags for ZFS_IOC_VDEV_SET_STATE
|
||||
*/
|
||||
#define ZFS_ONLINE_CHECKREMOVE 0x1
|
||||
#define ZFS_ONLINE_UNSPARE 0x2
|
||||
#define ZFS_ONLINE_FORCEFAULT 0x4
|
||||
#define ZFS_OFFLINE_TEMPORARY 0x1
|
||||
|
||||
/*
|
||||
* Sysevent payload members. ZFS will generate the following sysevents with the
|
||||
* given payloads:
|
||||
*
|
||||
* ESC_ZFS_RESILVER_START
|
||||
* ESC_ZFS_RESILVER_END
|
||||
* ESC_ZFS_POOL_DESTROY
|
||||
*
|
||||
* ZFS_EV_POOL_NAME DATA_TYPE_STRING
|
||||
* ZFS_EV_POOL_GUID DATA_TYPE_UINT64
|
||||
*
|
||||
* ESC_ZFS_VDEV_REMOVE
|
||||
* ESC_ZFS_VDEV_CLEAR
|
||||
* ESC_ZFS_VDEV_CHECK
|
||||
*
|
||||
* ZFS_EV_POOL_NAME DATA_TYPE_STRING
|
||||
* ZFS_EV_POOL_GUID DATA_TYPE_UINT64
|
||||
* ZFS_EV_VDEV_PATH DATA_TYPE_STRING (optional)
|
||||
* ZFS_EV_VDEV_GUID DATA_TYPE_UINT64
|
||||
*/
|
||||
#define ZFS_EV_POOL_NAME "pool_name"
|
||||
#define ZFS_EV_POOL_GUID "pool_guid"
|
||||
#define ZFS_EV_VDEV_PATH "vdev_path"
|
||||
#define ZFS_EV_VDEV_GUID "vdev_guid"
|
||||
|
||||
typedef enum history_internal_events {
|
||||
LOG_NO_EVENT = 0,
|
||||
LOG_POOL_CREATE,
|
||||
LOG_POOL_VDEV_ADD,
|
||||
LOG_POOL_REMOVE,
|
||||
LOG_POOL_DESTROY,
|
||||
LOG_POOL_EXPORT,
|
||||
LOG_POOL_IMPORT,
|
||||
LOG_POOL_VDEV_ATTACH,
|
||||
LOG_POOL_VDEV_REPLACE,
|
||||
LOG_POOL_VDEV_DETACH,
|
||||
LOG_POOL_VDEV_ONLINE,
|
||||
LOG_POOL_VDEV_OFFLINE,
|
||||
LOG_POOL_UPGRADE,
|
||||
LOG_POOL_CLEAR,
|
||||
LOG_POOL_SCRUB,
|
||||
LOG_POOL_PROPSET,
|
||||
LOG_DS_CREATE,
|
||||
LOG_DS_CLONE,
|
||||
LOG_DS_DESTROY,
|
||||
LOG_DS_DESTROY_BEGIN,
|
||||
LOG_DS_INHERIT,
|
||||
LOG_DS_PROPSET,
|
||||
LOG_DS_QUOTA,
|
||||
LOG_DS_PERM_UPDATE,
|
||||
LOG_DS_PERM_REMOVE,
|
||||
LOG_DS_PERM_WHO_REMOVE,
|
||||
LOG_DS_PROMOTE,
|
||||
LOG_DS_RECEIVE,
|
||||
LOG_DS_RENAME,
|
||||
LOG_DS_RESERVATION,
|
||||
LOG_DS_REPLAY_INC_SYNC,
|
||||
LOG_DS_REPLAY_FULL_SYNC,
|
||||
LOG_DS_ROLLBACK,
|
||||
LOG_DS_SNAPSHOT,
|
||||
LOG_DS_UPGRADE,
|
||||
LOG_DS_REFQUOTA,
|
||||
LOG_DS_REFRESERV,
|
||||
LOG_END
|
||||
} history_internal_events_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_FS_ZFS_H */
|
||||
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License, Version 1.0 only
|
||||
* (the "License"). You may not use this file except in compliance
|
||||
* with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_LIST_H
|
||||
#define _SYS_LIST_H
|
||||
|
||||
|
||||
|
||||
#include <sys/list_impl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct list_node list_node_t;
|
||||
typedef struct list list_t;
|
||||
|
||||
void list_create(list_t *, size_t, size_t);
|
||||
void list_destroy(list_t *);
|
||||
|
||||
void list_insert_after(list_t *, void *, void *);
|
||||
void list_insert_before(list_t *, void *, void *);
|
||||
void list_insert_head(list_t *, void *);
|
||||
void list_insert_tail(list_t *, void *);
|
||||
void list_remove(list_t *, void *);
|
||||
void list_move_tail(list_t *, list_t *);
|
||||
|
||||
void *list_head(list_t *);
|
||||
void *list_tail(list_t *);
|
||||
void *list_next(list_t *, void *);
|
||||
void *list_prev(list_t *, void *);
|
||||
|
||||
int list_link_active(list_node_t *);
|
||||
int list_is_empty(list_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_LIST_H */
|
||||
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License, Version 1.0 only
|
||||
* (the "License"). You may not use this file except in compliance
|
||||
* with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2003 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_LIST_IMPL_H
|
||||
#define _SYS_LIST_IMPL_H
|
||||
|
||||
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct list_node {
|
||||
struct list_node *list_next;
|
||||
struct list_node *list_prev;
|
||||
};
|
||||
|
||||
struct list {
|
||||
size_t list_size;
|
||||
size_t list_offset;
|
||||
struct list_node list_head;
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_LIST_IMPL_H */
|
||||
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_METASLAB_H
|
||||
#define _SYS_METASLAB_H
|
||||
|
||||
#pragma ident "@(#)metaslab.h 1.6 07/06/21 SMI"
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/space_map.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/avl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct metaslab_class metaslab_class_t;
|
||||
typedef struct metaslab_group metaslab_group_t;
|
||||
|
||||
extern metaslab_t *metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
|
||||
uint64_t start, uint64_t size, uint64_t txg);
|
||||
extern void metaslab_fini(metaslab_t *msp);
|
||||
extern void metaslab_sync(metaslab_t *msp, uint64_t txg);
|
||||
extern void metaslab_sync_done(metaslab_t *msp, uint64_t txg);
|
||||
|
||||
extern int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
||||
blkptr_t *bp, int ncopies, uint64_t txg, blkptr_t *hintbp,
|
||||
boolean_t hintbp_avoid);
|
||||
extern void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg,
|
||||
boolean_t now);
|
||||
extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg);
|
||||
|
||||
extern metaslab_class_t *metaslab_class_create(void);
|
||||
extern void metaslab_class_destroy(metaslab_class_t *mc);
|
||||
extern void metaslab_class_add(metaslab_class_t *mc, metaslab_group_t *mg);
|
||||
extern void metaslab_class_remove(metaslab_class_t *mc, metaslab_group_t *mg);
|
||||
|
||||
extern metaslab_group_t *metaslab_group_create(metaslab_class_t *mc,
|
||||
vdev_t *vd);
|
||||
extern void metaslab_group_destroy(metaslab_group_t *mg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_METASLAB_H */
|
||||
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_METASLAB_IMPL_H
|
||||
#define _SYS_METASLAB_IMPL_H
|
||||
|
||||
#pragma ident "@(#)metaslab_impl.h 1.2 06/04/02 SMI"
|
||||
|
||||
#include <sys/metaslab.h>
|
||||
#include <sys/space_map.h>
|
||||
#include <sys/vdev.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/avl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct metaslab_class {
|
||||
metaslab_group_t *mc_rotor;
|
||||
uint64_t mc_allocated;
|
||||
};
|
||||
|
||||
struct metaslab_group {
|
||||
kmutex_t mg_lock;
|
||||
avl_tree_t mg_metaslab_tree;
|
||||
uint64_t mg_aliquot;
|
||||
int64_t mg_bias;
|
||||
metaslab_class_t *mg_class;
|
||||
vdev_t *mg_vd;
|
||||
metaslab_group_t *mg_prev;
|
||||
metaslab_group_t *mg_next;
|
||||
};
|
||||
|
||||
/*
|
||||
* Each metaslab's free space is tracked in space map object in the MOS,
|
||||
* which is only updated in syncing context. Each time we sync a txg,
|
||||
* we append the allocs and frees from that txg to the space map object.
|
||||
* When the txg is done syncing, metaslab_sync_done() updates ms_smo
|
||||
* to ms_smo_syncing. Everything in ms_smo is always safe to allocate.
|
||||
*/
|
||||
struct metaslab {
|
||||
kmutex_t ms_lock; /* metaslab lock */
|
||||
space_map_obj_t ms_smo; /* synced space map object */
|
||||
space_map_obj_t ms_smo_syncing; /* syncing space map object */
|
||||
space_map_t ms_allocmap[TXG_SIZE]; /* allocated this txg */
|
||||
space_map_t ms_freemap[TXG_SIZE]; /* freed this txg */
|
||||
space_map_t ms_map; /* in-core free space map */
|
||||
uint64_t ms_weight; /* weight vs. others in group */
|
||||
metaslab_group_t *ms_group; /* metaslab group */
|
||||
avl_node_t ms_group_node; /* node in metaslab group tree */
|
||||
txg_node_t ms_txg_node; /* per-txg dirty metaslab links */
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_METASLAB_IMPL_H */
|
||||
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_REFCOUNT_H
|
||||
#define _SYS_REFCOUNT_H
|
||||
|
||||
#pragma ident "@(#)refcount.h 1.3 07/08/02 SMI"
|
||||
|
||||
#include <sys/inttypes.h>
|
||||
#include <sys/list.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If the reference is held only by the calling function and not any
|
||||
* particular object, use FTAG (which is a string) for the holder_tag.
|
||||
* Otherwise, use the object that holds the reference.
|
||||
*/
|
||||
#define FTAG ((char *)__func__)
|
||||
|
||||
#if defined(DEBUG) || !defined(_KERNEL)
|
||||
typedef struct reference {
|
||||
list_node_t ref_link;
|
||||
void *ref_holder;
|
||||
uint64_t ref_number;
|
||||
uint8_t *ref_removed;
|
||||
} reference_t;
|
||||
|
||||
typedef struct refcount {
|
||||
kmutex_t rc_mtx;
|
||||
list_t rc_list;
|
||||
list_t rc_removed;
|
||||
int64_t rc_count;
|
||||
int64_t rc_removed_count;
|
||||
} refcount_t;
|
||||
|
||||
/* Note: refcount_t must be initialized with refcount_create() */
|
||||
|
||||
void refcount_create(refcount_t *rc);
|
||||
void refcount_destroy(refcount_t *rc);
|
||||
void refcount_destroy_many(refcount_t *rc, uint64_t number);
|
||||
int refcount_is_zero(refcount_t *rc);
|
||||
int64_t refcount_count(refcount_t *rc);
|
||||
int64_t refcount_add(refcount_t *rc, void *holder_tag);
|
||||
int64_t refcount_remove(refcount_t *rc, void *holder_tag);
|
||||
int64_t refcount_add_many(refcount_t *rc, uint64_t number, void *holder_tag);
|
||||
int64_t refcount_remove_many(refcount_t *rc, uint64_t number, void *holder_tag);
|
||||
|
||||
void refcount_init(void);
|
||||
void refcount_fini(void);
|
||||
|
||||
#else /* DEBUG */
|
||||
|
||||
typedef struct refcount {
|
||||
uint64_t rc_count;
|
||||
} refcount_t;
|
||||
|
||||
#define refcount_create(rc) ((rc)->rc_count = 0)
|
||||
#define refcount_destroy(rc) ((rc)->rc_count = 0)
|
||||
#define refcount_destroy_many(rc, number) ((rc)->rc_count = 0)
|
||||
#define refcount_is_zero(rc) ((rc)->rc_count == 0)
|
||||
#define refcount_count(rc) ((rc)->rc_count)
|
||||
#define refcount_add(rc, holder) atomic_add_64_nv(&(rc)->rc_count, 1)
|
||||
#define refcount_remove(rc, holder) atomic_add_64_nv(&(rc)->rc_count, -1)
|
||||
#define refcount_add_many(rc, number, holder) \
|
||||
atomic_add_64_nv(&(rc)->rc_count, number)
|
||||
#define refcount_remove_many(rc, number, holder) \
|
||||
atomic_add_64_nv(&(rc)->rc_count, -number)
|
||||
|
||||
#define refcount_init()
|
||||
#define refcount_fini()
|
||||
|
||||
#endif /* DEBUG */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_REFCOUNT_H */
|
||||
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_RPRWLOCK_H
|
||||
#define _SYS_RPRWLOCK_H
|
||||
|
||||
|
||||
|
||||
#include <sys/inttypes.h>
|
||||
#include <sys/list.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/refcount.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct rprwlock {
|
||||
kmutex_t rw_lock;
|
||||
kthread_t *rw_writer;
|
||||
kcondvar_t rw_cv;
|
||||
refcount_t rw_count;
|
||||
} rprwlock_t;
|
||||
|
||||
void rprw_init(rprwlock_t *rwl);
|
||||
void rprw_destroy(rprwlock_t *rwl);
|
||||
void rprw_enter_read(rprwlock_t *rwl, void *tag);
|
||||
void rprw_enter_write(rprwlock_t *rwl, void *tag);
|
||||
void rprw_enter(rprwlock_t *rwl, krw_t rw, void *tag);
|
||||
void rprw_exit(rprwlock_t *rwl, void *tag);
|
||||
boolean_t rprw_held(rprwlock_t *rwl, krw_t rw);
|
||||
#define RPRW_READ_HELD(x) rprw_held(x, RW_READER)
|
||||
#define RPRW_WRITE_HELD(x) rprw_held(x, RW_WRITER)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_RPRWLOCK_H */
|
||||
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_RR_RW_LOCK_H
|
||||
#define _SYS_RR_RW_LOCK_H
|
||||
|
||||
#pragma ident "@(#)rrwlock.h 1.1 07/10/24 SMI"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <sys/inttypes.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/refcount.h>
|
||||
|
||||
/*
|
||||
* A reader-writer lock implementation that allows re-entrant reads, but
|
||||
* still gives writers priority on "new" reads.
|
||||
*
|
||||
* See rrwlock.c for more details about the implementation.
|
||||
*
|
||||
* Fields of the rrwlock_t structure:
|
||||
* - rr_lock: protects modification and reading of rrwlock_t fields
|
||||
* - rr_cv: cv for waking up readers or waiting writers
|
||||
* - rr_writer: thread id of the current writer
|
||||
* - rr_anon_rount: number of active anonymous readers
|
||||
* - rr_linked_rcount: total number of non-anonymous active readers
|
||||
* - rr_writer_wanted: a writer wants the lock
|
||||
*/
|
||||
typedef struct rrwlock {
|
||||
kmutex_t rr_lock;
|
||||
kcondvar_t rr_cv;
|
||||
kthread_t *rr_writer;
|
||||
refcount_t rr_anon_rcount;
|
||||
refcount_t rr_linked_rcount;
|
||||
boolean_t rr_writer_wanted;
|
||||
} rrwlock_t;
|
||||
|
||||
/*
|
||||
* 'tag' is used in reference counting tracking. The
|
||||
* 'tag' must be the same in a rrw_enter() as in its
|
||||
* corresponding rrw_exit().
|
||||
*/
|
||||
void rrw_init(rrwlock_t *rrl);
|
||||
void rrw_destroy(rrwlock_t *rrl);
|
||||
void rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag);
|
||||
void rrw_exit(rrwlock_t *rrl, void *tag);
|
||||
boolean_t rrw_held(rrwlock_t *rrl, krw_t rw);
|
||||
|
||||
#define RRW_READ_HELD(x) rrw_held(x, RW_READER)
|
||||
#define RRW_WRITE_HELD(x) rrw_held(x, RW_WRITER)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_RR_RW_LOCK_H */
|
||||
@@ -0,0 +1,538 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_SPA_H
|
||||
#define _SYS_SPA_H
|
||||
|
||||
#pragma ident "@(#)spa.h 1.31 08/04/09 SMI"
|
||||
|
||||
#include <sys/avl.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/nvpair.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Forward references that lots of things need.
|
||||
*/
|
||||
typedef struct spa spa_t;
|
||||
typedef struct vdev vdev_t;
|
||||
typedef struct metaslab metaslab_t;
|
||||
typedef struct zilog zilog_t;
|
||||
typedef struct traverse_handle traverse_handle_t;
|
||||
typedef struct spa_aux_vdev spa_aux_vdev_t;
|
||||
struct dsl_pool;
|
||||
|
||||
/*
|
||||
* General-purpose 32-bit and 64-bit bitfield encodings.
|
||||
*/
|
||||
#define BF32_DECODE(x, low, len) P2PHASE((x) >> (low), 1U << (len))
|
||||
#define BF64_DECODE(x, low, len) P2PHASE((x) >> (low), 1ULL << (len))
|
||||
#define BF32_ENCODE(x, low, len) (P2PHASE((x), 1U << (len)) << (low))
|
||||
#define BF64_ENCODE(x, low, len) (P2PHASE((x), 1ULL << (len)) << (low))
|
||||
|
||||
#define BF32_GET(x, low, len) BF32_DECODE(x, low, len)
|
||||
#define BF64_GET(x, low, len) BF64_DECODE(x, low, len)
|
||||
|
||||
#define BF32_SET(x, low, len, val) \
|
||||
((x) ^= BF32_ENCODE((x >> low) ^ (val), low, len))
|
||||
#define BF64_SET(x, low, len, val) \
|
||||
((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len))
|
||||
|
||||
#define BF32_GET_SB(x, low, len, shift, bias) \
|
||||
((BF32_GET(x, low, len) + (bias)) << (shift))
|
||||
#define BF64_GET_SB(x, low, len, shift, bias) \
|
||||
((BF64_GET(x, low, len) + (bias)) << (shift))
|
||||
|
||||
#define BF32_SET_SB(x, low, len, shift, bias, val) \
|
||||
BF32_SET(x, low, len, ((val) >> (shift)) - (bias))
|
||||
#define BF64_SET_SB(x, low, len, shift, bias, val) \
|
||||
BF64_SET(x, low, len, ((val) >> (shift)) - (bias))
|
||||
|
||||
/*
|
||||
* We currently support nine block sizes, from 512 bytes to 128K.
|
||||
* We could go higher, but the benefits are near-zero and the cost
|
||||
* of COWing a giant block to modify one byte would become excessive.
|
||||
*/
|
||||
#define SPA_MINBLOCKSHIFT 9
|
||||
#define SPA_MAXBLOCKSHIFT 17
|
||||
#define SPA_MINBLOCKSIZE (1ULL << SPA_MINBLOCKSHIFT)
|
||||
#define SPA_MAXBLOCKSIZE (1ULL << SPA_MAXBLOCKSHIFT)
|
||||
|
||||
#define SPA_BLOCKSIZES (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1)
|
||||
|
||||
/*
|
||||
* The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
|
||||
* The ASIZE encoding should be at least 64 times larger (6 more bits)
|
||||
* to support up to 4-way RAID-Z mirror mode with worst-case gang block
|
||||
* overhead, three DVAs per bp, plus one more bit in case we do anything
|
||||
* else that expands the ASIZE.
|
||||
*/
|
||||
#define SPA_LSIZEBITS 16 /* LSIZE up to 32M (2^16 * 512) */
|
||||
#define SPA_PSIZEBITS 16 /* PSIZE up to 32M (2^16 * 512) */
|
||||
#define SPA_ASIZEBITS 24 /* ASIZE up to 64 times larger */
|
||||
|
||||
/*
|
||||
* All SPA data is represented by 128-bit data virtual addresses (DVAs).
|
||||
* The members of the dva_t should be considered opaque outside the SPA.
|
||||
*/
|
||||
typedef struct dva {
|
||||
uint64_t dva_word[2];
|
||||
} dva_t;
|
||||
|
||||
/*
|
||||
* Each block has a 256-bit checksum -- strong enough for cryptographic hashes.
|
||||
*/
|
||||
typedef struct zio_cksum {
|
||||
uint64_t zc_word[4];
|
||||
} zio_cksum_t;
|
||||
|
||||
/*
|
||||
* Each block is described by its DVAs, time of birth, checksum, etc.
|
||||
* The word-by-word, bit-by-bit layout of the blkptr is as follows:
|
||||
*
|
||||
* 64 56 48 40 32 24 16 8 0
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 0 | vdev1 | GRID | ASIZE |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 1 |G| offset1 |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 2 | vdev2 | GRID | ASIZE |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 3 |G| offset2 |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 4 | vdev3 | GRID | ASIZE |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 5 |G| offset3 |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 6 |E| lvl | type | cksum | comp | PSIZE | LSIZE |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 7 | padding |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 8 | padding |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 9 | padding |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* a | birth txg |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* b | fill count |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* c | checksum[0] |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* d | checksum[1] |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* e | checksum[2] |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* f | checksum[3] |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
*
|
||||
* Legend:
|
||||
*
|
||||
* vdev virtual device ID
|
||||
* offset offset into virtual device
|
||||
* LSIZE logical size
|
||||
* PSIZE physical size (after compression)
|
||||
* ASIZE allocated size (including RAID-Z parity and gang block headers)
|
||||
* GRID RAID-Z layout information (reserved for future use)
|
||||
* cksum checksum function
|
||||
* comp compression function
|
||||
* G gang block indicator
|
||||
* E endianness
|
||||
* type DMU object type
|
||||
* lvl level of indirection
|
||||
* birth txg transaction group in which the block was born
|
||||
* fill count number of non-zero blocks under this bp
|
||||
* checksum[4] 256-bit checksum of the data this bp describes
|
||||
*/
|
||||
typedef struct blkptr {
|
||||
dva_t blk_dva[3]; /* 128-bit Data Virtual Address */
|
||||
uint64_t blk_prop; /* size, compression, type, etc */
|
||||
uint64_t blk_pad[3]; /* Extra space for the future */
|
||||
uint64_t blk_birth; /* transaction group at birth */
|
||||
uint64_t blk_fill; /* fill count */
|
||||
zio_cksum_t blk_cksum; /* 256-bit checksum */
|
||||
} blkptr_t;
|
||||
|
||||
#define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */
|
||||
#define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */
|
||||
|
||||
/*
|
||||
* Macros to get and set fields in a bp or DVA.
|
||||
*/
|
||||
#define DVA_GET_ASIZE(dva) \
|
||||
BF64_GET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0)
|
||||
#define DVA_SET_ASIZE(dva, x) \
|
||||
BF64_SET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0, x)
|
||||
|
||||
#define DVA_GET_GRID(dva) BF64_GET((dva)->dva_word[0], 24, 8)
|
||||
#define DVA_SET_GRID(dva, x) BF64_SET((dva)->dva_word[0], 24, 8, x)
|
||||
|
||||
#define DVA_GET_VDEV(dva) BF64_GET((dva)->dva_word[0], 32, 32)
|
||||
#define DVA_SET_VDEV(dva, x) BF64_SET((dva)->dva_word[0], 32, 32, x)
|
||||
|
||||
#define DVA_GET_OFFSET(dva) \
|
||||
BF64_GET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0)
|
||||
#define DVA_SET_OFFSET(dva, x) \
|
||||
BF64_SET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0, x)
|
||||
|
||||
#define DVA_GET_GANG(dva) BF64_GET((dva)->dva_word[1], 63, 1)
|
||||
#define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x)
|
||||
|
||||
#define BP_GET_LSIZE(bp) \
|
||||
(BP_IS_HOLE(bp) ? 0 : \
|
||||
BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1))
|
||||
#define BP_SET_LSIZE(bp, x) \
|
||||
BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
|
||||
|
||||
#define BP_GET_PSIZE(bp) \
|
||||
BF64_GET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1)
|
||||
#define BP_SET_PSIZE(bp, x) \
|
||||
BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
|
||||
|
||||
#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8)
|
||||
#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x)
|
||||
|
||||
#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8)
|
||||
#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x)
|
||||
|
||||
#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8)
|
||||
#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x)
|
||||
|
||||
#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5)
|
||||
#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x)
|
||||
|
||||
#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1))
|
||||
#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x)
|
||||
|
||||
#define BP_GET_ASIZE(bp) \
|
||||
(DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
|
||||
DVA_GET_ASIZE(&(bp)->blk_dva[2]))
|
||||
|
||||
#define BP_GET_UCSIZE(bp) \
|
||||
((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
|
||||
BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp));
|
||||
|
||||
#define BP_GET_NDVAS(bp) \
|
||||
(!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
|
||||
!!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
|
||||
!!DVA_GET_ASIZE(&(bp)->blk_dva[2]))
|
||||
|
||||
#define BP_COUNT_GANG(bp) \
|
||||
(DVA_GET_GANG(&(bp)->blk_dva[0]) + \
|
||||
DVA_GET_GANG(&(bp)->blk_dva[1]) + \
|
||||
DVA_GET_GANG(&(bp)->blk_dva[2]))
|
||||
|
||||
#define DVA_EQUAL(dva1, dva2) \
|
||||
((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
|
||||
(dva1)->dva_word[0] == (dva2)->dva_word[0])
|
||||
|
||||
#define ZIO_CHECKSUM_EQUAL(zc1, zc2) \
|
||||
(0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \
|
||||
((zc1).zc_word[1] - (zc2).zc_word[1]) | \
|
||||
((zc1).zc_word[2] - (zc2).zc_word[2]) | \
|
||||
((zc1).zc_word[3] - (zc2).zc_word[3])))
|
||||
|
||||
|
||||
#define DVA_IS_VALID(dva) (DVA_GET_ASIZE(dva) != 0)
|
||||
|
||||
#define ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3) \
|
||||
{ \
|
||||
(zcp)->zc_word[0] = w0; \
|
||||
(zcp)->zc_word[1] = w1; \
|
||||
(zcp)->zc_word[2] = w2; \
|
||||
(zcp)->zc_word[3] = w3; \
|
||||
}
|
||||
|
||||
#define BP_IDENTITY(bp) (&(bp)->blk_dva[0])
|
||||
#define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp))
|
||||
#define BP_IS_HOLE(bp) ((bp)->blk_birth == 0)
|
||||
#define BP_IS_OLDER(bp, txg) (!BP_IS_HOLE(bp) && (bp)->blk_birth < (txg))
|
||||
|
||||
#define BP_ZERO_DVAS(bp) \
|
||||
{ \
|
||||
(bp)->blk_dva[0].dva_word[0] = 0; \
|
||||
(bp)->blk_dva[0].dva_word[1] = 0; \
|
||||
(bp)->blk_dva[1].dva_word[0] = 0; \
|
||||
(bp)->blk_dva[1].dva_word[1] = 0; \
|
||||
(bp)->blk_dva[2].dva_word[0] = 0; \
|
||||
(bp)->blk_dva[2].dva_word[1] = 0; \
|
||||
(bp)->blk_birth = 0; \
|
||||
}
|
||||
|
||||
#define BP_ZERO(bp) \
|
||||
{ \
|
||||
BP_ZERO_DVAS(bp) \
|
||||
(bp)->blk_prop = 0; \
|
||||
(bp)->blk_pad[0] = 0; \
|
||||
(bp)->blk_pad[1] = 0; \
|
||||
(bp)->blk_pad[2] = 0; \
|
||||
(bp)->blk_fill = 0; \
|
||||
ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \
|
||||
}
|
||||
|
||||
/*
|
||||
* Note: the byteorder is either 0 or -1, both of which are palindromes.
|
||||
* This simplifies the endianness handling a bit.
|
||||
*/
|
||||
#ifdef _BIG_ENDIAN
|
||||
#define ZFS_HOST_BYTEORDER (0ULL)
|
||||
#else
|
||||
#define ZFS_HOST_BYTEORDER (-1ULL)
|
||||
#endif
|
||||
|
||||
#define BP_SHOULD_BYTESWAP(bp) (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER)
|
||||
|
||||
#define BP_SPRINTF_LEN 320
|
||||
|
||||
#include <sys/dmu.h>
|
||||
|
||||
#define BP_GET_BUFC_TYPE(bp) \
|
||||
(((BP_GET_LEVEL(bp) > 0) || (dmu_ot[BP_GET_TYPE(bp)].ot_metadata)) ? \
|
||||
ARC_BUFC_METADATA : ARC_BUFC_DATA);
|
||||
/*
|
||||
* Routines found in spa.c
|
||||
*/
|
||||
|
||||
/* state manipulation functions */
|
||||
extern int spa_open(const char *pool, spa_t **, void *tag);
|
||||
extern int spa_get_stats(const char *pool, nvlist_t **config,
|
||||
char *altroot, size_t buflen);
|
||||
extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
|
||||
const char *history_str);
|
||||
extern void spa_check_rootconf(char *devpath, char **the_dev_p,
|
||||
nvlist_t **the_conf_p, uint64_t *the_txg_p);
|
||||
extern boolean_t spa_rootdev_validate(nvlist_t *nv);
|
||||
extern int spa_import_rootpool(char *devpath);
|
||||
extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props);
|
||||
extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
|
||||
extern int spa_destroy(char *pool);
|
||||
extern int spa_export(char *pool, nvlist_t **oldconfig);
|
||||
extern int spa_reset(char *pool);
|
||||
extern void spa_async_request(spa_t *spa, int flag);
|
||||
extern void spa_async_suspend(spa_t *spa);
|
||||
extern void spa_async_resume(spa_t *spa);
|
||||
extern spa_t *spa_inject_addref(char *pool);
|
||||
extern void spa_inject_delref(spa_t *spa);
|
||||
|
||||
#define SPA_ASYNC_REMOVE 0x01
|
||||
#define SPA_ASYNC_RESILVER_DONE 0x02
|
||||
#define SPA_ASYNC_SCRUB 0x04
|
||||
#define SPA_ASYNC_RESILVER 0x08
|
||||
#define SPA_ASYNC_CONFIG_UPDATE 0x10
|
||||
|
||||
/* device manipulation */
|
||||
extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
|
||||
extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot,
|
||||
int replacing);
|
||||
extern int spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done);
|
||||
extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
|
||||
extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
|
||||
|
||||
/* spare state (which is global across all pools) */
|
||||
extern void spa_spare_add(vdev_t *vd);
|
||||
extern void spa_spare_remove(vdev_t *vd);
|
||||
extern boolean_t spa_spare_exists(uint64_t guid, uint64_t *pool);
|
||||
extern void spa_spare_activate(vdev_t *vd);
|
||||
|
||||
/* L2ARC state (which is global across all pools) */
|
||||
extern void spa_l2cache_add(vdev_t *vd);
|
||||
extern void spa_l2cache_remove(vdev_t *vd);
|
||||
extern boolean_t spa_l2cache_exists(uint64_t guid, uint64_t *pool);
|
||||
extern void spa_l2cache_activate(vdev_t *vd);
|
||||
extern void spa_l2cache_drop(spa_t *spa);
|
||||
extern void spa_l2cache_space_update(vdev_t *vd, int64_t space, int64_t alloc);
|
||||
|
||||
/* scrubbing */
|
||||
extern int spa_scrub(spa_t *spa, pool_scrub_type_t type, boolean_t force);
|
||||
extern void spa_scrub_suspend(spa_t *spa);
|
||||
extern void spa_scrub_resume(spa_t *spa);
|
||||
extern void spa_scrub_restart(spa_t *spa, uint64_t txg);
|
||||
|
||||
/* spa syncing */
|
||||
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
|
||||
extern void spa_sync_allpools(void);
|
||||
|
||||
/*
|
||||
* SPA configuration functions in spa_config.c
|
||||
*/
|
||||
|
||||
#define SPA_CONFIG_UPDATE_POOL 0
|
||||
#define SPA_CONFIG_UPDATE_VDEVS 1
|
||||
|
||||
extern void spa_config_sync(void);
|
||||
extern void spa_config_check(const char *, const char *);
|
||||
extern void spa_config_load(void);
|
||||
extern nvlist_t *spa_all_configs(uint64_t *);
|
||||
extern void spa_config_set(spa_t *spa, nvlist_t *config);
|
||||
extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
|
||||
int getstats);
|
||||
extern void spa_config_update(spa_t *spa, int what);
|
||||
extern void spa_config_update_common(spa_t *spa, int what, boolean_t isroot);
|
||||
|
||||
/*
|
||||
* Miscellaneous SPA routines in spa_misc.c
|
||||
*/
|
||||
|
||||
/* Namespace manipulation */
|
||||
extern spa_t *spa_lookup(const char *name);
|
||||
extern spa_t *spa_add(const char *name, const char *altroot);
|
||||
extern void spa_remove(spa_t *spa);
|
||||
extern spa_t *spa_next(spa_t *prev);
|
||||
|
||||
/* Refcount functions */
|
||||
extern void spa_open_ref(spa_t *spa, void *tag);
|
||||
extern void spa_close(spa_t *spa, void *tag);
|
||||
extern boolean_t spa_refcount_zero(spa_t *spa);
|
||||
|
||||
/* Pool configuration lock */
|
||||
extern void spa_config_enter(spa_t *spa, krw_t rw, void *tag);
|
||||
extern void spa_config_exit(spa_t *spa, void *tag);
|
||||
extern boolean_t spa_config_held(spa_t *spa, krw_t rw);
|
||||
|
||||
/* Pool vdev add/remove lock */
|
||||
extern uint64_t spa_vdev_enter(spa_t *spa);
|
||||
extern int spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error);
|
||||
|
||||
/* Accessor functions */
|
||||
extern krwlock_t *spa_traverse_rwlock(spa_t *spa);
|
||||
extern int spa_traverse_wanted(spa_t *spa);
|
||||
extern struct dsl_pool *spa_get_dsl(spa_t *spa);
|
||||
extern blkptr_t *spa_get_rootblkptr(spa_t *spa);
|
||||
extern void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp);
|
||||
extern void spa_altroot(spa_t *, char *, size_t);
|
||||
extern int spa_sync_pass(spa_t *spa);
|
||||
extern char *spa_name(spa_t *spa);
|
||||
extern uint64_t spa_guid(spa_t *spa);
|
||||
extern uint64_t spa_last_synced_txg(spa_t *spa);
|
||||
extern uint64_t spa_first_txg(spa_t *spa);
|
||||
extern uint64_t spa_version(spa_t *spa);
|
||||
extern int spa_state(spa_t *spa);
|
||||
extern uint64_t spa_freeze_txg(spa_t *spa);
|
||||
extern uint64_t spa_get_alloc(spa_t *spa);
|
||||
extern uint64_t spa_get_space(spa_t *spa);
|
||||
extern uint64_t spa_get_dspace(spa_t *spa);
|
||||
extern uint64_t spa_get_asize(spa_t *spa, uint64_t lsize);
|
||||
extern uint64_t spa_version(spa_t *spa);
|
||||
extern int spa_max_replication(spa_t *spa);
|
||||
extern int spa_busy(void);
|
||||
extern uint8_t spa_get_failmode(spa_t *spa);
|
||||
|
||||
/* Miscellaneous support routines */
|
||||
extern int spa_rename(const char *oldname, const char *newname);
|
||||
extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
|
||||
extern char *spa_strdup(const char *);
|
||||
extern void spa_strfree(char *);
|
||||
extern uint64_t spa_get_random(uint64_t range);
|
||||
extern void sprintf_blkptr(char *buf, int len, const blkptr_t *bp);
|
||||
extern void spa_freeze(spa_t *spa);
|
||||
extern void spa_upgrade(spa_t *spa, uint64_t version);
|
||||
extern void spa_evict_all(void);
|
||||
extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid);
|
||||
extern boolean_t spa_has_spare(spa_t *, uint64_t guid);
|
||||
extern uint64_t bp_get_dasize(spa_t *spa, const blkptr_t *bp);
|
||||
extern boolean_t spa_has_slogs(spa_t *spa);
|
||||
|
||||
/* history logging */
|
||||
typedef enum history_log_type {
|
||||
LOG_CMD_POOL_CREATE,
|
||||
LOG_CMD_NORMAL,
|
||||
LOG_INTERNAL
|
||||
} history_log_type_t;
|
||||
|
||||
typedef struct history_arg {
|
||||
const char *ha_history_str;
|
||||
history_log_type_t ha_log_type;
|
||||
history_internal_events_t ha_event;
|
||||
char ha_zone[MAXPATHLEN];
|
||||
} history_arg_t;
|
||||
|
||||
extern char *spa_his_ievent_table[];
|
||||
|
||||
extern void spa_history_create_obj(spa_t *spa, dmu_tx_t *tx);
|
||||
extern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read,
|
||||
char *his_buf);
|
||||
extern int spa_history_log(spa_t *spa, const char *his_buf,
|
||||
history_log_type_t what);
|
||||
void spa_history_internal_log(history_internal_events_t event, spa_t *spa,
|
||||
dmu_tx_t *tx, cred_t *cr, const char *fmt, ...);
|
||||
|
||||
/* error handling */
|
||||
struct zbookmark;
|
||||
struct zio;
|
||||
extern void spa_log_error(spa_t *spa, struct zio *zio);
|
||||
extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
|
||||
struct zio *zio, uint64_t stateoroffset, uint64_t length);
|
||||
extern void zfs_post_ok(spa_t *spa, vdev_t *vd);
|
||||
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
|
||||
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
|
||||
extern uint64_t spa_get_errlog_size(spa_t *spa);
|
||||
extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
|
||||
extern void spa_errlog_rotate(spa_t *spa);
|
||||
extern void spa_errlog_drain(spa_t *spa);
|
||||
extern void spa_errlog_sync(spa_t *spa, uint64_t txg);
|
||||
extern void spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub);
|
||||
|
||||
/* vdev cache */
|
||||
extern void vdev_cache_stat_init(void);
|
||||
extern void vdev_cache_stat_fini(void);
|
||||
|
||||
/* Initialization and termination */
|
||||
extern void spa_init(int flags);
|
||||
extern void spa_fini(void);
|
||||
extern void spa_boot_init();
|
||||
|
||||
/* properties */
|
||||
extern int spa_prop_set(spa_t *spa, nvlist_t *nvp);
|
||||
extern int spa_prop_get(spa_t *spa, nvlist_t **nvp);
|
||||
extern void spa_prop_clear_bootfs(spa_t *spa, uint64_t obj, dmu_tx_t *tx);
|
||||
|
||||
/* asynchronous event notification */
|
||||
extern void spa_event_notify(spa_t *spa, vdev_t *vdev, const char *name);
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
#define dprintf_bp(bp, fmt, ...) do { \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
|
||||
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \
|
||||
sprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, (bp)); \
|
||||
dprintf(fmt " %s\n", __VA_ARGS__, __blkbuf); \
|
||||
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
|
||||
} \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
#else
|
||||
#define dprintf_bp(bp, fmt, ...)
|
||||
#endif
|
||||
|
||||
extern int spa_mode; /* mode, e.g. FREAD | FWRITE */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_SPA_H */
|
||||
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_SPA_BOOT_H
|
||||
#define _SYS_SPA_BOOT_H
|
||||
|
||||
#pragma ident "@(#)spa_boot.h 1.1 08/04/09 SMI"
|
||||
|
||||
#include <sys/nvpair.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern char *spa_get_bootfs();
|
||||
extern void spa_free_bootfs(char *bootfs);
|
||||
extern int spa_get_rootconf(char *devpath, char **bestdev_p,
|
||||
nvlist_t **bestconf_p);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_SPA_BOOT_H */
|
||||
@@ -0,0 +1,178 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_SPA_IMPL_H
|
||||
#define _SYS_SPA_IMPL_H
|
||||
|
||||
#pragma ident "@(#)spa_impl.h 1.17 07/11/27 SMI"
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/vdev.h>
|
||||
#include <sys/metaslab.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/uberblock_impl.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/avl.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/bplist.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct spa_error_entry {
|
||||
zbookmark_t se_bookmark;
|
||||
char *se_name;
|
||||
avl_node_t se_avl;
|
||||
} spa_error_entry_t;
|
||||
|
||||
typedef struct spa_history_phys {
|
||||
uint64_t sh_pool_create_len; /* ending offset of zpool create */
|
||||
uint64_t sh_phys_max_off; /* physical EOF */
|
||||
uint64_t sh_bof; /* logical BOF */
|
||||
uint64_t sh_eof; /* logical EOF */
|
||||
uint64_t sh_records_lost; /* num of records overwritten */
|
||||
} spa_history_phys_t;
|
||||
|
||||
struct spa_aux_vdev {
|
||||
uint64_t sav_object; /* MOS object for device list */
|
||||
nvlist_t *sav_config; /* cached device config */
|
||||
vdev_t **sav_vdevs; /* devices */
|
||||
int sav_count; /* number devices */
|
||||
boolean_t sav_sync; /* sync the device list */
|
||||
nvlist_t **sav_pending; /* pending device additions */
|
||||
uint_t sav_npending; /* # pending devices */
|
||||
};
|
||||
|
||||
typedef struct spa_config_lock {
|
||||
kmutex_t scl_lock;
|
||||
kthread_t *scl_writer;
|
||||
uint16_t scl_write_wanted;
|
||||
kcondvar_t scl_cv;
|
||||
refcount_t scl_count;
|
||||
} spa_config_lock_t;
|
||||
|
||||
struct spa {
|
||||
/*
|
||||
* Fields protected by spa_namespace_lock.
|
||||
*/
|
||||
char *spa_name; /* pool name */
|
||||
avl_node_t spa_avl; /* node in spa_namespace_avl */
|
||||
nvlist_t *spa_config; /* last synced config */
|
||||
nvlist_t *spa_config_syncing; /* currently syncing config */
|
||||
uint64_t spa_config_txg; /* txg of last config change */
|
||||
kmutex_t spa_config_cache_lock; /* for spa_config RW_READER */
|
||||
int spa_sync_pass; /* iterate-to-convergence */
|
||||
int spa_state; /* pool state */
|
||||
int spa_inject_ref; /* injection references */
|
||||
uint8_t spa_traverse_wanted; /* traverse lock wanted */
|
||||
uint8_t spa_sync_on; /* sync threads are running */
|
||||
spa_load_state_t spa_load_state; /* current load operation */
|
||||
taskq_t *spa_zio_issue_taskq[ZIO_TYPES];
|
||||
taskq_t *spa_zio_intr_taskq[ZIO_TYPES];
|
||||
dsl_pool_t *spa_dsl_pool;
|
||||
metaslab_class_t *spa_normal_class; /* normal data class */
|
||||
metaslab_class_t *spa_log_class; /* intent log data class */
|
||||
uint64_t spa_first_txg; /* first txg after spa_open() */
|
||||
uint64_t spa_final_txg; /* txg of export/destroy */
|
||||
uint64_t spa_freeze_txg; /* freeze pool at this txg */
|
||||
objset_t *spa_meta_objset; /* copy of dp->dp_meta_objset */
|
||||
txg_list_t spa_vdev_txg_list; /* per-txg dirty vdev list */
|
||||
vdev_t *spa_root_vdev; /* top-level vdev container */
|
||||
uint64_t spa_load_guid; /* initial guid for spa_load */
|
||||
list_t spa_dirty_list; /* vdevs with dirty labels */
|
||||
spa_aux_vdev_t spa_spares; /* hot spares */
|
||||
spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */
|
||||
uint64_t spa_config_object; /* MOS object for pool config */
|
||||
uint64_t spa_syncing_txg; /* txg currently syncing */
|
||||
uint64_t spa_sync_bplist_obj; /* object for deferred frees */
|
||||
bplist_t spa_sync_bplist; /* deferred-free bplist */
|
||||
krwlock_t spa_traverse_lock; /* traverse vs. spa_sync() */
|
||||
uberblock_t spa_ubsync; /* last synced uberblock */
|
||||
uberblock_t spa_uberblock; /* current uberblock */
|
||||
kmutex_t spa_scrub_lock; /* resilver/scrub lock */
|
||||
kthread_t *spa_scrub_thread; /* scrub/resilver thread */
|
||||
traverse_handle_t *spa_scrub_th; /* scrub traverse handle */
|
||||
uint64_t spa_scrub_restart_txg; /* need to restart */
|
||||
uint64_t spa_scrub_mintxg; /* min txg we'll scrub */
|
||||
uint64_t spa_scrub_maxtxg; /* max txg we'll scrub */
|
||||
uint64_t spa_scrub_inflight; /* in-flight scrub I/Os */
|
||||
uint64_t spa_scrub_maxinflight; /* max in-flight scrub I/Os */
|
||||
uint64_t spa_scrub_errors; /* scrub I/O error count */
|
||||
int spa_scrub_suspended; /* tell scrubber to suspend */
|
||||
kcondvar_t spa_scrub_cv; /* scrub thread state change */
|
||||
kcondvar_t spa_scrub_io_cv; /* scrub I/O completion */
|
||||
uint8_t spa_scrub_stop; /* tell scrubber to stop */
|
||||
uint8_t spa_scrub_active; /* active or suspended? */
|
||||
uint8_t spa_scrub_type; /* type of scrub we're doing */
|
||||
uint8_t spa_scrub_finished; /* indicator to rotate logs */
|
||||
kmutex_t spa_async_lock; /* protect async state */
|
||||
kthread_t *spa_async_thread; /* thread doing async task */
|
||||
int spa_async_suspended; /* async tasks suspended */
|
||||
kcondvar_t spa_async_cv; /* wait for thread_exit() */
|
||||
uint16_t spa_async_tasks; /* async task mask */
|
||||
char *spa_root; /* alternate root directory */
|
||||
kmutex_t spa_uberblock_lock; /* vdev_uberblock_load_done() */
|
||||
uint64_t spa_ena; /* spa-wide ereport ENA */
|
||||
boolean_t spa_last_open_failed; /* true if last open faled */
|
||||
kmutex_t spa_errlog_lock; /* error log lock */
|
||||
uint64_t spa_errlog_last; /* last error log object */
|
||||
uint64_t spa_errlog_scrub; /* scrub error log object */
|
||||
kmutex_t spa_errlist_lock; /* error list/ereport lock */
|
||||
avl_tree_t spa_errlist_last; /* last error list */
|
||||
avl_tree_t spa_errlist_scrub; /* scrub error list */
|
||||
uint64_t spa_deflate; /* should we deflate? */
|
||||
uint64_t spa_history; /* history object */
|
||||
kmutex_t spa_history_lock; /* history lock */
|
||||
vdev_t *spa_pending_vdev; /* pending vdev additions */
|
||||
kmutex_t spa_props_lock; /* property lock */
|
||||
uint64_t spa_pool_props_object; /* object for properties */
|
||||
uint64_t spa_bootfs; /* default boot filesystem */
|
||||
boolean_t spa_delegation; /* delegation on/off */
|
||||
char *spa_config_dir; /* cache file directory */
|
||||
char *spa_config_file; /* cache file name */
|
||||
list_t spa_zio_list; /* zio error list */
|
||||
kcondvar_t spa_zio_cv; /* resume I/O pipeline */
|
||||
kmutex_t spa_zio_lock; /* zio error lock */
|
||||
uint8_t spa_failmode; /* failure mode for the pool */
|
||||
/*
|
||||
* spa_refcnt & spa_config_lock must be the last elements
|
||||
* because refcount_t changes size based on compilation options.
|
||||
* In order for the MDB module to function correctly, the other
|
||||
* fields must remain in the same location.
|
||||
*/
|
||||
spa_config_lock_t spa_config_lock; /* configuration changes */
|
||||
refcount_t spa_refcount; /* number of opens */
|
||||
};
|
||||
|
||||
extern const char *spa_config_dir;
|
||||
extern kmutex_t spa_namespace_lock;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_SPA_IMPL_H */
|
||||
@@ -0,0 +1,162 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_SPACE_MAP_H
|
||||
#define _SYS_SPACE_MAP_H
|
||||
|
||||
#pragma ident "@(#)space_map.h 1.2 06/04/02 SMI"
|
||||
|
||||
#include <sys/avl.h>
|
||||
#include <sys/dmu.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct space_map_ops space_map_ops_t;
|
||||
|
||||
typedef struct space_map {
|
||||
avl_tree_t sm_root; /* AVL tree of map segments */
|
||||
uint64_t sm_space; /* sum of all segments in the map */
|
||||
uint64_t sm_start; /* start of map */
|
||||
uint64_t sm_size; /* size of map */
|
||||
uint8_t sm_shift; /* unit shift */
|
||||
uint8_t sm_pad[3]; /* unused */
|
||||
uint8_t sm_loaded; /* map loaded? */
|
||||
uint8_t sm_loading; /* map loading? */
|
||||
kcondvar_t sm_load_cv; /* map load completion */
|
||||
space_map_ops_t *sm_ops; /* space map block picker ops vector */
|
||||
void *sm_ppd; /* picker-private data */
|
||||
kmutex_t *sm_lock; /* pointer to lock that protects map */
|
||||
} space_map_t;
|
||||
|
||||
typedef struct space_seg {
|
||||
avl_node_t ss_node; /* AVL node */
|
||||
uint64_t ss_start; /* starting offset of this segment */
|
||||
uint64_t ss_end; /* ending offset (non-inclusive) */
|
||||
} space_seg_t;
|
||||
|
||||
typedef struct space_map_obj {
|
||||
uint64_t smo_object; /* on-disk space map object */
|
||||
uint64_t smo_objsize; /* size of the object */
|
||||
uint64_t smo_alloc; /* space allocated from the map */
|
||||
} space_map_obj_t;
|
||||
|
||||
struct space_map_ops {
|
||||
void (*smop_load)(space_map_t *sm);
|
||||
void (*smop_unload)(space_map_t *sm);
|
||||
uint64_t (*smop_alloc)(space_map_t *sm, uint64_t size);
|
||||
void (*smop_claim)(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
void (*smop_free)(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
};
|
||||
|
||||
/*
|
||||
* debug entry
|
||||
*
|
||||
* 1 3 10 50
|
||||
* ,---+--------+------------+---------------------------------.
|
||||
* | 1 | action | syncpass | txg (lower bits) |
|
||||
* `---+--------+------------+---------------------------------'
|
||||
* 63 62 60 59 50 49 0
|
||||
*
|
||||
*
|
||||
*
|
||||
* non-debug entry
|
||||
*
|
||||
* 1 47 1 15
|
||||
* ,-----------------------------------------------------------.
|
||||
* | 0 | offset (sm_shift units) | type | run |
|
||||
* `-----------------------------------------------------------'
|
||||
* 63 62 17 16 15 0
|
||||
*/
|
||||
|
||||
/* All this stuff takes and returns bytes */
|
||||
#define SM_RUN_DECODE(x) (BF64_DECODE(x, 0, 15) + 1)
|
||||
#define SM_RUN_ENCODE(x) BF64_ENCODE((x) - 1, 0, 15)
|
||||
#define SM_TYPE_DECODE(x) BF64_DECODE(x, 15, 1)
|
||||
#define SM_TYPE_ENCODE(x) BF64_ENCODE(x, 15, 1)
|
||||
#define SM_OFFSET_DECODE(x) BF64_DECODE(x, 16, 47)
|
||||
#define SM_OFFSET_ENCODE(x) BF64_ENCODE(x, 16, 47)
|
||||
#define SM_DEBUG_DECODE(x) BF64_DECODE(x, 63, 1)
|
||||
#define SM_DEBUG_ENCODE(x) BF64_ENCODE(x, 63, 1)
|
||||
|
||||
#define SM_DEBUG_ACTION_DECODE(x) BF64_DECODE(x, 60, 3)
|
||||
#define SM_DEBUG_ACTION_ENCODE(x) BF64_ENCODE(x, 60, 3)
|
||||
|
||||
#define SM_DEBUG_SYNCPASS_DECODE(x) BF64_DECODE(x, 50, 10)
|
||||
#define SM_DEBUG_SYNCPASS_ENCODE(x) BF64_ENCODE(x, 50, 10)
|
||||
|
||||
#define SM_DEBUG_TXG_DECODE(x) BF64_DECODE(x, 0, 50)
|
||||
#define SM_DEBUG_TXG_ENCODE(x) BF64_ENCODE(x, 0, 50)
|
||||
|
||||
#define SM_RUN_MAX SM_RUN_DECODE(~0ULL)
|
||||
|
||||
#define SM_ALLOC 0x0
|
||||
#define SM_FREE 0x1
|
||||
|
||||
/*
|
||||
* The data for a given space map can be kept on blocks of any size.
|
||||
* Larger blocks entail fewer i/o operations, but they also cause the
|
||||
* DMU to keep more data in-core, and also to waste more i/o bandwidth
|
||||
* when only a few blocks have changed since the last transaction group.
|
||||
* This could use a lot more research, but for now, set the freelist
|
||||
* block size to 4k (2^12).
|
||||
*/
|
||||
#define SPACE_MAP_BLOCKSHIFT 12
|
||||
|
||||
typedef void space_map_func_t(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
|
||||
extern void space_map_create(space_map_t *sm, uint64_t start, uint64_t size,
|
||||
uint8_t shift, kmutex_t *lp);
|
||||
extern void space_map_destroy(space_map_t *sm);
|
||||
extern void space_map_add(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
extern void space_map_remove(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
extern int space_map_contains(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
extern void space_map_vacate(space_map_t *sm,
|
||||
space_map_func_t *func, space_map_t *mdest);
|
||||
extern void space_map_walk(space_map_t *sm,
|
||||
space_map_func_t *func, space_map_t *mdest);
|
||||
extern void space_map_excise(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
extern void space_map_union(space_map_t *smd, space_map_t *sms);
|
||||
|
||||
extern void space_map_load_wait(space_map_t *sm);
|
||||
extern int space_map_load(space_map_t *sm, space_map_ops_t *ops,
|
||||
uint8_t maptype, space_map_obj_t *smo, objset_t *os);
|
||||
extern void space_map_unload(space_map_t *sm);
|
||||
|
||||
extern uint64_t space_map_alloc(space_map_t *sm, uint64_t size);
|
||||
extern void space_map_claim(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
extern void space_map_free(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
|
||||
extern void space_map_sync(space_map_t *sm, uint8_t maptype,
|
||||
space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx);
|
||||
extern void space_map_truncate(space_map_obj_t *smo,
|
||||
objset_t *os, dmu_tx_t *tx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_SPACE_MAP_H */
|
||||
@@ -0,0 +1,127 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_TXG_H
|
||||
#define _SYS_TXG_H
|
||||
|
||||
#pragma ident "@(#)txg.h 1.2 08/03/20 SMI"
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define TXG_CONCURRENT_STATES 3 /* open, quiescing, syncing */
|
||||
#define TXG_SIZE 4 /* next power of 2 */
|
||||
#define TXG_MASK (TXG_SIZE - 1) /* mask for size */
|
||||
#define TXG_INITIAL TXG_SIZE /* initial txg */
|
||||
#define TXG_IDX (txg & TXG_MASK)
|
||||
|
||||
#define TXG_WAIT 1ULL
|
||||
#define TXG_NOWAIT 2ULL
|
||||
|
||||
typedef struct tx_cpu tx_cpu_t;
|
||||
|
||||
typedef struct txg_handle {
|
||||
tx_cpu_t *th_cpu;
|
||||
uint64_t th_txg;
|
||||
} txg_handle_t;
|
||||
|
||||
typedef struct txg_node {
|
||||
struct txg_node *tn_next[TXG_SIZE];
|
||||
uint8_t tn_member[TXG_SIZE];
|
||||
} txg_node_t;
|
||||
|
||||
typedef struct txg_list {
|
||||
kmutex_t tl_lock;
|
||||
size_t tl_offset;
|
||||
txg_node_t *tl_head[TXG_SIZE];
|
||||
} txg_list_t;
|
||||
|
||||
struct dsl_pool;
|
||||
|
||||
extern void txg_init(struct dsl_pool *dp, uint64_t txg);
|
||||
extern void txg_fini(struct dsl_pool *dp);
|
||||
extern void txg_sync_start(struct dsl_pool *dp);
|
||||
extern void txg_sync_stop(struct dsl_pool *dp);
|
||||
extern uint64_t txg_hold_open(struct dsl_pool *dp, txg_handle_t *txghp);
|
||||
extern void txg_rele_to_quiesce(txg_handle_t *txghp);
|
||||
extern void txg_rele_to_sync(txg_handle_t *txghp);
|
||||
extern void txg_suspend(struct dsl_pool *dp);
|
||||
extern void txg_resume(struct dsl_pool *dp);
|
||||
|
||||
/*
|
||||
* Delay the caller by the specified number of ticks or until
|
||||
* the txg closes (whichever comes first). This is intended
|
||||
* to be used to throttle writers when the system nears its
|
||||
* capacity.
|
||||
*/
|
||||
extern void txg_delay(struct dsl_pool *dp, uint64_t txg, int ticks);
|
||||
|
||||
/*
|
||||
* Wait until the given transaction group has finished syncing.
|
||||
* Try to make this happen as soon as possible (eg. kick off any
|
||||
* necessary syncs immediately). If txg==0, wait for the currently open
|
||||
* txg to finish syncing.
|
||||
*/
|
||||
extern void txg_wait_synced(struct dsl_pool *dp, uint64_t txg);
|
||||
|
||||
/*
|
||||
* Wait until the given transaction group, or one after it, is
|
||||
* the open transaction group. Try to make this happen as soon
|
||||
* as possible (eg. kick off any necessary syncs immediately).
|
||||
* If txg == 0, wait for the next open txg.
|
||||
*/
|
||||
extern void txg_wait_open(struct dsl_pool *dp, uint64_t txg);
|
||||
|
||||
/*
|
||||
* Returns TRUE if we are "backed up" waiting for the syncing
|
||||
* transaction to complete; otherwise returns FALSE.
|
||||
*/
|
||||
extern int txg_stalled(struct dsl_pool *dp);
|
||||
|
||||
/*
|
||||
* Per-txg object lists.
|
||||
*/
|
||||
|
||||
#define TXG_CLEAN(txg) ((txg) - 1)
|
||||
|
||||
extern void txg_list_create(txg_list_t *tl, size_t offset);
|
||||
extern void txg_list_destroy(txg_list_t *tl);
|
||||
extern int txg_list_empty(txg_list_t *tl, uint64_t txg);
|
||||
extern int txg_list_add(txg_list_t *tl, void *p, uint64_t txg);
|
||||
extern void *txg_list_remove(txg_list_t *tl, uint64_t txg);
|
||||
extern void *txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg);
|
||||
extern int txg_list_member(txg_list_t *tl, void *p, uint64_t txg);
|
||||
extern void *txg_list_head(txg_list_t *tl, uint64_t txg);
|
||||
extern void *txg_list_next(txg_list_t *tl, void *p, uint64_t txg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_TXG_H */
|
||||
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_TXG_IMPL_H
|
||||
#define _SYS_TXG_IMPL_H
|
||||
|
||||
#pragma ident "@(#)txg_impl.h 1.2 08/03/20 SMI"
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct tx_cpu {
|
||||
kmutex_t tc_lock;
|
||||
kcondvar_t tc_cv[TXG_SIZE];
|
||||
uint64_t tc_count[TXG_SIZE];
|
||||
char tc_pad[16];
|
||||
};
|
||||
|
||||
typedef struct tx_state {
|
||||
tx_cpu_t *tx_cpu; /* protects right to enter txg */
|
||||
kmutex_t tx_sync_lock; /* protects tx_state_t */
|
||||
krwlock_t tx_suspend;
|
||||
uint64_t tx_open_txg; /* currently open txg id */
|
||||
uint64_t tx_quiesced_txg; /* quiesced txg waiting for sync */
|
||||
uint64_t tx_syncing_txg; /* currently syncing txg id */
|
||||
uint64_t tx_synced_txg; /* last synced txg id */
|
||||
|
||||
uint64_t tx_sync_txg_waiting; /* txg we're waiting to sync */
|
||||
uint64_t tx_quiesce_txg_waiting; /* txg we're waiting to open */
|
||||
|
||||
kcondvar_t tx_sync_more_cv;
|
||||
kcondvar_t tx_sync_done_cv;
|
||||
kcondvar_t tx_quiesce_more_cv;
|
||||
kcondvar_t tx_quiesce_done_cv;
|
||||
kcondvar_t tx_timeout_cv;
|
||||
kcondvar_t tx_exit_cv; /* wait for all threads to exit */
|
||||
|
||||
uint8_t tx_threads; /* number of threads */
|
||||
uint8_t tx_exiting; /* set when we're exiting */
|
||||
|
||||
kthread_t *tx_sync_thread;
|
||||
kthread_t *tx_quiesce_thread;
|
||||
kthread_t *tx_timelimit_thread;
|
||||
} tx_state_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_TXG_IMPL_H */
|
||||
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License, Version 1.0 only
|
||||
* (the "License"). You may not use this file except in compliance
|
||||
* with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_UBERBLOCK_H
|
||||
#define _SYS_UBERBLOCK_H
|
||||
|
||||
#pragma ident "@(#)uberblock.h 1.1 05/10/30 SMI"
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/vdev.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct uberblock uberblock_t;
|
||||
|
||||
extern int uberblock_verify(uberblock_t *ub);
|
||||
extern int uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_UBERBLOCK_H */
|
||||
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_UBERBLOCK_IMPL_H
|
||||
#define _SYS_UBERBLOCK_IMPL_H
|
||||
|
||||
#pragma ident "@(#)uberblock_impl.h 1.4 07/06/29 SMI"
|
||||
|
||||
#include <sys/uberblock.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The uberblock version is incremented whenever an incompatible on-disk
|
||||
* format change is made to the SPA, DMU, or ZAP.
|
||||
*
|
||||
* Note: the first two fields should never be moved. When a storage pool
|
||||
* is opened, the uberblock must be read off the disk before the version
|
||||
* can be checked. If the ub_version field is moved, we may not detect
|
||||
* version mismatch. If the ub_magic field is moved, applications that
|
||||
* expect the magic number in the first word won't work.
|
||||
*/
|
||||
#define UBERBLOCK_MAGIC 0x00bab10c /* oo-ba-bloc! */
|
||||
#define UBERBLOCK_SHIFT 10 /* up to 1K */
|
||||
|
||||
struct uberblock {
|
||||
uint64_t ub_magic; /* UBERBLOCK_MAGIC */
|
||||
uint64_t ub_version; /* SPA_VERSION */
|
||||
uint64_t ub_txg; /* txg of last sync */
|
||||
uint64_t ub_guid_sum; /* sum of all vdev guids */
|
||||
uint64_t ub_timestamp; /* UTC time of last sync */
|
||||
blkptr_t ub_rootbp; /* MOS objset_phys_t */
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_UBERBLOCK_IMPL_H */
|
||||
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_UNIQUE_H
|
||||
#define _SYS_UNIQUE_H
|
||||
|
||||
#pragma ident "@(#)unique.h 1.2 07/08/02 SMI"
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* The number of significant bits in each unique value. */
|
||||
#define UNIQUE_BITS 56
|
||||
|
||||
void unique_init(void);
|
||||
void unique_fini(void);
|
||||
|
||||
/*
|
||||
* Return a new unique value (which will not be uniquified against until
|
||||
* it is unique_insert()-ed.
|
||||
*/
|
||||
uint64_t unique_create(void);
|
||||
|
||||
/* Return a unique value, which equals the one passed in if possible. */
|
||||
uint64_t unique_insert(uint64_t value);
|
||||
|
||||
/* Indicate that this value no longer needs to be uniquified against. */
|
||||
void unique_remove(uint64_t value);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_UNIQUE_H */
|
||||
@@ -0,0 +1,138 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_VDEV_H
|
||||
#define _SYS_VDEV_H
|
||||
|
||||
#pragma ident "@(#)vdev.h 1.16 07/12/12 SMI"
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/space_map.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern boolean_t zfs_nocacheflush;
|
||||
|
||||
/*
|
||||
* Fault injection modes.
|
||||
*/
|
||||
#define VDEV_FAULT_NONE 0
|
||||
#define VDEV_FAULT_RANDOM 1
|
||||
#define VDEV_FAULT_COUNT 2
|
||||
|
||||
extern int vdev_open(vdev_t *);
|
||||
extern int vdev_validate(vdev_t *);
|
||||
extern void vdev_close(vdev_t *);
|
||||
extern int vdev_create(vdev_t *, uint64_t txg, boolean_t isreplace);
|
||||
extern void vdev_init(vdev_t *, uint64_t txg);
|
||||
extern void vdev_reopen(vdev_t *);
|
||||
extern int vdev_validate_aux(vdev_t *vd);
|
||||
extern int vdev_probe(vdev_t *);
|
||||
|
||||
extern vdev_t *vdev_lookup_top(spa_t *spa, uint64_t vdev);
|
||||
extern vdev_t *vdev_lookup_by_guid(vdev_t *vd, uint64_t guid);
|
||||
extern void vdev_dtl_dirty(space_map_t *sm, uint64_t txg, uint64_t size);
|
||||
extern int vdev_dtl_contains(space_map_t *sm, uint64_t txg, uint64_t size);
|
||||
extern void vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
|
||||
int scrub_done);
|
||||
|
||||
extern const char *vdev_description(vdev_t *vd);
|
||||
|
||||
extern int vdev_metaslab_init(vdev_t *vd, uint64_t txg);
|
||||
extern void vdev_metaslab_fini(vdev_t *vd);
|
||||
|
||||
extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
|
||||
extern void vdev_clear_stats(vdev_t *vd);
|
||||
extern void vdev_stat_update(zio_t *zio);
|
||||
extern void vdev_scrub_stat_update(vdev_t *vd, pool_scrub_type_t type,
|
||||
boolean_t complete);
|
||||
extern int vdev_getspec(spa_t *spa, uint64_t vdev, char **vdev_spec);
|
||||
extern void vdev_propagate_state(vdev_t *vd);
|
||||
extern void vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state,
|
||||
vdev_aux_t aux);
|
||||
|
||||
extern void vdev_space_update(vdev_t *vd, int64_t space_delta,
|
||||
int64_t alloc_delta, boolean_t update_root);
|
||||
|
||||
extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize);
|
||||
|
||||
extern int vdev_fault(spa_t *spa, uint64_t guid);
|
||||
extern int vdev_degrade(spa_t *spa, uint64_t guid);
|
||||
extern int vdev_online(spa_t *spa, uint64_t guid, uint64_t flags,
|
||||
vdev_state_t *);
|
||||
extern int vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags);
|
||||
extern void vdev_clear(spa_t *spa, vdev_t *vd, boolean_t reopen_wanted);
|
||||
|
||||
extern int vdev_error_inject(vdev_t *vd, zio_t *zio);
|
||||
extern int vdev_is_dead(vdev_t *vd);
|
||||
extern int vdev_readable(vdev_t *vd);
|
||||
extern int vdev_writeable(vdev_t *vd);
|
||||
|
||||
extern void vdev_cache_init(vdev_t *vd);
|
||||
extern void vdev_cache_fini(vdev_t *vd);
|
||||
extern int vdev_cache_read(zio_t *zio);
|
||||
extern void vdev_cache_write(zio_t *zio);
|
||||
extern void vdev_cache_purge(vdev_t *vd);
|
||||
|
||||
extern void vdev_queue_init(vdev_t *vd);
|
||||
extern void vdev_queue_fini(vdev_t *vd);
|
||||
extern zio_t *vdev_queue_io(zio_t *zio);
|
||||
extern void vdev_queue_io_done(zio_t *zio);
|
||||
|
||||
extern void vdev_config_dirty(vdev_t *vd);
|
||||
extern void vdev_config_clean(vdev_t *vd);
|
||||
extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg);
|
||||
|
||||
extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
|
||||
boolean_t getstats, boolean_t isspare, boolean_t isl2cache);
|
||||
|
||||
/*
|
||||
* Label routines
|
||||
*/
|
||||
struct uberblock;
|
||||
extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset);
|
||||
extern nvlist_t *vdev_label_read_config(vdev_t *vd);
|
||||
extern void vdev_uberblock_load(zio_t *zio, vdev_t *vd, struct uberblock *ub);
|
||||
|
||||
typedef enum {
|
||||
VDEV_LABEL_CREATE, /* create/add a new device */
|
||||
VDEV_LABEL_REPLACE, /* replace an existing device */
|
||||
VDEV_LABEL_SPARE, /* add a new hot spare */
|
||||
VDEV_LABEL_REMOVE, /* remove an existing device */
|
||||
VDEV_LABEL_L2CACHE /* add an L2ARC cache device */
|
||||
} vdev_labeltype_t;
|
||||
|
||||
extern int vdev_label_init(vdev_t *vd, uint64_t txg, vdev_labeltype_t reason);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_VDEV_H */
|
||||
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License, Version 1.0 only
|
||||
* (the "License"). You may not use this file except in compliance
|
||||
* with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_VDEV_FILE_H
|
||||
#define _SYS_VDEV_FILE_H
|
||||
|
||||
#pragma ident "@(#)vdev_file.h 1.1 05/10/30 SMI"
|
||||
|
||||
#include <sys/vdev.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct vdev_file {
|
||||
vnode_t *vf_vnode;
|
||||
} vdev_file_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_VDEV_FILE_H */
|
||||
@@ -0,0 +1,306 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_VDEV_IMPL_H
|
||||
#define _SYS_VDEV_IMPL_H
|
||||
|
||||
#pragma ident "@(#)vdev_impl.h 1.19 07/11/27 SMI"
|
||||
|
||||
#include <sys/avl.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/metaslab.h>
|
||||
#include <sys/nvpair.h>
|
||||
#include <sys/space_map.h>
|
||||
#include <sys/vdev.h>
|
||||
#include <sys/dkio.h>
|
||||
#include <sys/uberblock_impl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Virtual device descriptors.
|
||||
*
|
||||
* All storage pool operations go through the virtual device framework,
|
||||
* which provides data replication and I/O scheduling.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Forward declarations that lots of things need.
|
||||
*/
|
||||
typedef struct vdev_queue vdev_queue_t;
|
||||
typedef struct vdev_cache vdev_cache_t;
|
||||
typedef struct vdev_cache_entry vdev_cache_entry_t;
|
||||
|
||||
/*
|
||||
* Virtual device operations
|
||||
*/
|
||||
typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *ashift);
|
||||
typedef void vdev_close_func_t(vdev_t *vd);
|
||||
typedef int vdev_probe_func_t(vdev_t *vd);
|
||||
typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize);
|
||||
typedef int vdev_io_start_func_t(zio_t *zio);
|
||||
typedef int vdev_io_done_func_t(zio_t *zio);
|
||||
typedef void vdev_state_change_func_t(vdev_t *vd, int, int);
|
||||
|
||||
typedef struct vdev_ops {
|
||||
vdev_open_func_t *vdev_op_open;
|
||||
vdev_close_func_t *vdev_op_close;
|
||||
vdev_probe_func_t *vdev_op_probe;
|
||||
vdev_asize_func_t *vdev_op_asize;
|
||||
vdev_io_start_func_t *vdev_op_io_start;
|
||||
vdev_io_done_func_t *vdev_op_io_done;
|
||||
vdev_state_change_func_t *vdev_op_state_change;
|
||||
char vdev_op_type[16];
|
||||
boolean_t vdev_op_leaf;
|
||||
} vdev_ops_t;
|
||||
|
||||
/*
|
||||
* Virtual device properties
|
||||
*/
|
||||
struct vdev_cache_entry {
|
||||
char *ve_data;
|
||||
uint64_t ve_offset;
|
||||
uint64_t ve_lastused;
|
||||
avl_node_t ve_offset_node;
|
||||
avl_node_t ve_lastused_node;
|
||||
uint32_t ve_hits;
|
||||
uint16_t ve_missed_update;
|
||||
zio_t *ve_fill_io;
|
||||
};
|
||||
|
||||
struct vdev_cache {
|
||||
avl_tree_t vc_offset_tree;
|
||||
avl_tree_t vc_lastused_tree;
|
||||
kmutex_t vc_lock;
|
||||
};
|
||||
|
||||
struct vdev_queue {
|
||||
avl_tree_t vq_deadline_tree;
|
||||
avl_tree_t vq_read_tree;
|
||||
avl_tree_t vq_write_tree;
|
||||
avl_tree_t vq_pending_tree;
|
||||
kmutex_t vq_lock;
|
||||
};
|
||||
|
||||
/*
|
||||
* Virtual device descriptor
|
||||
*/
|
||||
struct vdev {
|
||||
/*
|
||||
* Common to all vdev types.
|
||||
*/
|
||||
uint64_t vdev_id; /* child number in vdev parent */
|
||||
uint64_t vdev_guid; /* unique ID for this vdev */
|
||||
uint64_t vdev_guid_sum; /* self guid + all child guids */
|
||||
uint64_t vdev_asize; /* allocatable device capacity */
|
||||
uint64_t vdev_ashift; /* block alignment shift */
|
||||
uint64_t vdev_state; /* see VDEV_STATE_* #defines */
|
||||
uint64_t vdev_prevstate; /* used when reopening a vdev */
|
||||
vdev_ops_t *vdev_ops; /* vdev operations */
|
||||
spa_t *vdev_spa; /* spa for this vdev */
|
||||
void *vdev_tsd; /* type-specific data */
|
||||
vdev_t *vdev_top; /* top-level vdev */
|
||||
vdev_t *vdev_parent; /* parent vdev */
|
||||
vdev_t **vdev_child; /* array of children */
|
||||
uint64_t vdev_children; /* number of children */
|
||||
space_map_t vdev_dtl_map; /* dirty time log in-core state */
|
||||
space_map_t vdev_dtl_scrub; /* DTL for scrub repair writes */
|
||||
vdev_stat_t vdev_stat; /* virtual device statistics */
|
||||
|
||||
/*
|
||||
* Top-level vdev state.
|
||||
*/
|
||||
uint64_t vdev_ms_array; /* metaslab array object */
|
||||
uint64_t vdev_ms_shift; /* metaslab size shift */
|
||||
uint64_t vdev_ms_count; /* number of metaslabs */
|
||||
metaslab_group_t *vdev_mg; /* metaslab group */
|
||||
metaslab_t **vdev_ms; /* metaslab array */
|
||||
txg_list_t vdev_ms_list; /* per-txg dirty metaslab lists */
|
||||
txg_list_t vdev_dtl_list; /* per-txg dirty DTL lists */
|
||||
txg_node_t vdev_txg_node; /* per-txg dirty vdev linkage */
|
||||
boolean_t vdev_remove_wanted; /* async remove wanted? */
|
||||
list_node_t vdev_dirty_node; /* config dirty list */
|
||||
uint64_t vdev_deflate_ratio; /* deflation ratio (x512) */
|
||||
uint64_t vdev_islog; /* is an intent log device */
|
||||
|
||||
/*
|
||||
* Leaf vdev state.
|
||||
*/
|
||||
uint64_t vdev_psize; /* physical device capacity */
|
||||
space_map_obj_t vdev_dtl; /* dirty time log on-disk state */
|
||||
txg_node_t vdev_dtl_node; /* per-txg dirty DTL linkage */
|
||||
uint64_t vdev_wholedisk; /* true if this is a whole disk */
|
||||
uint64_t vdev_offline; /* persistent offline state */
|
||||
uint64_t vdev_faulted; /* persistent faulted state */
|
||||
uint64_t vdev_degraded; /* persistent degraded state */
|
||||
uint64_t vdev_removed; /* persistent removed state */
|
||||
uint64_t vdev_nparity; /* number of parity devices for raidz */
|
||||
char *vdev_path; /* vdev path (if any) */
|
||||
char *vdev_devid; /* vdev devid (if any) */
|
||||
char *vdev_physpath; /* vdev device path (if any) */
|
||||
uint64_t vdev_fault_arg; /* fault injection paramater */
|
||||
int vdev_fault_mask; /* zio types to fault */
|
||||
uint8_t vdev_fault_mode; /* fault injection mode */
|
||||
uint8_t vdev_tmpoffline; /* device taken offline temporarily? */
|
||||
uint8_t vdev_detached; /* device detached? */
|
||||
uint64_t vdev_isspare; /* was a hot spare */
|
||||
uint64_t vdev_isl2cache; /* was a l2cache device */
|
||||
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
|
||||
vdev_cache_t vdev_cache; /* physical block cache */
|
||||
uint64_t vdev_not_present; /* not present during import */
|
||||
hrtime_t vdev_last_try; /* last reopen time */
|
||||
boolean_t vdev_nowritecache; /* true if flushwritecache failed */
|
||||
uint64_t vdev_unspare; /* unspare when resilvering done */
|
||||
boolean_t vdev_checkremove; /* temporary online test */
|
||||
boolean_t vdev_forcefault; /* force online fault */
|
||||
boolean_t vdev_is_failing; /* device errors seen */
|
||||
|
||||
/*
|
||||
* For DTrace to work in userland (libzpool) context, these fields must
|
||||
* remain at the end of the structure. DTrace will use the kernel's
|
||||
* CTF definition for 'struct vdev', and since the size of a kmutex_t is
|
||||
* larger in userland, the offsets for the rest fields would be
|
||||
* incorrect.
|
||||
*/
|
||||
kmutex_t vdev_dtl_lock; /* vdev_dtl_{map,resilver} */
|
||||
kmutex_t vdev_stat_lock; /* vdev_stat */
|
||||
};
|
||||
|
||||
#define VDEV_SKIP_SIZE (8 << 10)
|
||||
#define VDEV_BOOT_HEADER_SIZE (8 << 10)
|
||||
#define VDEV_PHYS_SIZE (112 << 10)
|
||||
#define VDEV_UBERBLOCK_RING (128 << 10)
|
||||
|
||||
#define VDEV_UBERBLOCK_SHIFT(vd) \
|
||||
MAX((vd)->vdev_top->vdev_ashift, UBERBLOCK_SHIFT)
|
||||
#define VDEV_UBERBLOCK_COUNT(vd) \
|
||||
(VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT(vd))
|
||||
#define VDEV_UBERBLOCK_OFFSET(vd, n) \
|
||||
offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT(vd)])
|
||||
#define VDEV_UBERBLOCK_SIZE(vd) (1ULL << VDEV_UBERBLOCK_SHIFT(vd))
|
||||
|
||||
/* ZFS boot block */
|
||||
#define VDEV_BOOT_MAGIC 0x2f5b007b10cULL
|
||||
#define VDEV_BOOT_VERSION 1 /* version number */
|
||||
|
||||
typedef struct vdev_boot_header {
|
||||
uint64_t vb_magic; /* VDEV_BOOT_MAGIC */
|
||||
uint64_t vb_version; /* VDEV_BOOT_VERSION */
|
||||
uint64_t vb_offset; /* start offset (bytes) */
|
||||
uint64_t vb_size; /* size (bytes) */
|
||||
char vb_pad[VDEV_BOOT_HEADER_SIZE - 4 * sizeof (uint64_t)];
|
||||
} vdev_boot_header_t;
|
||||
|
||||
typedef struct vdev_phys {
|
||||
char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_block_tail_t)];
|
||||
zio_block_tail_t vp_zbt;
|
||||
} vdev_phys_t;
|
||||
|
||||
typedef struct vdev_label {
|
||||
char vl_pad[VDEV_SKIP_SIZE]; /* 8K */
|
||||
vdev_boot_header_t vl_boot_header; /* 8K */
|
||||
vdev_phys_t vl_vdev_phys; /* 112K */
|
||||
char vl_uberblock[VDEV_UBERBLOCK_RING]; /* 128K */
|
||||
} vdev_label_t; /* 256K total */
|
||||
|
||||
/*
|
||||
* vdev_dirty() flags
|
||||
*/
|
||||
#define VDD_METASLAB 0x01
|
||||
#define VDD_DTL 0x02
|
||||
|
||||
/*
|
||||
* Size and offset of embedded boot loader region on each label.
|
||||
* The total size of the first two labels plus the boot area is 4MB.
|
||||
*/
|
||||
#define VDEV_BOOT_OFFSET (2 * sizeof (vdev_label_t))
|
||||
#define VDEV_BOOT_SIZE (7ULL << 19) /* 3.5M */
|
||||
|
||||
/*
|
||||
* Size of label regions at the start and end of each leaf device.
|
||||
*/
|
||||
#define VDEV_LABEL_START_SIZE (2 * sizeof (vdev_label_t) + VDEV_BOOT_SIZE)
|
||||
#define VDEV_LABEL_END_SIZE (2 * sizeof (vdev_label_t))
|
||||
#define VDEV_LABELS 4
|
||||
|
||||
#define VDEV_ALLOC_LOAD 0
|
||||
#define VDEV_ALLOC_ADD 1
|
||||
#define VDEV_ALLOC_SPARE 2
|
||||
#define VDEV_ALLOC_L2CACHE 3
|
||||
|
||||
/*
|
||||
* Allocate or free a vdev
|
||||
*/
|
||||
extern int vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *config,
|
||||
vdev_t *parent, uint_t id, int alloctype);
|
||||
extern void vdev_free(vdev_t *vd);
|
||||
|
||||
/*
|
||||
* Add or remove children and parents
|
||||
*/
|
||||
extern void vdev_add_child(vdev_t *pvd, vdev_t *cvd);
|
||||
extern void vdev_remove_child(vdev_t *pvd, vdev_t *cvd);
|
||||
extern void vdev_compact_children(vdev_t *pvd);
|
||||
extern vdev_t *vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops);
|
||||
extern void vdev_remove_parent(vdev_t *cvd);
|
||||
|
||||
/*
|
||||
* vdev sync load and sync
|
||||
*/
|
||||
extern void vdev_load(vdev_t *vd);
|
||||
extern void vdev_sync(vdev_t *vd, uint64_t txg);
|
||||
extern void vdev_sync_done(vdev_t *vd, uint64_t txg);
|
||||
extern void vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg);
|
||||
|
||||
/*
|
||||
* Available vdev types.
|
||||
*/
|
||||
extern vdev_ops_t vdev_root_ops;
|
||||
extern vdev_ops_t vdev_mirror_ops;
|
||||
extern vdev_ops_t vdev_replacing_ops;
|
||||
extern vdev_ops_t vdev_raidz_ops;
|
||||
extern vdev_ops_t vdev_disk_ops;
|
||||
extern vdev_ops_t vdev_file_ops;
|
||||
extern vdev_ops_t vdev_missing_ops;
|
||||
extern vdev_ops_t vdev_spare_ops;
|
||||
|
||||
/*
|
||||
* Common size functions
|
||||
*/
|
||||
extern uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize);
|
||||
extern uint64_t vdev_get_rsize(vdev_t *vd);
|
||||
|
||||
/*
|
||||
* zdb uses this tunable, so it must be declared here to make lint happy.
|
||||
*/
|
||||
extern int zfs_vdev_cache_size;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_VDEV_IMPL_H */
|
||||
@@ -0,0 +1,410 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZAP_H
|
||||
#define _SYS_ZAP_H
|
||||
|
||||
#pragma ident "@(#)zap.h 1.6 07/10/25 SMI"
|
||||
|
||||
/*
|
||||
* ZAP - ZFS Attribute Processor
|
||||
*
|
||||
* The ZAP is a module which sits on top of the DMU (Data Management
|
||||
* Unit) and implements a higher-level storage primitive using DMU
|
||||
* objects. Its primary consumer is the ZPL (ZFS Posix Layer).
|
||||
*
|
||||
* A "zapobj" is a DMU object which the ZAP uses to stores attributes.
|
||||
* Users should use only zap routines to access a zapobj - they should
|
||||
* not access the DMU object directly using DMU routines.
|
||||
*
|
||||
* The attributes stored in a zapobj are name-value pairs. The name is
|
||||
* a zero-terminated string of up to ZAP_MAXNAMELEN bytes (including
|
||||
* terminating NULL). The value is an array of integers, which may be
|
||||
* 1, 2, 4, or 8 bytes long. The total space used by the array (number
|
||||
* of integers * integer length) can be up to ZAP_MAXVALUELEN bytes.
|
||||
* Note that an 8-byte integer value can be used to store the location
|
||||
* (object number) of another dmu object (which may be itself a zapobj).
|
||||
* Note that you can use a zero-length attribute to store a single bit
|
||||
* of information - the attribute is present or not.
|
||||
*
|
||||
* The ZAP routines are thread-safe. However, you must observe the
|
||||
* DMU's restriction that a transaction may not be operated on
|
||||
* concurrently.
|
||||
*
|
||||
* Any of the routines that return an int may return an I/O error (EIO
|
||||
* or ECHECKSUM).
|
||||
*
|
||||
*
|
||||
* Implementation / Performance Notes:
|
||||
*
|
||||
* The ZAP is intended to operate most efficiently on attributes with
|
||||
* short (49 bytes or less) names and single 8-byte values, for which
|
||||
* the microzap will be used. The ZAP should be efficient enough so
|
||||
* that the user does not need to cache these attributes.
|
||||
*
|
||||
* The ZAP's locking scheme makes its routines thread-safe. Operations
|
||||
* on different zapobjs will be processed concurrently. Operations on
|
||||
* the same zapobj which only read data will be processed concurrently.
|
||||
* Operations on the same zapobj which modify data will be processed
|
||||
* concurrently when there are many attributes in the zapobj (because
|
||||
* the ZAP uses per-block locking - more than 128 * (number of cpus)
|
||||
* small attributes will suffice).
|
||||
*/
|
||||
|
||||
/*
|
||||
* We're using zero-terminated byte strings (ie. ASCII or UTF-8 C
|
||||
* strings) for the names of attributes, rather than a byte string
|
||||
* bounded by an explicit length. If some day we want to support names
|
||||
* in character sets which have embedded zeros (eg. UTF-16, UTF-32),
|
||||
* we'll have to add routines for using length-bounded strings.
|
||||
*/
|
||||
|
||||
#include <sys/dmu.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ZAP_MAXNAMELEN 256
|
||||
#define ZAP_MAXVALUELEN 1024
|
||||
|
||||
/*
|
||||
* The matchtype specifies which entry will be accessed.
|
||||
* MT_EXACT: only find an exact match (non-normalized)
|
||||
* MT_FIRST: find the "first" normalized (case and Unicode
|
||||
* form) match; the designated "first" match will not change as long
|
||||
* as the set of entries with this normalization doesn't change
|
||||
* MT_BEST: if there is an exact match, find that, otherwise find the
|
||||
* first normalized match
|
||||
*/
|
||||
typedef enum matchtype
|
||||
{
|
||||
MT_EXACT,
|
||||
MT_BEST,
|
||||
MT_FIRST
|
||||
} matchtype_t;
|
||||
|
||||
/*
|
||||
* Create a new zapobj with no attributes and return its object number.
|
||||
* MT_EXACT will cause the zap object to only support MT_EXACT lookups,
|
||||
* otherwise any matchtype can be used for lookups.
|
||||
*
|
||||
* normflags specifies what normalization will be done. values are:
|
||||
* 0: no normalization (legacy on-disk format, supports MT_EXACT matching
|
||||
* only)
|
||||
* U8_TEXTPREP_TOLOWER: case normalization will be performed.
|
||||
* MT_FIRST/MT_BEST matching will find entries that match without
|
||||
* regard to case (eg. looking for "foo" can find an entry "Foo").
|
||||
* Eventually, other flags will permit unicode normalization as well.
|
||||
*/
|
||||
uint64_t zap_create(objset_t *ds, dmu_object_type_t ot,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
uint64_t zap_create_norm(objset_t *ds, int normflags, dmu_object_type_t ot,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Create a new zapobj with no attributes from the given (unallocated)
|
||||
* object number.
|
||||
*/
|
||||
int zap_create_claim(objset_t *ds, uint64_t obj, dmu_object_type_t ot,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
int zap_create_claim_norm(objset_t *ds, uint64_t obj,
|
||||
int normflags, dmu_object_type_t ot,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* The zapobj passed in must be a valid ZAP object for all of the
|
||||
* following routines.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Destroy this zapobj and all its attributes.
|
||||
*
|
||||
* Frees the object number using dmu_object_free.
|
||||
*/
|
||||
int zap_destroy(objset_t *ds, uint64_t zapobj, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Manipulate attributes.
|
||||
*
|
||||
* 'integer_size' is in bytes, and must be 1, 2, 4, or 8.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Retrieve the contents of the attribute with the given name.
|
||||
*
|
||||
* If the requested attribute does not exist, the call will fail and
|
||||
* return ENOENT.
|
||||
*
|
||||
* If 'integer_size' is smaller than the attribute's integer size, the
|
||||
* call will fail and return EINVAL.
|
||||
*
|
||||
* If 'integer_size' is equal to or larger than the attribute's integer
|
||||
* size, the call will succeed and return 0. * When converting to a
|
||||
* larger integer size, the integers will be treated as unsigned (ie. no
|
||||
* sign-extension will be performed).
|
||||
*
|
||||
* 'num_integers' is the length (in integers) of 'buf'.
|
||||
*
|
||||
* If the attribute is longer than the buffer, as many integers as will
|
||||
* fit will be transferred to 'buf'. If the entire attribute was not
|
||||
* transferred, the call will return EOVERFLOW.
|
||||
*
|
||||
* If rn_len is nonzero, realname will be set to the name of the found
|
||||
* entry (which may be different from the requested name if matchtype is
|
||||
* not MT_EXACT).
|
||||
*
|
||||
* If normalization_conflictp is not NULL, it will be set if there is
|
||||
* another name with the same case/unicode normalized form.
|
||||
*/
|
||||
int zap_lookup(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf);
|
||||
int zap_lookup_norm(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf,
|
||||
matchtype_t mt, char *realname, int rn_len,
|
||||
boolean_t *normalization_conflictp);
|
||||
|
||||
/*
|
||||
* Create an attribute with the given name and value.
|
||||
*
|
||||
* If an attribute with the given name already exists, the call will
|
||||
* fail and return EEXIST.
|
||||
*/
|
||||
int zap_add(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
int integer_size, uint64_t num_integers,
|
||||
const void *val, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Set the attribute with the given name to the given value. If an
|
||||
* attribute with the given name does not exist, it will be created. If
|
||||
* an attribute with the given name already exists, the previous value
|
||||
* will be overwritten. The integer_size may be different from the
|
||||
* existing attribute's integer size, in which case the attribute's
|
||||
* integer size will be updated to the new value.
|
||||
*/
|
||||
int zap_update(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Get the length (in integers) and the integer size of the specified
|
||||
* attribute.
|
||||
*
|
||||
* If the requested attribute does not exist, the call will fail and
|
||||
* return ENOENT.
|
||||
*/
|
||||
int zap_length(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
uint64_t *integer_size, uint64_t *num_integers);
|
||||
|
||||
/*
|
||||
* Remove the specified attribute.
|
||||
*
|
||||
* If the specified attribute does not exist, the call will fail and
|
||||
* return ENOENT.
|
||||
*/
|
||||
int zap_remove(objset_t *ds, uint64_t zapobj, const char *name, dmu_tx_t *tx);
|
||||
int zap_remove_norm(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
matchtype_t mt, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Returns (in *count) the number of attributes in the specified zap
|
||||
* object.
|
||||
*/
|
||||
int zap_count(objset_t *ds, uint64_t zapobj, uint64_t *count);
|
||||
|
||||
|
||||
/*
|
||||
* Returns (in name) the name of the entry whose (value & mask)
|
||||
* (za_first_integer) is value, or ENOENT if not found. The string
|
||||
* pointed to by name must be at least 256 bytes long. If mask==0, the
|
||||
* match must be exact (ie, same as mask=-1ULL).
|
||||
*/
|
||||
int zap_value_search(objset_t *os, uint64_t zapobj,
|
||||
uint64_t value, uint64_t mask, char *name);
|
||||
|
||||
struct zap;
|
||||
struct zap_leaf;
|
||||
typedef struct zap_cursor {
|
||||
/* This structure is opaque! */
|
||||
objset_t *zc_objset;
|
||||
struct zap *zc_zap;
|
||||
struct zap_leaf *zc_leaf;
|
||||
uint64_t zc_zapobj;
|
||||
uint64_t zc_hash;
|
||||
uint32_t zc_cd;
|
||||
} zap_cursor_t;
|
||||
|
||||
typedef struct {
|
||||
int za_integer_length;
|
||||
/*
|
||||
* za_normalization_conflict will be set if there are additional
|
||||
* entries with this normalized form (eg, "foo" and "Foo").
|
||||
*/
|
||||
boolean_t za_normalization_conflict;
|
||||
uint64_t za_num_integers;
|
||||
uint64_t za_first_integer; /* no sign extension for <8byte ints */
|
||||
char za_name[MAXNAMELEN];
|
||||
} zap_attribute_t;
|
||||
|
||||
/*
|
||||
* The interface for listing all the attributes of a zapobj can be
|
||||
* thought of as cursor moving down a list of the attributes one by
|
||||
* one. The cookie returned by the zap_cursor_serialize routine is
|
||||
* persistent across system calls (and across reboot, even).
|
||||
*/
|
||||
|
||||
/*
|
||||
* Initialize a zap cursor, pointing to the "first" attribute of the
|
||||
* zapobj. You must _fini the cursor when you are done with it.
|
||||
*/
|
||||
void zap_cursor_init(zap_cursor_t *zc, objset_t *ds, uint64_t zapobj);
|
||||
void zap_cursor_fini(zap_cursor_t *zc);
|
||||
|
||||
/*
|
||||
* Get the attribute currently pointed to by the cursor. Returns
|
||||
* ENOENT if at the end of the attributes.
|
||||
*/
|
||||
int zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za);
|
||||
|
||||
/*
|
||||
* Advance the cursor to the next attribute.
|
||||
*/
|
||||
void zap_cursor_advance(zap_cursor_t *zc);
|
||||
|
||||
/*
|
||||
* Get a persistent cookie pointing to the current position of the zap
|
||||
* cursor. The low 4 bits in the cookie are always zero, and thus can
|
||||
* be used as to differentiate a serialized cookie from a different type
|
||||
* of value. The cookie will be less than 2^32 as long as there are
|
||||
* fewer than 2^22 (4.2 million) entries in the zap object.
|
||||
*/
|
||||
uint64_t zap_cursor_serialize(zap_cursor_t *zc);
|
||||
|
||||
/*
|
||||
* Initialize a zap cursor pointing to the position recorded by
|
||||
* zap_cursor_serialize (in the "serialized" argument). You can also
|
||||
* use a "serialized" argument of 0 to start at the beginning of the
|
||||
* zapobj (ie. zap_cursor_init_serialized(..., 0) is equivalent to
|
||||
* zap_cursor_init(...).)
|
||||
*/
|
||||
void zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *ds,
|
||||
uint64_t zapobj, uint64_t serialized);
|
||||
|
||||
|
||||
#define ZAP_HISTOGRAM_SIZE 10
|
||||
|
||||
typedef struct zap_stats {
|
||||
/*
|
||||
* Size of the pointer table (in number of entries).
|
||||
* This is always a power of 2, or zero if it's a microzap.
|
||||
* In general, it should be considerably greater than zs_num_leafs.
|
||||
*/
|
||||
uint64_t zs_ptrtbl_len;
|
||||
|
||||
uint64_t zs_blocksize; /* size of zap blocks */
|
||||
|
||||
/*
|
||||
* The number of blocks used. Note that some blocks may be
|
||||
* wasted because old ptrtbl's and large name/value blocks are
|
||||
* not reused. (Although their space is reclaimed, we don't
|
||||
* reuse those offsets in the object.)
|
||||
*/
|
||||
uint64_t zs_num_blocks;
|
||||
|
||||
/*
|
||||
* Pointer table values from zap_ptrtbl in the zap_phys_t
|
||||
*/
|
||||
uint64_t zs_ptrtbl_nextblk; /* next (larger) copy start block */
|
||||
uint64_t zs_ptrtbl_blks_copied; /* number source blocks copied */
|
||||
uint64_t zs_ptrtbl_zt_blk; /* starting block number */
|
||||
uint64_t zs_ptrtbl_zt_numblks; /* number of blocks */
|
||||
uint64_t zs_ptrtbl_zt_shift; /* bits to index it */
|
||||
|
||||
/*
|
||||
* Values of the other members of the zap_phys_t
|
||||
*/
|
||||
uint64_t zs_block_type; /* ZBT_HEADER */
|
||||
uint64_t zs_magic; /* ZAP_MAGIC */
|
||||
uint64_t zs_num_leafs; /* The number of leaf blocks */
|
||||
uint64_t zs_num_entries; /* The number of zap entries */
|
||||
uint64_t zs_salt; /* salt to stir into hash function */
|
||||
|
||||
/*
|
||||
* Histograms. For all histograms, the last index
|
||||
* (ZAP_HISTOGRAM_SIZE-1) includes any values which are greater
|
||||
* than what can be represented. For example
|
||||
* zs_leafs_with_n5_entries[ZAP_HISTOGRAM_SIZE-1] is the number
|
||||
* of leafs with more than 45 entries.
|
||||
*/
|
||||
|
||||
/*
|
||||
* zs_leafs_with_n_pointers[n] is the number of leafs with
|
||||
* 2^n pointers to it.
|
||||
*/
|
||||
uint64_t zs_leafs_with_2n_pointers[ZAP_HISTOGRAM_SIZE];
|
||||
|
||||
/*
|
||||
* zs_leafs_with_n_entries[n] is the number of leafs with
|
||||
* [n*5, (n+1)*5) entries. In the current implementation, there
|
||||
* can be at most 55 entries in any block, but there may be
|
||||
* fewer if the name or value is large, or the block is not
|
||||
* completely full.
|
||||
*/
|
||||
uint64_t zs_blocks_with_n5_entries[ZAP_HISTOGRAM_SIZE];
|
||||
|
||||
/*
|
||||
* zs_leafs_n_tenths_full[n] is the number of leafs whose
|
||||
* fullness is in the range [n/10, (n+1)/10).
|
||||
*/
|
||||
uint64_t zs_blocks_n_tenths_full[ZAP_HISTOGRAM_SIZE];
|
||||
|
||||
/*
|
||||
* zs_entries_using_n_chunks[n] is the number of entries which
|
||||
* consume n 24-byte chunks. (Note, large names/values only use
|
||||
* one chunk, but contribute to zs_num_blocks_large.)
|
||||
*/
|
||||
uint64_t zs_entries_using_n_chunks[ZAP_HISTOGRAM_SIZE];
|
||||
|
||||
/*
|
||||
* zs_buckets_with_n_entries[n] is the number of buckets (each
|
||||
* leaf has 64 buckets) with n entries.
|
||||
* zs_buckets_with_n_entries[1] should be very close to
|
||||
* zs_num_entries.
|
||||
*/
|
||||
uint64_t zs_buckets_with_n_entries[ZAP_HISTOGRAM_SIZE];
|
||||
} zap_stats_t;
|
||||
|
||||
/*
|
||||
* Get statistics about a ZAP object. Note: you need to be aware of the
|
||||
* internal implementation of the ZAP to correctly interpret some of the
|
||||
* statistics. This interface shouldn't be relied on unless you really
|
||||
* know what you're doing.
|
||||
*/
|
||||
int zap_get_stats(objset_t *ds, uint64_t zapobj, zap_stats_t *zs);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZAP_H */
|
||||
@@ -0,0 +1,218 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZAP_IMPL_H
|
||||
#define _SYS_ZAP_IMPL_H
|
||||
|
||||
#pragma ident "@(#)zap_impl.h 1.9 07/10/30 SMI"
|
||||
|
||||
#include <sys/zap.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/avl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern int fzap_default_block_shift;
|
||||
|
||||
#define ZAP_MAGIC 0x2F52AB2ABULL
|
||||
|
||||
#define FZAP_BLOCK_SHIFT(zap) ((zap)->zap_f.zap_block_shift)
|
||||
|
||||
#define ZAP_MAXCD (uint32_t)(-1)
|
||||
#define ZAP_HASHBITS 28
|
||||
#define MZAP_ENT_LEN 64
|
||||
#define MZAP_NAME_LEN (MZAP_ENT_LEN - 8 - 4 - 2)
|
||||
#define MZAP_MAX_BLKSHIFT SPA_MAXBLOCKSHIFT
|
||||
#define MZAP_MAX_BLKSZ (1 << MZAP_MAX_BLKSHIFT)
|
||||
|
||||
typedef struct mzap_ent_phys {
|
||||
uint64_t mze_value;
|
||||
uint32_t mze_cd;
|
||||
uint16_t mze_pad; /* in case we want to chain them someday */
|
||||
char mze_name[MZAP_NAME_LEN];
|
||||
} mzap_ent_phys_t;
|
||||
|
||||
typedef struct mzap_phys {
|
||||
uint64_t mz_block_type; /* ZBT_MICRO */
|
||||
uint64_t mz_salt;
|
||||
uint64_t mz_normflags;
|
||||
uint64_t mz_pad[5];
|
||||
mzap_ent_phys_t mz_chunk[1];
|
||||
/* actually variable size depending on block size */
|
||||
} mzap_phys_t;
|
||||
|
||||
typedef struct mzap_ent {
|
||||
avl_node_t mze_node;
|
||||
int mze_chunkid;
|
||||
uint64_t mze_hash;
|
||||
mzap_ent_phys_t mze_phys;
|
||||
} mzap_ent_t;
|
||||
|
||||
|
||||
/*
|
||||
* The (fat) zap is stored in one object. It is an array of
|
||||
* 1<<FZAP_BLOCK_SHIFT byte blocks. The layout looks like one of:
|
||||
*
|
||||
* ptrtbl fits in first block:
|
||||
* [zap_phys_t zap_ptrtbl_shift < 6] [zap_leaf_t] ...
|
||||
*
|
||||
* ptrtbl too big for first block:
|
||||
* [zap_phys_t zap_ptrtbl_shift >= 6] [zap_leaf_t] [ptrtbl] ...
|
||||
*
|
||||
*/
|
||||
|
||||
struct dmu_buf;
|
||||
struct zap_leaf;
|
||||
|
||||
#define ZBT_LEAF ((1ULL << 63) + 0)
|
||||
#define ZBT_HEADER ((1ULL << 63) + 1)
|
||||
#define ZBT_MICRO ((1ULL << 63) + 3)
|
||||
/* any other values are ptrtbl blocks */
|
||||
|
||||
/*
|
||||
* the embedded pointer table takes up half a block:
|
||||
* block size / entry size (2^3) / 2
|
||||
*/
|
||||
#define ZAP_EMBEDDED_PTRTBL_SHIFT(zap) (FZAP_BLOCK_SHIFT(zap) - 3 - 1)
|
||||
|
||||
/*
|
||||
* The embedded pointer table starts half-way through the block. Since
|
||||
* the pointer table itself is half the block, it starts at (64-bit)
|
||||
* word number (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)).
|
||||
*/
|
||||
#define ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) \
|
||||
((uint64_t *)(zap)->zap_f.zap_phys) \
|
||||
[(idx) + (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap))]
|
||||
|
||||
/*
|
||||
* TAKE NOTE:
|
||||
* If zap_phys_t is modified, zap_byteswap() must be modified.
|
||||
*/
|
||||
typedef struct zap_phys {
|
||||
uint64_t zap_block_type; /* ZBT_HEADER */
|
||||
uint64_t zap_magic; /* ZAP_MAGIC */
|
||||
|
||||
struct zap_table_phys {
|
||||
uint64_t zt_blk; /* starting block number */
|
||||
uint64_t zt_numblks; /* number of blocks */
|
||||
uint64_t zt_shift; /* bits to index it */
|
||||
uint64_t zt_nextblk; /* next (larger) copy start block */
|
||||
uint64_t zt_blks_copied; /* number source blocks copied */
|
||||
} zap_ptrtbl;
|
||||
|
||||
uint64_t zap_freeblk; /* the next free block */
|
||||
uint64_t zap_num_leafs; /* number of leafs */
|
||||
uint64_t zap_num_entries; /* number of entries */
|
||||
uint64_t zap_salt; /* salt to stir into hash function */
|
||||
uint64_t zap_normflags; /* flags for u8_textprep_str() */
|
||||
/*
|
||||
* This structure is followed by padding, and then the embedded
|
||||
* pointer table. The embedded pointer table takes up second
|
||||
* half of the block. It is accessed using the
|
||||
* ZAP_EMBEDDED_PTRTBL_ENT() macro.
|
||||
*/
|
||||
} zap_phys_t;
|
||||
|
||||
typedef struct zap_table_phys zap_table_phys_t;
|
||||
|
||||
typedef struct zap {
|
||||
objset_t *zap_objset;
|
||||
uint64_t zap_object;
|
||||
struct dmu_buf *zap_dbuf;
|
||||
krwlock_t zap_rwlock;
|
||||
boolean_t zap_ismicro;
|
||||
int zap_normflags;
|
||||
uint64_t zap_salt;
|
||||
union {
|
||||
struct {
|
||||
zap_phys_t *zap_phys;
|
||||
|
||||
/*
|
||||
* zap_num_entries_mtx protects
|
||||
* zap_num_entries
|
||||
*/
|
||||
kmutex_t zap_num_entries_mtx;
|
||||
int zap_block_shift;
|
||||
} zap_fat;
|
||||
struct {
|
||||
mzap_phys_t *zap_phys;
|
||||
int16_t zap_num_entries;
|
||||
int16_t zap_num_chunks;
|
||||
int16_t zap_alloc_next;
|
||||
avl_tree_t zap_avl;
|
||||
} zap_micro;
|
||||
} zap_u;
|
||||
} zap_t;
|
||||
|
||||
typedef struct zap_name {
|
||||
zap_t *zn_zap;
|
||||
const char *zn_name_orij;
|
||||
uint64_t zn_hash;
|
||||
matchtype_t zn_matchtype;
|
||||
const char *zn_name_norm;
|
||||
char zn_normbuf[ZAP_MAXNAMELEN];
|
||||
} zap_name_t;
|
||||
|
||||
#define zap_f zap_u.zap_fat
|
||||
#define zap_m zap_u.zap_micro
|
||||
|
||||
boolean_t zap_match(zap_name_t *zn, const char *matchname);
|
||||
int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
|
||||
krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp);
|
||||
void zap_unlockdir(zap_t *zap);
|
||||
void zap_evict(dmu_buf_t *db, void *vmzap);
|
||||
zap_name_t *zap_name_alloc(zap_t *zap, const char *name, matchtype_t mt);
|
||||
void zap_name_free(zap_name_t *zn);
|
||||
|
||||
#define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))
|
||||
|
||||
void fzap_byteswap(void *buf, size_t size);
|
||||
int fzap_count(zap_t *zap, uint64_t *count);
|
||||
int fzap_lookup(zap_name_t *zn,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf,
|
||||
char *realname, int rn_len, boolean_t *normalization_conflictp);
|
||||
int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
|
||||
const void *val, dmu_tx_t *tx);
|
||||
int fzap_update(zap_name_t *zn,
|
||||
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
|
||||
int fzap_length(zap_name_t *zn,
|
||||
uint64_t *integer_size, uint64_t *num_integers);
|
||||
int fzap_remove(zap_name_t *zn, dmu_tx_t *tx);
|
||||
int fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za);
|
||||
void fzap_get_stats(zap_t *zap, zap_stats_t *zs);
|
||||
void zap_put_leaf(struct zap_leaf *l);
|
||||
|
||||
int fzap_add_cd(zap_name_t *zn,
|
||||
uint64_t integer_size, uint64_t num_integers,
|
||||
const void *val, uint32_t cd, dmu_tx_t *tx);
|
||||
void fzap_upgrade(zap_t *zap, dmu_tx_t *tx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZAP_IMPL_H */
|
||||
@@ -0,0 +1,244 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZAP_LEAF_H
|
||||
#define _SYS_ZAP_LEAF_H
|
||||
|
||||
#pragma ident "@(#)zap_leaf.h 1.5 07/11/16 SMI"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct zap;
|
||||
|
||||
#define ZAP_LEAF_MAGIC 0x2AB1EAF
|
||||
|
||||
/* chunk size = 24 bytes */
|
||||
#define ZAP_LEAF_CHUNKSIZE 24
|
||||
|
||||
/*
|
||||
* The amount of space available for chunks is:
|
||||
* block size (1<<l->l_bs) - hash entry size (2) * number of hash
|
||||
* entries - header space (2*chunksize)
|
||||
*/
|
||||
#define ZAP_LEAF_NUMCHUNKS(l) \
|
||||
(((1<<(l)->l_bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(l)) / \
|
||||
ZAP_LEAF_CHUNKSIZE - 2)
|
||||
|
||||
/*
|
||||
* The amount of space within the chunk available for the array is:
|
||||
* chunk size - space for type (1) - space for next pointer (2)
|
||||
*/
|
||||
#define ZAP_LEAF_ARRAY_BYTES (ZAP_LEAF_CHUNKSIZE - 3)
|
||||
|
||||
#define ZAP_LEAF_ARRAY_NCHUNKS(bytes) \
|
||||
(((bytes)+ZAP_LEAF_ARRAY_BYTES-1)/ZAP_LEAF_ARRAY_BYTES)
|
||||
|
||||
/*
|
||||
* Low water mark: when there are only this many chunks free, start
|
||||
* growing the ptrtbl. Ideally, this should be larger than a
|
||||
* "reasonably-sized" entry. 20 chunks is more than enough for the
|
||||
* largest directory entry (MAXNAMELEN (256) byte name, 8-byte value),
|
||||
* while still being only around 3% for 16k blocks.
|
||||
*/
|
||||
#define ZAP_LEAF_LOW_WATER (20)
|
||||
|
||||
/*
|
||||
* The leaf hash table has block size / 2^5 (32) number of entries,
|
||||
* which should be more than enough for the maximum number of entries,
|
||||
* which is less than block size / CHUNKSIZE (24) / minimum number of
|
||||
* chunks per entry (3).
|
||||
*/
|
||||
#define ZAP_LEAF_HASH_SHIFT(l) ((l)->l_bs - 5)
|
||||
#define ZAP_LEAF_HASH_NUMENTRIES(l) (1 << ZAP_LEAF_HASH_SHIFT(l))
|
||||
|
||||
/*
|
||||
* The chunks start immediately after the hash table. The end of the
|
||||
* hash table is at l_hash + HASH_NUMENTRIES, which we simply cast to a
|
||||
* chunk_t.
|
||||
*/
|
||||
#define ZAP_LEAF_CHUNK(l, idx) \
|
||||
((zap_leaf_chunk_t *) \
|
||||
((l)->l_phys->l_hash + ZAP_LEAF_HASH_NUMENTRIES(l)))[idx]
|
||||
#define ZAP_LEAF_ENTRY(l, idx) (&ZAP_LEAF_CHUNK(l, idx).l_entry)
|
||||
|
||||
typedef enum zap_chunk_type {
|
||||
ZAP_CHUNK_FREE = 253,
|
||||
ZAP_CHUNK_ENTRY = 252,
|
||||
ZAP_CHUNK_ARRAY = 251,
|
||||
ZAP_CHUNK_TYPE_MAX = 250
|
||||
} zap_chunk_type_t;
|
||||
|
||||
#define ZLF_ENTRIES_CDSORTED (1<<0)
|
||||
|
||||
/*
|
||||
* TAKE NOTE:
|
||||
* If zap_leaf_phys_t is modified, zap_leaf_byteswap() must be modified.
|
||||
*/
|
||||
typedef struct zap_leaf_phys {
|
||||
struct zap_leaf_header {
|
||||
uint64_t lh_block_type; /* ZBT_LEAF */
|
||||
uint64_t lh_pad1;
|
||||
uint64_t lh_prefix; /* hash prefix of this leaf */
|
||||
uint32_t lh_magic; /* ZAP_LEAF_MAGIC */
|
||||
uint16_t lh_nfree; /* number free chunks */
|
||||
uint16_t lh_nentries; /* number of entries */
|
||||
uint16_t lh_prefix_len; /* num bits used to id this */
|
||||
|
||||
/* above is accessable to zap, below is zap_leaf private */
|
||||
|
||||
uint16_t lh_freelist; /* chunk head of free list */
|
||||
uint8_t lh_flags; /* ZLF_* flags */
|
||||
uint8_t lh_pad2[11];
|
||||
} l_hdr; /* 2 24-byte chunks */
|
||||
|
||||
/*
|
||||
* The header is followed by a hash table with
|
||||
* ZAP_LEAF_HASH_NUMENTRIES(zap) entries. The hash table is
|
||||
* followed by an array of ZAP_LEAF_NUMCHUNKS(zap)
|
||||
* zap_leaf_chunk structures. These structures are accessed
|
||||
* with the ZAP_LEAF_CHUNK() macro.
|
||||
*/
|
||||
|
||||
uint16_t l_hash[1];
|
||||
} zap_leaf_phys_t;
|
||||
|
||||
typedef union zap_leaf_chunk {
|
||||
struct zap_leaf_entry {
|
||||
uint8_t le_type; /* always ZAP_CHUNK_ENTRY */
|
||||
uint8_t le_int_size; /* size of ints */
|
||||
uint16_t le_next; /* next entry in hash chain */
|
||||
uint16_t le_name_chunk; /* first chunk of the name */
|
||||
uint16_t le_name_length; /* bytes in name, incl null */
|
||||
uint16_t le_value_chunk; /* first chunk of the value */
|
||||
uint16_t le_value_length; /* value length in ints */
|
||||
uint32_t le_cd; /* collision differentiator */
|
||||
uint64_t le_hash; /* hash value of the name */
|
||||
} l_entry;
|
||||
struct zap_leaf_array {
|
||||
uint8_t la_type; /* always ZAP_CHUNK_ARRAY */
|
||||
uint8_t la_array[ZAP_LEAF_ARRAY_BYTES];
|
||||
uint16_t la_next; /* next blk or CHAIN_END */
|
||||
} l_array;
|
||||
struct zap_leaf_free {
|
||||
uint8_t lf_type; /* always ZAP_CHUNK_FREE */
|
||||
uint8_t lf_pad[ZAP_LEAF_ARRAY_BYTES];
|
||||
uint16_t lf_next; /* next in free list, or CHAIN_END */
|
||||
} l_free;
|
||||
} zap_leaf_chunk_t;
|
||||
|
||||
typedef struct zap_leaf {
|
||||
krwlock_t l_rwlock; /* only used on head of chain */
|
||||
uint64_t l_blkid; /* 1<<ZAP_BLOCK_SHIFT byte block off */
|
||||
int l_bs; /* block size shift */
|
||||
dmu_buf_t *l_dbuf;
|
||||
zap_leaf_phys_t *l_phys;
|
||||
} zap_leaf_t;
|
||||
|
||||
|
||||
typedef struct zap_entry_handle {
|
||||
/* below is set by zap_leaf.c and is public to zap.c */
|
||||
uint64_t zeh_num_integers;
|
||||
uint64_t zeh_hash;
|
||||
uint32_t zeh_cd;
|
||||
uint8_t zeh_integer_size;
|
||||
|
||||
/* below is private to zap_leaf.c */
|
||||
uint16_t zeh_fakechunk;
|
||||
uint16_t *zeh_chunkp;
|
||||
zap_leaf_t *zeh_leaf;
|
||||
} zap_entry_handle_t;
|
||||
|
||||
/*
|
||||
* Return a handle to the named entry, or ENOENT if not found. The hash
|
||||
* value must equal zap_hash(name).
|
||||
*/
|
||||
extern int zap_leaf_lookup(zap_leaf_t *l,
|
||||
zap_name_t *zn, zap_entry_handle_t *zeh);
|
||||
|
||||
/*
|
||||
* Return a handle to the entry with this hash+cd, or the entry with the
|
||||
* next closest hash+cd.
|
||||
*/
|
||||
extern int zap_leaf_lookup_closest(zap_leaf_t *l,
|
||||
uint64_t hash, uint32_t cd, zap_entry_handle_t *zeh);
|
||||
|
||||
/*
|
||||
* Read the first num_integers in the attribute. Integer size
|
||||
* conversion will be done without sign extension. Return EINVAL if
|
||||
* integer_size is too small. Return EOVERFLOW if there are more than
|
||||
* num_integers in the attribute.
|
||||
*/
|
||||
extern int zap_entry_read(const zap_entry_handle_t *zeh,
|
||||
uint8_t integer_size, uint64_t num_integers, void *buf);
|
||||
|
||||
extern int zap_entry_read_name(const zap_entry_handle_t *zeh,
|
||||
uint16_t buflen, char *buf);
|
||||
|
||||
/*
|
||||
* Replace the value of an existing entry.
|
||||
*
|
||||
* zap_entry_update may fail if it runs out of space (ENOSPC).
|
||||
*/
|
||||
extern int zap_entry_update(zap_entry_handle_t *zeh,
|
||||
uint8_t integer_size, uint64_t num_integers, const void *buf);
|
||||
|
||||
/*
|
||||
* Remove an entry.
|
||||
*/
|
||||
extern void zap_entry_remove(zap_entry_handle_t *zeh);
|
||||
|
||||
/*
|
||||
* Create an entry. An equal entry must not exist, and this entry must
|
||||
* belong in this leaf (according to its hash value). Fills in the
|
||||
* entry handle on success. Returns 0 on success or ENOSPC on failure.
|
||||
*/
|
||||
extern int zap_entry_create(zap_leaf_t *l,
|
||||
const char *name, uint64_t h, uint32_t cd,
|
||||
uint8_t integer_size, uint64_t num_integers, const void *buf,
|
||||
zap_entry_handle_t *zeh);
|
||||
|
||||
/*
|
||||
* Return true if there are additional entries with the same normalized
|
||||
* form.
|
||||
*/
|
||||
extern boolean_t zap_entry_normalization_conflict(zap_entry_handle_t *zeh,
|
||||
zap_name_t *zn, const char *name, zap_t *zap);
|
||||
|
||||
/*
|
||||
* Other stuff.
|
||||
*/
|
||||
|
||||
extern void zap_leaf_init(zap_leaf_t *l, boolean_t sort);
|
||||
extern void zap_leaf_byteswap(zap_leaf_phys_t *buf, int len);
|
||||
extern void zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort);
|
||||
extern void zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZAP_LEAF_H */
|
||||
@@ -0,0 +1,215 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FS_ZFS_ACL_H
|
||||
#define _SYS_FS_ZFS_ACL_H
|
||||
|
||||
#pragma ident "@(#)zfs_acl.h 1.9 08/04/08 SMI"
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/isa_defs.h>
|
||||
#include <sys/types32.h>
|
||||
#endif
|
||||
#include <sys/acl.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zfs_fuid.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct znode_phys;
|
||||
|
||||
#define ACE_SLOT_CNT 6
|
||||
#define ZFS_ACL_VERSION_INITIAL 0ULL
|
||||
#define ZFS_ACL_VERSION_FUID 1ULL
|
||||
#define ZFS_ACL_VERSION ZFS_ACL_VERSION_FUID
|
||||
|
||||
/*
|
||||
* ZFS ACLs are store in various forms.
|
||||
* Files created with ACL version ZFS_ACL_VERSION_INITIAL
|
||||
* will all be created with fixed length ACEs of type
|
||||
* zfs_oldace_t.
|
||||
*
|
||||
* Files with ACL version ZFS_ACL_VERSION_FUID will be created
|
||||
* with various sized ACEs. The abstraction entries will utilize
|
||||
* zfs_ace_hdr_t, normal user/group entries will use zfs_ace_t
|
||||
* and some specialized CIFS ACEs will use zfs_object_ace_t.
|
||||
*/
|
||||
|
||||
/*
|
||||
* All ACEs have a common hdr. For
|
||||
* owner@, group@, and everyone@ this is all
|
||||
* thats needed.
|
||||
*/
|
||||
typedef struct zfs_ace_hdr {
|
||||
uint16_t z_type;
|
||||
uint16_t z_flags;
|
||||
uint32_t z_access_mask;
|
||||
} zfs_ace_hdr_t;
|
||||
|
||||
typedef zfs_ace_hdr_t zfs_ace_abstract_t;
|
||||
|
||||
/*
|
||||
* Standard ACE
|
||||
*/
|
||||
typedef struct zfs_ace {
|
||||
zfs_ace_hdr_t z_hdr;
|
||||
uint64_t z_fuid;
|
||||
} zfs_ace_t;
|
||||
|
||||
/*
|
||||
* The following type only applies to ACE_ACCESS_ALLOWED|DENIED_OBJECT_ACE_TYPE
|
||||
* and will only be set/retrieved in a CIFS context.
|
||||
*/
|
||||
|
||||
typedef struct zfs_object_ace {
|
||||
zfs_ace_t z_ace;
|
||||
uint8_t z_object_type[16]; /* object type */
|
||||
uint8_t z_inherit_type[16]; /* inherited object type */
|
||||
} zfs_object_ace_t;
|
||||
|
||||
typedef struct zfs_oldace {
|
||||
uint32_t z_fuid; /* "who" */
|
||||
uint32_t z_access_mask; /* access mask */
|
||||
uint16_t z_flags; /* flags, i.e inheritance */
|
||||
uint16_t z_type; /* type of entry allow/deny */
|
||||
} zfs_oldace_t;
|
||||
|
||||
typedef struct zfs_acl_phys_v0 {
|
||||
uint64_t z_acl_extern_obj; /* ext acl pieces */
|
||||
uint32_t z_acl_count; /* Number of ACEs */
|
||||
uint16_t z_acl_version; /* acl version */
|
||||
uint16_t z_acl_pad; /* pad */
|
||||
zfs_oldace_t z_ace_data[ACE_SLOT_CNT]; /* 6 standard ACEs */
|
||||
} zfs_acl_phys_v0_t;
|
||||
|
||||
#define ZFS_ACE_SPACE (sizeof (zfs_oldace_t) * ACE_SLOT_CNT)
|
||||
|
||||
typedef struct zfs_acl_phys {
|
||||
uint64_t z_acl_extern_obj; /* ext acl pieces */
|
||||
uint32_t z_acl_size; /* Number of bytes in ACL */
|
||||
uint16_t z_acl_version; /* acl version */
|
||||
uint16_t z_acl_count; /* ace count */
|
||||
uint8_t z_ace_data[ZFS_ACE_SPACE]; /* space for embedded ACEs */
|
||||
} zfs_acl_phys_t;
|
||||
|
||||
|
||||
|
||||
typedef struct acl_ops {
|
||||
uint32_t (*ace_mask_get) (void *acep); /* get access mask */
|
||||
void (*ace_mask_set) (void *acep,
|
||||
uint32_t mask); /* set access mask */
|
||||
uint16_t (*ace_flags_get) (void *acep); /* get flags */
|
||||
void (*ace_flags_set) (void *acep,
|
||||
uint16_t flags); /* set flags */
|
||||
uint16_t (*ace_type_get)(void *acep); /* get type */
|
||||
void (*ace_type_set)(void *acep,
|
||||
uint16_t type); /* set type */
|
||||
uint64_t (*ace_who_get)(void *acep); /* get who/fuid */
|
||||
void (*ace_who_set)(void *acep,
|
||||
uint64_t who); /* set who/fuid */
|
||||
size_t (*ace_size)(void *acep); /* how big is this ace */
|
||||
size_t (*ace_abstract_size)(void); /* sizeof abstract entry */
|
||||
int (*ace_mask_off)(void); /* off of access mask in ace */
|
||||
int (*ace_data)(void *acep, void **datap);
|
||||
/* ptr to data if any */
|
||||
} acl_ops_t;
|
||||
|
||||
/*
|
||||
* A zfs_acl_t structure is composed of a list of zfs_acl_node_t's.
|
||||
* Each node will have one or more ACEs associated with it. You will
|
||||
* only have multiple nodes during a chmod operation. Normally only
|
||||
* one node is required.
|
||||
*/
|
||||
typedef struct zfs_acl_node {
|
||||
list_node_t z_next; /* Next chunk of ACEs */
|
||||
void *z_acldata; /* pointer into actual ACE(s) */
|
||||
void *z_allocdata; /* pointer to kmem allocated memory */
|
||||
size_t z_allocsize; /* Size of blob in bytes */
|
||||
size_t z_size; /* length of ACL data */
|
||||
int z_ace_count; /* number of ACEs in this acl node */
|
||||
int z_ace_idx; /* ace iterator positioned on */
|
||||
} zfs_acl_node_t;
|
||||
|
||||
typedef struct zfs_acl {
|
||||
int z_acl_count; /* Number of ACEs */
|
||||
size_t z_acl_bytes; /* Number of bytes in ACL */
|
||||
uint_t z_version; /* version of ACL */
|
||||
void *z_next_ace; /* pointer to next ACE */
|
||||
int z_hints; /* ACL hints (ZFS_INHERIT_ACE ...) */
|
||||
zfs_acl_node_t *z_curr_node; /* current node iterator is handling */
|
||||
list_t z_acl; /* chunks of ACE data */
|
||||
acl_ops_t z_ops; /* ACL operations */
|
||||
boolean_t z_has_fuids; /* FUIDs present in ACL? */
|
||||
} zfs_acl_t;
|
||||
|
||||
#define ACL_DATA_ALLOCED 0x1
|
||||
#define ZFS_ACL_SIZE(aclcnt) (sizeof (ace_t) * (aclcnt))
|
||||
|
||||
/*
|
||||
* Property values for acl_mode and acl_inherit.
|
||||
*
|
||||
* acl_mode can take discard, noallow, groupmask and passthrough.
|
||||
* whereas acl_inherit has secure instead of groupmask.
|
||||
*/
|
||||
|
||||
#define ZFS_ACL_DISCARD 0
|
||||
#define ZFS_ACL_NOALLOW 1
|
||||
#define ZFS_ACL_GROUPMASK 2
|
||||
#define ZFS_ACL_PASSTHROUGH 3
|
||||
#define ZFS_ACL_RESTRICTED 4
|
||||
|
||||
struct znode;
|
||||
struct zfsvfs;
|
||||
struct zfs_fuid_info;
|
||||
|
||||
#ifdef _KERNEL
|
||||
void zfs_perm_init(struct znode *, struct znode *, int, vattr_t *,
|
||||
dmu_tx_t *, cred_t *, zfs_acl_t *, zfs_fuid_info_t **);
|
||||
int zfs_getacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
|
||||
int zfs_setacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
|
||||
void zfs_acl_rele(void *);
|
||||
void zfs_oldace_byteswap(ace_t *, int);
|
||||
void zfs_ace_byteswap(void *, size_t, boolean_t);
|
||||
extern int zfs_zaccess(struct znode *, int, int, boolean_t, cred_t *);
|
||||
extern int zfs_zaccess_rwx(struct znode *, mode_t, int, cred_t *);
|
||||
extern int zfs_zaccess_unix(struct znode *, mode_t, cred_t *);
|
||||
extern int zfs_acl_access(struct znode *, int, cred_t *);
|
||||
int zfs_acl_chmod_setattr(struct znode *, zfs_acl_t **, uint64_t);
|
||||
int zfs_zaccess_delete(struct znode *, struct znode *, cred_t *);
|
||||
int zfs_zaccess_rename(struct znode *, struct znode *,
|
||||
struct znode *, struct znode *, cred_t *cr);
|
||||
void zfs_acl_free(zfs_acl_t *);
|
||||
int zfs_vsec_2_aclp(struct zfsvfs *, vtype_t, vsecattr_t *, zfs_acl_t **);
|
||||
int zfs_aclset_common(struct znode *, zfs_acl_t *, cred_t *,
|
||||
struct zfs_fuid_info **, dmu_tx_t *);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* _SYS_FS_ZFS_ACL_H */
|
||||
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZFS_CONTEXT_H
|
||||
#define _SYS_ZFS_CONTEXT_H
|
||||
|
||||
#pragma ident "@(#)zfs_context.h 1.3 07/10/24 SMI"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <sys/note.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/t_lock.h>
|
||||
#include <sys/atomic.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#include <sys/bitmap.h>
|
||||
#include <sys/cmn_err.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/taskq.h>
|
||||
#include <sys/buf.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/cpuvar.h>
|
||||
#include <sys/kobj.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/disp.h>
|
||||
#include <sys/debug.h>
|
||||
#include <sys/random.h>
|
||||
#include <sys/byteorder.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/list.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/dirent.h>
|
||||
#include <sys/time.h>
|
||||
#include <vm/seg_kmem.h>
|
||||
#include <sys/zone.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/zfs_debug.h>
|
||||
#include <sys/sysevent.h>
|
||||
#include <sys/sysevent/eventdefs.h>
|
||||
#include <sys/fm/util.h>
|
||||
|
||||
#define CPU_SEQID (CPU->cpu_seqid)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZFS_CONTEXT_H */
|
||||
@@ -0,0 +1,538 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZFS_CONTEXT_H
|
||||
#define _SYS_ZFS_CONTEXT_H
|
||||
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define _SYS_MUTEX_H
|
||||
#define _SYS_RWLOCK_H
|
||||
#define _SYS_CONDVAR_H
|
||||
#define _SYS_SYSTM_H
|
||||
#define _SYS_DEBUG_H
|
||||
#define _SYS_T_LOCK_H
|
||||
#define _SYS_VNODE_H
|
||||
#define _SYS_VFS_H
|
||||
#define _SYS_SUNDDI_H
|
||||
#define _SYS_CALLB_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stddef.h>
|
||||
#include <stdarg.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <synch.h>
|
||||
#include <thread.h>
|
||||
#include <assert.h>
|
||||
#include <alloca.h>
|
||||
#include <umem.h>
|
||||
#include <limits.h>
|
||||
#include <atomic.h>
|
||||
#include <dirent.h>
|
||||
#include <time.h>
|
||||
#include <sys/note.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/cred.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#include <sys/bitmap.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/byteorder.h>
|
||||
#include <sys/list.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/zfs_debug.h>
|
||||
#include <sys/sdt.h>
|
||||
#include <sys/kstat.h>
|
||||
#include <sys/u8_textprep.h>
|
||||
#include <sys/sysevent/eventdefs.h>
|
||||
|
||||
/*
|
||||
* Debugging
|
||||
*/
|
||||
|
||||
/*
|
||||
* Note that we are not using the debugging levels.
|
||||
*/
|
||||
|
||||
#define CE_CONT 0 /* continuation */
|
||||
#define CE_NOTE 1 /* notice */
|
||||
#define CE_WARN 2 /* warning */
|
||||
#define CE_PANIC 3 /* panic */
|
||||
#define CE_IGNORE 4 /* print nothing */
|
||||
|
||||
/*
|
||||
* ZFS debugging
|
||||
*/
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
extern void dprintf_setup(int *argc, char **argv);
|
||||
#endif /* ZFS_DEBUG */
|
||||
|
||||
extern void cmn_err(int, const char *, ...);
|
||||
extern void vcmn_err(int, const char *, __va_list);
|
||||
extern void panic(const char *, ...);
|
||||
extern void vpanic(const char *, __va_list);
|
||||
|
||||
#define fm_panic panic
|
||||
|
||||
/* This definition is copied from assert.h. */
|
||||
#if defined(__STDC__)
|
||||
#if __STDC_VERSION__ - 0 >= 199901L
|
||||
#define verify(EX) (void)((EX) || \
|
||||
(__assert_c99(#EX, __FILE__, __LINE__, __func__), 0))
|
||||
#else
|
||||
#define verify(EX) (void)((EX) || (__assert(#EX, __FILE__, __LINE__), 0))
|
||||
#endif /* __STDC_VERSION__ - 0 >= 199901L */
|
||||
#else
|
||||
#define verify(EX) (void)((EX) || (_assert("EX", __FILE__, __LINE__), 0))
|
||||
#endif /* __STDC__ */
|
||||
|
||||
|
||||
#define VERIFY verify
|
||||
#define ASSERT assert
|
||||
|
||||
extern void __assert(const char *, const char *, int);
|
||||
|
||||
#ifdef lint
|
||||
#define VERIFY3_IMPL(x, y, z, t) if (x == z) ((void)0)
|
||||
#else
|
||||
/* BEGIN CSTYLED */
|
||||
#define VERIFY3_IMPL(LEFT, OP, RIGHT, TYPE) do { \
|
||||
const TYPE __left = (TYPE)(LEFT); \
|
||||
const TYPE __right = (TYPE)(RIGHT); \
|
||||
if (!(__left OP __right)) { \
|
||||
char *__buf = alloca(256); \
|
||||
(void) snprintf(__buf, 256, "%s %s %s (0x%llx %s 0x%llx)", \
|
||||
#LEFT, #OP, #RIGHT, \
|
||||
(u_longlong_t)__left, #OP, (u_longlong_t)__right); \
|
||||
__assert(__buf, __FILE__, __LINE__); \
|
||||
} \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
/* END CSTYLED */
|
||||
#endif /* lint */
|
||||
|
||||
#define VERIFY3S(x, y, z) VERIFY3_IMPL(x, y, z, int64_t)
|
||||
#define VERIFY3U(x, y, z) VERIFY3_IMPL(x, y, z, uint64_t)
|
||||
#define VERIFY3P(x, y, z) VERIFY3_IMPL(x, y, z, uintptr_t)
|
||||
|
||||
#ifdef NDEBUG
|
||||
#define ASSERT3S(x, y, z) ((void)0)
|
||||
#define ASSERT3U(x, y, z) ((void)0)
|
||||
#define ASSERT3P(x, y, z) ((void)0)
|
||||
#else
|
||||
#define ASSERT3S(x, y, z) VERIFY3S(x, y, z)
|
||||
#define ASSERT3U(x, y, z) VERIFY3U(x, y, z)
|
||||
#define ASSERT3P(x, y, z) VERIFY3P(x, y, z)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* DTrace SDT probes have different signatures in userland than they do in
|
||||
* kernel. If they're being used in kernel code, re-define them out of
|
||||
* existence for their counterparts in libzpool.
|
||||
*/
|
||||
|
||||
#ifdef DTRACE_PROBE1
|
||||
#undef DTRACE_PROBE1
|
||||
#define DTRACE_PROBE1(a, b, c) ((void)0)
|
||||
#endif /* DTRACE_PROBE1 */
|
||||
|
||||
#ifdef DTRACE_PROBE2
|
||||
#undef DTRACE_PROBE2
|
||||
#define DTRACE_PROBE2(a, b, c, d, e) ((void)0)
|
||||
#endif /* DTRACE_PROBE2 */
|
||||
|
||||
#ifdef DTRACE_PROBE3
|
||||
#undef DTRACE_PROBE3
|
||||
#define DTRACE_PROBE3(a, b, c, d, e, f, g) ((void)0)
|
||||
#endif /* DTRACE_PROBE3 */
|
||||
|
||||
#ifdef DTRACE_PROBE4
|
||||
#undef DTRACE_PROBE4
|
||||
#define DTRACE_PROBE4(a, b, c, d, e, f, g, h, i) ((void)0)
|
||||
#endif /* DTRACE_PROBE4 */
|
||||
|
||||
/*
|
||||
* Threads
|
||||
*/
|
||||
#define curthread ((void *)(uintptr_t)thr_self())
|
||||
|
||||
typedef struct kthread kthread_t;
|
||||
|
||||
#define thread_create(stk, stksize, func, arg, len, pp, state, pri) \
|
||||
zk_thread_create(func, arg)
|
||||
#define thread_exit() thr_exit(NULL)
|
||||
|
||||
extern kthread_t *zk_thread_create(void (*func)(), void *arg);
|
||||
|
||||
#define issig(why) (FALSE)
|
||||
#define ISSIG(thr, why) (FALSE)
|
||||
|
||||
/*
|
||||
* Mutexes
|
||||
*/
|
||||
typedef struct kmutex {
|
||||
void *m_owner;
|
||||
boolean_t initialized;
|
||||
mutex_t m_lock;
|
||||
} kmutex_t;
|
||||
|
||||
#define MUTEX_DEFAULT USYNC_THREAD
|
||||
#undef MUTEX_HELD
|
||||
#define MUTEX_HELD(m) _mutex_held(&(m)->m_lock)
|
||||
|
||||
/*
|
||||
* Argh -- we have to get cheesy here because the kernel and userland
|
||||
* have different signatures for the same routine.
|
||||
*/
|
||||
extern int _mutex_init(mutex_t *mp, int type, void *arg);
|
||||
extern int _mutex_destroy(mutex_t *mp);
|
||||
|
||||
#define mutex_init(mp, b, c, d) zmutex_init((kmutex_t *)(mp))
|
||||
#define mutex_destroy(mp) zmutex_destroy((kmutex_t *)(mp))
|
||||
|
||||
extern void zmutex_init(kmutex_t *mp);
|
||||
extern void zmutex_destroy(kmutex_t *mp);
|
||||
extern void mutex_enter(kmutex_t *mp);
|
||||
extern void mutex_exit(kmutex_t *mp);
|
||||
extern int mutex_tryenter(kmutex_t *mp);
|
||||
extern void *mutex_owner(kmutex_t *mp);
|
||||
|
||||
/*
|
||||
* RW locks
|
||||
*/
|
||||
typedef struct krwlock {
|
||||
void *rw_owner;
|
||||
boolean_t initialized;
|
||||
rwlock_t rw_lock;
|
||||
} krwlock_t;
|
||||
|
||||
typedef int krw_t;
|
||||
|
||||
#define RW_READER 0
|
||||
#define RW_WRITER 1
|
||||
#define RW_DEFAULT USYNC_THREAD
|
||||
|
||||
#undef RW_READ_HELD
|
||||
#define RW_READ_HELD(x) _rw_read_held(&(x)->rw_lock)
|
||||
|
||||
#undef RW_WRITE_HELD
|
||||
#define RW_WRITE_HELD(x) _rw_write_held(&(x)->rw_lock)
|
||||
|
||||
extern void rw_init(krwlock_t *rwlp, char *name, int type, void *arg);
|
||||
extern void rw_destroy(krwlock_t *rwlp);
|
||||
extern void rw_enter(krwlock_t *rwlp, krw_t rw);
|
||||
extern int rw_tryenter(krwlock_t *rwlp, krw_t rw);
|
||||
extern int rw_tryupgrade(krwlock_t *rwlp);
|
||||
extern void rw_exit(krwlock_t *rwlp);
|
||||
#define rw_downgrade(rwlp) do { } while (0)
|
||||
|
||||
extern uid_t crgetuid(cred_t *cr);
|
||||
extern gid_t crgetgid(cred_t *cr);
|
||||
extern int crgetngroups(cred_t *cr);
|
||||
extern gid_t *crgetgroups(cred_t *cr);
|
||||
|
||||
/*
|
||||
* Condition variables
|
||||
*/
|
||||
typedef cond_t kcondvar_t;
|
||||
|
||||
#define CV_DEFAULT USYNC_THREAD
|
||||
|
||||
extern void cv_init(kcondvar_t *cv, char *name, int type, void *arg);
|
||||
extern void cv_destroy(kcondvar_t *cv);
|
||||
extern void cv_wait(kcondvar_t *cv, kmutex_t *mp);
|
||||
extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime);
|
||||
extern void cv_signal(kcondvar_t *cv);
|
||||
extern void cv_broadcast(kcondvar_t *cv);
|
||||
|
||||
/*
|
||||
* kstat creation, installation and deletion
|
||||
*/
|
||||
extern kstat_t *kstat_create(char *, int,
|
||||
char *, char *, uchar_t, ulong_t, uchar_t);
|
||||
extern void kstat_install(kstat_t *);
|
||||
extern void kstat_delete(kstat_t *);
|
||||
|
||||
/*
|
||||
* Kernel memory
|
||||
*/
|
||||
#define KM_SLEEP UMEM_NOFAIL
|
||||
#define KM_NOSLEEP UMEM_DEFAULT
|
||||
#define KMC_NODEBUG UMC_NODEBUG
|
||||
#define kmem_alloc(_s, _f) umem_alloc(_s, _f)
|
||||
#define kmem_zalloc(_s, _f) umem_zalloc(_s, _f)
|
||||
#define kmem_free(_b, _s) umem_free(_b, _s)
|
||||
#define kmem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i) \
|
||||
umem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i)
|
||||
#define kmem_cache_destroy(_c) umem_cache_destroy(_c)
|
||||
#define kmem_cache_alloc(_c, _f) umem_cache_alloc(_c, _f)
|
||||
#define kmem_cache_free(_c, _b) umem_cache_free(_c, _b)
|
||||
#define kmem_debugging() 0
|
||||
#define kmem_cache_reap_now(c)
|
||||
|
||||
typedef umem_cache_t kmem_cache_t;
|
||||
|
||||
/*
|
||||
* Task queues
|
||||
*/
|
||||
typedef struct taskq taskq_t;
|
||||
typedef uintptr_t taskqid_t;
|
||||
typedef void (task_func_t)(void *);
|
||||
|
||||
#define TASKQ_PREPOPULATE 0x0001
|
||||
#define TASKQ_CPR_SAFE 0x0002 /* Use CPR safe protocol */
|
||||
#define TASKQ_DYNAMIC 0x0004 /* Use dynamic thread scheduling */
|
||||
|
||||
#define TQ_SLEEP KM_SLEEP /* Can block for memory */
|
||||
#define TQ_NOSLEEP KM_NOSLEEP /* cannot block for memory; may fail */
|
||||
#define TQ_NOQUEUE 0x02 /* Do not enqueue if can't dispatch */
|
||||
|
||||
extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
|
||||
extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
|
||||
extern void taskq_destroy(taskq_t *);
|
||||
extern void taskq_wait(taskq_t *);
|
||||
extern int taskq_member(taskq_t *, void *);
|
||||
|
||||
#define XVA_MAPSIZE 3
|
||||
#define XVA_MAGIC 0x78766174
|
||||
|
||||
/*
|
||||
* vnodes
|
||||
*/
|
||||
typedef struct vnode {
|
||||
uint64_t v_size;
|
||||
int v_fd;
|
||||
char *v_path;
|
||||
} vnode_t;
|
||||
|
||||
|
||||
typedef struct xoptattr {
|
||||
timestruc_t xoa_createtime; /* Create time of file */
|
||||
uint8_t xoa_archive;
|
||||
uint8_t xoa_system;
|
||||
uint8_t xoa_readonly;
|
||||
uint8_t xoa_hidden;
|
||||
uint8_t xoa_nounlink;
|
||||
uint8_t xoa_immutable;
|
||||
uint8_t xoa_appendonly;
|
||||
uint8_t xoa_nodump;
|
||||
uint8_t xoa_settable;
|
||||
uint8_t xoa_opaque;
|
||||
uint8_t xoa_av_quarantined;
|
||||
uint8_t xoa_av_modified;
|
||||
} xoptattr_t;
|
||||
|
||||
typedef struct vattr {
|
||||
uint_t va_mask; /* bit-mask of attributes */
|
||||
u_offset_t va_size; /* file size in bytes */
|
||||
} vattr_t;
|
||||
|
||||
|
||||
typedef struct xvattr {
|
||||
vattr_t xva_vattr; /* Embedded vattr structure */
|
||||
uint32_t xva_magic; /* Magic Number */
|
||||
uint32_t xva_mapsize; /* Size of attr bitmap (32-bit words) */
|
||||
uint32_t *xva_rtnattrmapp; /* Ptr to xva_rtnattrmap[] */
|
||||
uint32_t xva_reqattrmap[XVA_MAPSIZE]; /* Requested attrs */
|
||||
uint32_t xva_rtnattrmap[XVA_MAPSIZE]; /* Returned attrs */
|
||||
xoptattr_t xva_xoptattrs; /* Optional attributes */
|
||||
} xvattr_t;
|
||||
|
||||
typedef struct vsecattr {
|
||||
uint_t vsa_mask; /* See below */
|
||||
int vsa_aclcnt; /* ACL entry count */
|
||||
void *vsa_aclentp; /* pointer to ACL entries */
|
||||
int vsa_dfaclcnt; /* default ACL entry count */
|
||||
void *vsa_dfaclentp; /* pointer to default ACL entries */
|
||||
size_t vsa_aclentsz; /* ACE size in bytes of vsa_aclentp */
|
||||
} vsecattr_t;
|
||||
|
||||
#define AT_TYPE 0x00001
|
||||
#define AT_MODE 0x00002
|
||||
#define AT_UID 0x00004
|
||||
#define AT_GID 0x00008
|
||||
#define AT_FSID 0x00010
|
||||
#define AT_NODEID 0x00020
|
||||
#define AT_NLINK 0x00040
|
||||
#define AT_SIZE 0x00080
|
||||
#define AT_ATIME 0x00100
|
||||
#define AT_MTIME 0x00200
|
||||
#define AT_CTIME 0x00400
|
||||
#define AT_RDEV 0x00800
|
||||
#define AT_BLKSIZE 0x01000
|
||||
#define AT_NBLOCKS 0x02000
|
||||
#define AT_SEQ 0x08000
|
||||
#define AT_XVATTR 0x10000
|
||||
|
||||
#define CRCREAT 0
|
||||
|
||||
#define VOP_CLOSE(vp, f, c, o, cr, ct) 0
|
||||
#define VOP_PUTPAGE(vp, of, sz, fl, cr, ct) 0
|
||||
#define VOP_GETATTR(vp, vap, fl, cr, ct) ((vap)->va_size = (vp)->v_size, 0)
|
||||
|
||||
#define VOP_FSYNC(vp, f, cr, ct) fsync((vp)->v_fd)
|
||||
|
||||
#define VN_RELE(vp) vn_close(vp)
|
||||
|
||||
extern int vn_open(char *path, int x1, int oflags, int mode, vnode_t **vpp,
|
||||
int x2, int x3);
|
||||
extern int vn_openat(char *path, int x1, int oflags, int mode, vnode_t **vpp,
|
||||
int x2, int x3, vnode_t *vp, int fd);
|
||||
extern int vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len,
|
||||
offset_t offset, int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp);
|
||||
extern void vn_close(vnode_t *vp);
|
||||
|
||||
#define vn_remove(path, x1, x2) remove(path)
|
||||
#define vn_rename(from, to, seg) rename((from), (to))
|
||||
#define vn_is_readonly(vp) B_FALSE
|
||||
|
||||
extern vnode_t *rootdir;
|
||||
|
||||
#include <sys/file.h> /* for FREAD, FWRITE, etc */
|
||||
|
||||
/*
|
||||
* Random stuff
|
||||
*/
|
||||
#define lbolt (gethrtime() >> 23)
|
||||
#define lbolt64 (gethrtime() >> 23)
|
||||
#define hz 119 /* frequency when using gethrtime() >> 23 for lbolt */
|
||||
|
||||
extern void delay(clock_t ticks);
|
||||
|
||||
#define gethrestime_sec() time(NULL)
|
||||
|
||||
#define max_ncpus 64
|
||||
|
||||
#define minclsyspri 60
|
||||
#define maxclsyspri 99
|
||||
|
||||
#define CPU_SEQID (thr_self() & (max_ncpus - 1))
|
||||
|
||||
#define kcred NULL
|
||||
#define CRED() NULL
|
||||
|
||||
extern uint64_t physmem;
|
||||
|
||||
extern int highbit(ulong_t i);
|
||||
extern int random_get_bytes(uint8_t *ptr, size_t len);
|
||||
extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len);
|
||||
|
||||
extern void kernel_init(int);
|
||||
extern void kernel_fini(void);
|
||||
|
||||
struct spa;
|
||||
extern void nicenum(uint64_t num, char *buf);
|
||||
extern void show_pool_stats(struct spa *);
|
||||
|
||||
typedef struct callb_cpr {
|
||||
kmutex_t *cc_lockp;
|
||||
} callb_cpr_t;
|
||||
|
||||
#define CALLB_CPR_INIT(cp, lockp, func, name) { \
|
||||
(cp)->cc_lockp = lockp; \
|
||||
}
|
||||
|
||||
#define CALLB_CPR_SAFE_BEGIN(cp) { \
|
||||
ASSERT(MUTEX_HELD((cp)->cc_lockp)); \
|
||||
}
|
||||
|
||||
#define CALLB_CPR_SAFE_END(cp, lockp) { \
|
||||
ASSERT(MUTEX_HELD((cp)->cc_lockp)); \
|
||||
}
|
||||
|
||||
#define CALLB_CPR_EXIT(cp) { \
|
||||
ASSERT(MUTEX_HELD((cp)->cc_lockp)); \
|
||||
mutex_exit((cp)->cc_lockp); \
|
||||
}
|
||||
|
||||
#define zone_dataset_visible(x, y) (1)
|
||||
#define INGLOBALZONE(z) (1)
|
||||
|
||||
/*
|
||||
* Hostname information
|
||||
*/
|
||||
extern char hw_serial[];
|
||||
extern int ddi_strtoul(const char *str, char **nptr, int base,
|
||||
unsigned long *result);
|
||||
|
||||
/* ZFS Boot Related stuff. */
|
||||
|
||||
struct _buf {
|
||||
intptr_t _fd;
|
||||
};
|
||||
|
||||
struct bootstat {
|
||||
uint64_t st_size;
|
||||
};
|
||||
|
||||
typedef struct ace_object {
|
||||
uid_t a_who;
|
||||
uint32_t a_access_mask;
|
||||
uint16_t a_flags;
|
||||
uint16_t a_type;
|
||||
uint8_t a_obj_type[16];
|
||||
uint8_t a_inherit_obj_type[16];
|
||||
} ace_object_t;
|
||||
|
||||
|
||||
#define ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE 0x05
|
||||
#define ACE_ACCESS_DENIED_OBJECT_ACE_TYPE 0x06
|
||||
#define ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE 0x07
|
||||
#define ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE 0x08
|
||||
|
||||
extern struct _buf *kobj_open_file(char *name);
|
||||
extern int kobj_read_file(struct _buf *file, char *buf, unsigned size,
|
||||
unsigned off);
|
||||
extern void kobj_close_file(struct _buf *file);
|
||||
extern int kobj_get_filesize(struct _buf *file, uint64_t *size);
|
||||
extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr);
|
||||
extern int zfs_secpolicy_rename_perms(const char *from, const char *to,
|
||||
cred_t *cr);
|
||||
extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
|
||||
extern zoneid_t getzoneid(void);
|
||||
|
||||
/* SID stuff */
|
||||
typedef struct ksiddomain {
|
||||
uint_t kd_ref;
|
||||
uint_t kd_len;
|
||||
char *kd_name;
|
||||
} ksiddomain_t;
|
||||
|
||||
ksiddomain_t *ksid_lookupdomain(const char *);
|
||||
void ksiddomain_rele(ksiddomain_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZFS_CONTEXT_H */
|
||||
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _ZFS_CTLDIR_H
|
||||
#define _ZFS_CTLDIR_H
|
||||
|
||||
#pragma ident "@(#)zfs_ctldir.h 1.4 08/02/22 SMI"
|
||||
|
||||
#include <sys/pathname.h>
|
||||
#include <sys/vnode.h>
|
||||
#include <sys/zfs_vfsops.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ZFS_CTLDIR_NAME ".zfs"
|
||||
|
||||
#define zfs_has_ctldir(zdp) \
|
||||
((zdp)->z_id == (zdp)->z_zfsvfs->z_root && \
|
||||
((zdp)->z_zfsvfs->z_ctldir != NULL))
|
||||
#define zfs_show_ctldir(zdp) \
|
||||
(zfs_has_ctldir(zdp) && \
|
||||
((zdp)->z_zfsvfs->z_show_ctldir))
|
||||
|
||||
void zfsctl_create(zfsvfs_t *);
|
||||
void zfsctl_destroy(zfsvfs_t *);
|
||||
vnode_t *zfsctl_root(znode_t *);
|
||||
void zfsctl_init(void);
|
||||
void zfsctl_fini(void);
|
||||
|
||||
int zfsctl_rename_snapshot(const char *from, const char *to);
|
||||
int zfsctl_destroy_snapshot(const char *snapname, int force);
|
||||
int zfsctl_umount_snapshots(vfs_t *, int, cred_t *);
|
||||
|
||||
int zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
|
||||
int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
|
||||
int *direntflags, pathname_t *realpnp);
|
||||
|
||||
int zfsctl_make_fid(zfsvfs_t *zfsvfsp, uint64_t object, uint32_t gen,
|
||||
fid_t *fidp);
|
||||
int zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp);
|
||||
|
||||
#define ZFSCTL_INO_ROOT 0x1
|
||||
#define ZFSCTL_INO_SNAPDIR 0x2
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ZFS_CTLDIR_H */
|
||||
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZFS_DEBUG_H
|
||||
#define _SYS_ZFS_DEBUG_H
|
||||
|
||||
#pragma ident "@(#)zfs_debug.h 1.3 07/02/25 SMI"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef TRUE
|
||||
#define TRUE 1
|
||||
#endif
|
||||
|
||||
#ifndef FALSE
|
||||
#define FALSE 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* ZFS debugging
|
||||
*/
|
||||
|
||||
#if defined(DEBUG) || !defined(_KERNEL)
|
||||
#define ZFS_DEBUG
|
||||
#endif
|
||||
|
||||
extern int zfs_flags;
|
||||
|
||||
#define ZFS_DEBUG_DPRINTF 0x0001
|
||||
#define ZFS_DEBUG_DBUF_VERIFY 0x0002
|
||||
#define ZFS_DEBUG_DNODE_VERIFY 0x0004
|
||||
#define ZFS_DEBUG_SNAPNAMES 0x0008
|
||||
#define ZFS_DEBUG_MODIFY 0x0010
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
extern void __dprintf(const char *file, const char *func,
|
||||
int line, const char *fmt, ...);
|
||||
#define dprintf(...) \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) \
|
||||
__dprintf(__FILE__, __func__, __LINE__, __VA_ARGS__)
|
||||
#else
|
||||
#define dprintf(...) ((void)0)
|
||||
#endif /* ZFS_DEBUG */
|
||||
|
||||
extern void zfs_panic_recover(const char *fmt, ...);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZFS_DEBUG_H */
|
||||
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FS_ZFS_DIR_H
|
||||
#define _SYS_FS_ZFS_DIR_H
|
||||
|
||||
#pragma ident "@(#)zfs_dir.h 1.5 07/11/09 SMI"
|
||||
|
||||
#include <sys/pathname.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* zfs_dirent_lock() flags */
|
||||
#define ZNEW 0x0001 /* entry should not exist */
|
||||
#define ZEXISTS 0x0002 /* entry should exist */
|
||||
#define ZSHARED 0x0004 /* shared access (zfs_dirlook()) */
|
||||
#define ZXATTR 0x0008 /* we want the xattr dir */
|
||||
#define ZRENAMING 0x0010 /* znode is being renamed */
|
||||
#define ZCILOOK 0x0020 /* case-insensitive lookup requested */
|
||||
#define ZCIEXACT 0x0040 /* c-i requires c-s match (rename) */
|
||||
|
||||
/* mknode flags */
|
||||
#define IS_ROOT_NODE 0x01 /* create a root node */
|
||||
#define IS_XATTR 0x02 /* create an extended attribute node */
|
||||
#define IS_REPLAY 0x04 /* we are replaying intent log */
|
||||
|
||||
extern int zfs_dirent_lock(zfs_dirlock_t **, znode_t *, char *, znode_t **,
|
||||
int, int *, pathname_t *);
|
||||
extern void zfs_dirent_unlock(zfs_dirlock_t *);
|
||||
extern int zfs_link_create(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int);
|
||||
extern int zfs_link_destroy(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int,
|
||||
boolean_t *);
|
||||
extern int zfs_dirlook(znode_t *, char *, vnode_t **, int, int *,
|
||||
pathname_t *);
|
||||
extern void zfs_mknode(znode_t *, vattr_t *, dmu_tx_t *, cred_t *,
|
||||
uint_t, znode_t **, int, zfs_acl_t *, zfs_fuid_info_t **);
|
||||
extern void zfs_rmnode(znode_t *);
|
||||
extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old);
|
||||
extern boolean_t zfs_dirempty(znode_t *);
|
||||
extern void zfs_unlinked_add(znode_t *, dmu_tx_t *);
|
||||
extern void zfs_unlinked_drain(zfsvfs_t *zfsvfs);
|
||||
extern int zfs_sticky_remove_access(znode_t *, znode_t *, cred_t *cr);
|
||||
extern int zfs_get_xattrdir(znode_t *, vnode_t **, cred_t *, int);
|
||||
extern int zfs_make_xattrdir(znode_t *, vattr_t *, vnode_t **, cred_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_FS_ZFS_DIR_H */
|
||||
@@ -0,0 +1,125 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FS_ZFS_FUID_H
|
||||
#define _SYS_FS_ZFS_FUID_H
|
||||
|
||||
#pragma ident "@(#)zfs_fuid.h 1.4 08/01/31 SMI"
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/kidmap.h>
|
||||
#include <sys/sid.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zfs_vfsops.h>
|
||||
#endif
|
||||
#include <sys/avl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
ZFS_OWNER,
|
||||
ZFS_GROUP,
|
||||
ZFS_ACE_USER,
|
||||
ZFS_ACE_GROUP
|
||||
} zfs_fuid_type_t;
|
||||
|
||||
/*
|
||||
* Estimate space needed for one more fuid table entry.
|
||||
* for now assume its current size + 1K
|
||||
*/
|
||||
#define FUID_SIZE_ESTIMATE(z) (z->z_fuid_size + (SPA_MINBLOCKSIZE << 1))
|
||||
|
||||
#define FUID_INDEX(x) (x >> 32)
|
||||
#define FUID_RID(x) (x & 0xffffffff)
|
||||
#define FUID_ENCODE(idx, rid) ((idx << 32) | rid)
|
||||
/*
|
||||
* FUIDs cause problems for the intent log
|
||||
* we need to replay the creation of the FUID,
|
||||
* but we can't count on the idmapper to be around
|
||||
* and during replay the FUID index may be different than
|
||||
* before. Also, if an ACL has 100 ACEs and 12 different
|
||||
* domains we don't want to log 100 domain strings, but rather
|
||||
* just the unique 12.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The FUIDs in the log will index into
|
||||
* domain string table and the bottom half will be the rid.
|
||||
* Used for mapping ephemeral uid/gid during ACL setting to FUIDs
|
||||
*/
|
||||
typedef struct zfs_fuid {
|
||||
list_node_t z_next;
|
||||
uint64_t z_id; /* uid/gid being converted to fuid */
|
||||
uint64_t z_domidx; /* index in AVL domain table */
|
||||
uint64_t z_logfuid; /* index for domain in log */
|
||||
} zfs_fuid_t;
|
||||
|
||||
/* list of unique domains */
|
||||
typedef struct zfs_fuid_domain {
|
||||
list_node_t z_next;
|
||||
uint64_t z_domidx; /* AVL tree idx */
|
||||
const char *z_domain; /* domain string */
|
||||
} zfs_fuid_domain_t;
|
||||
|
||||
/*
|
||||
* FUID information necessary for logging create, setattr, and setacl.
|
||||
*/
|
||||
typedef struct zfs_fuid_info {
|
||||
list_t z_fuids;
|
||||
list_t z_domains;
|
||||
uint64_t z_fuid_owner;
|
||||
uint64_t z_fuid_group;
|
||||
char **z_domain_table; /* Used during replay */
|
||||
uint32_t z_fuid_cnt; /* How many fuids in z_fuids */
|
||||
uint32_t z_domain_cnt; /* How many domains */
|
||||
size_t z_domain_str_sz; /* len of domain strings z_domain list */
|
||||
} zfs_fuid_info_t;
|
||||
|
||||
#ifdef _KERNEL
|
||||
struct znode;
|
||||
extern uid_t zfs_fuid_map_id(zfsvfs_t *, uint64_t, cred_t *, zfs_fuid_type_t);
|
||||
extern void zfs_fuid_destroy(zfsvfs_t *);
|
||||
extern uint64_t zfs_fuid_create_cred(zfsvfs_t *, zfs_fuid_type_t,
|
||||
dmu_tx_t *, cred_t *, zfs_fuid_info_t **);
|
||||
extern uint64_t zfs_fuid_create(zfsvfs_t *, uint64_t, cred_t *, zfs_fuid_type_t,
|
||||
dmu_tx_t *, zfs_fuid_info_t **);
|
||||
extern void zfs_fuid_map_ids(struct znode *zp, cred_t *cr, uid_t *uid,
|
||||
uid_t *gid);
|
||||
extern zfs_fuid_info_t *zfs_fuid_info_alloc(void);
|
||||
extern void zfs_fuid_info_free();
|
||||
extern boolean_t zfs_groupmember(zfsvfs_t *, uint64_t, cred_t *);
|
||||
#endif
|
||||
|
||||
char *zfs_fuid_idx_domain(avl_tree_t *, uint32_t);
|
||||
uint64_t zfs_fuid_table_load(objset_t *, uint64_t, avl_tree_t *, avl_tree_t *);
|
||||
void zfs_fuid_table_destroy(avl_tree_t *, avl_tree_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_FS_ZFS_FUID_H */
|
||||
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZFS_I18N_H
|
||||
#define _SYS_ZFS_I18N_H
|
||||
|
||||
|
||||
|
||||
#include <sys/sunddi.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* z_case behaviors
|
||||
* The first two describe the extent of case insensitivity.
|
||||
* The third describes matching behavior when mixed sensitivity
|
||||
* is allowed.
|
||||
*/
|
||||
#define ZFS_CI_ONLY 0x01 /* all lookups case-insensitive */
|
||||
#define ZFS_CI_MIXD 0x02 /* some lookups case-insensitive */
|
||||
|
||||
/*
|
||||
* ZFS_UTF8_ONLY
|
||||
* If set, the file system should reject non-utf8 characters in names.
|
||||
*/
|
||||
#define ZFS_UTF8_ONLY 0x04
|
||||
|
||||
enum zfs_case {
|
||||
ZFS_CASE_SENSITIVE,
|
||||
ZFS_CASE_INSENSITIVE,
|
||||
ZFS_CASE_MIXED
|
||||
};
|
||||
|
||||
enum zfs_normal {
|
||||
ZFS_NORMALIZE_NONE,
|
||||
ZFS_NORMALIZE_D,
|
||||
ZFS_NORMALIZE_KC,
|
||||
ZFS_NORMALIZE_C,
|
||||
ZFS_NORMALIZE_KD
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZFS_I18N_H */
|
||||
@@ -0,0 +1,196 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZFS_IOCTL_H
|
||||
#define _SYS_ZFS_IOCTL_H
|
||||
|
||||
#pragma ident "@(#)zfs_ioctl.h 1.19 08/04/27 SMI"
|
||||
|
||||
#include <sys/cred.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dsl_deleg.h>
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/nvpair.h>
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Property values for snapdir
|
||||
*/
|
||||
#define ZFS_SNAPDIR_HIDDEN 0
|
||||
#define ZFS_SNAPDIR_VISIBLE 1
|
||||
|
||||
#define DMU_BACKUP_STREAM_VERSION (1ULL)
|
||||
#define DMU_BACKUP_HEADER_VERSION (2ULL)
|
||||
#define DMU_BACKUP_MAGIC 0x2F5bacbacULL
|
||||
|
||||
#define DRR_FLAG_CLONE (1<<0)
|
||||
#define DRR_FLAG_CI_DATA (1<<1)
|
||||
|
||||
/*
|
||||
* zfs ioctl command structure
|
||||
*/
|
||||
typedef struct dmu_replay_record {
|
||||
enum {
|
||||
DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
|
||||
DRR_WRITE, DRR_FREE, DRR_END,
|
||||
} drr_type;
|
||||
uint32_t drr_payloadlen;
|
||||
union {
|
||||
struct drr_begin {
|
||||
uint64_t drr_magic;
|
||||
uint64_t drr_version;
|
||||
uint64_t drr_creation_time;
|
||||
dmu_objset_type_t drr_type;
|
||||
uint32_t drr_flags;
|
||||
uint64_t drr_toguid;
|
||||
uint64_t drr_fromguid;
|
||||
char drr_toname[MAXNAMELEN];
|
||||
} drr_begin;
|
||||
struct drr_end {
|
||||
zio_cksum_t drr_checksum;
|
||||
} drr_end;
|
||||
struct drr_object {
|
||||
uint64_t drr_object;
|
||||
dmu_object_type_t drr_type;
|
||||
dmu_object_type_t drr_bonustype;
|
||||
uint32_t drr_blksz;
|
||||
uint32_t drr_bonuslen;
|
||||
uint8_t drr_checksum;
|
||||
uint8_t drr_compress;
|
||||
uint8_t drr_pad[6];
|
||||
/* bonus content follows */
|
||||
} drr_object;
|
||||
struct drr_freeobjects {
|
||||
uint64_t drr_firstobj;
|
||||
uint64_t drr_numobjs;
|
||||
} drr_freeobjects;
|
||||
struct drr_write {
|
||||
uint64_t drr_object;
|
||||
dmu_object_type_t drr_type;
|
||||
uint32_t drr_pad;
|
||||
uint64_t drr_offset;
|
||||
uint64_t drr_length;
|
||||
/* content follows */
|
||||
} drr_write;
|
||||
struct drr_free {
|
||||
uint64_t drr_object;
|
||||
uint64_t drr_offset;
|
||||
uint64_t drr_length;
|
||||
} drr_free;
|
||||
} drr_u;
|
||||
} dmu_replay_record_t;
|
||||
|
||||
typedef struct zinject_record {
|
||||
uint64_t zi_objset;
|
||||
uint64_t zi_object;
|
||||
uint64_t zi_start;
|
||||
uint64_t zi_end;
|
||||
uint64_t zi_guid;
|
||||
uint32_t zi_level;
|
||||
uint32_t zi_error;
|
||||
uint64_t zi_type;
|
||||
uint32_t zi_freq;
|
||||
uint32_t zi_pad; /* pad out to 64 bit alignment */
|
||||
} zinject_record_t;
|
||||
|
||||
#define ZINJECT_NULL 0x1
|
||||
#define ZINJECT_FLUSH_ARC 0x2
|
||||
#define ZINJECT_UNLOAD_SPA 0x4
|
||||
|
||||
typedef struct zfs_share {
|
||||
uint64_t z_exportdata;
|
||||
uint64_t z_sharedata;
|
||||
uint64_t z_sharetype; /* 0 = share, 1 = unshare */
|
||||
uint64_t z_sharemax; /* max length of share string */
|
||||
} zfs_share_t;
|
||||
|
||||
/*
|
||||
* ZFS file systems may behave the usual, POSIX-compliant way, where
|
||||
* name lookups are case-sensitive. They may also be set up so that
|
||||
* all the name lookups are case-insensitive, or so that only some
|
||||
* lookups, the ones that set an FIGNORECASE flag, are case-insensitive.
|
||||
*/
|
||||
typedef enum zfs_case {
|
||||
ZFS_CASE_SENSITIVE,
|
||||
ZFS_CASE_INSENSITIVE,
|
||||
ZFS_CASE_MIXED
|
||||
} zfs_case_t;
|
||||
|
||||
typedef struct zfs_cmd {
|
||||
char zc_name[MAXPATHLEN];
|
||||
char zc_value[MAXPATHLEN * 2];
|
||||
char zc_string[MAXNAMELEN];
|
||||
uint64_t zc_guid;
|
||||
uint64_t zc_nvlist_conf; /* really (char *) */
|
||||
uint64_t zc_nvlist_conf_size;
|
||||
uint64_t zc_nvlist_src; /* really (char *) */
|
||||
uint64_t zc_nvlist_src_size;
|
||||
uint64_t zc_nvlist_dst; /* really (char *) */
|
||||
uint64_t zc_nvlist_dst_size;
|
||||
uint64_t zc_cookie;
|
||||
uint64_t zc_objset_type;
|
||||
uint64_t zc_perm_action;
|
||||
uint64_t zc_history; /* really (char *) */
|
||||
uint64_t zc_history_len;
|
||||
uint64_t zc_history_offset;
|
||||
uint64_t zc_obj;
|
||||
zfs_share_t zc_share;
|
||||
dmu_objset_stats_t zc_objset_stats;
|
||||
struct drr_begin zc_begin_record;
|
||||
zinject_record_t zc_inject_record;
|
||||
} zfs_cmd_t;
|
||||
|
||||
#define ZVOL_MAX_MINOR (1 << 16)
|
||||
#define ZFS_MIN_MINOR (ZVOL_MAX_MINOR + 1)
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
typedef struct zfs_creat {
|
||||
nvlist_t *zct_zplprops;
|
||||
nvlist_t *zct_props;
|
||||
} zfs_creat_t;
|
||||
|
||||
extern dev_info_t *zfs_dip;
|
||||
|
||||
extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr);
|
||||
extern int zfs_secpolicy_rename_perms(const char *from,
|
||||
const char *to, cred_t *cr);
|
||||
extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
|
||||
extern int zfs_busy(void);
|
||||
extern int zfs_unmount_snap(char *, void *);
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZFS_IOCTL_H */
|
||||
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FS_ZFS_RLOCK_H
|
||||
#define _SYS_FS_ZFS_RLOCK_H
|
||||
|
||||
#pragma ident "@(#)zfs_rlock.h 1.2 06/06/19 SMI"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
#include <sys/zfs_znode.h>
|
||||
|
||||
typedef enum {
|
||||
RL_READER,
|
||||
RL_WRITER,
|
||||
RL_APPEND
|
||||
} rl_type_t;
|
||||
|
||||
typedef struct rl {
|
||||
znode_t *r_zp; /* znode this lock applies to */
|
||||
avl_node_t r_node; /* avl node link */
|
||||
uint64_t r_off; /* file range offset */
|
||||
uint64_t r_len; /* file range length */
|
||||
uint_t r_cnt; /* range reference count in tree */
|
||||
rl_type_t r_type; /* range type */
|
||||
kcondvar_t r_wr_cv; /* cv for waiting writers */
|
||||
kcondvar_t r_rd_cv; /* cv for waiting readers */
|
||||
uint8_t r_proxy; /* acting for original range */
|
||||
uint8_t r_write_wanted; /* writer wants to lock this range */
|
||||
uint8_t r_read_wanted; /* reader wants to lock this range */
|
||||
} rl_t;
|
||||
|
||||
/*
|
||||
* Lock a range (offset, length) as either shared (READER)
|
||||
* or exclusive (WRITER or APPEND). APPEND is a special type that
|
||||
* is converted to WRITER that specified to lock from the start of the
|
||||
* end of file. zfs_range_lock() returns the range lock structure.
|
||||
*/
|
||||
rl_t *zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type);
|
||||
|
||||
/*
|
||||
* Unlock range and destroy range lock structure.
|
||||
*/
|
||||
void zfs_range_unlock(rl_t *rl);
|
||||
|
||||
/*
|
||||
* Reduce range locked as RW_WRITER from whole file to specified range.
|
||||
* Asserts the whole file was previously locked.
|
||||
*/
|
||||
void zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len);
|
||||
|
||||
/*
|
||||
* AVL comparison function used to compare range locks
|
||||
*/
|
||||
int zfs_range_compare(const void *arg1, const void *arg2);
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_FS_ZFS_RLOCK_H */
|
||||
@@ -0,0 +1,140 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FS_ZFS_VFSOPS_H
|
||||
#define _SYS_FS_ZFS_VFSOPS_H
|
||||
|
||||
#pragma ident "@(#)zfs_vfsops.h 1.11 08/02/22 SMI"
|
||||
|
||||
#include <sys/isa_defs.h>
|
||||
#include <sys/types32.h>
|
||||
#include <sys/list.h>
|
||||
#include <sys/vfs.h>
|
||||
#include <sys/zil.h>
|
||||
#include <sys/rrwlock.h>
|
||||
#include <sys/zfs_ioctl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct zfsvfs zfsvfs_t;
|
||||
|
||||
struct zfsvfs {
|
||||
vfs_t *z_vfs; /* generic fs struct */
|
||||
zfsvfs_t *z_parent; /* parent fs */
|
||||
objset_t *z_os; /* objset reference */
|
||||
uint64_t z_root; /* id of root znode */
|
||||
uint64_t z_unlinkedobj; /* id of unlinked zapobj */
|
||||
uint64_t z_max_blksz; /* maximum block size for files */
|
||||
uint64_t z_assign; /* TXG_NOWAIT or set by zil_replay() */
|
||||
uint64_t z_fuid_obj; /* fuid table object number */
|
||||
uint64_t z_fuid_size; /* fuid table size */
|
||||
avl_tree_t z_fuid_idx; /* fuid tree keyed by index */
|
||||
avl_tree_t z_fuid_domain; /* fuid tree keyed by domain */
|
||||
krwlock_t z_fuid_lock; /* fuid lock */
|
||||
boolean_t z_fuid_loaded; /* fuid tables are loaded */
|
||||
struct zfs_fuid_info *z_fuid_replay; /* fuid info for replay */
|
||||
zilog_t *z_log; /* intent log pointer */
|
||||
uint_t z_acl_mode; /* acl chmod/mode behavior */
|
||||
uint_t z_acl_inherit; /* acl inheritance behavior */
|
||||
zfs_case_t z_case; /* case-sense */
|
||||
boolean_t z_utf8; /* utf8-only */
|
||||
int z_norm; /* normalization flags */
|
||||
boolean_t z_atime; /* enable atimes mount option */
|
||||
boolean_t z_unmounted; /* unmounted */
|
||||
rrwlock_t z_teardown_lock;
|
||||
krwlock_t z_teardown_inactive_lock;
|
||||
list_t z_all_znodes; /* all vnodes in the fs */
|
||||
kmutex_t z_znodes_lock; /* lock for z_all_znodes */
|
||||
vnode_t *z_ctldir; /* .zfs directory pointer */
|
||||
boolean_t z_show_ctldir; /* expose .zfs in the root dir */
|
||||
boolean_t z_issnap; /* true if this is a snapshot */
|
||||
boolean_t z_vscan; /* virus scan on/off */
|
||||
boolean_t z_use_fuids; /* version allows fuids */
|
||||
kmutex_t z_online_recv_lock; /* recv in prog grabs as WRITER */
|
||||
uint64_t z_version; /* ZPL version */
|
||||
#define ZFS_OBJ_MTX_SZ 64
|
||||
kmutex_t z_hold_mtx[ZFS_OBJ_MTX_SZ]; /* znode hold locks */
|
||||
};
|
||||
|
||||
/*
|
||||
* Normal filesystems (those not under .zfs/snapshot) have a total
|
||||
* file ID size limited to 12 bytes (including the length field) due to
|
||||
* NFSv2 protocol's limitation of 32 bytes for a filehandle. For historical
|
||||
* reasons, this same limit is being imposed by the Solaris NFSv3 implementation
|
||||
* (although the NFSv3 protocol actually permits a maximum of 64 bytes). It
|
||||
* is not possible to expand beyond 12 bytes without abandoning support
|
||||
* of NFSv2.
|
||||
*
|
||||
* For normal filesystems, we partition up the available space as follows:
|
||||
* 2 bytes fid length (required)
|
||||
* 6 bytes object number (48 bits)
|
||||
* 4 bytes generation number (32 bits)
|
||||
*
|
||||
* We reserve only 48 bits for the object number, as this is the limit
|
||||
* currently defined and imposed by the DMU.
|
||||
*/
|
||||
typedef struct zfid_short {
|
||||
uint16_t zf_len;
|
||||
uint8_t zf_object[6]; /* obj[i] = obj >> (8 * i) */
|
||||
uint8_t zf_gen[4]; /* gen[i] = gen >> (8 * i) */
|
||||
} zfid_short_t;
|
||||
|
||||
/*
|
||||
* Filesystems under .zfs/snapshot have a total file ID size of 22 bytes
|
||||
* (including the length field). This makes files under .zfs/snapshot
|
||||
* accessible by NFSv3 and NFSv4, but not NFSv2.
|
||||
*
|
||||
* For files under .zfs/snapshot, we partition up the available space
|
||||
* as follows:
|
||||
* 2 bytes fid length (required)
|
||||
* 6 bytes object number (48 bits)
|
||||
* 4 bytes generation number (32 bits)
|
||||
* 6 bytes objset id (48 bits)
|
||||
* 4 bytes currently just zero (32 bits)
|
||||
*
|
||||
* We reserve only 48 bits for the object number and objset id, as these are
|
||||
* the limits currently defined and imposed by the DMU.
|
||||
*/
|
||||
typedef struct zfid_long {
|
||||
zfid_short_t z_fid;
|
||||
uint8_t zf_setid[6]; /* obj[i] = obj >> (8 * i) */
|
||||
uint8_t zf_setgen[4]; /* gen[i] = gen >> (8 * i) */
|
||||
} zfid_long_t;
|
||||
|
||||
#define SHORT_FID_LEN (sizeof (zfid_short_t) - sizeof (uint16_t))
|
||||
#define LONG_FID_LEN (sizeof (zfid_long_t) - sizeof (uint16_t))
|
||||
|
||||
extern uint_t zfs_fsyncer_key;
|
||||
|
||||
extern int zfs_suspend_fs(zfsvfs_t *zfsvfs, char *osname, int *mode);
|
||||
extern int zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_FS_ZFS_VFSOPS_H */
|
||||
@@ -0,0 +1,353 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FS_ZFS_ZNODE_H
|
||||
#define _SYS_FS_ZFS_ZNODE_H
|
||||
|
||||
#pragma ident "@(#)zfs_znode.h 1.25 07/12/07 SMI"
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/isa_defs.h>
|
||||
#include <sys/types32.h>
|
||||
#include <sys/attr.h>
|
||||
#include <sys/list.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zfs_vfsops.h>
|
||||
#include <sys/rrwlock.h>
|
||||
#endif
|
||||
#include <sys/zfs_acl.h>
|
||||
#include <sys/zil.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Additional file level attributes, that are stored
|
||||
* in the upper half of zp_flags
|
||||
*/
|
||||
#define ZFS_READONLY 0x0000000100000000
|
||||
#define ZFS_HIDDEN 0x0000000200000000
|
||||
#define ZFS_SYSTEM 0x0000000400000000
|
||||
#define ZFS_ARCHIVE 0x0000000800000000
|
||||
#define ZFS_IMMUTABLE 0x0000001000000000
|
||||
#define ZFS_NOUNLINK 0x0000002000000000
|
||||
#define ZFS_APPENDONLY 0x0000004000000000
|
||||
#define ZFS_NODUMP 0x0000008000000000
|
||||
#define ZFS_OPAQUE 0x0000010000000000
|
||||
#define ZFS_AV_QUARANTINED 0x0000020000000000
|
||||
#define ZFS_AV_MODIFIED 0x0000040000000000
|
||||
|
||||
#define ZFS_ATTR_SET(zp, attr, value) \
|
||||
{ \
|
||||
if (value) \
|
||||
zp->z_phys->zp_flags |= attr; \
|
||||
else \
|
||||
zp->z_phys->zp_flags &= ~attr; \
|
||||
}
|
||||
|
||||
/*
|
||||
* Define special zfs pflags
|
||||
*/
|
||||
#define ZFS_XATTR 0x1 /* is an extended attribute */
|
||||
#define ZFS_INHERIT_ACE 0x2 /* ace has inheritable ACEs */
|
||||
#define ZFS_ACL_TRIVIAL 0x4 /* files ACL is trivial */
|
||||
#define ZFS_ACL_OBJ_ACE 0x8 /* ACL has CMPLX Object ACE */
|
||||
#define ZFS_ACL_PROTECTED 0x10 /* ACL protected */
|
||||
#define ZFS_ACL_DEFAULTED 0x20 /* ACL should be defaulted */
|
||||
#define ZFS_ACL_AUTO_INHERIT 0x40 /* ACL should be inherited */
|
||||
#define ZFS_BONUS_SCANSTAMP 0x80 /* Scanstamp in bonus area */
|
||||
|
||||
/*
|
||||
* Is ID ephemeral?
|
||||
*/
|
||||
#define IS_EPHEMERAL(x) (x > MAXUID)
|
||||
|
||||
/*
|
||||
* Should we use FUIDs?
|
||||
*/
|
||||
#define USE_FUIDS(version, os) (version >= ZPL_VERSION_FUID &&\
|
||||
spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID)
|
||||
|
||||
#define MASTER_NODE_OBJ 1
|
||||
|
||||
/*
|
||||
* Special attributes for master node.
|
||||
*/
|
||||
#define ZFS_FSID "FSID"
|
||||
#define ZFS_UNLINKED_SET "DELETE_QUEUE"
|
||||
#define ZFS_ROOT_OBJ "ROOT"
|
||||
#define ZPL_VERSION_STR "VERSION"
|
||||
#define ZFS_FUID_TABLES "FUID"
|
||||
|
||||
#define ZFS_MAX_BLOCKSIZE (SPA_MAXBLOCKSIZE)
|
||||
|
||||
/* Path component length */
|
||||
/*
|
||||
* The generic fs code uses MAXNAMELEN to represent
|
||||
* what the largest component length is. Unfortunately,
|
||||
* this length includes the terminating NULL. ZFS needs
|
||||
* to tell the users via pathconf() and statvfs() what the
|
||||
* true maximum length of a component is, excluding the NULL.
|
||||
*/
|
||||
#define ZFS_MAXNAMELEN (MAXNAMELEN - 1)
|
||||
|
||||
/*
|
||||
* Convert mode bits (zp_mode) to BSD-style DT_* values for storing in
|
||||
* the directory entries.
|
||||
*/
|
||||
#define IFTODT(mode) (((mode) & S_IFMT) >> 12)
|
||||
|
||||
/*
|
||||
* The directory entry has the type (currently unused on Solaris) in the
|
||||
* top 4 bits, and the object number in the low 48 bits. The "middle"
|
||||
* 12 bits are unused.
|
||||
*/
|
||||
#define ZFS_DIRENT_TYPE(de) BF64_GET(de, 60, 4)
|
||||
#define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48)
|
||||
|
||||
/*
|
||||
* This is the persistent portion of the znode. It is stored
|
||||
* in the "bonus buffer" of the file. Short symbolic links
|
||||
* are also stored in the bonus buffer.
|
||||
*/
|
||||
typedef struct znode_phys {
|
||||
uint64_t zp_atime[2]; /* 0 - last file access time */
|
||||
uint64_t zp_mtime[2]; /* 16 - last file modification time */
|
||||
uint64_t zp_ctime[2]; /* 32 - last file change time */
|
||||
uint64_t zp_crtime[2]; /* 48 - creation time */
|
||||
uint64_t zp_gen; /* 64 - generation (txg of creation) */
|
||||
uint64_t zp_mode; /* 72 - file mode bits */
|
||||
uint64_t zp_size; /* 80 - size of file */
|
||||
uint64_t zp_parent; /* 88 - directory parent (`..') */
|
||||
uint64_t zp_links; /* 96 - number of links to file */
|
||||
uint64_t zp_xattr; /* 104 - DMU object for xattrs */
|
||||
uint64_t zp_rdev; /* 112 - dev_t for VBLK & VCHR files */
|
||||
uint64_t zp_flags; /* 120 - persistent flags */
|
||||
uint64_t zp_uid; /* 128 - file owner */
|
||||
uint64_t zp_gid; /* 136 - owning group */
|
||||
uint64_t zp_zap; /* 144 - extra attributes */
|
||||
uint64_t zp_pad[3]; /* 152 - future */
|
||||
zfs_acl_phys_t zp_acl; /* 176 - 263 ACL */
|
||||
/*
|
||||
* Data may pad out any remaining bytes in the znode buffer, eg:
|
||||
*
|
||||
* |<---------------------- dnode_phys (512) ------------------------>|
|
||||
* |<-- dnode (192) --->|<----------- "bonus" buffer (320) ---------->|
|
||||
* |<---- znode (264) ---->|<---- data (56) ---->|
|
||||
*
|
||||
* At present, we use this space for the following:
|
||||
* - symbolic links
|
||||
* - 32-byte anti-virus scanstamp (regular files only)
|
||||
*/
|
||||
} znode_phys_t;
|
||||
|
||||
/*
|
||||
* Directory entry locks control access to directory entries.
|
||||
* They are used to protect creates, deletes, and renames.
|
||||
* Each directory znode has a mutex and a list of locked names.
|
||||
*/
|
||||
#ifdef _KERNEL
|
||||
typedef struct zfs_dirlock {
|
||||
char *dl_name; /* directory entry being locked */
|
||||
uint32_t dl_sharecnt; /* 0 if exclusive, > 0 if shared */
|
||||
uint16_t dl_namesize; /* set if dl_name was allocated */
|
||||
kcondvar_t dl_cv; /* wait for entry to be unlocked */
|
||||
struct znode *dl_dzp; /* directory znode */
|
||||
struct zfs_dirlock *dl_next; /* next in z_dirlocks list */
|
||||
} zfs_dirlock_t;
|
||||
|
||||
typedef struct znode {
|
||||
struct zfsvfs *z_zfsvfs;
|
||||
vnode_t *z_vnode;
|
||||
uint64_t z_id; /* object ID for this znode */
|
||||
kmutex_t z_lock; /* znode modification lock */
|
||||
krwlock_t z_map_lock; /* page map lock */
|
||||
krwlock_t z_parent_lock; /* parent lock for directories */
|
||||
krwlock_t z_name_lock; /* "master" lock for dirent locks */
|
||||
zfs_dirlock_t *z_dirlocks; /* directory entry lock list */
|
||||
kmutex_t z_range_lock; /* protects changes to z_range_avl */
|
||||
avl_tree_t z_range_avl; /* avl tree of file range locks */
|
||||
uint8_t z_unlinked; /* file has been unlinked */
|
||||
uint8_t z_atime_dirty; /* atime needs to be synced */
|
||||
uint8_t z_zn_prefetch; /* Prefetch znodes? */
|
||||
uint_t z_blksz; /* block size in bytes */
|
||||
uint_t z_seq; /* modification sequence number */
|
||||
uint64_t z_mapcnt; /* number of pages mapped to file */
|
||||
uint64_t z_last_itx; /* last ZIL itx on this znode */
|
||||
uint64_t z_gen; /* generation (same as zp_gen) */
|
||||
uint32_t z_sync_cnt; /* synchronous open count */
|
||||
kmutex_t z_acl_lock; /* acl data lock */
|
||||
list_node_t z_link_node; /* all znodes in fs link */
|
||||
/*
|
||||
* These are dmu managed fields.
|
||||
*/
|
||||
znode_phys_t *z_phys; /* pointer to persistent znode */
|
||||
dmu_buf_t *z_dbuf; /* buffer containing the z_phys */
|
||||
} znode_t;
|
||||
|
||||
|
||||
/*
|
||||
* Range locking rules
|
||||
* --------------------
|
||||
* 1. When truncating a file (zfs_create, zfs_setattr, zfs_space) the whole
|
||||
* file range needs to be locked as RL_WRITER. Only then can the pages be
|
||||
* freed etc and zp_size reset. zp_size must be set within range lock.
|
||||
* 2. For writes and punching holes (zfs_write & zfs_space) just the range
|
||||
* being written or freed needs to be locked as RL_WRITER.
|
||||
* Multiple writes at the end of the file must coordinate zp_size updates
|
||||
* to ensure data isn't lost. A compare and swap loop is currently used
|
||||
* to ensure the file size is at least the offset last written.
|
||||
* 3. For reads (zfs_read, zfs_get_data & zfs_putapage) just the range being
|
||||
* read needs to be locked as RL_READER. A check against zp_size can then
|
||||
* be made for reading beyond end of file.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Convert between znode pointers and vnode pointers
|
||||
*/
|
||||
#define ZTOV(ZP) ((ZP)->z_vnode)
|
||||
#define VTOZ(VP) ((znode_t *)(VP)->v_data)
|
||||
|
||||
/*
|
||||
* ZFS_ENTER() is called on entry to each ZFS vnode and vfs operation.
|
||||
* ZFS_EXIT() must be called before exitting the vop.
|
||||
* ZFS_VERIFY_ZP() verifies the znode is valid.
|
||||
*/
|
||||
#define ZFS_ENTER(zfsvfs) \
|
||||
{ \
|
||||
rrw_enter(&(zfsvfs)->z_teardown_lock, RW_READER, FTAG); \
|
||||
if ((zfsvfs)->z_unmounted) { \
|
||||
ZFS_EXIT(zfsvfs); \
|
||||
return (EIO); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define ZFS_EXIT(zfsvfs) rrw_exit(&(zfsvfs)->z_teardown_lock, FTAG)
|
||||
|
||||
#define ZFS_VERIFY_ZP(zp) \
|
||||
if ((zp)->z_dbuf == NULL) { \
|
||||
ZFS_EXIT((zp)->z_zfsvfs); \
|
||||
return (EIO); \
|
||||
} \
|
||||
|
||||
/*
|
||||
* Macros for dealing with dmu_buf_hold
|
||||
*/
|
||||
#define ZFS_OBJ_HASH(obj_num) ((obj_num) & (ZFS_OBJ_MTX_SZ - 1))
|
||||
#define ZFS_OBJ_MUTEX(zp) \
|
||||
(&(zp)->z_zfsvfs->z_hold_mtx[ZFS_OBJ_HASH((zp)->z_id)])
|
||||
#define ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num) \
|
||||
mutex_enter(&(zfsvfs)->z_hold_mtx[ZFS_OBJ_HASH(obj_num)]);
|
||||
#define ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num) \
|
||||
mutex_exit(&(zfsvfs)->z_hold_mtx[ZFS_OBJ_HASH(obj_num)])
|
||||
|
||||
/*
|
||||
* Macros to encode/decode ZFS stored time values from/to struct timespec
|
||||
*/
|
||||
#define ZFS_TIME_ENCODE(tp, stmp) \
|
||||
{ \
|
||||
(stmp)[0] = (uint64_t)(tp)->tv_sec; \
|
||||
(stmp)[1] = (uint64_t)(tp)->tv_nsec; \
|
||||
}
|
||||
|
||||
#define ZFS_TIME_DECODE(tp, stmp) \
|
||||
{ \
|
||||
(tp)->tv_sec = (time_t)(stmp)[0]; \
|
||||
(tp)->tv_nsec = (long)(stmp)[1]; \
|
||||
}
|
||||
|
||||
/*
|
||||
* Timestamp defines
|
||||
*/
|
||||
#define ACCESSED (AT_ATIME)
|
||||
#define STATE_CHANGED (AT_CTIME)
|
||||
#define CONTENT_MODIFIED (AT_MTIME | AT_CTIME)
|
||||
|
||||
#define ZFS_ACCESSTIME_STAMP(zfsvfs, zp) \
|
||||
if ((zfsvfs)->z_atime && !((zfsvfs)->z_vfs->vfs_flag & VFS_RDONLY)) \
|
||||
zfs_time_stamper(zp, ACCESSED, NULL)
|
||||
|
||||
extern int zfs_init_fs(zfsvfs_t *, znode_t **, cred_t *);
|
||||
extern void zfs_set_dataprop(objset_t *);
|
||||
extern void zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *,
|
||||
dmu_tx_t *tx);
|
||||
extern void zfs_time_stamper(znode_t *, uint_t, dmu_tx_t *);
|
||||
extern void zfs_time_stamper_locked(znode_t *, uint_t, dmu_tx_t *);
|
||||
extern void zfs_grow_blocksize(znode_t *, uint64_t, dmu_tx_t *);
|
||||
extern int zfs_freesp(znode_t *, uint64_t, uint64_t, int, boolean_t);
|
||||
extern void zfs_znode_init(void);
|
||||
extern void zfs_znode_fini(void);
|
||||
extern int zfs_zget(zfsvfs_t *, uint64_t, znode_t **);
|
||||
extern int zfs_rezget(znode_t *);
|
||||
extern void zfs_zinactive(znode_t *);
|
||||
extern void zfs_znode_delete(znode_t *, dmu_tx_t *);
|
||||
extern void zfs_znode_free(znode_t *);
|
||||
extern void zfs_remove_op_tables();
|
||||
extern int zfs_create_op_tables();
|
||||
extern int zfs_sync(vfs_t *vfsp, short flag, cred_t *cr);
|
||||
extern dev_t zfs_cmpldev(uint64_t);
|
||||
extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value);
|
||||
extern int zfs_set_version(const char *name, uint64_t newvers);
|
||||
extern int zfs_get_stats(objset_t *os, nvlist_t *nv);
|
||||
extern void zfs_znode_dmu_fini(znode_t *);
|
||||
|
||||
extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
|
||||
znode_t *dzp, znode_t *zp, char *name, vsecattr_t *, zfs_fuid_info_t *,
|
||||
vattr_t *vap);
|
||||
extern int zfs_log_create_txtype(zil_create_t, vsecattr_t *vsecp,
|
||||
vattr_t *vap);
|
||||
extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
|
||||
znode_t *dzp, char *name);
|
||||
extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
|
||||
znode_t *dzp, znode_t *zp, char *name);
|
||||
extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
|
||||
znode_t *dzp, znode_t *zp, char *name, char *link);
|
||||
extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
|
||||
znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp);
|
||||
extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||
znode_t *zp, offset_t off, ssize_t len, int ioflag);
|
||||
extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||
znode_t *zp, uint64_t off, uint64_t len);
|
||||
extern void zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||
znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp);
|
||||
extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
|
||||
vsecattr_t *vsecp, zfs_fuid_info_t *fuidp);
|
||||
extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap);
|
||||
extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
|
||||
|
||||
extern zil_get_data_t zfs_get_data;
|
||||
extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE];
|
||||
extern int zfsfstype;
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
extern int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_FS_ZFS_ZNODE_H */
|
||||
@@ -0,0 +1,380 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZIL_H
|
||||
#define _SYS_ZIL_H
|
||||
|
||||
#pragma ident "@(#)zil.h 1.15 08/02/22 SMI"
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dmu.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Intent log format:
|
||||
*
|
||||
* Each objset has its own intent log. The log header (zil_header_t)
|
||||
* for objset N's intent log is kept in the Nth object of the SPA's
|
||||
* intent_log objset. The log header points to a chain of log blocks,
|
||||
* each of which contains log records (i.e., transactions) followed by
|
||||
* a log block trailer (zil_trailer_t). The format of a log record
|
||||
* depends on the record (or transaction) type, but all records begin
|
||||
* with a common structure that defines the type, length, and txg.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Intent log header - this on disk structure holds fields to manage
|
||||
* the log. All fields are 64 bit to easily handle cross architectures.
|
||||
*/
|
||||
typedef struct zil_header {
|
||||
uint64_t zh_claim_txg; /* txg in which log blocks were claimed */
|
||||
uint64_t zh_replay_seq; /* highest replayed sequence number */
|
||||
blkptr_t zh_log; /* log chain */
|
||||
uint64_t zh_claim_seq; /* highest claimed sequence number */
|
||||
uint64_t zh_pad[5];
|
||||
} zil_header_t;
|
||||
|
||||
/*
|
||||
* Log block trailer - structure at the end of the header and each log block
|
||||
*
|
||||
* The zit_bt contains a zbt_cksum which for the intent log is
|
||||
* the sequence number of this log block. A seq of 0 is invalid.
|
||||
* The zbt_cksum is checked by the SPA against the sequence
|
||||
* number passed in the blk_cksum field of the blkptr_t
|
||||
*/
|
||||
typedef struct zil_trailer {
|
||||
uint64_t zit_pad;
|
||||
blkptr_t zit_next_blk; /* next block in chain */
|
||||
uint64_t zit_nused; /* bytes in log block used */
|
||||
zio_block_tail_t zit_bt; /* block trailer */
|
||||
} zil_trailer_t;
|
||||
|
||||
#define ZIL_MIN_BLKSZ 4096ULL
|
||||
#define ZIL_MAX_BLKSZ SPA_MAXBLOCKSIZE
|
||||
#define ZIL_BLK_DATA_SZ(lwb) ((lwb)->lwb_sz - sizeof (zil_trailer_t))
|
||||
|
||||
/*
|
||||
* The words of a log block checksum.
|
||||
*/
|
||||
#define ZIL_ZC_GUID_0 0
|
||||
#define ZIL_ZC_GUID_1 1
|
||||
#define ZIL_ZC_OBJSET 2
|
||||
#define ZIL_ZC_SEQ 3
|
||||
|
||||
typedef enum zil_create {
|
||||
Z_FILE,
|
||||
Z_DIR,
|
||||
Z_XATTRDIR,
|
||||
} zil_create_t;
|
||||
|
||||
/*
|
||||
* size of xvattr log section.
|
||||
* its composed of lr_attr_t + xvattr bitmap + 2 64 bit timestamps
|
||||
* for create time and a single 64 bit integer for all of the attributes,
|
||||
* and 4 64 bit integers (32 bytes) for the scanstamp.
|
||||
*
|
||||
*/
|
||||
|
||||
#define ZIL_XVAT_SIZE(mapsize) \
|
||||
sizeof (lr_attr_t) + (sizeof (uint32_t) * (mapsize - 1)) + \
|
||||
(sizeof (uint64_t) * 7)
|
||||
|
||||
/*
|
||||
* Size of ACL in log. The ACE data is padded out to properly align
|
||||
* on 8 byte boundary.
|
||||
*/
|
||||
|
||||
#define ZIL_ACE_LENGTH(x) (roundup(x, sizeof (uint64_t)))
|
||||
|
||||
/*
|
||||
* Intent log transaction types and record structures
|
||||
*/
|
||||
#define TX_CREATE 1 /* Create file */
|
||||
#define TX_MKDIR 2 /* Make directory */
|
||||
#define TX_MKXATTR 3 /* Make XATTR directory */
|
||||
#define TX_SYMLINK 4 /* Create symbolic link to a file */
|
||||
#define TX_REMOVE 5 /* Remove file */
|
||||
#define TX_RMDIR 6 /* Remove directory */
|
||||
#define TX_LINK 7 /* Create hard link to a file */
|
||||
#define TX_RENAME 8 /* Rename a file */
|
||||
#define TX_WRITE 9 /* File write */
|
||||
#define TX_TRUNCATE 10 /* Truncate a file */
|
||||
#define TX_SETATTR 11 /* Set file attributes */
|
||||
#define TX_ACL_V0 12 /* Set old formatted ACL */
|
||||
#define TX_ACL 13 /* Set ACL */
|
||||
#define TX_CREATE_ACL 14 /* create with ACL */
|
||||
#define TX_CREATE_ATTR 15 /* create + attrs */
|
||||
#define TX_CREATE_ACL_ATTR 16 /* create with ACL + attrs */
|
||||
#define TX_MKDIR_ACL 17 /* mkdir with ACL */
|
||||
#define TX_MKDIR_ATTR 18 /* mkdir with attr */
|
||||
#define TX_MKDIR_ACL_ATTR 19 /* mkdir with ACL + attrs */
|
||||
#define TX_MAX_TYPE 20 /* Max transaction type */
|
||||
|
||||
/*
|
||||
* The transactions for mkdir, symlink, remove, rmdir, link, and rename
|
||||
* may have the following bit set, indicating the original request
|
||||
* specified case-insensitive handling of names.
|
||||
*/
|
||||
#define TX_CI ((uint64_t)0x1 << 63) /* case-insensitive behavior requested */
|
||||
|
||||
/*
|
||||
* Format of log records.
|
||||
* The fields are carefully defined to allow them to be aligned
|
||||
* and sized the same on sparc & intel architectures.
|
||||
* Each log record has a common structure at the beginning.
|
||||
*
|
||||
* Note, lrc_seq holds two different sequence numbers. Whilst in memory
|
||||
* it contains the transaction sequence number. The log record on
|
||||
* disk holds the sequence number of all log records which is used to
|
||||
* ensure we don't replay the same record. The two sequence numbers are
|
||||
* different because the transactions can now be pushed out of order.
|
||||
*/
|
||||
typedef struct { /* common log record header */
|
||||
uint64_t lrc_txtype; /* intent log transaction type */
|
||||
uint64_t lrc_reclen; /* transaction record length */
|
||||
uint64_t lrc_txg; /* dmu transaction group number */
|
||||
uint64_t lrc_seq; /* see comment above */
|
||||
} lr_t;
|
||||
|
||||
/*
|
||||
* Handle option extended vattr attributes.
|
||||
*
|
||||
* Whenever new attributes are added the version number
|
||||
* will need to be updated as will code in
|
||||
* zfs_log.c and zfs_replay.c
|
||||
*/
|
||||
typedef struct {
|
||||
uint32_t lr_attr_masksize; /* number of elements in array */
|
||||
uint32_t lr_attr_bitmap; /* First entry of array */
|
||||
/* remainder of array and any additional fields */
|
||||
} lr_attr_t;
|
||||
|
||||
/*
|
||||
* log record for creates without optional ACL.
|
||||
* This log record does support optional xvattr_t attributes.
|
||||
*/
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_doid; /* object id of directory */
|
||||
uint64_t lr_foid; /* object id of created file object */
|
||||
uint64_t lr_mode; /* mode of object */
|
||||
uint64_t lr_uid; /* uid of object */
|
||||
uint64_t lr_gid; /* gid of object */
|
||||
uint64_t lr_gen; /* generation (txg of creation) */
|
||||
uint64_t lr_crtime[2]; /* creation time */
|
||||
uint64_t lr_rdev; /* rdev of object to create */
|
||||
/* name of object to create follows this */
|
||||
/* for symlinks, link content follows name */
|
||||
/* for creates with xvattr data, the name follows the xvattr info */
|
||||
} lr_create_t;
|
||||
|
||||
/*
|
||||
* FUID ACL record will be an array of ACEs from the original ACL.
|
||||
* If this array includes ephemeral IDs, the record will also include
|
||||
* an array of log-specific FUIDs to replace the ephemeral IDs.
|
||||
* Only one copy of each unique domain will be present, so the log-specific
|
||||
* FUIDs will use an index into a compressed domain table. On replay this
|
||||
* information will be used to construct real FUIDs (and bypass idmap,
|
||||
* since it may not be available).
|
||||
*/
|
||||
|
||||
/*
|
||||
* Log record for creates with optional ACL
|
||||
* This log record is also used for recording any FUID
|
||||
* information needed for replaying the create. If the
|
||||
* file doesn't have any actual ACEs then the lr_aclcnt
|
||||
* would be zero.
|
||||
*/
|
||||
typedef struct {
|
||||
lr_create_t lr_create; /* common create portion */
|
||||
uint64_t lr_aclcnt; /* number of ACEs in ACL */
|
||||
uint64_t lr_domcnt; /* number of unique domains */
|
||||
uint64_t lr_fuidcnt; /* number of real fuids */
|
||||
uint64_t lr_acl_bytes; /* number of bytes in ACL */
|
||||
uint64_t lr_acl_flags; /* ACL flags */
|
||||
/* lr_acl_bytes number of variable sized ace's follows */
|
||||
/* if create is also setting xvattr's, then acl data follows xvattr */
|
||||
/* if ACE FUIDs are needed then they will follow the xvattr_t */
|
||||
/* Following the FUIDs will be the domain table information. */
|
||||
/* The FUIDs for the owner and group will be in the lr_create */
|
||||
/* portion of the record. */
|
||||
/* name follows ACL data */
|
||||
} lr_acl_create_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_doid; /* obj id of directory */
|
||||
/* name of object to remove follows this */
|
||||
} lr_remove_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_doid; /* obj id of directory */
|
||||
uint64_t lr_link_obj; /* obj id of link */
|
||||
/* name of object to link follows this */
|
||||
} lr_link_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_sdoid; /* obj id of source directory */
|
||||
uint64_t lr_tdoid; /* obj id of target directory */
|
||||
/* 2 strings: names of source and destination follow this */
|
||||
} lr_rename_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_foid; /* file object to write */
|
||||
uint64_t lr_offset; /* offset to write to */
|
||||
uint64_t lr_length; /* user data length to write */
|
||||
uint64_t lr_blkoff; /* offset represented by lr_blkptr */
|
||||
blkptr_t lr_blkptr; /* spa block pointer for replay */
|
||||
/* write data will follow for small writes */
|
||||
} lr_write_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_foid; /* object id of file to truncate */
|
||||
uint64_t lr_offset; /* offset to truncate from */
|
||||
uint64_t lr_length; /* length to truncate */
|
||||
} lr_truncate_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_foid; /* file object to change attributes */
|
||||
uint64_t lr_mask; /* mask of attributes to set */
|
||||
uint64_t lr_mode; /* mode to set */
|
||||
uint64_t lr_uid; /* uid to set */
|
||||
uint64_t lr_gid; /* gid to set */
|
||||
uint64_t lr_size; /* size to set */
|
||||
uint64_t lr_atime[2]; /* access time */
|
||||
uint64_t lr_mtime[2]; /* modification time */
|
||||
/* optional attribute lr_attr_t may be here */
|
||||
} lr_setattr_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_foid; /* obj id of file */
|
||||
uint64_t lr_aclcnt; /* number of acl entries */
|
||||
/* lr_aclcnt number of ace_t entries follow this */
|
||||
} lr_acl_v0_t;
|
||||
|
||||
typedef struct {
|
||||
lr_t lr_common; /* common portion of log record */
|
||||
uint64_t lr_foid; /* obj id of file */
|
||||
uint64_t lr_aclcnt; /* number of ACEs in ACL */
|
||||
uint64_t lr_domcnt; /* number of unique domains */
|
||||
uint64_t lr_fuidcnt; /* number of real fuids */
|
||||
uint64_t lr_acl_bytes; /* number of bytes in ACL */
|
||||
uint64_t lr_acl_flags; /* ACL flags */
|
||||
/* lr_acl_bytes number of variable sized ace's follows */
|
||||
} lr_acl_t;
|
||||
|
||||
/*
|
||||
* ZIL structure definitions, interface function prototype and globals.
|
||||
*/
|
||||
|
||||
/*
|
||||
* ZFS intent log transaction structure
|
||||
*/
|
||||
typedef enum {
|
||||
WR_INDIRECT, /* indirect - a large write (dmu_sync() data */
|
||||
/* and put blkptr in log, rather than actual data) */
|
||||
WR_COPIED, /* immediate - data is copied into lr_write_t */
|
||||
WR_NEED_COPY, /* immediate - data needs to be copied if pushed */
|
||||
} itx_wr_state_t;
|
||||
|
||||
typedef struct itx {
|
||||
list_node_t itx_node; /* linkage on zl_itx_list */
|
||||
void *itx_private; /* type-specific opaque data */
|
||||
itx_wr_state_t itx_wr_state; /* write state */
|
||||
uint8_t itx_sync; /* synchronous transaction */
|
||||
uint64_t itx_sod; /* record size on disk */
|
||||
lr_t itx_lr; /* common part of log record */
|
||||
/* followed by type-specific part of lr_xx_t and its immediate data */
|
||||
} itx_t;
|
||||
|
||||
|
||||
/*
|
||||
* zgd_t is passed through dmu_sync() to the callback routine zfs_get_done()
|
||||
* to handle the cleanup of the dmu_sync() buffer write
|
||||
*/
|
||||
typedef struct {
|
||||
zilog_t *zgd_zilog; /* zilog */
|
||||
blkptr_t *zgd_bp; /* block pointer */
|
||||
struct rl *zgd_rl; /* range lock */
|
||||
} zgd_t;
|
||||
|
||||
|
||||
typedef void zil_parse_blk_func_t(zilog_t *zilog, blkptr_t *bp, void *arg,
|
||||
uint64_t txg);
|
||||
typedef void zil_parse_lr_func_t(zilog_t *zilog, lr_t *lr, void *arg,
|
||||
uint64_t txg);
|
||||
typedef int zil_replay_func_t();
|
||||
typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio);
|
||||
|
||||
extern uint64_t zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
|
||||
zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg);
|
||||
|
||||
extern void zil_init(void);
|
||||
extern void zil_fini(void);
|
||||
|
||||
extern zilog_t *zil_alloc(objset_t *os, zil_header_t *zh_phys);
|
||||
extern void zil_free(zilog_t *zilog);
|
||||
|
||||
extern zilog_t *zil_open(objset_t *os, zil_get_data_t *get_data);
|
||||
extern void zil_close(zilog_t *zilog);
|
||||
|
||||
extern void zil_replay(objset_t *os, void *arg, uint64_t *txgp,
|
||||
zil_replay_func_t *replay_func[TX_MAX_TYPE]);
|
||||
extern void zil_destroy(zilog_t *zilog, boolean_t keep_first);
|
||||
extern void zil_rollback_destroy(zilog_t *zilog, dmu_tx_t *tx);
|
||||
|
||||
extern itx_t *zil_itx_create(uint64_t txtype, size_t lrsize);
|
||||
extern uint64_t zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx);
|
||||
|
||||
extern void zil_commit(zilog_t *zilog, uint64_t seq, uint64_t oid);
|
||||
|
||||
extern int zil_claim(char *osname, void *txarg);
|
||||
extern void zil_sync(zilog_t *zilog, dmu_tx_t *tx);
|
||||
extern void zil_clean(zilog_t *zilog);
|
||||
extern int zil_is_committed(zilog_t *zilog);
|
||||
|
||||
extern int zil_suspend(zilog_t *zilog);
|
||||
extern void zil_resume(zilog_t *zilog);
|
||||
|
||||
extern void zil_add_block(zilog_t *zilog, blkptr_t *bp);
|
||||
|
||||
extern int zil_disable;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZIL_H */
|
||||
@@ -0,0 +1,109 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZIL_IMPL_H
|
||||
#define _SYS_ZIL_IMPL_H
|
||||
|
||||
#pragma ident "@(#)zil_impl.h 1.7 07/12/12 SMI"
|
||||
|
||||
#include <sys/zil.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Log write buffer.
|
||||
*/
|
||||
typedef struct lwb {
|
||||
zilog_t *lwb_zilog; /* back pointer to log struct */
|
||||
blkptr_t lwb_blk; /* on disk address of this log blk */
|
||||
int lwb_nused; /* # used bytes in buffer */
|
||||
int lwb_sz; /* size of block and buffer */
|
||||
char *lwb_buf; /* log write buffer */
|
||||
zio_t *lwb_zio; /* zio for this buffer */
|
||||
uint64_t lwb_max_txg; /* highest txg in this lwb */
|
||||
txg_handle_t lwb_txgh; /* txg handle for txg_exit() */
|
||||
list_node_t lwb_node; /* zilog->zl_lwb_list linkage */
|
||||
} lwb_t;
|
||||
|
||||
/*
|
||||
* Vdev flushing: during a zil_commit(), we build up an AVL tree of the vdevs
|
||||
* we've touched so we know which ones need a write cache flush at the end.
|
||||
*/
|
||||
typedef struct zil_vdev_node {
|
||||
uint64_t zv_vdev; /* vdev to be flushed */
|
||||
avl_node_t zv_node; /* AVL tree linkage */
|
||||
} zil_vdev_node_t;
|
||||
|
||||
/*
|
||||
* Stable storage intent log management structure. One per dataset.
|
||||
*/
|
||||
struct zilog {
|
||||
kmutex_t zl_lock; /* protects most zilog_t fields */
|
||||
struct dsl_pool *zl_dmu_pool; /* DSL pool */
|
||||
spa_t *zl_spa; /* handle for read/write log */
|
||||
const zil_header_t *zl_header; /* log header buffer */
|
||||
objset_t *zl_os; /* object set we're logging */
|
||||
zil_get_data_t *zl_get_data; /* callback to get object content */
|
||||
zio_t *zl_root_zio; /* log writer root zio */
|
||||
uint64_t zl_itx_seq; /* next itx sequence number */
|
||||
uint64_t zl_commit_seq; /* committed upto this number */
|
||||
uint64_t zl_lr_seq; /* log record sequence number */
|
||||
uint64_t zl_destroy_txg; /* txg of last zil_destroy() */
|
||||
uint64_t zl_replay_seq[TXG_SIZE]; /* seq of last replayed rec */
|
||||
uint32_t zl_suspend; /* log suspend count */
|
||||
kcondvar_t zl_cv_writer; /* log writer thread completion */
|
||||
kcondvar_t zl_cv_suspend; /* log suspend completion */
|
||||
uint8_t zl_suspending; /* log is currently suspending */
|
||||
uint8_t zl_keep_first; /* keep first log block in destroy */
|
||||
uint8_t zl_stop_replay; /* don't replay any further */
|
||||
uint8_t zl_stop_sync; /* for debugging */
|
||||
uint8_t zl_writer; /* boolean: write setup in progress */
|
||||
uint8_t zl_log_error; /* boolean: log write error */
|
||||
list_t zl_itx_list; /* in-memory itx list */
|
||||
uint64_t zl_itx_list_sz; /* total size of records on list */
|
||||
uint64_t zl_cur_used; /* current commit log size used */
|
||||
uint64_t zl_prev_used; /* previous commit log size used */
|
||||
list_t zl_lwb_list; /* in-flight log write list */
|
||||
kmutex_t zl_vdev_lock; /* protects zl_vdev_tree */
|
||||
avl_tree_t zl_vdev_tree; /* vdevs to flush in zil_commit() */
|
||||
taskq_t *zl_clean_taskq; /* runs lwb and itx clean tasks */
|
||||
avl_tree_t zl_dva_tree; /* track DVAs during log parse */
|
||||
clock_t zl_replay_time; /* lbolt of when replay started */
|
||||
uint64_t zl_replay_blks; /* number of log blocks replayed */
|
||||
};
|
||||
|
||||
typedef struct zil_dva_node {
|
||||
dva_t zn_dva;
|
||||
avl_node_t zn_node;
|
||||
} zil_dva_node_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZIL_IMPL_H */
|
||||
@@ -0,0 +1,388 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _ZIO_H
|
||||
#define _ZIO_H
|
||||
|
||||
#pragma ident "@(#)zio.h 1.20 08/04/01 SMI"
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/avl.h>
|
||||
#include <sys/dkio.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/zio_impl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ZBT_MAGIC 0x210da7ab10c7a11ULL /* zio data bloc tail */
|
||||
|
||||
typedef struct zio_block_tail {
|
||||
uint64_t zbt_magic; /* for validation, endianness */
|
||||
zio_cksum_t zbt_cksum; /* 256-bit checksum */
|
||||
} zio_block_tail_t;
|
||||
|
||||
/*
|
||||
* Gang block headers are self-checksumming and contain an array
|
||||
* of block pointers.
|
||||
*/
|
||||
#define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE
|
||||
#define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \
|
||||
sizeof (zio_block_tail_t)) / sizeof (blkptr_t))
|
||||
#define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \
|
||||
sizeof (zio_block_tail_t) - \
|
||||
(SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\
|
||||
sizeof (uint64_t))
|
||||
|
||||
#define ZIO_GET_IOSIZE(zio) \
|
||||
(BP_IS_GANG((zio)->io_bp) ? \
|
||||
SPA_GANGBLOCKSIZE : BP_GET_PSIZE((zio)->io_bp))
|
||||
|
||||
typedef struct zio_gbh {
|
||||
blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS];
|
||||
uint64_t zg_filler[SPA_GBH_FILLER];
|
||||
zio_block_tail_t zg_tail;
|
||||
} zio_gbh_phys_t;
|
||||
|
||||
enum zio_checksum {
|
||||
ZIO_CHECKSUM_INHERIT = 0,
|
||||
ZIO_CHECKSUM_ON,
|
||||
ZIO_CHECKSUM_OFF,
|
||||
ZIO_CHECKSUM_LABEL,
|
||||
ZIO_CHECKSUM_GANG_HEADER,
|
||||
ZIO_CHECKSUM_ZILOG,
|
||||
ZIO_CHECKSUM_FLETCHER_2,
|
||||
ZIO_CHECKSUM_FLETCHER_4,
|
||||
ZIO_CHECKSUM_SHA256,
|
||||
ZIO_CHECKSUM_FUNCTIONS
|
||||
};
|
||||
|
||||
#define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_2
|
||||
#define ZIO_CHECKSUM_DEFAULT ZIO_CHECKSUM_ON
|
||||
|
||||
enum zio_compress {
|
||||
ZIO_COMPRESS_INHERIT = 0,
|
||||
ZIO_COMPRESS_ON,
|
||||
ZIO_COMPRESS_OFF,
|
||||
ZIO_COMPRESS_LZJB,
|
||||
ZIO_COMPRESS_EMPTY,
|
||||
ZIO_COMPRESS_GZIP_1,
|
||||
ZIO_COMPRESS_GZIP_2,
|
||||
ZIO_COMPRESS_GZIP_3,
|
||||
ZIO_COMPRESS_GZIP_4,
|
||||
ZIO_COMPRESS_GZIP_5,
|
||||
ZIO_COMPRESS_GZIP_6,
|
||||
ZIO_COMPRESS_GZIP_7,
|
||||
ZIO_COMPRESS_GZIP_8,
|
||||
ZIO_COMPRESS_GZIP_9,
|
||||
ZIO_COMPRESS_FUNCTIONS
|
||||
};
|
||||
|
||||
#define ZIO_COMPRESS_ON_VALUE ZIO_COMPRESS_LZJB
|
||||
#define ZIO_COMPRESS_DEFAULT ZIO_COMPRESS_OFF
|
||||
|
||||
#define ZIO_FAILURE_MODE_WAIT 0
|
||||
#define ZIO_FAILURE_MODE_CONTINUE 1
|
||||
#define ZIO_FAILURE_MODE_PANIC 2
|
||||
|
||||
#define ZIO_PRIORITY_NOW (zio_priority_table[0])
|
||||
#define ZIO_PRIORITY_SYNC_READ (zio_priority_table[1])
|
||||
#define ZIO_PRIORITY_SYNC_WRITE (zio_priority_table[2])
|
||||
#define ZIO_PRIORITY_ASYNC_READ (zio_priority_table[3])
|
||||
#define ZIO_PRIORITY_ASYNC_WRITE (zio_priority_table[4])
|
||||
#define ZIO_PRIORITY_FREE (zio_priority_table[5])
|
||||
#define ZIO_PRIORITY_CACHE_FILL (zio_priority_table[6])
|
||||
#define ZIO_PRIORITY_LOG_WRITE (zio_priority_table[7])
|
||||
#define ZIO_PRIORITY_RESILVER (zio_priority_table[8])
|
||||
#define ZIO_PRIORITY_SCRUB (zio_priority_table[9])
|
||||
#define ZIO_PRIORITY_TABLE_SIZE 10
|
||||
|
||||
#define ZIO_FLAG_MUSTSUCCEED 0x00000
|
||||
#define ZIO_FLAG_CANFAIL 0x00001
|
||||
#define ZIO_FLAG_FAILFAST 0x00002
|
||||
#define ZIO_FLAG_CONFIG_HELD 0x00004
|
||||
#define ZIO_FLAG_CONFIG_GRABBED 0x00008
|
||||
|
||||
#define ZIO_FLAG_DONT_CACHE 0x00010
|
||||
#define ZIO_FLAG_DONT_QUEUE 0x00020
|
||||
#define ZIO_FLAG_DONT_PROPAGATE 0x00040
|
||||
#define ZIO_FLAG_DONT_RETRY 0x00080
|
||||
|
||||
#define ZIO_FLAG_PHYSICAL 0x00100
|
||||
#define ZIO_FLAG_IO_BYPASS 0x00200
|
||||
#define ZIO_FLAG_IO_REPAIR 0x00400
|
||||
#define ZIO_FLAG_SPECULATIVE 0x00800
|
||||
|
||||
#define ZIO_FLAG_RESILVER 0x01000
|
||||
#define ZIO_FLAG_SCRUB 0x02000
|
||||
#define ZIO_FLAG_SCRUB_THREAD 0x04000
|
||||
#define ZIO_FLAG_SUBBLOCK 0x08000
|
||||
|
||||
#define ZIO_FLAG_NOBOOKMARK 0x10000
|
||||
#define ZIO_FLAG_USER 0x20000
|
||||
#define ZIO_FLAG_METADATA 0x40000
|
||||
#define ZIO_FLAG_WRITE_RETRY 0x80000
|
||||
|
||||
#define ZIO_FLAG_GANG_INHERIT \
|
||||
(ZIO_FLAG_CANFAIL | \
|
||||
ZIO_FLAG_FAILFAST | \
|
||||
ZIO_FLAG_CONFIG_HELD | \
|
||||
ZIO_FLAG_DONT_CACHE | \
|
||||
ZIO_FLAG_DONT_RETRY | \
|
||||
ZIO_FLAG_IO_REPAIR | \
|
||||
ZIO_FLAG_SPECULATIVE | \
|
||||
ZIO_FLAG_RESILVER | \
|
||||
ZIO_FLAG_SCRUB | \
|
||||
ZIO_FLAG_SCRUB_THREAD | \
|
||||
ZIO_FLAG_USER | \
|
||||
ZIO_FLAG_METADATA)
|
||||
|
||||
#define ZIO_FLAG_VDEV_INHERIT \
|
||||
(ZIO_FLAG_GANG_INHERIT | \
|
||||
ZIO_FLAG_PHYSICAL)
|
||||
|
||||
#define ZIO_FLAG_RETRY_INHERIT \
|
||||
(ZIO_FLAG_VDEV_INHERIT | \
|
||||
ZIO_FLAG_CONFIG_GRABBED | \
|
||||
ZIO_FLAG_DONT_PROPAGATE | \
|
||||
ZIO_FLAG_NOBOOKMARK)
|
||||
|
||||
|
||||
#define ZIO_PIPELINE_CONTINUE 0x100
|
||||
#define ZIO_PIPELINE_STOP 0x101
|
||||
|
||||
/*
|
||||
* We'll take the unused errnos, 'EBADE' and 'EBADR' (from the Convergent
|
||||
* graveyard) to indicate checksum errors and fragmentation.
|
||||
*/
|
||||
#define ECKSUM EBADE
|
||||
#define EFRAGS EBADR
|
||||
|
||||
typedef struct zio zio_t;
|
||||
typedef void zio_done_func_t(zio_t *zio);
|
||||
|
||||
extern uint8_t zio_priority_table[ZIO_PRIORITY_TABLE_SIZE];
|
||||
extern char *zio_type_name[ZIO_TYPES];
|
||||
|
||||
/*
|
||||
* A bookmark is a four-tuple <objset, object, level, blkid> that uniquely
|
||||
* identifies any block in the pool. By convention, the meta-objset (MOS)
|
||||
* is objset 0, the meta-dnode is object 0, the root block (osphys_t) is
|
||||
* level -1 of the meta-dnode, and intent log blocks (which are chained
|
||||
* off the root block) have blkid == sequence number. In summary:
|
||||
*
|
||||
* mos is objset 0
|
||||
* meta-dnode is object 0
|
||||
* root block is <objset, 0, -1, 0>
|
||||
* intent log is <objset, 0, -1, ZIL sequence number>
|
||||
*
|
||||
* Note: this structure is called a bookmark because its first purpose was
|
||||
* to remember where to resume a pool-wide traverse. The absolute ordering
|
||||
* for block visitation during traversal is defined in compare_bookmark().
|
||||
*
|
||||
* Note: this structure is passed between userland and the kernel.
|
||||
* Therefore it must not change size or alignment between 32/64 bit
|
||||
* compilation options.
|
||||
*/
|
||||
typedef struct zbookmark {
|
||||
uint64_t zb_objset;
|
||||
uint64_t zb_object;
|
||||
int64_t zb_level;
|
||||
uint64_t zb_blkid;
|
||||
} zbookmark_t;
|
||||
|
||||
struct zio {
|
||||
/* Core information about this I/O */
|
||||
zio_t *io_parent;
|
||||
zio_t *io_root;
|
||||
spa_t *io_spa;
|
||||
zbookmark_t io_bookmark;
|
||||
enum zio_checksum io_checksum;
|
||||
enum zio_compress io_compress;
|
||||
int io_ndvas;
|
||||
uint64_t io_txg;
|
||||
blkptr_t *io_bp;
|
||||
blkptr_t io_bp_copy;
|
||||
zio_t *io_child;
|
||||
zio_t *io_sibling_prev;
|
||||
zio_t *io_sibling_next;
|
||||
zio_transform_t *io_transform_stack;
|
||||
zio_t *io_logical;
|
||||
list_node_t zio_link_node;
|
||||
|
||||
/* Callback info */
|
||||
zio_done_func_t *io_ready;
|
||||
zio_done_func_t *io_done;
|
||||
void *io_private;
|
||||
blkptr_t io_bp_orig;
|
||||
|
||||
/* Data represented by this I/O */
|
||||
void *io_data;
|
||||
uint64_t io_size;
|
||||
|
||||
/* Stuff for the vdev stack */
|
||||
vdev_t *io_vd;
|
||||
void *io_vsd;
|
||||
uint64_t io_offset;
|
||||
uint64_t io_deadline;
|
||||
uint64_t io_timestamp;
|
||||
avl_node_t io_offset_node;
|
||||
avl_node_t io_deadline_node;
|
||||
avl_tree_t *io_vdev_tree;
|
||||
zio_t *io_delegate_list;
|
||||
zio_t *io_delegate_next;
|
||||
|
||||
/* Internal pipeline state */
|
||||
int io_flags;
|
||||
int io_orig_flags;
|
||||
enum zio_type io_type;
|
||||
enum zio_stage io_stage;
|
||||
enum zio_stage io_orig_stage;
|
||||
uint8_t io_stalled;
|
||||
uint8_t io_priority;
|
||||
struct dk_callback io_dk_callback;
|
||||
int io_cmd;
|
||||
int io_retries;
|
||||
int io_error;
|
||||
uint32_t io_numerrors;
|
||||
uint32_t io_pipeline;
|
||||
uint32_t io_orig_pipeline;
|
||||
uint64_t io_children_notready;
|
||||
uint64_t io_children_notdone;
|
||||
void *io_waiter;
|
||||
kmutex_t io_lock;
|
||||
kcondvar_t io_cv;
|
||||
|
||||
/* FMA state */
|
||||
uint64_t io_ena;
|
||||
};
|
||||
|
||||
extern zio_t *zio_null(zio_t *pio, spa_t *spa,
|
||||
zio_done_func_t *done, void *private, int flags);
|
||||
|
||||
extern zio_t *zio_root(spa_t *spa,
|
||||
zio_done_func_t *done, void *private, int flags);
|
||||
|
||||
extern zio_t *zio_read(zio_t *pio, spa_t *spa, blkptr_t *bp, void *data,
|
||||
uint64_t size, zio_done_func_t *done, void *private,
|
||||
int priority, int flags, zbookmark_t *zb);
|
||||
|
||||
extern zio_t *zio_write(zio_t *pio, spa_t *spa, int checksum, int compress,
|
||||
int ncopies, uint64_t txg, blkptr_t *bp, void *data, uint64_t size,
|
||||
zio_done_func_t *ready, zio_done_func_t *done, void *private, int priority,
|
||||
int flags, zbookmark_t *zb);
|
||||
|
||||
extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, int checksum,
|
||||
uint64_t txg, blkptr_t *bp, void *data, uint64_t size,
|
||||
zio_done_func_t *done, void *private, int priority, int flags,
|
||||
zbookmark_t *zb);
|
||||
|
||||
extern zio_t *zio_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
|
||||
zio_done_func_t *done, void *private);
|
||||
|
||||
extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
|
||||
zio_done_func_t *done, void *private);
|
||||
|
||||
extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
|
||||
zio_done_func_t *done, void *private, int priority, int flags);
|
||||
|
||||
extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
|
||||
uint64_t size, void *data, int checksum,
|
||||
zio_done_func_t *done, void *private, int priority, int flags,
|
||||
boolean_t labels);
|
||||
|
||||
extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
|
||||
uint64_t size, void *data, int checksum,
|
||||
zio_done_func_t *done, void *private, int priority, int flags,
|
||||
boolean_t labels);
|
||||
|
||||
extern int zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp,
|
||||
blkptr_t *old_bp, uint64_t txg);
|
||||
extern void zio_free_blk(spa_t *spa, blkptr_t *bp, uint64_t txg);
|
||||
extern void zio_flush(zio_t *zio, vdev_t *vd);
|
||||
|
||||
extern int zio_wait(zio_t *zio);
|
||||
extern void zio_nowait(zio_t *zio);
|
||||
extern void zio_execute(zio_t *zio);
|
||||
extern void zio_interrupt(zio_t *zio);
|
||||
|
||||
extern int zio_wait_for_children_ready(zio_t *zio);
|
||||
extern int zio_wait_for_children_done(zio_t *zio);
|
||||
|
||||
extern void *zio_buf_alloc(size_t size);
|
||||
extern void zio_buf_free(void *buf, size_t size);
|
||||
extern void *zio_data_buf_alloc(size_t size);
|
||||
extern void zio_data_buf_free(void *buf, size_t size);
|
||||
|
||||
extern void zio_resubmit_stage_async(void *);
|
||||
|
||||
/*
|
||||
* Delegate I/O to a child vdev.
|
||||
*/
|
||||
extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd,
|
||||
uint64_t offset, void *data, uint64_t size, int type, int priority,
|
||||
int flags, zio_done_func_t *done, void *private);
|
||||
|
||||
extern void zio_vdev_io_bypass(zio_t *zio);
|
||||
extern void zio_vdev_io_reissue(zio_t *zio);
|
||||
extern void zio_vdev_io_redone(zio_t *zio);
|
||||
|
||||
extern void zio_checksum_verified(zio_t *zio);
|
||||
extern void zio_set_gang_verifier(zio_t *zio, zio_cksum_t *zcp);
|
||||
|
||||
extern uint8_t zio_checksum_select(uint8_t child, uint8_t parent);
|
||||
extern uint8_t zio_compress_select(uint8_t child, uint8_t parent);
|
||||
|
||||
extern boolean_t zio_should_retry(zio_t *zio);
|
||||
extern int zio_vdev_resume_io(spa_t *);
|
||||
|
||||
/*
|
||||
* Initial setup and teardown.
|
||||
*/
|
||||
extern void zio_init(void);
|
||||
extern void zio_fini(void);
|
||||
|
||||
/*
|
||||
* Fault injection
|
||||
*/
|
||||
struct zinject_record;
|
||||
extern uint32_t zio_injection_enabled;
|
||||
extern int zio_inject_fault(char *name, int flags, int *id,
|
||||
struct zinject_record *record);
|
||||
extern int zio_inject_list_next(int *id, char *name, size_t buflen,
|
||||
struct zinject_record *record);
|
||||
extern int zio_clear_fault(int id);
|
||||
extern int zio_handle_fault_injection(zio_t *zio, int error);
|
||||
extern int zio_handle_device_injection(vdev_t *vd, int error);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ZIO_H */
|
||||
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZIO_CHECKSUM_H
|
||||
#define _SYS_ZIO_CHECKSUM_H
|
||||
|
||||
#pragma ident "@(#)zio_checksum.h 1.2 06/03/03 SMI"
|
||||
|
||||
#include <sys/zio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Signature for checksum functions.
|
||||
*/
|
||||
typedef void zio_checksum_t(const void *data, uint64_t size, zio_cksum_t *zcp);
|
||||
|
||||
/*
|
||||
* Information about each checksum function.
|
||||
*/
|
||||
typedef struct zio_checksum_info {
|
||||
zio_checksum_t *ci_func[2]; /* checksum function for each byteorder */
|
||||
int ci_correctable; /* number of correctable bits */
|
||||
int ci_zbt; /* uses zio block tail? */
|
||||
char *ci_name; /* descriptive name */
|
||||
} zio_checksum_info_t;
|
||||
|
||||
extern zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS];
|
||||
|
||||
/*
|
||||
* Checksum routines.
|
||||
*/
|
||||
extern zio_checksum_t fletcher_2_native;
|
||||
extern zio_checksum_t fletcher_4_native;
|
||||
extern zio_checksum_t fletcher_4_incremental_native;
|
||||
|
||||
extern zio_checksum_t fletcher_2_byteswap;
|
||||
extern zio_checksum_t fletcher_4_byteswap;
|
||||
extern zio_checksum_t fletcher_4_incremental_byteswap;
|
||||
|
||||
extern zio_checksum_t zio_checksum_SHA256;
|
||||
|
||||
extern void zio_checksum(uint_t checksum, zio_cksum_t *zcp,
|
||||
void *data, uint64_t size);
|
||||
extern int zio_checksum_error(zio_t *zio);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZIO_CHECKSUM_H */
|
||||
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZIO_COMPRESS_H
|
||||
#define _SYS_ZIO_COMPRESS_H
|
||||
|
||||
#pragma ident "@(#)zio_compress.h 1.2 07/03/22 SMI"
|
||||
|
||||
#include <sys/zio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Common signature for all zio compress/decompress functions.
|
||||
*/
|
||||
typedef size_t zio_compress_func_t(void *src, void *dst,
|
||||
size_t s_len, size_t d_len, int);
|
||||
typedef int zio_decompress_func_t(void *src, void *dst,
|
||||
size_t s_len, size_t d_len, int);
|
||||
|
||||
/*
|
||||
* Information about each compression function.
|
||||
*/
|
||||
typedef struct zio_compress_info {
|
||||
zio_compress_func_t *ci_compress; /* compression function */
|
||||
zio_decompress_func_t *ci_decompress; /* decompression function */
|
||||
int ci_level; /* level parameter */
|
||||
char *ci_name; /* algorithm name */
|
||||
} zio_compress_info_t;
|
||||
|
||||
extern zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS];
|
||||
|
||||
/*
|
||||
* Compression routines.
|
||||
*/
|
||||
extern size_t lzjb_compress(void *src, void *dst, size_t s_len, size_t d_len,
|
||||
int level);
|
||||
extern int lzjb_decompress(void *src, void *dst, size_t s_len, size_t d_len,
|
||||
int level);
|
||||
extern size_t gzip_compress(void *src, void *dst, size_t s_len, size_t d_len,
|
||||
int level);
|
||||
extern int gzip_decompress(void *src, void *dst, size_t s_len, size_t d_len,
|
||||
int level);
|
||||
|
||||
/*
|
||||
* Compress and decompress data if necessary.
|
||||
*/
|
||||
extern int zio_compress_data(int cpfunc, void *src, uint64_t srcsize,
|
||||
void **destp, uint64_t *destsizep, uint64_t *destbufsizep);
|
||||
extern int zio_decompress_data(int cpfunc, void *src, uint64_t srcsize,
|
||||
void *dest, uint64_t destsize);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZIO_COMPRESS_H */
|
||||
@@ -0,0 +1,178 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _ZIO_IMPL_H
|
||||
#define _ZIO_IMPL_H
|
||||
|
||||
#pragma ident "@(#)zio_impl.h 1.6 07/12/12 SMI"
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* I/O Groups: pipeline stage definitions.
|
||||
*/
|
||||
typedef enum zio_stage {
|
||||
ZIO_STAGE_OPEN = 0, /* RWFCI */
|
||||
ZIO_STAGE_WAIT_FOR_CHILDREN_READY, /* RWFCI */
|
||||
|
||||
ZIO_STAGE_READ_INIT, /* R---- */
|
||||
ZIO_STAGE_ISSUE_ASYNC, /* -W--- */
|
||||
ZIO_STAGE_WRITE_COMPRESS, /* -W--- */
|
||||
ZIO_STAGE_CHECKSUM_GENERATE, /* -W--- */
|
||||
|
||||
ZIO_STAGE_GET_GANG_HEADER, /* -WFC- */
|
||||
ZIO_STAGE_REWRITE_GANG_MEMBERS, /* -W--- */
|
||||
ZIO_STAGE_FREE_GANG_MEMBERS, /* --F-- */
|
||||
ZIO_STAGE_CLAIM_GANG_MEMBERS, /* ---C- */
|
||||
|
||||
ZIO_STAGE_DVA_ALLOCATE, /* -W--- */
|
||||
ZIO_STAGE_DVA_FREE, /* --F-- */
|
||||
ZIO_STAGE_DVA_CLAIM, /* ---C- */
|
||||
|
||||
ZIO_STAGE_GANG_CHECKSUM_GENERATE, /* -W--- */
|
||||
|
||||
ZIO_STAGE_READY, /* RWFCI */
|
||||
|
||||
ZIO_STAGE_VDEV_IO_START, /* RW--I */
|
||||
ZIO_STAGE_VDEV_IO_DONE, /* RW--I */
|
||||
ZIO_STAGE_VDEV_IO_ASSESS, /* RW--I */
|
||||
|
||||
ZIO_STAGE_WAIT_FOR_CHILDREN_DONE, /* RWFCI */
|
||||
|
||||
ZIO_STAGE_CHECKSUM_VERIFY, /* R---- */
|
||||
ZIO_STAGE_READ_GANG_MEMBERS, /* R---- */
|
||||
ZIO_STAGE_READ_DECOMPRESS, /* R---- */
|
||||
|
||||
ZIO_STAGE_ASSESS, /* RWFCI */
|
||||
ZIO_STAGE_DONE /* RWFCI */
|
||||
} zio_stage_t;
|
||||
|
||||
#define ZIO_INTERLOCK_STAGES \
|
||||
((1U << ZIO_STAGE_WAIT_FOR_CHILDREN_READY) | \
|
||||
(1U << ZIO_STAGE_READY) | \
|
||||
(1U << ZIO_STAGE_WAIT_FOR_CHILDREN_DONE) | \
|
||||
(1U << ZIO_STAGE_ASSESS) | \
|
||||
(1U << ZIO_STAGE_DONE))
|
||||
|
||||
#define ZIO_VDEV_IO_STAGES \
|
||||
((1U << ZIO_STAGE_VDEV_IO_START) | \
|
||||
(1U << ZIO_STAGE_VDEV_IO_DONE) | \
|
||||
(1U << ZIO_STAGE_VDEV_IO_ASSESS))
|
||||
|
||||
#define ZIO_READ_PHYS_PIPELINE \
|
||||
(ZIO_INTERLOCK_STAGES | \
|
||||
ZIO_VDEV_IO_STAGES | \
|
||||
(1U << ZIO_STAGE_CHECKSUM_VERIFY))
|
||||
|
||||
#define ZIO_READ_GANG_PIPELINE \
|
||||
ZIO_READ_PHYS_PIPELINE
|
||||
|
||||
#define ZIO_READ_PIPELINE \
|
||||
(1U << ZIO_STAGE_READ_INIT) | \
|
||||
ZIO_READ_PHYS_PIPELINE
|
||||
|
||||
#define ZIO_WRITE_COMMON_STAGES \
|
||||
(ZIO_INTERLOCK_STAGES | \
|
||||
ZIO_VDEV_IO_STAGES | \
|
||||
(1U << ZIO_STAGE_ISSUE_ASYNC) | \
|
||||
(1U << ZIO_STAGE_CHECKSUM_GENERATE))
|
||||
|
||||
#define ZIO_WRITE_PHYS_PIPELINE \
|
||||
ZIO_WRITE_COMMON_STAGES
|
||||
|
||||
#define ZIO_WRITE_PIPELINE \
|
||||
(ZIO_WRITE_COMMON_STAGES | \
|
||||
(1U << ZIO_STAGE_WRITE_COMPRESS) | \
|
||||
(1U << ZIO_STAGE_DVA_ALLOCATE))
|
||||
|
||||
#define ZIO_GANG_REWRITE_STAGES \
|
||||
((1U << ZIO_STAGE_GET_GANG_HEADER) | \
|
||||
(1U << ZIO_STAGE_REWRITE_GANG_MEMBERS) | \
|
||||
(1U << ZIO_STAGE_GANG_CHECKSUM_GENERATE))
|
||||
|
||||
#define ZIO_GANG_FREE_STAGES \
|
||||
((1U << ZIO_STAGE_GET_GANG_HEADER) | \
|
||||
(1U << ZIO_STAGE_FREE_GANG_MEMBERS))
|
||||
|
||||
#define ZIO_GANG_CLAIM_STAGES \
|
||||
((1U << ZIO_STAGE_GET_GANG_HEADER) | \
|
||||
(1U << ZIO_STAGE_CLAIM_GANG_MEMBERS))
|
||||
|
||||
#define ZIO_REWRITE_PIPELINE(bp) \
|
||||
(ZIO_WRITE_COMMON_STAGES | \
|
||||
(BP_IS_GANG(bp) ? ZIO_GANG_REWRITE_STAGES : 0))
|
||||
|
||||
#define ZIO_WRITE_ALLOCATE_PIPELINE \
|
||||
(ZIO_WRITE_COMMON_STAGES | \
|
||||
(1U << ZIO_STAGE_DVA_ALLOCATE))
|
||||
|
||||
#define ZIO_FREE_PIPELINE(bp) \
|
||||
(ZIO_INTERLOCK_STAGES | \
|
||||
(1U << ZIO_STAGE_DVA_FREE) | \
|
||||
(BP_IS_GANG(bp) ? ZIO_GANG_FREE_STAGES : 0))
|
||||
|
||||
#define ZIO_CLAIM_PIPELINE(bp) \
|
||||
(ZIO_INTERLOCK_STAGES | \
|
||||
(1U << ZIO_STAGE_DVA_CLAIM) | \
|
||||
(BP_IS_GANG(bp) ? ZIO_GANG_CLAIM_STAGES : 0))
|
||||
|
||||
#define ZIO_IOCTL_PIPELINE \
|
||||
(ZIO_INTERLOCK_STAGES | \
|
||||
ZIO_VDEV_IO_STAGES)
|
||||
|
||||
|
||||
#define ZIO_WAIT_FOR_CHILDREN_PIPELINE \
|
||||
ZIO_INTERLOCK_STAGES
|
||||
|
||||
#define ZIO_VDEV_CHILD_PIPELINE \
|
||||
(ZIO_VDEV_IO_STAGES | \
|
||||
(1U << ZIO_STAGE_ASSESS) | \
|
||||
(1U << ZIO_STAGE_WAIT_FOR_CHILDREN_DONE) | \
|
||||
(1U << ZIO_STAGE_DONE))
|
||||
|
||||
#define ZIO_ERROR_PIPELINE_MASK \
|
||||
ZIO_INTERLOCK_STAGES
|
||||
|
||||
typedef struct zio_transform zio_transform_t;
|
||||
struct zio_transform {
|
||||
void *zt_data;
|
||||
uint64_t zt_size;
|
||||
uint64_t zt_bufsize;
|
||||
zio_transform_t *zt_next;
|
||||
};
|
||||
|
||||
extern void zio_inject_init(void);
|
||||
extern void zio_inject_fini(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ZIO_IMPL_H */
|
||||
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZVOL_H
|
||||
#define _SYS_ZVOL_H
|
||||
|
||||
#pragma ident "@(#)zvol.h 1.5 08/04/01 SMI"
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ZVOL_OBJ 1ULL
|
||||
#define ZVOL_ZAP_OBJ 2ULL
|
||||
|
||||
#ifdef _KERNEL
|
||||
extern int zvol_check_volsize(uint64_t volsize, uint64_t blocksize);
|
||||
extern int zvol_check_volblocksize(uint64_t volblocksize);
|
||||
extern int zvol_get_stats(objset_t *os, nvlist_t *nv);
|
||||
extern void zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
|
||||
extern int zvol_create_minor(const char *, major_t);
|
||||
extern int zvol_remove_minor(const char *);
|
||||
extern int zvol_set_volsize(const char *, major_t, uint64_t);
|
||||
extern int zvol_set_volblocksize(const char *, uint64_t);
|
||||
|
||||
extern int zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr);
|
||||
extern int zvol_dump(dev_t dev, caddr_t addr, daddr_t offset, int nblocks);
|
||||
extern int zvol_close(dev_t dev, int flag, int otyp, cred_t *cr);
|
||||
extern int zvol_strategy(buf_t *bp);
|
||||
extern int zvol_read(dev_t dev, uio_t *uiop, cred_t *cr);
|
||||
extern int zvol_write(dev_t dev, uio_t *uiop, cred_t *cr);
|
||||
extern int zvol_aread(dev_t dev, struct aio_req *aio, cred_t *cr);
|
||||
extern int zvol_awrite(dev_t dev, struct aio_req *aio, cred_t *cr);
|
||||
extern int zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr,
|
||||
int *rvalp);
|
||||
extern int zvol_busy(void);
|
||||
extern void zvol_init(void);
|
||||
extern void zvol_fini(void);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZVOL_H */
|
||||
Reference in New Issue
Block a user